Files
elasticsearch/searching/__init__.py
2023-05-14 18:15:10 +02:00

95 lines
2.5 KiB
Python

import os
from elasticsearch import Elasticsearch, helpers
import contextlib
import logging
ELASTIC_HOST = os.environ.get("ELASTIC_HOST", "http://localhost:9200")
ELASTIC_PASSWORD = os.environ.get("ELASTIC_PASSWORD", "*l9qNGoojiCC4n9KcZhj")
ELASTIC_QUERY = os.environ.get("ELASTIC_QUERY", "Anwesend")
ELASTIC_INDEX = os.environ.get("ELASTIC_INDEX", "legacy")
# Verbinde mit Client
@contextlib.contextmanager
def es_client():
logging.debug(f"ELASIC HOST:%s" % ELASTIC_HOST)
client = Elasticsearch(
ELASTIC_HOST, verify_certs=False, basic_auth=("elastic", ELASTIC_PASSWORD)
)
yield client
client.close()
def es_query(query: str):
query = {
"multi_match": {
"query": query,
"fields": ["title^20", "title.ngrams^10", "text^5", "text.ngrams"],
"tie_breaker": 0.3
# "type": "most_fields"
}
}
return query
def es_highlight():
highlight = {
"fields": {
"title": {},
"text": {"fragment_size": 150},
"title.ngrams": {},
"text.ngrams": {"fragment_size": 150},
}
}
return highlight
sorting = {
"updated_at": {"order": "desc"},
"_score": {"order": "desc"},
"prio": {"order": "desc"},
}
def es_sorting():
return {
"_script": {
"type": "number",
"script": {
"lang": "painless",
"source": "Math.log10(1+doc['updated_at'].value.toInstant().toEpochMilli()/1000000000/100) + Math.log10(1+_score)/10 + Math.log10(1+doc['prio'].value/1000) ", # * Math.log10(1+) * Math.log10(doc['prio'].value/10)" #* doc['_score'].value
},
"order": "desc",
}
}
def es_search(query: str):
with es_client() as client:
result = client.search(
index=ELASTIC_INDEX,
size=30,
query=es_query(query),
sort=es_sorting(),
highlight=es_highlight(),
)
return result
# for hit in resp["hits"]["hits"]:
# print(hit)
if __name__ == "__main__":
resp = es_search(ELASTIC_QUERY)
logging.info(f"Found %d recorts in hits" % resp["hits"]["hits"])
for hit in resp["hits"]["hits"]:
print(
f"\n\n%s\n%s\n%s - %s"
% (
hit.get("_source", {}).get("url", ""),
hit.get("_source", {}).get("title", ""),
" ".join(hit.get("highlight", {}).get("title", [""])),
" ".join(hit.get("highlight", {}).get("text", [""])),
)
)