95 lines
2.5 KiB
Python
95 lines
2.5 KiB
Python
import os
|
|
from elasticsearch import Elasticsearch, helpers
|
|
import contextlib
|
|
import logging
|
|
|
|
ELASTIC_HOST = os.environ.get("ELASTIC_HOST", "http://localhost:9200")
|
|
ELASTIC_PASSWORD = os.environ.get("ELASTIC_PASSWORD", "*l9qNGoojiCC4n9KcZhj")
|
|
ELASTIC_QUERY = os.environ.get("ELASTIC_QUERY", "Anwesend")
|
|
ELASTIC_INDEX = os.environ.get("ELASTIC_INDEX", "legacy")
|
|
|
|
|
|
# Verbinde mit Client
|
|
@contextlib.contextmanager
|
|
def es_client():
|
|
logging.debug(f"ELASIC HOST:%s" % ELASTIC_HOST)
|
|
client = Elasticsearch(
|
|
ELASTIC_HOST, verify_certs=False, basic_auth=("elastic", ELASTIC_PASSWORD)
|
|
)
|
|
yield client
|
|
client.close()
|
|
|
|
|
|
def es_query(query: str):
|
|
query = {
|
|
"multi_match": {
|
|
"query": query,
|
|
"fields": ["title^20", "title.ngrams^10", "text^5", "text.ngrams"],
|
|
"tie_breaker": 0.3
|
|
# "type": "most_fields"
|
|
}
|
|
}
|
|
return query
|
|
|
|
|
|
def es_highlight():
|
|
highlight = {
|
|
"fields": {
|
|
"title": {},
|
|
"text": {"fragment_size": 150},
|
|
"title.ngrams": {},
|
|
"text.ngrams": {"fragment_size": 150},
|
|
}
|
|
}
|
|
return highlight
|
|
|
|
|
|
sorting = {
|
|
"updated_at": {"order": "desc"},
|
|
"_score": {"order": "desc"},
|
|
"prio": {"order": "desc"},
|
|
}
|
|
|
|
|
|
def es_sorting():
|
|
return {
|
|
"_script": {
|
|
"type": "number",
|
|
"script": {
|
|
"lang": "painless",
|
|
"source": "Math.log10(1+doc['updated_at'].value.toInstant().toEpochMilli()/1000000000/100) + Math.log10(1+_score)/10 + Math.log10(1+doc['prio'].value/1000) ", # * Math.log10(1+) * Math.log10(doc['prio'].value/10)" #* doc['_score'].value
|
|
},
|
|
"order": "desc",
|
|
}
|
|
}
|
|
|
|
|
|
def es_search(query: str):
|
|
with es_client() as client:
|
|
result = client.search(
|
|
index=ELASTIC_INDEX,
|
|
size=30,
|
|
query=es_query(query),
|
|
sort=es_sorting(),
|
|
highlight=es_highlight(),
|
|
)
|
|
return result
|
|
|
|
|
|
# for hit in resp["hits"]["hits"]:
|
|
# print(hit)
|
|
|
|
if __name__ == "__main__":
|
|
resp = es_search(ELASTIC_QUERY)
|
|
logging.info(f"Found %d recorts in hits" % resp["hits"]["hits"])
|
|
for hit in resp["hits"]["hits"]:
|
|
print(
|
|
f"\n\n%s\n%s\n%s - %s"
|
|
% (
|
|
hit.get("_source", {}).get("url", ""),
|
|
hit.get("_source", {}).get("title", ""),
|
|
" ".join(hit.get("highlight", {}).get("title", [""])),
|
|
" ".join(hit.get("highlight", {}).get("text", [""])),
|
|
)
|
|
)
|