first_commit

This commit is contained in:
2023-02-19 10:04:10 +01:00
commit ed610970cb
23 changed files with 11434 additions and 0 deletions

57
searching/__init__.py Normal file
View File

@@ -0,0 +1,57 @@
import os
from elasticsearch import Elasticsearch, helpers
import contextlib
ELASTIC_HOST = os.environ.get("ELASTIC_HOST","http://localhost:9200")
ELASTIC_PASSWORD = os.environ.get("ELASTIC_PASSWORD","*l9qNGoojiCC4n9KcZhj")
ELASTIC_QUERY = os.environ.get("ELASTIC_QUERY","Anwesend")
ELASTIC_INDEX = os.environ.get("ELASTIC_INDEX","legacy")
# Verbinde mit Client
@contextlib.contextmanager
def es_client():
client = Elasticsearch(ELASTIC_HOST, verify_certs=False, basic_auth=('elastic', ELASTIC_PASSWORD))
yield client
client.close()
def es_query(query:str):
query ={
"multi_match":{
"query": query,
"fields": ["title^20","title.ngrams^10","text","text.ngrams"],
"type": "most_fields"
}
}
def es_highlight():
highlight = {
"fields": {
"title": {},
"text": {}
}
}
def es_search(query:str):
with es_client() as client:
result = client.search(
index = ELASTIC_INDEX,
size=10,
query= es_query(query),
highlight = es_highlight()
)
return result
#for hit in resp["hits"]["hits"]:
# print(hit)
if __name__ =="__main__":
resp = es_search(ELASTIC_QUERY)
for hit in resp["hits"]["hits"]:
print(f"\n\n%s\n%s\n%s - %s" % (
hit.get("_source",{}).get("url",""),
hit.get("_source",{}).get("title",""),
" ".join(hit.get("highlight",{}).get("title",[""])),
" ".join(hit.get("highlight",{}).get("text",[""]))
))

71
searching/index.py Normal file
View File

@@ -0,0 +1,71 @@
from . import ELASTIC_INDEX
from . import es_client
import elasticsearch
settings = {
"index":
{"max_ngram_diff": 3
},
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "my_tokenizer",
"filter": [
"lowercase",
]
}
},
"tokenizer": {
"my_tokenizer": {
"type": "ngram",
"min_gram": 3,
"max_gram": 6,
"token_chars": [
"letter",
"digit"
]
}
}
}}
mapping = {
"properties": {
"title": {
"type": "text",
"fields": {
"ngrams": {
"type":"text",
"analyzer": "my_analyzer",
"search_analyzer": "standard",
}
}
},
"text": {
"type": "text",
"fields": {
"ngrams": {
"type":"text",
"analyzer": "my_analyzer",
"search_analyzer": "standard",
}
}
},
"url": { "type": "text", "index": False},
"published": {"type": "date", "format": "date_optional_time"},
"updated_at": {"type": "date", "format": "date_optional_time"},
"raw": {
"type": "text",
"index": False
},
}
}
def reset_index():
with es_client() as client:
try:
client.indices.delete(index=ELASTIC_INDEX)
except elasticsearch.NotFoundError:
print("Index already removed")
client.indices.create(index=ELASTIC_INDEX, settings=settings, mappings=mapping)