first_commit
This commit is contained in:
57
searching/__init__.py
Normal file
57
searching/__init__.py
Normal file
@@ -0,0 +1,57 @@
|
||||
|
||||
|
||||
|
||||
import os
|
||||
from elasticsearch import Elasticsearch, helpers
|
||||
import contextlib
|
||||
ELASTIC_HOST = os.environ.get("ELASTIC_HOST","http://localhost:9200")
|
||||
ELASTIC_PASSWORD = os.environ.get("ELASTIC_PASSWORD","*l9qNGoojiCC4n9KcZhj")
|
||||
ELASTIC_QUERY = os.environ.get("ELASTIC_QUERY","Anwesend")
|
||||
ELASTIC_INDEX = os.environ.get("ELASTIC_INDEX","legacy")
|
||||
|
||||
|
||||
# Verbinde mit Client
|
||||
@contextlib.contextmanager
|
||||
def es_client():
|
||||
client = Elasticsearch(ELASTIC_HOST, verify_certs=False, basic_auth=('elastic', ELASTIC_PASSWORD))
|
||||
yield client
|
||||
client.close()
|
||||
|
||||
|
||||
def es_query(query:str):
|
||||
query ={
|
||||
"multi_match":{
|
||||
"query": query,
|
||||
"fields": ["title^20","title.ngrams^10","text","text.ngrams"],
|
||||
"type": "most_fields"
|
||||
}
|
||||
}
|
||||
def es_highlight():
|
||||
highlight = {
|
||||
"fields": {
|
||||
"title": {},
|
||||
"text": {}
|
||||
}
|
||||
}
|
||||
|
||||
def es_search(query:str):
|
||||
with es_client() as client:
|
||||
result = client.search(
|
||||
index = ELASTIC_INDEX,
|
||||
size=10,
|
||||
query= es_query(query),
|
||||
highlight = es_highlight()
|
||||
)
|
||||
return result
|
||||
#for hit in resp["hits"]["hits"]:
|
||||
# print(hit)
|
||||
|
||||
if __name__ =="__main__":
|
||||
resp = es_search(ELASTIC_QUERY)
|
||||
for hit in resp["hits"]["hits"]:
|
||||
print(f"\n\n%s\n%s\n%s - %s" % (
|
||||
hit.get("_source",{}).get("url",""),
|
||||
hit.get("_source",{}).get("title",""),
|
||||
" ".join(hit.get("highlight",{}).get("title",[""])),
|
||||
" ".join(hit.get("highlight",{}).get("text",[""]))
|
||||
))
|
||||
71
searching/index.py
Normal file
71
searching/index.py
Normal file
@@ -0,0 +1,71 @@
|
||||
from . import ELASTIC_INDEX
|
||||
from . import es_client
|
||||
import elasticsearch
|
||||
|
||||
settings = {
|
||||
"index":
|
||||
{"max_ngram_diff": 3
|
||||
},
|
||||
"analysis": {
|
||||
"analyzer": {
|
||||
"my_analyzer": {
|
||||
"tokenizer": "my_tokenizer",
|
||||
"filter": [
|
||||
"lowercase",
|
||||
]
|
||||
}
|
||||
},
|
||||
"tokenizer": {
|
||||
"my_tokenizer": {
|
||||
"type": "ngram",
|
||||
"min_gram": 3,
|
||||
"max_gram": 6,
|
||||
"token_chars": [
|
||||
"letter",
|
||||
"digit"
|
||||
]
|
||||
}
|
||||
}
|
||||
}}
|
||||
mapping = {
|
||||
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"ngrams": {
|
||||
"type":"text",
|
||||
"analyzer": "my_analyzer",
|
||||
"search_analyzer": "standard",
|
||||
}
|
||||
}
|
||||
|
||||
},
|
||||
"text": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"ngrams": {
|
||||
"type":"text",
|
||||
"analyzer": "my_analyzer",
|
||||
"search_analyzer": "standard",
|
||||
}
|
||||
}
|
||||
},
|
||||
"url": { "type": "text", "index": False},
|
||||
"published": {"type": "date", "format": "date_optional_time"},
|
||||
"updated_at": {"type": "date", "format": "date_optional_time"},
|
||||
"raw": {
|
||||
"type": "text",
|
||||
"index": False
|
||||
},
|
||||
}
|
||||
}
|
||||
def reset_index():
|
||||
with es_client() as client:
|
||||
try:
|
||||
|
||||
client.indices.delete(index=ELASTIC_INDEX)
|
||||
except elasticsearch.NotFoundError:
|
||||
print("Index already removed")
|
||||
|
||||
client.indices.create(index=ELASTIC_INDEX, settings=settings, mappings=mapping)
|
||||
Reference in New Issue
Block a user