From 4c92e00899609ea4866354b99854a3f7b350ce91 Mon Sep 17 00:00:00 2001 From: Andi Date: Thu, 13 Apr 2023 09:46:53 +0200 Subject: [PATCH] highlights and weights --- searching/__init__.py | 17 +++++++++++++---- searching/index.py | 8 ++++---- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/searching/__init__.py b/searching/__init__.py index 3ebae2c..7e60198 100644 --- a/searching/__init__.py +++ b/searching/__init__.py @@ -24,17 +24,25 @@ def es_query(query:str): query ={ "multi_match":{ "query": query, - "fields": ["title^20","title.ngrams^10","text","text.ngrams"], - "type": "most_fields" + "fields": ["title^20","title.ngrams^10","text^5","text.ngrams"], + "tie_breaker": 0.3 + #"type": "most_fields" } } + return query + + def es_highlight(): highlight = { "fields": { "title": {}, - "text": {} + "text": {"fragment_size" : 150}, + "title.ngrams": {}, + "text.ngrams": {"fragment_size" : 150}, + } } + return highlight def es_search(query:str): with es_client() as client: @@ -57,4 +65,5 @@ if __name__ =="__main__": hit.get("_source",{}).get("title",""), " ".join(hit.get("highlight",{}).get("title",[""])), " ".join(hit.get("highlight",{}).get("text",[""])) - )) \ No newline at end of file + )) + \ No newline at end of file diff --git a/searching/index.py b/searching/index.py index 4aade9d..1927489 100644 --- a/searching/index.py +++ b/searching/index.py @@ -4,7 +4,7 @@ import elasticsearch settings = { "index": - {"max_ngram_diff": 3 + {"max_ngram_diff": 7 }, "analysis": { "analyzer": { @@ -19,10 +19,10 @@ settings = { "my_tokenizer": { "type": "ngram", "min_gram": 3, - "max_gram": 6, + "max_gram": 10, "token_chars": [ "letter", - "digit" + "digit", "symbol" ] } } @@ -55,7 +55,7 @@ mapping = { "published": {"type": "date", "format": "date_optional_time"}, "updated_at": {"type": "date", "format": "date_optional_time"}, "raw": { - "type": "text", + "type": "text", "index": False }, }