72 lines
1.6 KiB
Python
72 lines
1.6 KiB
Python
from . import ELASTIC_INDEX
|
|
from . import es_client
|
|
import elasticsearch
|
|
|
|
settings = {
|
|
"index":
|
|
{"max_ngram_diff": 7
|
|
},
|
|
"analysis": {
|
|
"analyzer": {
|
|
"my_analyzer": {
|
|
"tokenizer": "my_tokenizer",
|
|
"filter": [
|
|
"lowercase",
|
|
]
|
|
}
|
|
},
|
|
"tokenizer": {
|
|
"my_tokenizer": {
|
|
"type": "ngram",
|
|
"min_gram": 3,
|
|
"max_gram": 10,
|
|
"token_chars": [
|
|
"letter",
|
|
"digit", "symbol"
|
|
]
|
|
}
|
|
}
|
|
}}
|
|
mapping = {
|
|
|
|
"properties": {
|
|
"title": {
|
|
"type": "text",
|
|
"fields": {
|
|
"ngrams": {
|
|
"type":"text",
|
|
"analyzer": "my_analyzer",
|
|
"search_analyzer": "standard",
|
|
}
|
|
}
|
|
|
|
},
|
|
"text": {
|
|
"type": "text",
|
|
"fields": {
|
|
"ngrams": {
|
|
"type":"text",
|
|
"analyzer": "my_analyzer",
|
|
"search_analyzer": "standard",
|
|
}
|
|
}
|
|
},
|
|
"url": { "type": "text", "index": False},
|
|
"published": {"type": "date", "format": "date_optional_time"},
|
|
"updated_at": {"type": "date", "format": "date_optional_time"},
|
|
"raw": {
|
|
"type": "text",
|
|
"index": False
|
|
},
|
|
}
|
|
}
|
|
def reset_index():
|
|
with es_client() as client:
|
|
try:
|
|
|
|
client.indices.delete(index=ELASTIC_INDEX)
|
|
except elasticsearch.NotFoundError:
|
|
print("Index already removed")
|
|
|
|
client.indices.create(index=ELASTIC_INDEX, settings=settings, mappings=mapping)
|