Files
elasticsearch/html_scrapy/elastic_publish.py
2023-05-14 18:15:10 +02:00

26 lines
900 B
Python

import os
from elasticsearch import Elasticsearch
from searching import es_client, ELASTIC_INDEX
def push_to_index(id, element):
element = check_elastic_document(element)
try:
with es_client() as client:
client.index(index=ELASTIC_INDEX, id=id, document=element)
except Exception as e:
print(e)
def check_elastic_document(element):
for e in ["url", "title", "text", "published", "updated_at"]:
if not e in element:
raise AttributeError(f"A %s is needed for the Elastic Element" % e)
return { "published": str(element["published"]),
"text": element["text"],
"title": element["title"],
#"source": get_source(post),
"prio": element.get("prio", 1000),
"url": element["url"],
"updated_at": str(element["updated_at"])
}