refactor major
This commit is contained in:
69
solrfet2020/__init__.py
Normal file
69
solrfet2020/__init__.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from lxml.html.clean import clean_html, Cleaner
|
||||
import environ
|
||||
import pysolr
|
||||
from .convert import post_to_solr
|
||||
from urllib.parse import urljoin
|
||||
from fet2020api import fet2020postapi
|
||||
import yaml
|
||||
env=environ.Env(
|
||||
SOLR_HOST=(str,"http://localhost:8980"),
|
||||
TARGET=(str,"https://alpha.2020.fet.at")
|
||||
)
|
||||
|
||||
|
||||
fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/"))
|
||||
def search_post(text=""):
|
||||
pass
|
||||
def reindex():
|
||||
pass
|
||||
|
||||
def replace_special(t):
|
||||
return t.replace("\n","").replace("\r","").replace("\t","").replace("\\","")
|
||||
|
||||
def strip_html(text):
|
||||
c=Cleaner(
|
||||
allow_tags=['i','em','p'],
|
||||
remove_tags=['p','div'])
|
||||
if type(text) is list:
|
||||
h=""
|
||||
for item in text:
|
||||
h=h+" "+strip_html(item)+";"
|
||||
return h
|
||||
return c.clean_html(replace_special(text))[5:-6]
|
||||
|
||||
|
||||
def result_to_posts(result):
|
||||
docs = result.docs
|
||||
highlights =result.highlighting
|
||||
posts = [fet.find_one({"slug": rr["id"]}) for rr in docs ]
|
||||
def create_text(p):
|
||||
return "<b>" + \
|
||||
p["title"]+ "</b>: "+ \
|
||||
"(%s) " % p["public_date"] + \
|
||||
urljoin(env('TARGET'),
|
||||
p["url"]).rstrip("/")+" "+\
|
||||
str(strip_html(highlights[p["slug"]]["text_txt"]))
|
||||
for post in posts:
|
||||
post["text"] = create_text(post)
|
||||
return posts
|
||||
class SolrFet2020():
|
||||
def __init__(self):
|
||||
self.solr=pysolr.Solr(
|
||||
urljoin(env('SOLR_HOST'),'/solr/core'),
|
||||
always_commit=True
|
||||
)
|
||||
def reindex(self):
|
||||
self.solr.delete(q='*:*')
|
||||
p=post_to_solr(fet.find({"slug":""}))
|
||||
self.solr.add(p)
|
||||
|
||||
def search(self,query):
|
||||
r=self.solr.search("text_txt:*%s*" % query,sort="date_dt desc",**{
|
||||
'hl':'true',
|
||||
'hl.fragsize': 100,
|
||||
'hl.fl': '*',
|
||||
'hl.maxAnalyzedChars': -1,
|
||||
'hl.snippets': 100, })
|
||||
links = result_to_posts(r)
|
||||
#print(yaml.dump(r))
|
||||
return links, r.hits
|
||||
30
solrfet2020/convert.py
Normal file
30
solrfet2020/convert.py
Normal file
@@ -0,0 +1,30 @@
|
||||
from lxml.html.clean import clean_html, Cleaner
|
||||
|
||||
def PostKeyError(KeyError):
|
||||
pass
|
||||
|
||||
def post_to_solr(p):
|
||||
# if a list is given call for each element
|
||||
if type(p) is list:
|
||||
return [post_to_solr(o) for o in p]
|
||||
def get_text(l):
|
||||
if not l:
|
||||
return ""
|
||||
if len(l)<1:
|
||||
return ""
|
||||
c=Cleaner(allow_tags=['i','em','p'], remove_tags=['p','div'])
|
||||
h=c.clean_html(l.replace("\n"," ").replace("\r"," ").replace("\t"," ").replace("\\"," "))
|
||||
return h
|
||||
if type(p) is list:
|
||||
return [post_to_solr(pp) for pp in p]
|
||||
# Check Dict and keys
|
||||
assert type(p) is dict, "Argument for post_to_solr needs to be a dict or list of dicts"
|
||||
for k in ["url", "body"]:
|
||||
if not k in p:
|
||||
raise PostKeyError("Post needs to have key '%s'" % k)
|
||||
# Return the solr structure
|
||||
return {
|
||||
"id": p["slug"],
|
||||
"date_dt": p["public_date"],
|
||||
"text_txt": (get_text(p.get("body","")) or "")+(p.get("agenda_html","") or "")
|
||||
}
|
||||
Reference in New Issue
Block a user