from lxml.html.clean import clean_html, Cleaner
import environ
import pysolr
from .convert import post_to_solr
from urllib.parse import urljoin,urlparse
from fet2020api import fet2020postapi, fet2020memberapi
import yaml
env=environ.Env(
SOLR_HOST=(str,"http://localhost:8980"),
TARGET=(str,"https://alpha.2020.fet.at")
)
fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/"))
fetmember=fet2020memberapi(urljoin(env('TARGET'),"api/members/"))
def search_post(text=""):
pass
def reindex():
pass
def replace_special(t):
return t.replace("\n","").replace("\r","").replace("\t","").replace("\\","")
def strip_html(text):
if text=="": return ""
c=Cleaner(
allow_tags=['i','em','p'],
remove_tags=['p','div'])
if type(text) is list:
h=""
for item in text:
h=h+" "+strip_html(item)+";"
return h
return c.clean_html(replace_special(text))[5:-6]
def result_to_posts(result):
docs = result.docs
highlights =result.highlighting
posts = [fet.find_one({"slug": rr["id"].split("/")[2]}) for rr in docs ]
def create_text(p):
return "" + \
p["title"]+ ": "+ \
"(%s) " % p["public_date"] + \
urljoin(env('TARGET'),
p["url"]).rstrip("/")+" "+\
str(strip_html(highlights["/posts/"+p["slug"]].get("text_txt","")))
def create_highlights(p):
print(highlights["/posts/"+p["slug"]])
return str(strip_html(highlights["/posts/"+p["slug"]].get("text_txt","")))
for post in posts:
if post:
post["text"] = create_text(post)
post["highlights"]=create_highlights(post)
if post["url"]: post["url"]= urljoin(env('TARGET'),post["url"]).rstrip("/")
if post["imageurl"]:post["image"]=urljoin(env('TARGET'),urlparse(post["imageurl"]).path)
return posts
class SolrFet2020():
def __init__(self):
self.solr=pysolr.Solr(
urljoin(env('SOLR_HOST'),'/solr/core'),
always_commit=True
)
def reindex(self):
self.solr.delete(q='*:*')
p=post_to_solr(fet.find({"slug":""}))
self.solr.add(p)
m=fetmember.find({"nickname":""})
print(m)
def search(self,query):
querystring="text_txt:*%s* title_txt:*%s*^2 tags_tkn:*%s*^2" % (query,query,query)
r=self.solr.search(querystring,sort="score desc, date_dt desc",**{
'hl':'true',
'hl.fragsize': 100,
'hl.fl': '*',
'hl.maxAnalyzedChars': -1,
'hl.snippets': 100, })
links = result_to_posts(r)
#print(yaml.dump(r))
return links, r.hits