from lxml.html.clean import clean_html, Cleaner import environ import pysolr from .convert import post_to_solr from urllib.parse import urljoin,urlparse from fet2020api import fet2020postapi, fet2020memberapi import yaml env=environ.Env( SOLR_HOST=(str,"http://localhost:8980"), TARGET=(str,"https://alpha.2020.fet.at") ) fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/")) fetmember=fet2020memberapi(urljoin(env('TARGET'),"api/members/")) def search_post(text=""): pass def reindex(): pass def replace_special(t): return t.replace("\n","").replace("\r","").replace("\t","").replace("\\","") def strip_html(text): if text=="": return "" c=Cleaner( allow_tags=['i','em','p'], remove_tags=['p','div']) if type(text) is list: h="" for item in text: h=h+" "+strip_html(item)+";" return h return c.clean_html(replace_special(text))[5:-6] def result_to_posts(result): docs = result.docs highlights =result.highlighting posts = [fet.find_one({"slug": rr["id"].split("/")[2]}) for rr in docs ] def create_text(p): return "" + \ p["title"]+ ": "+ \ "(%s) " % p["public_date"] + \ urljoin(env('TARGET'), p["url"]).rstrip("/")+" "+\ str(strip_html(highlights["/posts/"+p["slug"]].get("text_txt",""))) def create_highlights(p): print(highlights["/posts/"+p["slug"]]) return str(strip_html(highlights["/posts/"+p["slug"]].get("text_txt",""))) for post in posts: if post: post["text"] = create_text(post) post["highlights"]=create_highlights(post) if post["url"]: post["url"]= urljoin(env('TARGET'),post["url"]).rstrip("/") if post["imageurl"]:post["image"]=urljoin(env('TARGET'),urlparse(post["imageurl"]).path) return posts class SolrFet2020(): def __init__(self): self.solr=pysolr.Solr( urljoin(env('SOLR_HOST'),'/solr/core'), always_commit=True ) def reindex(self): self.solr.delete(q='*:*') p=post_to_solr(fet.find({"slug":""})) self.solr.add(p) m=fetmember.find({"nickname":""}) print(m) def search(self,query): querystring="text_txt:*%s* title_txt:*%s*^2 tags_tkn:*%s*^2" % (query,query,query) r=self.solr.search(querystring,sort="score desc, date_dt desc",**{ 'hl':'true', 'hl.fragsize': 100, 'hl.fl': '*', 'hl.maxAnalyzedChars': -1, 'hl.snippets': 100, }) links = result_to_posts(r) #print(yaml.dump(r)) return links, r.hits