77 lines
2.3 KiB
Python
77 lines
2.3 KiB
Python
from lxml.html.clean import clean_html, Cleaner
|
|
import environ
|
|
import pysolr
|
|
from .convert import post_to_solr
|
|
from urllib.parse import urljoin,urlparse
|
|
from fet2020api import fet2020postapi
|
|
import yaml
|
|
env=environ.Env(
|
|
SOLR_HOST=(str,"http://localhost:8980"),
|
|
TARGET=(str,"https://alpha.2020.fet.at")
|
|
)
|
|
|
|
|
|
fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/"))
|
|
def search_post(text=""):
|
|
pass
|
|
def reindex():
|
|
pass
|
|
|
|
def replace_special(t):
|
|
return t.replace("\n","").replace("\r","").replace("\t","").replace("\\","")
|
|
|
|
def strip_html(text):
|
|
c=Cleaner(
|
|
allow_tags=['i','em','p'],
|
|
remove_tags=['p','div'])
|
|
if type(text) is list:
|
|
h=""
|
|
for item in text:
|
|
h=h+" "+strip_html(item)+";"
|
|
return h
|
|
return c.clean_html(replace_special(text))[5:-6]
|
|
|
|
|
|
def result_to_posts(result):
|
|
docs = result.docs
|
|
highlights =result.highlighting
|
|
posts = [fet.find_one({"slug": rr["id"]}) for rr in docs ]
|
|
def create_text(p):
|
|
return "<b>" + \
|
|
p["title"]+ "</b>: "+ \
|
|
"(%s) " % p["public_date"] + \
|
|
urljoin(env('TARGET'),
|
|
p["url"]).rstrip("/")+" "+\
|
|
str(strip_html(highlights[p["slug"]]["text_txt"]))
|
|
def create_highlights(p):
|
|
return str(strip_html(highlights[p["slug"]]["text_txt"]))
|
|
|
|
for post in posts:
|
|
if post:
|
|
post["text"] = create_text(post)
|
|
post["highlights"]=create_highlights(post)
|
|
if post["url"]: post["url"]= urljoin(env('TARGET'),post["url"]).rstrip("/")
|
|
if post["image"]:post["image"]=urljoin(env('TARGET'),urlparse(post["image"]).path)
|
|
return posts
|
|
|
|
class SolrFet2020():
|
|
def __init__(self):
|
|
self.solr=pysolr.Solr(
|
|
urljoin(env('SOLR_HOST'),'/solr/core'),
|
|
always_commit=True
|
|
)
|
|
def reindex(self):
|
|
self.solr.delete(q='*:*')
|
|
p=post_to_solr(fet.find({"slug":""}))
|
|
self.solr.add(p)
|
|
|
|
def search(self,query):
|
|
r=self.solr.search("text_txt:*%s*" % query,sort="date_dt desc",**{
|
|
'hl':'true',
|
|
'hl.fragsize': 100,
|
|
'hl.fl': '*',
|
|
'hl.maxAnalyzedChars': -1,
|
|
'hl.snippets': 100, })
|
|
links = result_to_posts(r)
|
|
#print(yaml.dump(r))
|
|
return links, r.hits |