index member

This commit is contained in:
www
2020-12-14 20:16:49 +00:00
parent ca753097b0
commit 14f641ec08
6 changed files with 126 additions and 57 deletions

View File

@@ -179,6 +179,7 @@ class Chat(BaseChat):
self.mode=txt self.mode=txt
self.reply_msg("Mode: %s" % txt) self.reply_msg("Mode: %s" % txt)
return True return True
elif cmd == "/debug": elif cmd == "/debug":
if not u.fet_user: if not u.fet_user:
self.reply_msg("bitte vorher /auth ausführen wenn du ein FET Mitglied bist") self.reply_msg("bitte vorher /auth ausführen wenn du ein FET Mitglied bist")
@@ -196,6 +197,7 @@ class Chat(BaseChat):
return True return True
self.workflows[get_from_id(update)]=CreatePostWorkflow(chat=self) self.workflows[get_from_id(update)]=CreatePostWorkflow(chat=self)
return True return True
elif cmd == "/reindex": elif cmd == "/reindex":
if not u.fet_user: if not u.fet_user:
self.reply_msg("bitte vorher /auth ausführen wenn du ein FET Mitglied bist") self.reply_msg("bitte vorher /auth ausführen wenn du ein FET Mitglied bist")
@@ -204,6 +206,14 @@ class Chat(BaseChat):
solr.reindex() solr.reindex()
self.send_msg("Fertig mit dem neuen Index") self.send_msg("Fertig mit dem neuen Index")
return True return True
elif cmd == "/reindextest":
if not u.fet_user:
self.reply_msg("bitte vorher /auth ausführen wenn du ein FET Mitglied bist")
return True
self.reply_msg("Das kann ein bissl dauern...")
solr.reindextest()
self.send_msg("Fertig mit dem neuen Index")
return True
elif cmd == "/auth": elif cmd == "/auth":
if u.fet_user: if u.fet_user:
self.reply_msg("Du bist schon authentifiziert...") self.reply_msg("Du bist schon authentifiziert...")

View File

@@ -20,7 +20,7 @@ class django_crud_api():
if r is None: if r is None:
return None return None
if len(r)>1: if len(r)>1:
raise LookupError("Mehr als ein Objekt von der API zurückgegeben") raise LookupError("Mehr als ein Objekt von der API zurückgegeben filter: %s" % str(filter))
if len(r)==0: if len(r)==0:
return None return None
return r[0] return r[0]

View File

@@ -1,74 +1,41 @@
from lxml.html.clean import clean_html, Cleaner from lxml.html.clean import clean_html, Cleaner
import environ import environ
import pysolr
from .convert import post_to_solr
from urllib.parse import urljoin,urlparse
from fet2020api import fet2020postapi, fet2020memberapi
import yaml
env=environ.Env( env=environ.Env(
SOLR_HOST=(str,"http://localhost:8980"), SOLR_HOST=(str,"http://localhost:8980"),
TARGET=(str,"https://alpha.2020.fet.at") TARGET=(str,"https://alpha.2020.fet.at")
) )
import pysolr
from .convert import post_to_solr, member_to_solr
from urllib.parse import urljoin
from fet2020api import fet2020postapi, fet2020memberapi
import yaml
from .solr_to_objects import result_to_object
fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/")) fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/"))
fetmember=fet2020memberapi(urljoin(env('TARGET'),"api/members/")) fetmember=fet2020memberapi(urljoin(env('TARGET'),"api/members/"))
def search_post(text=""):
pass
def reindex():
pass
def replace_special(t):
return t.replace("\n","").replace("\r","").replace("\t","").replace("\\","")
def strip_html(text):
if text=="": return ""
c=Cleaner(
allow_tags=['i','em','p'],
remove_tags=['p','div'])
if type(text) is list:
h=""
for item in text:
h=h+" "+strip_html(item)+";"
return h
return c.clean_html(replace_special(text))[5:-6]
def result_to_posts(result):
docs = result.docs
highlights =result.highlighting
posts = [fet.find_one({"slug": rr["id"].split("/")[2]}) for rr in docs ]
def create_text(p):
return "<b>" + \
p["title"]+ "</b>: "+ \
"(%s) " % p["public_date"] + \
urljoin(env('TARGET'),
p["url"]).rstrip("/")+" "+\
str(strip_html(highlights["/posts/"+p["slug"]].get("text_txt","")))
def create_highlights(p):
print(highlights["/posts/"+p["slug"]])
return str(strip_html(highlights["/posts/"+p["slug"]].get("text_txt","")))
for post in posts:
if post:
post["text"] = create_text(post)
post["highlights"]=create_highlights(post)
if post["url"]: post["url"]= urljoin(env('TARGET'),post["url"]).rstrip("/")
if post["imageurl"]:post["image"]=urljoin(env('TARGET'),urlparse(post["imageurl"]).path)
return posts
class SolrFet2020(): class SolrFet2020():
def __init__(self): def __init__(self):
self.solr=pysolr.Solr( self.solr=pysolr.Solr(
urljoin(env('SOLR_HOST'),'/solr/core'), urljoin(env('SOLR_HOST'),'/solr/core'),
always_commit=True always_commit=True
) )
def reindex(self): def reindex(self):
self.solr.delete(q='*:*') self.solr.delete(q='*:*')
p=post_to_solr(fet.find({"slug":""})) p=post_to_solr(fet.find({"slug":""}))
self.solr.add(p) self.solr.add(p)
m=member_to_solr(fetmember.find({"nickname":""})) # search all members
self.solr.add(m)
#print(m)
def reindextest(self):
m=fetmember.find({"nickname":""}) m=fetmember.find({"nickname":""})
print(m) print(m)
def search(self,query): def search(self,query):
querystring="text_txt:*%s* title_txt:*%s*^2 tags_tkn:*%s*^2" % (query,query,query) querystring="text_txt:*%s* title_txt:*%s*^2 tags_tkn:*%s*^2" % (query,query,query)
@@ -78,6 +45,7 @@ class SolrFet2020():
'hl.fl': '*', 'hl.fl': '*',
'hl.maxAnalyzedChars': -1, 'hl.maxAnalyzedChars': -1,
'hl.snippets': 100, }) 'hl.snippets': 100, })
links = result_to_posts(r)
links = result_to_object(r)
#print(yaml.dump(r)) #print(yaml.dump(r))
return links, r.hits return links, r.hits

View File

@@ -7,14 +7,6 @@ def post_to_solr(p):
# if a list is given call for each element # if a list is given call for each element
if type(p) is list: if type(p) is list:
return [post_to_solr(o) for o in p] return [post_to_solr(o) for o in p]
def get_text(l):
if not l:
return ""
if len(l)<1:
return ""
c=Cleaner(allow_tags=['i','em'], remove_tags=['p','div','ul','li']) #
h=c.clean_html(l.replace("\n"," ").replace("\r"," ").replace("\t"," ").replace("\\"," ")).text_content()
return h
def get_text2(l): def get_text2(l):
if not l: return "" if not l: return ""
soup=BeautifulSoup(l,features="lxml") soup=BeautifulSoup(l,features="lxml")
@@ -34,3 +26,14 @@ def post_to_solr(p):
"title_txt": p["title"], "title_txt": p["title"],
"text_txt": (get_text2(p.get("body","")) or "")+" "+get_text2(p.get("agenda_html","") or "") "text_txt": (get_text2(p.get("body","")) or "")+" "+get_text2(p.get("agenda_html","") or "")
} }
def member_to_solr(m):
if type(m) is list:
return [member_to_solr(o) for o in m]
return {
"id": "/member/"+str(m["id"]),
"date_dt": None,
"tags_tkn": m["nickname"],
"title_txt": m["firstname"]+" "+m["surname"]+"("+m["nickname"]+")",
"text_txt": m["description"]
}

View File

@@ -0,0 +1,60 @@
from .utils import fet, fetmember
from .utils import strip_html
from urllib.parse import urljoin,urlparse
from . import env
def pull_post(slug, o):
#docs = result.docs
highlights =o["highlights"]
post = fet.find_one({"slug": slug})
def create_text(p):
return "<b>" + \
p["title"]+ "</b>: "+ \
"(%s) " % p["public_date"] + \
urljoin(env('TARGET'),
p["url"]).rstrip("/")+" "+\
p["highlights"]
def create_highlights(h):
return str(strip_html(h.get("text_txt","")))
if post:
post["typ"] = "posts"
post["highlights"]=create_highlights(o["highlights"])
post["text"] = create_text(post)
if post["url"]: post["url"]= urljoin(env('TARGET'),post["url"]).rstrip("/")
if post["imageurl"]:post["image"]=urljoin(env('TARGET'),urlparse(post["imageurl"]).path)
return post
def pull_member(id, o):
member = fetmember.get(id)
if not member: return None
member["url"]=urljoin(env('TARGET'), "/member/%s" % id)
member["text"]="<b>"+member["firstname"]+" "+member["surname"]+"</b>"+" "+member["url"]
member["imageurl"]=member["image"]
member["title"] =member["firstname"]+" "+member["surname"]
return member
pull_original={"posts": pull_post,"member": pull_member}
def result_to_object(result):
docs=result.docs
highlights =result.highlighting
#objects=[(rr["id"],rr["id"].split("/")[1],rr["id"].) for rr in docs]
def split_id(url):
"Split the id into its parts /<typ>/<id> -> <typ>, <id> "
return {
"id": url["id"] ,
"typ": url["id"] .split("/")[1],
"term": url["id"] .split("/")[2]
}
def doc_to_object(r):
o=split_id(r)
o["highlights"]=highlights.get(o["id"])
o=pull_original[o["typ"]](o["term"],o)
return o
return [doc_to_object(d) for d in docs]

28
solrfet2020/utils.py Normal file
View File

@@ -0,0 +1,28 @@
from lxml.html.clean import clean_html, Cleaner
from fet2020api import fet2020postapi, fet2020memberapi
from urllib.parse import urljoin,urlparse
from . import env
fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/"))
fetmember=fet2020memberapi(urljoin(env('TARGET'),"api/members/"))
def replace_special(t):
if type(t)is dict: raise TypeError("String needed git a %s"% str(type(t)))
return t.replace("\n","").replace("\r","").replace("\t","").replace("\\","")
def strip_html(text):
if text=="": return ""
c=Cleaner(
allow_tags=['i','em','p'],
remove_tags=['p','div'])
if type(text) is list:
h=""
for item in text:
h=h+" "+strip_html(item)+";"
return h
print(text)
return c.clean_html(replace_special(text))[5:-6]