index member
This commit is contained in:
@@ -179,6 +179,7 @@ class Chat(BaseChat):
|
||||
self.mode=txt
|
||||
self.reply_msg("Mode: %s" % txt)
|
||||
return True
|
||||
|
||||
elif cmd == "/debug":
|
||||
if not u.fet_user:
|
||||
self.reply_msg("bitte vorher /auth ausführen wenn du ein FET Mitglied bist")
|
||||
@@ -196,6 +197,7 @@ class Chat(BaseChat):
|
||||
return True
|
||||
self.workflows[get_from_id(update)]=CreatePostWorkflow(chat=self)
|
||||
return True
|
||||
|
||||
elif cmd == "/reindex":
|
||||
if not u.fet_user:
|
||||
self.reply_msg("bitte vorher /auth ausführen wenn du ein FET Mitglied bist")
|
||||
@@ -204,6 +206,14 @@ class Chat(BaseChat):
|
||||
solr.reindex()
|
||||
self.send_msg("Fertig mit dem neuen Index")
|
||||
return True
|
||||
elif cmd == "/reindextest":
|
||||
if not u.fet_user:
|
||||
self.reply_msg("bitte vorher /auth ausführen wenn du ein FET Mitglied bist")
|
||||
return True
|
||||
self.reply_msg("Das kann ein bissl dauern...")
|
||||
solr.reindextest()
|
||||
self.send_msg("Fertig mit dem neuen Index")
|
||||
return True
|
||||
elif cmd == "/auth":
|
||||
if u.fet_user:
|
||||
self.reply_msg("Du bist schon authentifiziert...")
|
||||
|
||||
@@ -20,7 +20,7 @@ class django_crud_api():
|
||||
if r is None:
|
||||
return None
|
||||
if len(r)>1:
|
||||
raise LookupError("Mehr als ein Objekt von der API zurückgegeben")
|
||||
raise LookupError("Mehr als ein Objekt von der API zurückgegeben filter: %s" % str(filter))
|
||||
if len(r)==0:
|
||||
return None
|
||||
return r[0]
|
||||
|
||||
@@ -1,74 +1,41 @@
|
||||
from lxml.html.clean import clean_html, Cleaner
|
||||
import environ
|
||||
import pysolr
|
||||
from .convert import post_to_solr
|
||||
from urllib.parse import urljoin,urlparse
|
||||
from fet2020api import fet2020postapi, fet2020memberapi
|
||||
import yaml
|
||||
|
||||
env=environ.Env(
|
||||
SOLR_HOST=(str,"http://localhost:8980"),
|
||||
TARGET=(str,"https://alpha.2020.fet.at")
|
||||
)
|
||||
|
||||
import pysolr
|
||||
from .convert import post_to_solr, member_to_solr
|
||||
from urllib.parse import urljoin
|
||||
from fet2020api import fet2020postapi, fet2020memberapi
|
||||
import yaml
|
||||
from .solr_to_objects import result_to_object
|
||||
|
||||
fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/"))
|
||||
fetmember=fet2020memberapi(urljoin(env('TARGET'),"api/members/"))
|
||||
def search_post(text=""):
|
||||
pass
|
||||
def reindex():
|
||||
pass
|
||||
|
||||
def replace_special(t):
|
||||
return t.replace("\n","").replace("\r","").replace("\t","").replace("\\","")
|
||||
|
||||
def strip_html(text):
|
||||
if text=="": return ""
|
||||
c=Cleaner(
|
||||
allow_tags=['i','em','p'],
|
||||
remove_tags=['p','div'])
|
||||
if type(text) is list:
|
||||
h=""
|
||||
for item in text:
|
||||
h=h+" "+strip_html(item)+";"
|
||||
return h
|
||||
return c.clean_html(replace_special(text))[5:-6]
|
||||
|
||||
|
||||
def result_to_posts(result):
|
||||
docs = result.docs
|
||||
highlights =result.highlighting
|
||||
posts = [fet.find_one({"slug": rr["id"].split("/")[2]}) for rr in docs ]
|
||||
def create_text(p):
|
||||
return "<b>" + \
|
||||
p["title"]+ "</b>: "+ \
|
||||
"(%s) " % p["public_date"] + \
|
||||
urljoin(env('TARGET'),
|
||||
p["url"]).rstrip("/")+" "+\
|
||||
str(strip_html(highlights["/posts/"+p["slug"]].get("text_txt","")))
|
||||
def create_highlights(p):
|
||||
print(highlights["/posts/"+p["slug"]])
|
||||
return str(strip_html(highlights["/posts/"+p["slug"]].get("text_txt","")))
|
||||
|
||||
for post in posts:
|
||||
if post:
|
||||
post["text"] = create_text(post)
|
||||
post["highlights"]=create_highlights(post)
|
||||
if post["url"]: post["url"]= urljoin(env('TARGET'),post["url"]).rstrip("/")
|
||||
if post["imageurl"]:post["image"]=urljoin(env('TARGET'),urlparse(post["imageurl"]).path)
|
||||
return posts
|
||||
|
||||
class SolrFet2020():
|
||||
def __init__(self):
|
||||
self.solr=pysolr.Solr(
|
||||
urljoin(env('SOLR_HOST'),'/solr/core'),
|
||||
always_commit=True
|
||||
)
|
||||
|
||||
def reindex(self):
|
||||
self.solr.delete(q='*:*')
|
||||
p=post_to_solr(fet.find({"slug":""}))
|
||||
self.solr.add(p)
|
||||
m=member_to_solr(fetmember.find({"nickname":""})) # search all members
|
||||
self.solr.add(m)
|
||||
#print(m)
|
||||
|
||||
def reindextest(self):
|
||||
m=fetmember.find({"nickname":""})
|
||||
print(m)
|
||||
|
||||
def search(self,query):
|
||||
querystring="text_txt:*%s* title_txt:*%s*^2 tags_tkn:*%s*^2" % (query,query,query)
|
||||
|
||||
@@ -78,6 +45,7 @@ class SolrFet2020():
|
||||
'hl.fl': '*',
|
||||
'hl.maxAnalyzedChars': -1,
|
||||
'hl.snippets': 100, })
|
||||
links = result_to_posts(r)
|
||||
|
||||
links = result_to_object(r)
|
||||
#print(yaml.dump(r))
|
||||
return links, r.hits
|
||||
@@ -7,14 +7,6 @@ def post_to_solr(p):
|
||||
# if a list is given call for each element
|
||||
if type(p) is list:
|
||||
return [post_to_solr(o) for o in p]
|
||||
def get_text(l):
|
||||
if not l:
|
||||
return ""
|
||||
if len(l)<1:
|
||||
return ""
|
||||
c=Cleaner(allow_tags=['i','em'], remove_tags=['p','div','ul','li']) #
|
||||
h=c.clean_html(l.replace("\n"," ").replace("\r"," ").replace("\t"," ").replace("\\"," ")).text_content()
|
||||
return h
|
||||
def get_text2(l):
|
||||
if not l: return ""
|
||||
soup=BeautifulSoup(l,features="lxml")
|
||||
@@ -34,3 +26,14 @@ def post_to_solr(p):
|
||||
"title_txt": p["title"],
|
||||
"text_txt": (get_text2(p.get("body","")) or "")+" "+get_text2(p.get("agenda_html","") or "")
|
||||
}
|
||||
def member_to_solr(m):
|
||||
if type(m) is list:
|
||||
return [member_to_solr(o) for o in m]
|
||||
return {
|
||||
"id": "/member/"+str(m["id"]),
|
||||
"date_dt": None,
|
||||
"tags_tkn": m["nickname"],
|
||||
"title_txt": m["firstname"]+" "+m["surname"]+"("+m["nickname"]+")",
|
||||
"text_txt": m["description"]
|
||||
}
|
||||
|
||||
60
solrfet2020/solr_to_objects.py
Normal file
60
solrfet2020/solr_to_objects.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from .utils import fet, fetmember
|
||||
from .utils import strip_html
|
||||
from urllib.parse import urljoin,urlparse
|
||||
from . import env
|
||||
|
||||
def pull_post(slug, o):
|
||||
#docs = result.docs
|
||||
highlights =o["highlights"]
|
||||
post = fet.find_one({"slug": slug})
|
||||
|
||||
def create_text(p):
|
||||
return "<b>" + \
|
||||
p["title"]+ "</b>: "+ \
|
||||
"(%s) " % p["public_date"] + \
|
||||
urljoin(env('TARGET'),
|
||||
p["url"]).rstrip("/")+" "+\
|
||||
p["highlights"]
|
||||
|
||||
def create_highlights(h):
|
||||
return str(strip_html(h.get("text_txt","")))
|
||||
|
||||
if post:
|
||||
post["typ"] = "posts"
|
||||
post["highlights"]=create_highlights(o["highlights"])
|
||||
post["text"] = create_text(post)
|
||||
if post["url"]: post["url"]= urljoin(env('TARGET'),post["url"]).rstrip("/")
|
||||
if post["imageurl"]:post["image"]=urljoin(env('TARGET'),urlparse(post["imageurl"]).path)
|
||||
return post
|
||||
|
||||
def pull_member(id, o):
|
||||
member = fetmember.get(id)
|
||||
if not member: return None
|
||||
member["url"]=urljoin(env('TARGET'), "/member/%s" % id)
|
||||
member["text"]="<b>"+member["firstname"]+" "+member["surname"]+"</b>"+" "+member["url"]
|
||||
member["imageurl"]=member["image"]
|
||||
member["title"] =member["firstname"]+" "+member["surname"]
|
||||
return member
|
||||
|
||||
pull_original={"posts": pull_post,"member": pull_member}
|
||||
|
||||
|
||||
def result_to_object(result):
|
||||
docs=result.docs
|
||||
highlights =result.highlighting
|
||||
#objects=[(rr["id"],rr["id"].split("/")[1],rr["id"].) for rr in docs]
|
||||
|
||||
def split_id(url):
|
||||
"Split the id into its parts /<typ>/<id> -> <typ>, <id> "
|
||||
return {
|
||||
"id": url["id"] ,
|
||||
"typ": url["id"] .split("/")[1],
|
||||
"term": url["id"] .split("/")[2]
|
||||
}
|
||||
|
||||
def doc_to_object(r):
|
||||
o=split_id(r)
|
||||
o["highlights"]=highlights.get(o["id"])
|
||||
o=pull_original[o["typ"]](o["term"],o)
|
||||
return o
|
||||
return [doc_to_object(d) for d in docs]
|
||||
28
solrfet2020/utils.py
Normal file
28
solrfet2020/utils.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from lxml.html.clean import clean_html, Cleaner
|
||||
from fet2020api import fet2020postapi, fet2020memberapi
|
||||
from urllib.parse import urljoin,urlparse
|
||||
from . import env
|
||||
|
||||
|
||||
fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/"))
|
||||
fetmember=fet2020memberapi(urljoin(env('TARGET'),"api/members/"))
|
||||
|
||||
|
||||
def replace_special(t):
|
||||
if type(t)is dict: raise TypeError("String needed git a %s"% str(type(t)))
|
||||
return t.replace("\n","").replace("\r","").replace("\t","").replace("\\","")
|
||||
|
||||
def strip_html(text):
|
||||
if text=="": return ""
|
||||
c=Cleaner(
|
||||
allow_tags=['i','em','p'],
|
||||
remove_tags=['p','div'])
|
||||
if type(text) is list:
|
||||
h=""
|
||||
for item in text:
|
||||
h=h+" "+strip_html(item)+";"
|
||||
return h
|
||||
print(text)
|
||||
return c.clean_html(replace_special(text))[5:-6]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user