index member
This commit is contained in:
@@ -179,6 +179,7 @@ class Chat(BaseChat):
|
|||||||
self.mode=txt
|
self.mode=txt
|
||||||
self.reply_msg("Mode: %s" % txt)
|
self.reply_msg("Mode: %s" % txt)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
elif cmd == "/debug":
|
elif cmd == "/debug":
|
||||||
if not u.fet_user:
|
if not u.fet_user:
|
||||||
self.reply_msg("bitte vorher /auth ausführen wenn du ein FET Mitglied bist")
|
self.reply_msg("bitte vorher /auth ausführen wenn du ein FET Mitglied bist")
|
||||||
@@ -196,6 +197,7 @@ class Chat(BaseChat):
|
|||||||
return True
|
return True
|
||||||
self.workflows[get_from_id(update)]=CreatePostWorkflow(chat=self)
|
self.workflows[get_from_id(update)]=CreatePostWorkflow(chat=self)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
elif cmd == "/reindex":
|
elif cmd == "/reindex":
|
||||||
if not u.fet_user:
|
if not u.fet_user:
|
||||||
self.reply_msg("bitte vorher /auth ausführen wenn du ein FET Mitglied bist")
|
self.reply_msg("bitte vorher /auth ausführen wenn du ein FET Mitglied bist")
|
||||||
@@ -204,6 +206,14 @@ class Chat(BaseChat):
|
|||||||
solr.reindex()
|
solr.reindex()
|
||||||
self.send_msg("Fertig mit dem neuen Index")
|
self.send_msg("Fertig mit dem neuen Index")
|
||||||
return True
|
return True
|
||||||
|
elif cmd == "/reindextest":
|
||||||
|
if not u.fet_user:
|
||||||
|
self.reply_msg("bitte vorher /auth ausführen wenn du ein FET Mitglied bist")
|
||||||
|
return True
|
||||||
|
self.reply_msg("Das kann ein bissl dauern...")
|
||||||
|
solr.reindextest()
|
||||||
|
self.send_msg("Fertig mit dem neuen Index")
|
||||||
|
return True
|
||||||
elif cmd == "/auth":
|
elif cmd == "/auth":
|
||||||
if u.fet_user:
|
if u.fet_user:
|
||||||
self.reply_msg("Du bist schon authentifiziert...")
|
self.reply_msg("Du bist schon authentifiziert...")
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ class django_crud_api():
|
|||||||
if r is None:
|
if r is None:
|
||||||
return None
|
return None
|
||||||
if len(r)>1:
|
if len(r)>1:
|
||||||
raise LookupError("Mehr als ein Objekt von der API zurückgegeben")
|
raise LookupError("Mehr als ein Objekt von der API zurückgegeben filter: %s" % str(filter))
|
||||||
if len(r)==0:
|
if len(r)==0:
|
||||||
return None
|
return None
|
||||||
return r[0]
|
return r[0]
|
||||||
|
|||||||
@@ -1,74 +1,41 @@
|
|||||||
from lxml.html.clean import clean_html, Cleaner
|
from lxml.html.clean import clean_html, Cleaner
|
||||||
import environ
|
import environ
|
||||||
import pysolr
|
|
||||||
from .convert import post_to_solr
|
|
||||||
from urllib.parse import urljoin,urlparse
|
|
||||||
from fet2020api import fet2020postapi, fet2020memberapi
|
|
||||||
import yaml
|
|
||||||
env=environ.Env(
|
env=environ.Env(
|
||||||
SOLR_HOST=(str,"http://localhost:8980"),
|
SOLR_HOST=(str,"http://localhost:8980"),
|
||||||
TARGET=(str,"https://alpha.2020.fet.at")
|
TARGET=(str,"https://alpha.2020.fet.at")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
import pysolr
|
||||||
|
from .convert import post_to_solr, member_to_solr
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
from fet2020api import fet2020postapi, fet2020memberapi
|
||||||
|
import yaml
|
||||||
|
from .solr_to_objects import result_to_object
|
||||||
|
|
||||||
fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/"))
|
fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/"))
|
||||||
fetmember=fet2020memberapi(urljoin(env('TARGET'),"api/members/"))
|
fetmember=fet2020memberapi(urljoin(env('TARGET'),"api/members/"))
|
||||||
def search_post(text=""):
|
|
||||||
pass
|
|
||||||
def reindex():
|
|
||||||
pass
|
|
||||||
|
|
||||||
def replace_special(t):
|
|
||||||
return t.replace("\n","").replace("\r","").replace("\t","").replace("\\","")
|
|
||||||
|
|
||||||
def strip_html(text):
|
|
||||||
if text=="": return ""
|
|
||||||
c=Cleaner(
|
|
||||||
allow_tags=['i','em','p'],
|
|
||||||
remove_tags=['p','div'])
|
|
||||||
if type(text) is list:
|
|
||||||
h=""
|
|
||||||
for item in text:
|
|
||||||
h=h+" "+strip_html(item)+";"
|
|
||||||
return h
|
|
||||||
return c.clean_html(replace_special(text))[5:-6]
|
|
||||||
|
|
||||||
|
|
||||||
def result_to_posts(result):
|
|
||||||
docs = result.docs
|
|
||||||
highlights =result.highlighting
|
|
||||||
posts = [fet.find_one({"slug": rr["id"].split("/")[2]}) for rr in docs ]
|
|
||||||
def create_text(p):
|
|
||||||
return "<b>" + \
|
|
||||||
p["title"]+ "</b>: "+ \
|
|
||||||
"(%s) " % p["public_date"] + \
|
|
||||||
urljoin(env('TARGET'),
|
|
||||||
p["url"]).rstrip("/")+" "+\
|
|
||||||
str(strip_html(highlights["/posts/"+p["slug"]].get("text_txt","")))
|
|
||||||
def create_highlights(p):
|
|
||||||
print(highlights["/posts/"+p["slug"]])
|
|
||||||
return str(strip_html(highlights["/posts/"+p["slug"]].get("text_txt","")))
|
|
||||||
|
|
||||||
for post in posts:
|
|
||||||
if post:
|
|
||||||
post["text"] = create_text(post)
|
|
||||||
post["highlights"]=create_highlights(post)
|
|
||||||
if post["url"]: post["url"]= urljoin(env('TARGET'),post["url"]).rstrip("/")
|
|
||||||
if post["imageurl"]:post["image"]=urljoin(env('TARGET'),urlparse(post["imageurl"]).path)
|
|
||||||
return posts
|
|
||||||
|
|
||||||
class SolrFet2020():
|
class SolrFet2020():
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.solr=pysolr.Solr(
|
self.solr=pysolr.Solr(
|
||||||
urljoin(env('SOLR_HOST'),'/solr/core'),
|
urljoin(env('SOLR_HOST'),'/solr/core'),
|
||||||
always_commit=True
|
always_commit=True
|
||||||
)
|
)
|
||||||
|
|
||||||
def reindex(self):
|
def reindex(self):
|
||||||
self.solr.delete(q='*:*')
|
self.solr.delete(q='*:*')
|
||||||
p=post_to_solr(fet.find({"slug":""}))
|
p=post_to_solr(fet.find({"slug":""}))
|
||||||
self.solr.add(p)
|
self.solr.add(p)
|
||||||
|
m=member_to_solr(fetmember.find({"nickname":""})) # search all members
|
||||||
|
self.solr.add(m)
|
||||||
|
#print(m)
|
||||||
|
|
||||||
|
def reindextest(self):
|
||||||
m=fetmember.find({"nickname":""})
|
m=fetmember.find({"nickname":""})
|
||||||
print(m)
|
print(m)
|
||||||
|
|
||||||
def search(self,query):
|
def search(self,query):
|
||||||
querystring="text_txt:*%s* title_txt:*%s*^2 tags_tkn:*%s*^2" % (query,query,query)
|
querystring="text_txt:*%s* title_txt:*%s*^2 tags_tkn:*%s*^2" % (query,query,query)
|
||||||
|
|
||||||
@@ -78,6 +45,7 @@ class SolrFet2020():
|
|||||||
'hl.fl': '*',
|
'hl.fl': '*',
|
||||||
'hl.maxAnalyzedChars': -1,
|
'hl.maxAnalyzedChars': -1,
|
||||||
'hl.snippets': 100, })
|
'hl.snippets': 100, })
|
||||||
links = result_to_posts(r)
|
|
||||||
|
links = result_to_object(r)
|
||||||
#print(yaml.dump(r))
|
#print(yaml.dump(r))
|
||||||
return links, r.hits
|
return links, r.hits
|
||||||
@@ -7,14 +7,6 @@ def post_to_solr(p):
|
|||||||
# if a list is given call for each element
|
# if a list is given call for each element
|
||||||
if type(p) is list:
|
if type(p) is list:
|
||||||
return [post_to_solr(o) for o in p]
|
return [post_to_solr(o) for o in p]
|
||||||
def get_text(l):
|
|
||||||
if not l:
|
|
||||||
return ""
|
|
||||||
if len(l)<1:
|
|
||||||
return ""
|
|
||||||
c=Cleaner(allow_tags=['i','em'], remove_tags=['p','div','ul','li']) #
|
|
||||||
h=c.clean_html(l.replace("\n"," ").replace("\r"," ").replace("\t"," ").replace("\\"," ")).text_content()
|
|
||||||
return h
|
|
||||||
def get_text2(l):
|
def get_text2(l):
|
||||||
if not l: return ""
|
if not l: return ""
|
||||||
soup=BeautifulSoup(l,features="lxml")
|
soup=BeautifulSoup(l,features="lxml")
|
||||||
@@ -34,3 +26,14 @@ def post_to_solr(p):
|
|||||||
"title_txt": p["title"],
|
"title_txt": p["title"],
|
||||||
"text_txt": (get_text2(p.get("body","")) or "")+" "+get_text2(p.get("agenda_html","") or "")
|
"text_txt": (get_text2(p.get("body","")) or "")+" "+get_text2(p.get("agenda_html","") or "")
|
||||||
}
|
}
|
||||||
|
def member_to_solr(m):
|
||||||
|
if type(m) is list:
|
||||||
|
return [member_to_solr(o) for o in m]
|
||||||
|
return {
|
||||||
|
"id": "/member/"+str(m["id"]),
|
||||||
|
"date_dt": None,
|
||||||
|
"tags_tkn": m["nickname"],
|
||||||
|
"title_txt": m["firstname"]+" "+m["surname"]+"("+m["nickname"]+")",
|
||||||
|
"text_txt": m["description"]
|
||||||
|
}
|
||||||
|
|
||||||
60
solrfet2020/solr_to_objects.py
Normal file
60
solrfet2020/solr_to_objects.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
from .utils import fet, fetmember
|
||||||
|
from .utils import strip_html
|
||||||
|
from urllib.parse import urljoin,urlparse
|
||||||
|
from . import env
|
||||||
|
|
||||||
|
def pull_post(slug, o):
|
||||||
|
#docs = result.docs
|
||||||
|
highlights =o["highlights"]
|
||||||
|
post = fet.find_one({"slug": slug})
|
||||||
|
|
||||||
|
def create_text(p):
|
||||||
|
return "<b>" + \
|
||||||
|
p["title"]+ "</b>: "+ \
|
||||||
|
"(%s) " % p["public_date"] + \
|
||||||
|
urljoin(env('TARGET'),
|
||||||
|
p["url"]).rstrip("/")+" "+\
|
||||||
|
p["highlights"]
|
||||||
|
|
||||||
|
def create_highlights(h):
|
||||||
|
return str(strip_html(h.get("text_txt","")))
|
||||||
|
|
||||||
|
if post:
|
||||||
|
post["typ"] = "posts"
|
||||||
|
post["highlights"]=create_highlights(o["highlights"])
|
||||||
|
post["text"] = create_text(post)
|
||||||
|
if post["url"]: post["url"]= urljoin(env('TARGET'),post["url"]).rstrip("/")
|
||||||
|
if post["imageurl"]:post["image"]=urljoin(env('TARGET'),urlparse(post["imageurl"]).path)
|
||||||
|
return post
|
||||||
|
|
||||||
|
def pull_member(id, o):
|
||||||
|
member = fetmember.get(id)
|
||||||
|
if not member: return None
|
||||||
|
member["url"]=urljoin(env('TARGET'), "/member/%s" % id)
|
||||||
|
member["text"]="<b>"+member["firstname"]+" "+member["surname"]+"</b>"+" "+member["url"]
|
||||||
|
member["imageurl"]=member["image"]
|
||||||
|
member["title"] =member["firstname"]+" "+member["surname"]
|
||||||
|
return member
|
||||||
|
|
||||||
|
pull_original={"posts": pull_post,"member": pull_member}
|
||||||
|
|
||||||
|
|
||||||
|
def result_to_object(result):
|
||||||
|
docs=result.docs
|
||||||
|
highlights =result.highlighting
|
||||||
|
#objects=[(rr["id"],rr["id"].split("/")[1],rr["id"].) for rr in docs]
|
||||||
|
|
||||||
|
def split_id(url):
|
||||||
|
"Split the id into its parts /<typ>/<id> -> <typ>, <id> "
|
||||||
|
return {
|
||||||
|
"id": url["id"] ,
|
||||||
|
"typ": url["id"] .split("/")[1],
|
||||||
|
"term": url["id"] .split("/")[2]
|
||||||
|
}
|
||||||
|
|
||||||
|
def doc_to_object(r):
|
||||||
|
o=split_id(r)
|
||||||
|
o["highlights"]=highlights.get(o["id"])
|
||||||
|
o=pull_original[o["typ"]](o["term"],o)
|
||||||
|
return o
|
||||||
|
return [doc_to_object(d) for d in docs]
|
||||||
28
solrfet2020/utils.py
Normal file
28
solrfet2020/utils.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
from lxml.html.clean import clean_html, Cleaner
|
||||||
|
from fet2020api import fet2020postapi, fet2020memberapi
|
||||||
|
from urllib.parse import urljoin,urlparse
|
||||||
|
from . import env
|
||||||
|
|
||||||
|
|
||||||
|
fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/"))
|
||||||
|
fetmember=fet2020memberapi(urljoin(env('TARGET'),"api/members/"))
|
||||||
|
|
||||||
|
|
||||||
|
def replace_special(t):
|
||||||
|
if type(t)is dict: raise TypeError("String needed git a %s"% str(type(t)))
|
||||||
|
return t.replace("\n","").replace("\r","").replace("\t","").replace("\\","")
|
||||||
|
|
||||||
|
def strip_html(text):
|
||||||
|
if text=="": return ""
|
||||||
|
c=Cleaner(
|
||||||
|
allow_tags=['i','em','p'],
|
||||||
|
remove_tags=['p','div'])
|
||||||
|
if type(text) is list:
|
||||||
|
h=""
|
||||||
|
for item in text:
|
||||||
|
h=h+" "+strip_html(item)+";"
|
||||||
|
return h
|
||||||
|
print(text)
|
||||||
|
return c.clean_html(replace_special(text))[5:-6]
|
||||||
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user