diff --git a/bot1/chats.py b/bot1/chats.py
index e87b240..9828e7b 100644
--- a/bot1/chats.py
+++ b/bot1/chats.py
@@ -179,6 +179,7 @@ class Chat(BaseChat):
self.mode=txt
self.reply_msg("Mode: %s" % txt)
return True
+
elif cmd == "/debug":
if not u.fet_user:
self.reply_msg("bitte vorher /auth ausführen wenn du ein FET Mitglied bist")
@@ -196,6 +197,7 @@ class Chat(BaseChat):
return True
self.workflows[get_from_id(update)]=CreatePostWorkflow(chat=self)
return True
+
elif cmd == "/reindex":
if not u.fet_user:
self.reply_msg("bitte vorher /auth ausführen wenn du ein FET Mitglied bist")
@@ -204,6 +206,14 @@ class Chat(BaseChat):
solr.reindex()
self.send_msg("Fertig mit dem neuen Index")
return True
+ elif cmd == "/reindextest":
+ if not u.fet_user:
+ self.reply_msg("bitte vorher /auth ausführen wenn du ein FET Mitglied bist")
+ return True
+ self.reply_msg("Das kann ein bissl dauern...")
+ solr.reindextest()
+ self.send_msg("Fertig mit dem neuen Index")
+ return True
elif cmd == "/auth":
if u.fet_user:
self.reply_msg("Du bist schon authentifiziert...")
diff --git a/fet2020api/djangoapi.py b/fet2020api/djangoapi.py
index 83ff9cb..bf35533 100644
--- a/fet2020api/djangoapi.py
+++ b/fet2020api/djangoapi.py
@@ -20,7 +20,7 @@ class django_crud_api():
if r is None:
return None
if len(r)>1:
- raise LookupError("Mehr als ein Objekt von der API zurückgegeben")
+ raise LookupError("Mehr als ein Objekt von der API zurückgegeben filter: %s" % str(filter))
if len(r)==0:
return None
return r[0]
diff --git a/solrfet2020/__init__.py b/solrfet2020/__init__.py
index 72c85e7..79cedc2 100644
--- a/solrfet2020/__init__.py
+++ b/solrfet2020/__init__.py
@@ -1,74 +1,41 @@
from lxml.html.clean import clean_html, Cleaner
import environ
-import pysolr
-from .convert import post_to_solr
-from urllib.parse import urljoin,urlparse
-from fet2020api import fet2020postapi, fet2020memberapi
-import yaml
+
env=environ.Env(
SOLR_HOST=(str,"http://localhost:8980"),
TARGET=(str,"https://alpha.2020.fet.at")
)
+import pysolr
+from .convert import post_to_solr, member_to_solr
+from urllib.parse import urljoin
+from fet2020api import fet2020postapi, fet2020memberapi
+import yaml
+from .solr_to_objects import result_to_object
fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/"))
fetmember=fet2020memberapi(urljoin(env('TARGET'),"api/members/"))
-def search_post(text=""):
- pass
-def reindex():
- pass
-
-def replace_special(t):
- return t.replace("\n","").replace("\r","").replace("\t","").replace("\\","")
-
-def strip_html(text):
- if text=="": return ""
- c=Cleaner(
- allow_tags=['i','em','p'],
- remove_tags=['p','div'])
- if type(text) is list:
- h=""
- for item in text:
- h=h+" "+strip_html(item)+";"
- return h
- return c.clean_html(replace_special(text))[5:-6]
-def result_to_posts(result):
- docs = result.docs
- highlights =result.highlighting
- posts = [fet.find_one({"slug": rr["id"].split("/")[2]}) for rr in docs ]
- def create_text(p):
- return "" + \
- p["title"]+ ": "+ \
- "(%s) " % p["public_date"] + \
- urljoin(env('TARGET'),
- p["url"]).rstrip("/")+" "+\
- str(strip_html(highlights["/posts/"+p["slug"]].get("text_txt","")))
- def create_highlights(p):
- print(highlights["/posts/"+p["slug"]])
- return str(strip_html(highlights["/posts/"+p["slug"]].get("text_txt","")))
-
- for post in posts:
- if post:
- post["text"] = create_text(post)
- post["highlights"]=create_highlights(post)
- if post["url"]: post["url"]= urljoin(env('TARGET'),post["url"]).rstrip("/")
- if post["imageurl"]:post["image"]=urljoin(env('TARGET'),urlparse(post["imageurl"]).path)
- return posts
-
class SolrFet2020():
def __init__(self):
self.solr=pysolr.Solr(
urljoin(env('SOLR_HOST'),'/solr/core'),
always_commit=True
)
+
def reindex(self):
self.solr.delete(q='*:*')
p=post_to_solr(fet.find({"slug":""}))
self.solr.add(p)
+ m=member_to_solr(fetmember.find({"nickname":""})) # search all members
+ self.solr.add(m)
+ #print(m)
+
+ def reindextest(self):
m=fetmember.find({"nickname":""})
print(m)
+
def search(self,query):
querystring="text_txt:*%s* title_txt:*%s*^2 tags_tkn:*%s*^2" % (query,query,query)
@@ -78,6 +45,7 @@ class SolrFet2020():
'hl.fl': '*',
'hl.maxAnalyzedChars': -1,
'hl.snippets': 100, })
- links = result_to_posts(r)
+
+ links = result_to_object(r)
#print(yaml.dump(r))
return links, r.hits
\ No newline at end of file
diff --git a/solrfet2020/convert.py b/solrfet2020/convert.py
index 314268c..337554e 100644
--- a/solrfet2020/convert.py
+++ b/solrfet2020/convert.py
@@ -7,14 +7,6 @@ def post_to_solr(p):
# if a list is given call for each element
if type(p) is list:
return [post_to_solr(o) for o in p]
- def get_text(l):
- if not l:
- return ""
- if len(l)<1:
- return ""
- c=Cleaner(allow_tags=['i','em'], remove_tags=['p','div','ul','li']) #
- h=c.clean_html(l.replace("\n"," ").replace("\r"," ").replace("\t"," ").replace("\\"," ")).text_content()
- return h
def get_text2(l):
if not l: return ""
soup=BeautifulSoup(l,features="lxml")
@@ -34,3 +26,14 @@ def post_to_solr(p):
"title_txt": p["title"],
"text_txt": (get_text2(p.get("body","")) or "")+" "+get_text2(p.get("agenda_html","") or "")
}
+def member_to_solr(m):
+ if type(m) is list:
+ return [member_to_solr(o) for o in m]
+ return {
+ "id": "/member/"+str(m["id"]),
+ "date_dt": None,
+ "tags_tkn": m["nickname"],
+ "title_txt": m["firstname"]+" "+m["surname"]+"("+m["nickname"]+")",
+ "text_txt": m["description"]
+ }
+
\ No newline at end of file
diff --git a/solrfet2020/solr_to_objects.py b/solrfet2020/solr_to_objects.py
new file mode 100644
index 0000000..f3f247b
--- /dev/null
+++ b/solrfet2020/solr_to_objects.py
@@ -0,0 +1,60 @@
+from .utils import fet, fetmember
+from .utils import strip_html
+from urllib.parse import urljoin,urlparse
+from . import env
+
+def pull_post(slug, o):
+ #docs = result.docs
+ highlights =o["highlights"]
+ post = fet.find_one({"slug": slug})
+
+ def create_text(p):
+ return "" + \
+ p["title"]+ ": "+ \
+ "(%s) " % p["public_date"] + \
+ urljoin(env('TARGET'),
+ p["url"]).rstrip("/")+" "+\
+ p["highlights"]
+
+ def create_highlights(h):
+ return str(strip_html(h.get("text_txt","")))
+
+ if post:
+ post["typ"] = "posts"
+ post["highlights"]=create_highlights(o["highlights"])
+ post["text"] = create_text(post)
+ if post["url"]: post["url"]= urljoin(env('TARGET'),post["url"]).rstrip("/")
+ if post["imageurl"]:post["image"]=urljoin(env('TARGET'),urlparse(post["imageurl"]).path)
+ return post
+
+def pull_member(id, o):
+ member = fetmember.get(id)
+ if not member: return None
+ member["url"]=urljoin(env('TARGET'), "/member/%s" % id)
+ member["text"]=""+member["firstname"]+" "+member["surname"]+""+" "+member["url"]
+ member["imageurl"]=member["image"]
+ member["title"] =member["firstname"]+" "+member["surname"]
+ return member
+
+pull_original={"posts": pull_post,"member": pull_member}
+
+
+def result_to_object(result):
+ docs=result.docs
+ highlights =result.highlighting
+ #objects=[(rr["id"],rr["id"].split("/")[1],rr["id"].) for rr in docs]
+
+ def split_id(url):
+ "Split the id into its parts // -> , "
+ return {
+ "id": url["id"] ,
+ "typ": url["id"] .split("/")[1],
+ "term": url["id"] .split("/")[2]
+ }
+
+ def doc_to_object(r):
+ o=split_id(r)
+ o["highlights"]=highlights.get(o["id"])
+ o=pull_original[o["typ"]](o["term"],o)
+ return o
+ return [doc_to_object(d) for d in docs]
\ No newline at end of file
diff --git a/solrfet2020/utils.py b/solrfet2020/utils.py
new file mode 100644
index 0000000..5a05eb3
--- /dev/null
+++ b/solrfet2020/utils.py
@@ -0,0 +1,28 @@
+from lxml.html.clean import clean_html, Cleaner
+from fet2020api import fet2020postapi, fet2020memberapi
+from urllib.parse import urljoin,urlparse
+from . import env
+
+
+fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/"))
+fetmember=fet2020memberapi(urljoin(env('TARGET'),"api/members/"))
+
+
+def replace_special(t):
+ if type(t)is dict: raise TypeError("String needed git a %s"% str(type(t)))
+ return t.replace("\n","").replace("\r","").replace("\t","").replace("\\","")
+
+def strip_html(text):
+ if text=="": return ""
+ c=Cleaner(
+ allow_tags=['i','em','p'],
+ remove_tags=['p','div'])
+ if type(text) is list:
+ h=""
+ for item in text:
+ h=h+" "+strip_html(item)+";"
+ return h
+ print(text)
+ return c.clean_html(replace_special(text))[5:-6]
+
+