start indexing members

This commit is contained in:
www
2020-12-12 10:15:20 +00:00
parent 73b21cdb7a
commit ca753097b0
2 changed files with 16 additions and 8 deletions

View File

@@ -3,7 +3,7 @@ import environ
import pysolr import pysolr
from .convert import post_to_solr from .convert import post_to_solr
from urllib.parse import urljoin,urlparse from urllib.parse import urljoin,urlparse
from fet2020api import fet2020postapi from fet2020api import fet2020postapi, fet2020memberapi
import yaml import yaml
env=environ.Env( env=environ.Env(
SOLR_HOST=(str,"http://localhost:8980"), SOLR_HOST=(str,"http://localhost:8980"),
@@ -12,6 +12,7 @@ env=environ.Env(
fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/")) fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/"))
fetmember=fet2020memberapi(urljoin(env('TARGET'),"api/members/"))
def search_post(text=""): def search_post(text=""):
pass pass
def reindex(): def reindex():
@@ -21,6 +22,7 @@ def replace_special(t):
return t.replace("\n","").replace("\r","").replace("\t","").replace("\\","") return t.replace("\n","").replace("\r","").replace("\t","").replace("\\","")
def strip_html(text): def strip_html(text):
if text=="": return ""
c=Cleaner( c=Cleaner(
allow_tags=['i','em','p'], allow_tags=['i','em','p'],
remove_tags=['p','div']) remove_tags=['p','div'])
@@ -35,23 +37,24 @@ def strip_html(text):
def result_to_posts(result): def result_to_posts(result):
docs = result.docs docs = result.docs
highlights =result.highlighting highlights =result.highlighting
posts = [fet.find_one({"slug": rr["id"]}) for rr in docs ] posts = [fet.find_one({"slug": rr["id"].split("/")[2]}) for rr in docs ]
def create_text(p): def create_text(p):
return "<b>" + \ return "<b>" + \
p["title"]+ "</b>: "+ \ p["title"]+ "</b>: "+ \
"(%s) " % p["public_date"] + \ "(%s) " % p["public_date"] + \
urljoin(env('TARGET'), urljoin(env('TARGET'),
p["url"]).rstrip("/")+" "+\ p["url"]).rstrip("/")+" "+\
str(strip_html(highlights[p["slug"]]["text_txt"])) str(strip_html(highlights["/posts/"+p["slug"]].get("text_txt","")))
def create_highlights(p): def create_highlights(p):
return str(strip_html(highlights[p["slug"]]["text_txt"])) print(highlights["/posts/"+p["slug"]])
return str(strip_html(highlights["/posts/"+p["slug"]].get("text_txt","")))
for post in posts: for post in posts:
if post: if post:
post["text"] = create_text(post) post["text"] = create_text(post)
post["highlights"]=create_highlights(post) post["highlights"]=create_highlights(post)
if post["url"]: post["url"]= urljoin(env('TARGET'),post["url"]).rstrip("/") if post["url"]: post["url"]= urljoin(env('TARGET'),post["url"]).rstrip("/")
if post["image"]:post["image"]=urljoin(env('TARGET'),urlparse(post["image"]).path) if post["imageurl"]:post["image"]=urljoin(env('TARGET'),urlparse(post["imageurl"]).path)
return posts return posts
class SolrFet2020(): class SolrFet2020():
@@ -64,9 +67,12 @@ class SolrFet2020():
self.solr.delete(q='*:*') self.solr.delete(q='*:*')
p=post_to_solr(fet.find({"slug":""})) p=post_to_solr(fet.find({"slug":""}))
self.solr.add(p) self.solr.add(p)
m=fetmember.find({"nickname":""})
print(m)
def search(self,query): def search(self,query):
r=self.solr.search("text_txt:*%s*" % query,sort="date_dt desc",**{ querystring="text_txt:*%s* title_txt:*%s*^2 tags_tkn:*%s*^2" % (query,query,query)
r=self.solr.search(querystring,sort="score desc, date_dt desc",**{
'hl':'true', 'hl':'true',
'hl.fragsize': 100, 'hl.fragsize': 100,
'hl.fl': '*', 'hl.fl': '*',

View File

@@ -28,7 +28,9 @@ def post_to_solr(p):
raise PostKeyError("Post needs to have key '%s'" % k) raise PostKeyError("Post needs to have key '%s'" % k)
# Return the solr structure # Return the solr structure
return { return {
"id": p["slug"], "id": p["url"],
"date_dt": p["public_date"], "date_dt": p["public_date"],
"tags_tkn": p["tag_string"],
"title_txt": p["title"],
"text_txt": (get_text2(p.get("body","")) or "")+" "+get_text2(p.get("agenda_html","") or "") "text_txt": (get_text2(p.get("body","")) or "")+" "+get_text2(p.get("agenda_html","") or "")
} }