From ca753097b0734f6b328afa7edf88c5f8b2256fcd Mon Sep 17 00:00:00 2001 From: www Date: Sat, 12 Dec 2020 10:15:20 +0000 Subject: [PATCH] start indexing members --- solrfet2020/__init__.py | 20 +++++++++++++------- solrfet2020/convert.py | 4 +++- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/solrfet2020/__init__.py b/solrfet2020/__init__.py index b914136..72c85e7 100644 --- a/solrfet2020/__init__.py +++ b/solrfet2020/__init__.py @@ -3,7 +3,7 @@ import environ import pysolr from .convert import post_to_solr from urllib.parse import urljoin,urlparse -from fet2020api import fet2020postapi +from fet2020api import fet2020postapi, fet2020memberapi import yaml env=environ.Env( SOLR_HOST=(str,"http://localhost:8980"), @@ -12,6 +12,7 @@ env=environ.Env( fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/")) +fetmember=fet2020memberapi(urljoin(env('TARGET'),"api/members/")) def search_post(text=""): pass def reindex(): @@ -21,6 +22,7 @@ def replace_special(t): return t.replace("\n","").replace("\r","").replace("\t","").replace("\\","") def strip_html(text): + if text=="": return "" c=Cleaner( allow_tags=['i','em','p'], remove_tags=['p','div']) @@ -35,23 +37,24 @@ def strip_html(text): def result_to_posts(result): docs = result.docs highlights =result.highlighting - posts = [fet.find_one({"slug": rr["id"]}) for rr in docs ] + posts = [fet.find_one({"slug": rr["id"].split("/")[2]}) for rr in docs ] def create_text(p): return "" + \ p["title"]+ ": "+ \ "(%s) " % p["public_date"] + \ urljoin(env('TARGET'), p["url"]).rstrip("/")+" "+\ - str(strip_html(highlights[p["slug"]]["text_txt"])) + str(strip_html(highlights["/posts/"+p["slug"]].get("text_txt",""))) def create_highlights(p): - return str(strip_html(highlights[p["slug"]]["text_txt"])) + print(highlights["/posts/"+p["slug"]]) + return str(strip_html(highlights["/posts/"+p["slug"]].get("text_txt",""))) for post in posts: if post: post["text"] = create_text(post) post["highlights"]=create_highlights(post) if post["url"]: post["url"]= urljoin(env('TARGET'),post["url"]).rstrip("/") - if post["image"]:post["image"]=urljoin(env('TARGET'),urlparse(post["image"]).path) + if post["imageurl"]:post["image"]=urljoin(env('TARGET'),urlparse(post["imageurl"]).path) return posts class SolrFet2020(): @@ -64,9 +67,12 @@ class SolrFet2020(): self.solr.delete(q='*:*') p=post_to_solr(fet.find({"slug":""})) self.solr.add(p) - + m=fetmember.find({"nickname":""}) + print(m) def search(self,query): - r=self.solr.search("text_txt:*%s*" % query,sort="date_dt desc",**{ + querystring="text_txt:*%s* title_txt:*%s*^2 tags_tkn:*%s*^2" % (query,query,query) + + r=self.solr.search(querystring,sort="score desc, date_dt desc",**{ 'hl':'true', 'hl.fragsize': 100, 'hl.fl': '*', diff --git a/solrfet2020/convert.py b/solrfet2020/convert.py index 9458a64..314268c 100644 --- a/solrfet2020/convert.py +++ b/solrfet2020/convert.py @@ -28,7 +28,9 @@ def post_to_solr(p): raise PostKeyError("Post needs to have key '%s'" % k) # Return the solr structure return { - "id": p["slug"], + "id": p["url"], "date_dt": p["public_date"], + "tags_tkn": p["tag_string"], + "title_txt": p["title"], "text_txt": (get_text2(p.get("body","")) or "")+" "+get_text2(p.get("agenda_html","") or "") }