format solrfet
This commit is contained in:
@@ -1,50 +1,58 @@
|
|||||||
from lxml.html.clean import clean_html, Cleaner
|
from lxml.html.clean import clean_html, Cleaner
|
||||||
import environ
|
import environ
|
||||||
|
import settings
|
||||||
|
|
||||||
env=environ.Env(
|
env = environ.Env(
|
||||||
SOLR_HOST=(str,"http://localhost:8980"),
|
SOLR_HOST=(str, "http://localhost:8980"), TARGET=(str, "https://alpha.2020.fet.at")
|
||||||
TARGET=(str,"https://alpha.2020.fet.at")
|
)
|
||||||
)
|
|
||||||
|
|
||||||
import pysolr
|
import pysolr
|
||||||
from .convert import post_to_solr, member_to_solr
|
from .convert import post_to_solr, member_to_solr
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
from fet2020api import fet2020postapi, fet2020memberapi
|
from fet2020api import fet2020memberapi, fet2020api
|
||||||
import yaml
|
import yaml
|
||||||
from .solr_to_objects import result_to_object
|
from .solr_to_objects import result_to_object
|
||||||
|
|
||||||
fet=fet2020postapi(urljoin(env('TARGET'),"api/posts/"))
|
fet = fet2020api(urljoin(env("TARGET"), "api/posts/"), pk="slug")
|
||||||
fetmember=fet2020memberapi(urljoin(env('TARGET'),"api/members/"))
|
fetmember = fet2020memberapi(urljoin(env("TARGET"), "api/members/"))
|
||||||
|
|
||||||
|
|
||||||
class SolrFet2020():
|
class SolrFet2020:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.solr=pysolr.Solr(
|
self.solr = pysolr.Solr(
|
||||||
urljoin(env('SOLR_HOST'),'/solr/core'),
|
urljoin(env("SOLR_HOST"), "/solr/core"), always_commit=True
|
||||||
always_commit=True
|
)
|
||||||
)
|
|
||||||
|
|
||||||
def reindex(self):
|
def reindex(self):
|
||||||
self.solr.delete(q='*:*')
|
self.solr.delete(q="*:*")
|
||||||
p=post_to_solr(fet.find({"slug":""}))
|
p = post_to_solr(fet.find({"slug": ""}))
|
||||||
self.solr.add(p)
|
self.solr.add(p)
|
||||||
m=member_to_solr(fetmember.find({"nickname":""})) # search all members
|
m = member_to_solr(fetmember.find({"nickname": ""})) # search all members
|
||||||
self.solr.add(m)
|
self.solr.add(m)
|
||||||
|
|
||||||
def reindextest(self):
|
def reindextest(self):
|
||||||
m=fetmember.find({"nickname":""})
|
m = fetmember.find({"nickname": ""})
|
||||||
print(m)
|
print(m)
|
||||||
|
|
||||||
def search(self,query):
|
def search(self, query):
|
||||||
querystring="text_txt:*%s* title_txt:*%s*^2 tags_tkn:*%s*^2" % (query,query,query)
|
querystring = "text_txt:*%s* title_txt:*%s*^2 tags_tkn:*%s*^2" % (
|
||||||
|
query,
|
||||||
|
query,
|
||||||
|
query,
|
||||||
|
)
|
||||||
|
|
||||||
r=self.solr.search(querystring,sort="score desc, date_dt desc",**{
|
r = self.solr.search(
|
||||||
'hl':'true',
|
querystring,
|
||||||
'hl.fragsize': 100,
|
sort="score desc, date_dt desc",
|
||||||
'hl.fl': '*',
|
**{
|
||||||
'hl.maxAnalyzedChars': -1,
|
"hl": "true",
|
||||||
'hl.snippets': 100, })
|
"hl.fragsize": 100,
|
||||||
|
"hl.fl": "*",
|
||||||
|
"hl.maxAnalyzedChars": -1,
|
||||||
|
"hl.snippets": 10,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
links = result_to_object(r)
|
links = result_to_object(r)
|
||||||
#print(yaml.dump(r))
|
# print(yaml.dump(r))
|
||||||
return links, r.hits
|
return links, r.hits
|
||||||
|
|||||||
@@ -1,39 +1,47 @@
|
|||||||
from lxml.html.clean import clean_html, Cleaner
|
from lxml.html.clean import clean_html, Cleaner
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
def PostKeyError(KeyError):
|
def PostKeyError(KeyError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def post_to_solr(p):
|
def post_to_solr(p):
|
||||||
# if a list is given call for each element
|
# if a list is given call for each element
|
||||||
if type(p) is list:
|
if type(p) is list:
|
||||||
return [post_to_solr(o) for o in p]
|
return [post_to_solr(o) for o in p]
|
||||||
def get_text2(l):
|
if not type(p) is dict:
|
||||||
if not l: return ""
|
raise TypeError("Argument for post_to_solr needs to be a dict or list of dicts")
|
||||||
soup=BeautifulSoup(l,features="lxml")
|
|
||||||
return soup.get_text().replace("\n"," ").replace("\r"," ").replace("\t"," ")
|
|
||||||
if type(p) is list:
|
|
||||||
return [post_to_solr(pp) for pp in p]
|
|
||||||
# Check Dict and keys
|
|
||||||
assert type(p) is dict, "Argument for post_to_solr needs to be a dict or list of dicts"
|
|
||||||
for k in ["url", "body"]:
|
for k in ["url", "body"]:
|
||||||
if not k in p:
|
if not k in p:
|
||||||
raise PostKeyError("Post needs to have key '%s'" % k)
|
raise PostKeyError("Post needs to have key '%s'" % k)
|
||||||
|
|
||||||
|
def get_text2(l):
|
||||||
|
if not l:
|
||||||
|
return ""
|
||||||
|
soup = BeautifulSoup(l, features="lxml")
|
||||||
|
return soup.get_text().replace("\n", " ").replace("\r", " ").replace("\t", " ")
|
||||||
|
|
||||||
# Return the solr structure
|
# Return the solr structure
|
||||||
return {
|
return {
|
||||||
"id": p["url"],
|
"id": p["url"],
|
||||||
"date_dt": p["public_date"],
|
"date_dt": p["public_date"],
|
||||||
"tags_tkn": p["tag_string"],
|
"tags_tkn": p["tag_string"],
|
||||||
"title_txt": p["title"],
|
"title_txt": p["title"],
|
||||||
"text_txt": (get_text2(p.get("body","")) or "")+" "+get_text2(p.get("agenda_html","") or "")
|
"text_txt": (get_text2(p.get("body", "")) or "")
|
||||||
|
+ " "
|
||||||
|
+ get_text2(p.get("agenda_html", "") or ""),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def member_to_solr(m):
|
def member_to_solr(m):
|
||||||
if type(m) is list:
|
if type(m) is list:
|
||||||
return [member_to_solr(o) for o in m]
|
return [member_to_solr(o) for o in m]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"id": "/member/"+str(m["id"]),
|
"id": "/member/" + str(m["id"]),
|
||||||
"date_dt": None,
|
"date_dt": None,
|
||||||
"tags_tkn": m["nickname"],
|
"tags_tkn": m["nickname"],
|
||||||
"title_txt": m["firstname"]+" "+m["surname"]+"("+m["nickname"]+")",
|
"title_txt": m["firstname"] + " " + m["surname"] + "(" + m["nickname"] + ")",
|
||||||
"text_txt": m["description"]
|
"text_txt": m["description"],
|
||||||
}
|
}
|
||||||
|
|
||||||
Reference in New Issue
Block a user