format solrfet
This commit is contained in:
@@ -1,39 +1,47 @@
|
||||
from lxml.html.clean import clean_html, Cleaner
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def PostKeyError(KeyError):
|
||||
pass
|
||||
|
||||
|
||||
def post_to_solr(p):
|
||||
# if a list is given call for each element
|
||||
if type(p) is list:
|
||||
return [post_to_solr(o) for o in p]
|
||||
def get_text2(l):
|
||||
if not l: return ""
|
||||
soup=BeautifulSoup(l,features="lxml")
|
||||
return soup.get_text().replace("\n"," ").replace("\r"," ").replace("\t"," ")
|
||||
if type(p) is list:
|
||||
return [post_to_solr(pp) for pp in p]
|
||||
# Check Dict and keys
|
||||
assert type(p) is dict, "Argument for post_to_solr needs to be a dict or list of dicts"
|
||||
if not type(p) is dict:
|
||||
raise TypeError("Argument for post_to_solr needs to be a dict or list of dicts")
|
||||
for k in ["url", "body"]:
|
||||
if not k in p:
|
||||
if not k in p:
|
||||
raise PostKeyError("Post needs to have key '%s'" % k)
|
||||
|
||||
def get_text2(l):
|
||||
if not l:
|
||||
return ""
|
||||
soup = BeautifulSoup(l, features="lxml")
|
||||
return soup.get_text().replace("\n", " ").replace("\r", " ").replace("\t", " ")
|
||||
|
||||
# Return the solr structure
|
||||
return {
|
||||
"id": p["url"],
|
||||
"date_dt": p["public_date"],
|
||||
"tags_tkn": p["tag_string"],
|
||||
"title_txt": p["title"],
|
||||
"text_txt": (get_text2(p.get("body","")) or "")+" "+get_text2(p.get("agenda_html","") or "")
|
||||
"text_txt": (get_text2(p.get("body", "")) or "")
|
||||
+ " "
|
||||
+ get_text2(p.get("agenda_html", "") or ""),
|
||||
}
|
||||
|
||||
|
||||
def member_to_solr(m):
|
||||
if type(m) is list:
|
||||
return [member_to_solr(o) for o in m]
|
||||
|
||||
return {
|
||||
"id": "/member/"+str(m["id"]),
|
||||
"id": "/member/" + str(m["id"]),
|
||||
"date_dt": None,
|
||||
"tags_tkn": m["nickname"],
|
||||
"title_txt": m["firstname"]+" "+m["surname"]+"("+m["nickname"]+")",
|
||||
"text_txt": m["description"]
|
||||
"title_txt": m["firstname"] + " " + m["surname"] + "(" + m["nickname"] + ")",
|
||||
"text_txt": m["description"],
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user