introduce search interface
This commit is contained in:
@@ -2,7 +2,7 @@ from lxml.html.clean import clean_html, Cleaner
|
||||
import environ
|
||||
import pysolr
|
||||
from .convert import post_to_solr
|
||||
from urllib.parse import urljoin
|
||||
from urllib.parse import urljoin,urlparse
|
||||
from fet2020api import fet2020postapi
|
||||
import yaml
|
||||
env=environ.Env(
|
||||
@@ -43,9 +43,17 @@ def result_to_posts(result):
|
||||
urljoin(env('TARGET'),
|
||||
p["url"]).rstrip("/")+" "+\
|
||||
str(strip_html(highlights[p["slug"]]["text_txt"]))
|
||||
def create_highlights(p):
|
||||
return str(strip_html(highlights[p["slug"]]["text_txt"]))
|
||||
|
||||
for post in posts:
|
||||
post["text"] = create_text(post)
|
||||
if post:
|
||||
post["text"] = create_text(post)
|
||||
post["highlights"]=create_highlights(post)
|
||||
if post["url"]: post["url"]= urljoin(env('TARGET'),post["url"]).rstrip("/")
|
||||
if post["image"]:post["image"]=urljoin(env('TARGET'),urlparse(post["image"]).path)
|
||||
return posts
|
||||
|
||||
class SolrFet2020():
|
||||
def __init__(self):
|
||||
self.solr=pysolr.Solr(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from lxml.html.clean import clean_html, Cleaner
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
def PostKeyError(KeyError):
|
||||
pass
|
||||
|
||||
@@ -12,9 +12,13 @@ def post_to_solr(p):
|
||||
return ""
|
||||
if len(l)<1:
|
||||
return ""
|
||||
c=Cleaner(allow_tags=['i','em','p'], remove_tags=['p','div'])
|
||||
h=c.clean_html(l.replace("\n"," ").replace("\r"," ").replace("\t"," ").replace("\\"," "))
|
||||
c=Cleaner(allow_tags=['i','em'], remove_tags=['p','div','ul','li']) #
|
||||
h=c.clean_html(l.replace("\n"," ").replace("\r"," ").replace("\t"," ").replace("\\"," ")).text_content()
|
||||
return h
|
||||
def get_text2(l):
|
||||
if not l: return ""
|
||||
soup=BeautifulSoup(l,features="lxml")
|
||||
return soup.get_text().replace("\n"," ").replace("\r"," ").replace("\t"," ")
|
||||
if type(p) is list:
|
||||
return [post_to_solr(pp) for pp in p]
|
||||
# Check Dict and keys
|
||||
@@ -26,5 +30,5 @@ def post_to_solr(p):
|
||||
return {
|
||||
"id": p["slug"],
|
||||
"date_dt": p["public_date"],
|
||||
"text_txt": (get_text(p.get("body","")) or "")+(p.get("agenda_html","") or "")
|
||||
"text_txt": (get_text2(p.get("body","")) or "")+" "+get_text2(p.get("agenda_html","") or "")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user