From 449a278d5825deada811504184780ba9abe2710b Mon Sep 17 00:00:00 2001 From: Andreas Stephanides Date: Sun, 15 Jan 2017 08:05:40 +0100 Subject: [PATCH] bizarticle --- articles/controller.py | 4 ++-- compiler/compiler.py | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/articles/controller.py b/articles/controller.py index a5157d3..e89d95c 100644 --- a/articles/controller.py +++ b/articles/controller.py @@ -7,7 +7,7 @@ from src.database import db_session, read_json def get_all(): - return Article.query.all() + return Article.query.order_by(Article.published_date.desc()).all() def search(s): - return Article.query.filter(Article.title.like("%"+s+"%")).all() + return Article.query.filter(Article.title.like("%"+s+"%")).order_by(Article.published_date.desc()).all() diff --git a/compiler/compiler.py b/compiler/compiler.py index 6827e87..d227330 100644 --- a/compiler/compiler.py +++ b/compiler/compiler.py @@ -135,6 +135,30 @@ def fsarcharticle(url, raw): d["author"]=None return {"article": d} +def fsbizarticle(url, raw): + sp=BeautifulSoup(raw) + d={} + h=sp.find("h1", {"class": "entry-title"}) + if h is not None: + d["title"]=h.text.strip() + d["url"]=url + h=sp.find("time", {"class": "entry-date"}) + if h is not None: + d["published"] = parse(h.attrs["datetime"]) + else: + d["published"]=None + h=sp.find("div", {"class": "entry-content"}) + if h is not None: + d["text"]=h.encode_contents().strip() + d["image"]="" + d["sourcetype"]="fsbizarticle" + d["section"]="fsbiz" + h=sp.find("span", {"class": "author"}) + d["author"]=None + if h is not None: + d["author"]=h.find("a").text.strip() + return {"article": d} + def fetindex(url, raw): if raw is None: raise Error