various improvements

This commit is contained in:
uwsgi
2017-02-21 18:05:51 +01:00
parent 158ab4236f
commit ae2f61485e
8 changed files with 40 additions and 14 deletions

View File

@@ -1,7 +1,7 @@
from src import clogger # Logger for crawler
from src.models import Article # Article model
from datetime import datetime
from src.database import db_session
from src.database import db_session_process as db_session
from mqueues import fetch_queue, compile_queue, put_fetch_queue
from fetching import fetch_page, downloadfile, announce_articleid
from fixing import fix_html, fix_file
@@ -23,7 +23,8 @@ def process_article(art):
art["text"] = fix_html(art["text"], art["url"])
if "image" in art:
art["image"]=fix_file(art["url"], art["image"])
clogger.info(art)
# clogger.info(art)
clogger.info(str(db_session.info))
aa = Article.from_hash(art)
aa.process_hash(art)
aa.last_fetched = datetime.now()
@@ -31,11 +32,11 @@ def process_article(art):
db_session.add(aa)
try:
db_session.commit()
except InvalidRequestError,e:
except (InvalidRequestError, Exception),e:
db_session.rollback()
clogger.error(e)
clogger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
return aa
return True
# app.logger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
# db_session.close()
# announce_articleid(aa.id)