various improvements
This commit is contained in:
@@ -42,6 +42,7 @@ def fbfeed(url, raw, params={}):
|
||||
aa=fbfeedelement(m)
|
||||
if not aa.has_key("title"):
|
||||
aa["title"] = u[1]+ " at " + aa["published"].strftime("%Y-%m-%d %H:%M")
|
||||
if not aa.has_key("section"):
|
||||
aa["section"]="Facebook: "+u[1]
|
||||
arts.append(aa)
|
||||
nx=None
|
||||
|
||||
@@ -46,4 +46,5 @@ def fscharticle(url,raw,params={}):
|
||||
d["pi"]=pi
|
||||
d["sourcetype"]="fscharticle"
|
||||
d["section"]= "Fachschaft Chemie"
|
||||
d["url"]=url
|
||||
return {"article": d}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from src import clogger # Logger for crawler
|
||||
from src.models import Article # Article model
|
||||
from datetime import datetime
|
||||
from src.database import db_session
|
||||
from src.database import db_session_process as db_session
|
||||
from mqueues import fetch_queue, compile_queue, put_fetch_queue
|
||||
from fetching import fetch_page, downloadfile, announce_articleid
|
||||
from fixing import fix_html, fix_file
|
||||
@@ -23,7 +23,8 @@ def process_article(art):
|
||||
art["text"] = fix_html(art["text"], art["url"])
|
||||
if "image" in art:
|
||||
art["image"]=fix_file(art["url"], art["image"])
|
||||
clogger.info(art)
|
||||
# clogger.info(art)
|
||||
clogger.info(str(db_session.info))
|
||||
aa = Article.from_hash(art)
|
||||
aa.process_hash(art)
|
||||
aa.last_fetched = datetime.now()
|
||||
@@ -31,11 +32,11 @@ def process_article(art):
|
||||
db_session.add(aa)
|
||||
try:
|
||||
db_session.commit()
|
||||
except InvalidRequestError,e:
|
||||
except (InvalidRequestError, Exception),e:
|
||||
db_session.rollback()
|
||||
clogger.error(e)
|
||||
clogger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
|
||||
return aa
|
||||
return True
|
||||
# app.logger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
|
||||
# db_session.close()
|
||||
# announce_articleid(aa.id)
|
||||
|
||||
@@ -3,7 +3,7 @@ from mqueues import fetch_queue, compile_queue, process_queue
|
||||
from compiler import do_compile
|
||||
from mprocess import do_process
|
||||
from fetching import fetch_page
|
||||
from gevent import spawn
|
||||
from gevent import spawn,sleep
|
||||
from itertools import repeat
|
||||
from models import CrawlUrl
|
||||
from src import clogger
|
||||
@@ -22,6 +22,7 @@ def start_workers(f,c,p):
|
||||
def work_fetch():
|
||||
try:
|
||||
while True:
|
||||
sleep(2)
|
||||
run_fetch()
|
||||
except Empty:
|
||||
clogger.info("Fetch - Worker died")
|
||||
|
||||
Reference in New Issue
Block a user