diff --git a/articles/controller.py b/articles/controller.py index 6f5f37c..756dbed 100644 --- a/articles/controller.py +++ b/articles/controller.py @@ -2,7 +2,10 @@ from .model import Article from .model import ArticleSchema #import flask from datetime import datetime +from sqlalchemy import func +from sqlalchemy import exc import json +from src import clogger from src.sections.model import Section from src.organizations.model import Organization from src.database import db_session, read_json @@ -42,7 +45,13 @@ class ArtController(BaseController): def get_all_page(self,lim, off): - return Article.query.order_by(Article.published_date.desc()).limit(lim).offset(off).all() +# try: + arts=Article.query.order_by(Article.published_date.desc()).limit(lim).offset(off).all() +# except exc.InvalidRequestError, e: +# clogger.error(e) +# db_session.rollback() +# arts=Article.query.order_by(Article.published_date.desc()).limit(lim).offset(off).all() + return arts def get_section_page(self,section_id, lim, off): @@ -52,21 +61,21 @@ class ArtController(BaseController): def get_organization_page(self,organization_id, lim, off): sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all() sid=map(lambda a:a[0], sid) - articles=Article.query.filter(Article.section_id.in_(sid)).order_by(Article.published_date.desc()).limit(lim).offset(off).all() + articles=db_session.query(Article).filter(Article.section_id.in_(sid)).order_by(Article.published_date.desc()).limit(lim).offset(off).all() return articles # # return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all() def section_count(self,section_id): - return Article.query.filter(Article.section_id==section_id).count() + return db_session.query(func.count(Article.id)).filter(Article.section_id==section_id).scalar() def organization_count(self,organization_id): sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all() sid=map(lambda a:a[0], sid) - return Article.query.filter(Article.section_id.in_(sid)).count() + return db_session.query(func.count(Article.id)).filter(Article.section_id.in_(sid)).scalar() def count(self): - return Article.query.count() + return db_session.query(func.count(Article.id)).scalar() controller=ArtController() diff --git a/compiler/comp/fb.py b/compiler/comp/fb.py index afb9e50..d92a095 100644 --- a/compiler/comp/fb.py +++ b/compiler/comp/fb.py @@ -42,6 +42,7 @@ def fbfeed(url, raw, params={}): aa=fbfeedelement(m) if not aa.has_key("title"): aa["title"] = u[1]+ " at " + aa["published"].strftime("%Y-%m-%d %H:%M") + if not aa.has_key("section"): aa["section"]="Facebook: "+u[1] arts.append(aa) nx=None diff --git a/compiler/comp/fsch.py b/compiler/comp/fsch.py index 59667a6..c0f89cc 100644 --- a/compiler/comp/fsch.py +++ b/compiler/comp/fsch.py @@ -46,4 +46,5 @@ def fscharticle(url,raw,params={}): d["pi"]=pi d["sourcetype"]="fscharticle" d["section"]= "Fachschaft Chemie" + d["url"]=url return {"article": d} diff --git a/compiler/mprocess.py b/compiler/mprocess.py index e8bd1f6..cd35e46 100644 --- a/compiler/mprocess.py +++ b/compiler/mprocess.py @@ -1,7 +1,7 @@ from src import clogger # Logger for crawler from src.models import Article # Article model from datetime import datetime -from src.database import db_session +from src.database import db_session_process as db_session from mqueues import fetch_queue, compile_queue, put_fetch_queue from fetching import fetch_page, downloadfile, announce_articleid from fixing import fix_html, fix_file @@ -23,7 +23,8 @@ def process_article(art): art["text"] = fix_html(art["text"], art["url"]) if "image" in art: art["image"]=fix_file(art["url"], art["image"]) - clogger.info(art) +# clogger.info(art) + clogger.info(str(db_session.info)) aa = Article.from_hash(art) aa.process_hash(art) aa.last_fetched = datetime.now() @@ -31,11 +32,11 @@ def process_article(art): db_session.add(aa) try: db_session.commit() - except InvalidRequestError,e: + except (InvalidRequestError, Exception),e: db_session.rollback() clogger.error(e) clogger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8"))) - return aa + return True # app.logger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8"))) # db_session.close() # announce_articleid(aa.id) diff --git a/compiler/mworker.py b/compiler/mworker.py index 67c6a36..01dcfcd 100644 --- a/compiler/mworker.py +++ b/compiler/mworker.py @@ -3,7 +3,7 @@ from mqueues import fetch_queue, compile_queue, process_queue from compiler import do_compile from mprocess import do_process from fetching import fetch_page -from gevent import spawn +from gevent import spawn,sleep from itertools import repeat from models import CrawlUrl from src import clogger @@ -22,6 +22,7 @@ def start_workers(f,c,p): def work_fetch(): try: while True: + sleep(2) run_fetch() except Empty: clogger.info("Fetch - Worker died") diff --git a/controller.py b/controller.py index 18e0222..3506ae4 100644 --- a/controller.py +++ b/controller.py @@ -25,8 +25,14 @@ class BaseController(): return o, errors def get_all(self): - return self.__myclass__.query.all() - +# try: + ar = self.__myclass__.query.all() +# except Error,e: +# db_session.rollback() +# clogger.error(e) +# ar=[] + return ar + def delete(self,id): o=self.get(id) if o != None: diff --git a/database.py b/database.py index 024c9f4..188bf0f 100644 --- a/database.py +++ b/database.py @@ -30,6 +30,9 @@ else: db_session = scoped_session(sessionmaker(autocommit=False, +# autoflush=False, + bind=engine)) +db_session_process = scoped_session(sessionmaker(autocommit=False, # autoflush=False, bind=engine)) diff --git a/sections/model.py b/sections/model.py index cea6066..b37b435 100644 --- a/sections/model.py +++ b/sections/model.py @@ -1,6 +1,6 @@ from sqlalchemy import Column, Integer, String, Boolean, DateTime, Text, ForeignKey from sqlalchemy.orm import relationship - +from sqlalchemy.exc import IntegrityError, InvalidRequestError from datetime import datetime from src.database import Base,db_session from marshmallow import Schema, fields, post_load @@ -82,7 +82,11 @@ class Section(Base): if s is None: s=Section(fname) db_session.add(s) - db_session.commit() + try: + db_session.commit() + except (IntegrityError, InvalidRequestError),e: + db_session.rollback() + clogger.error(e) s.foreign_name=fname db_session.add(s) db_session.commit()