various improvements

This commit is contained in:
uwsgi
2017-02-21 18:05:51 +01:00
parent 158ab4236f
commit ae2f61485e
8 changed files with 40 additions and 14 deletions

View File

@@ -2,7 +2,10 @@ from .model import Article
from .model import ArticleSchema from .model import ArticleSchema
#import flask #import flask
from datetime import datetime from datetime import datetime
from sqlalchemy import func
from sqlalchemy import exc
import json import json
from src import clogger
from src.sections.model import Section from src.sections.model import Section
from src.organizations.model import Organization from src.organizations.model import Organization
from src.database import db_session, read_json from src.database import db_session, read_json
@@ -42,7 +45,13 @@ class ArtController(BaseController):
def get_all_page(self,lim, off): def get_all_page(self,lim, off):
return Article.query.order_by(Article.published_date.desc()).limit(lim).offset(off).all() # try:
arts=Article.query.order_by(Article.published_date.desc()).limit(lim).offset(off).all()
# except exc.InvalidRequestError, e:
# clogger.error(e)
# db_session.rollback()
# arts=Article.query.order_by(Article.published_date.desc()).limit(lim).offset(off).all()
return arts
def get_section_page(self,section_id, lim, off): def get_section_page(self,section_id, lim, off):
@@ -52,21 +61,21 @@ class ArtController(BaseController):
def get_organization_page(self,organization_id, lim, off): def get_organization_page(self,organization_id, lim, off):
sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all() sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all()
sid=map(lambda a:a[0], sid) sid=map(lambda a:a[0], sid)
articles=Article.query.filter(Article.section_id.in_(sid)).order_by(Article.published_date.desc()).limit(lim).offset(off).all() articles=db_session.query(Article).filter(Article.section_id.in_(sid)).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
return articles return articles
# #
# return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all() # return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
def section_count(self,section_id): def section_count(self,section_id):
return Article.query.filter(Article.section_id==section_id).count() return db_session.query(func.count(Article.id)).filter(Article.section_id==section_id).scalar()
def organization_count(self,organization_id): def organization_count(self,organization_id):
sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all() sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all()
sid=map(lambda a:a[0], sid) sid=map(lambda a:a[0], sid)
return Article.query.filter(Article.section_id.in_(sid)).count() return db_session.query(func.count(Article.id)).filter(Article.section_id.in_(sid)).scalar()
def count(self): def count(self):
return Article.query.count() return db_session.query(func.count(Article.id)).scalar()
controller=ArtController() controller=ArtController()

View File

@@ -42,6 +42,7 @@ def fbfeed(url, raw, params={}):
aa=fbfeedelement(m) aa=fbfeedelement(m)
if not aa.has_key("title"): if not aa.has_key("title"):
aa["title"] = u[1]+ " at " + aa["published"].strftime("%Y-%m-%d %H:%M") aa["title"] = u[1]+ " at " + aa["published"].strftime("%Y-%m-%d %H:%M")
if not aa.has_key("section"):
aa["section"]="Facebook: "+u[1] aa["section"]="Facebook: "+u[1]
arts.append(aa) arts.append(aa)
nx=None nx=None

View File

@@ -46,4 +46,5 @@ def fscharticle(url,raw,params={}):
d["pi"]=pi d["pi"]=pi
d["sourcetype"]="fscharticle" d["sourcetype"]="fscharticle"
d["section"]= "Fachschaft Chemie" d["section"]= "Fachschaft Chemie"
d["url"]=url
return {"article": d} return {"article": d}

View File

@@ -1,7 +1,7 @@
from src import clogger # Logger for crawler from src import clogger # Logger for crawler
from src.models import Article # Article model from src.models import Article # Article model
from datetime import datetime from datetime import datetime
from src.database import db_session from src.database import db_session_process as db_session
from mqueues import fetch_queue, compile_queue, put_fetch_queue from mqueues import fetch_queue, compile_queue, put_fetch_queue
from fetching import fetch_page, downloadfile, announce_articleid from fetching import fetch_page, downloadfile, announce_articleid
from fixing import fix_html, fix_file from fixing import fix_html, fix_file
@@ -23,7 +23,8 @@ def process_article(art):
art["text"] = fix_html(art["text"], art["url"]) art["text"] = fix_html(art["text"], art["url"])
if "image" in art: if "image" in art:
art["image"]=fix_file(art["url"], art["image"]) art["image"]=fix_file(art["url"], art["image"])
clogger.info(art) # clogger.info(art)
clogger.info(str(db_session.info))
aa = Article.from_hash(art) aa = Article.from_hash(art)
aa.process_hash(art) aa.process_hash(art)
aa.last_fetched = datetime.now() aa.last_fetched = datetime.now()
@@ -31,11 +32,11 @@ def process_article(art):
db_session.add(aa) db_session.add(aa)
try: try:
db_session.commit() db_session.commit()
except InvalidRequestError,e: except (InvalidRequestError, Exception),e:
db_session.rollback() db_session.rollback()
clogger.error(e) clogger.error(e)
clogger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8"))) clogger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
return aa return True
# app.logger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8"))) # app.logger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
# db_session.close() # db_session.close()
# announce_articleid(aa.id) # announce_articleid(aa.id)

View File

@@ -3,7 +3,7 @@ from mqueues import fetch_queue, compile_queue, process_queue
from compiler import do_compile from compiler import do_compile
from mprocess import do_process from mprocess import do_process
from fetching import fetch_page from fetching import fetch_page
from gevent import spawn from gevent import spawn,sleep
from itertools import repeat from itertools import repeat
from models import CrawlUrl from models import CrawlUrl
from src import clogger from src import clogger
@@ -22,6 +22,7 @@ def start_workers(f,c,p):
def work_fetch(): def work_fetch():
try: try:
while True: while True:
sleep(2)
run_fetch() run_fetch()
except Empty: except Empty:
clogger.info("Fetch - Worker died") clogger.info("Fetch - Worker died")

View File

@@ -25,7 +25,13 @@ class BaseController():
return o, errors return o, errors
def get_all(self): def get_all(self):
return self.__myclass__.query.all() # try:
ar = self.__myclass__.query.all()
# except Error,e:
# db_session.rollback()
# clogger.error(e)
# ar=[]
return ar
def delete(self,id): def delete(self,id):
o=self.get(id) o=self.get(id)

View File

@@ -30,6 +30,9 @@ else:
db_session = scoped_session(sessionmaker(autocommit=False, db_session = scoped_session(sessionmaker(autocommit=False,
# autoflush=False,
bind=engine))
db_session_process = scoped_session(sessionmaker(autocommit=False,
# autoflush=False, # autoflush=False,
bind=engine)) bind=engine))

View File

@@ -1,6 +1,6 @@
from sqlalchemy import Column, Integer, String, Boolean, DateTime, Text, ForeignKey from sqlalchemy import Column, Integer, String, Boolean, DateTime, Text, ForeignKey
from sqlalchemy.orm import relationship from sqlalchemy.orm import relationship
from sqlalchemy.exc import IntegrityError, InvalidRequestError
from datetime import datetime from datetime import datetime
from src.database import Base,db_session from src.database import Base,db_session
from marshmallow import Schema, fields, post_load from marshmallow import Schema, fields, post_load
@@ -82,7 +82,11 @@ class Section(Base):
if s is None: if s is None:
s=Section(fname) s=Section(fname)
db_session.add(s) db_session.add(s)
db_session.commit() try:
db_session.commit()
except (IntegrityError, InvalidRequestError),e:
db_session.rollback()
clogger.error(e)
s.foreign_name=fname s.foreign_name=fname
db_session.add(s) db_session.add(s)
db_session.commit() db_session.commit()