various improvements

This commit is contained in:
uwsgi
2017-02-21 18:05:51 +01:00
parent 158ab4236f
commit ae2f61485e
8 changed files with 40 additions and 14 deletions

View File

@@ -2,7 +2,10 @@ from .model import Article
from .model import ArticleSchema
#import flask
from datetime import datetime
from sqlalchemy import func
from sqlalchemy import exc
import json
from src import clogger
from src.sections.model import Section
from src.organizations.model import Organization
from src.database import db_session, read_json
@@ -42,7 +45,13 @@ class ArtController(BaseController):
def get_all_page(self,lim, off):
return Article.query.order_by(Article.published_date.desc()).limit(lim).offset(off).all()
# try:
arts=Article.query.order_by(Article.published_date.desc()).limit(lim).offset(off).all()
# except exc.InvalidRequestError, e:
# clogger.error(e)
# db_session.rollback()
# arts=Article.query.order_by(Article.published_date.desc()).limit(lim).offset(off).all()
return arts
def get_section_page(self,section_id, lim, off):
@@ -52,21 +61,21 @@ class ArtController(BaseController):
def get_organization_page(self,organization_id, lim, off):
sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all()
sid=map(lambda a:a[0], sid)
articles=Article.query.filter(Article.section_id.in_(sid)).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
articles=db_session.query(Article).filter(Article.section_id.in_(sid)).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
return articles
#
# return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
def section_count(self,section_id):
return Article.query.filter(Article.section_id==section_id).count()
return db_session.query(func.count(Article.id)).filter(Article.section_id==section_id).scalar()
def organization_count(self,organization_id):
sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all()
sid=map(lambda a:a[0], sid)
return Article.query.filter(Article.section_id.in_(sid)).count()
return db_session.query(func.count(Article.id)).filter(Article.section_id.in_(sid)).scalar()
def count(self):
return Article.query.count()
return db_session.query(func.count(Article.id)).scalar()
controller=ArtController()

View File

@@ -42,6 +42,7 @@ def fbfeed(url, raw, params={}):
aa=fbfeedelement(m)
if not aa.has_key("title"):
aa["title"] = u[1]+ " at " + aa["published"].strftime("%Y-%m-%d %H:%M")
if not aa.has_key("section"):
aa["section"]="Facebook: "+u[1]
arts.append(aa)
nx=None

View File

@@ -46,4 +46,5 @@ def fscharticle(url,raw,params={}):
d["pi"]=pi
d["sourcetype"]="fscharticle"
d["section"]= "Fachschaft Chemie"
d["url"]=url
return {"article": d}

View File

@@ -1,7 +1,7 @@
from src import clogger # Logger for crawler
from src.models import Article # Article model
from datetime import datetime
from src.database import db_session
from src.database import db_session_process as db_session
from mqueues import fetch_queue, compile_queue, put_fetch_queue
from fetching import fetch_page, downloadfile, announce_articleid
from fixing import fix_html, fix_file
@@ -23,7 +23,8 @@ def process_article(art):
art["text"] = fix_html(art["text"], art["url"])
if "image" in art:
art["image"]=fix_file(art["url"], art["image"])
clogger.info(art)
# clogger.info(art)
clogger.info(str(db_session.info))
aa = Article.from_hash(art)
aa.process_hash(art)
aa.last_fetched = datetime.now()
@@ -31,11 +32,11 @@ def process_article(art):
db_session.add(aa)
try:
db_session.commit()
except InvalidRequestError,e:
except (InvalidRequestError, Exception),e:
db_session.rollback()
clogger.error(e)
clogger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
return aa
return True
# app.logger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
# db_session.close()
# announce_articleid(aa.id)

View File

@@ -3,7 +3,7 @@ from mqueues import fetch_queue, compile_queue, process_queue
from compiler import do_compile
from mprocess import do_process
from fetching import fetch_page
from gevent import spawn
from gevent import spawn,sleep
from itertools import repeat
from models import CrawlUrl
from src import clogger
@@ -22,6 +22,7 @@ def start_workers(f,c,p):
def work_fetch():
try:
while True:
sleep(2)
run_fetch()
except Empty:
clogger.info("Fetch - Worker died")

View File

@@ -25,7 +25,13 @@ class BaseController():
return o, errors
def get_all(self):
return self.__myclass__.query.all()
# try:
ar = self.__myclass__.query.all()
# except Error,e:
# db_session.rollback()
# clogger.error(e)
# ar=[]
return ar
def delete(self,id):
o=self.get(id)

View File

@@ -30,6 +30,9 @@ else:
db_session = scoped_session(sessionmaker(autocommit=False,
# autoflush=False,
bind=engine))
db_session_process = scoped_session(sessionmaker(autocommit=False,
# autoflush=False,
bind=engine))

View File

@@ -1,6 +1,6 @@
from sqlalchemy import Column, Integer, String, Boolean, DateTime, Text, ForeignKey
from sqlalchemy.orm import relationship
from sqlalchemy.exc import IntegrityError, InvalidRequestError
from datetime import datetime
from src.database import Base,db_session
from marshmallow import Schema, fields, post_load
@@ -82,7 +82,11 @@ class Section(Base):
if s is None:
s=Section(fname)
db_session.add(s)
try:
db_session.commit()
except (IntegrityError, InvalidRequestError),e:
db_session.rollback()
clogger.error(e)
s.foreign_name=fname
db_session.add(s)
db_session.commit()