various improvements
This commit is contained in:
@@ -2,7 +2,10 @@ from .model import Article
|
|||||||
from .model import ArticleSchema
|
from .model import ArticleSchema
|
||||||
#import flask
|
#import flask
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from sqlalchemy import func
|
||||||
|
from sqlalchemy import exc
|
||||||
import json
|
import json
|
||||||
|
from src import clogger
|
||||||
from src.sections.model import Section
|
from src.sections.model import Section
|
||||||
from src.organizations.model import Organization
|
from src.organizations.model import Organization
|
||||||
from src.database import db_session, read_json
|
from src.database import db_session, read_json
|
||||||
@@ -42,7 +45,13 @@ class ArtController(BaseController):
|
|||||||
|
|
||||||
|
|
||||||
def get_all_page(self,lim, off):
|
def get_all_page(self,lim, off):
|
||||||
return Article.query.order_by(Article.published_date.desc()).limit(lim).offset(off).all()
|
# try:
|
||||||
|
arts=Article.query.order_by(Article.published_date.desc()).limit(lim).offset(off).all()
|
||||||
|
# except exc.InvalidRequestError, e:
|
||||||
|
# clogger.error(e)
|
||||||
|
# db_session.rollback()
|
||||||
|
# arts=Article.query.order_by(Article.published_date.desc()).limit(lim).offset(off).all()
|
||||||
|
return arts
|
||||||
|
|
||||||
|
|
||||||
def get_section_page(self,section_id, lim, off):
|
def get_section_page(self,section_id, lim, off):
|
||||||
@@ -52,21 +61,21 @@ class ArtController(BaseController):
|
|||||||
def get_organization_page(self,organization_id, lim, off):
|
def get_organization_page(self,organization_id, lim, off):
|
||||||
sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all()
|
sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all()
|
||||||
sid=map(lambda a:a[0], sid)
|
sid=map(lambda a:a[0], sid)
|
||||||
articles=Article.query.filter(Article.section_id.in_(sid)).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
|
articles=db_session.query(Article).filter(Article.section_id.in_(sid)).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
|
||||||
return articles
|
return articles
|
||||||
#
|
#
|
||||||
# return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
|
# return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
|
||||||
|
|
||||||
def section_count(self,section_id):
|
def section_count(self,section_id):
|
||||||
return Article.query.filter(Article.section_id==section_id).count()
|
return db_session.query(func.count(Article.id)).filter(Article.section_id==section_id).scalar()
|
||||||
|
|
||||||
def organization_count(self,organization_id):
|
def organization_count(self,organization_id):
|
||||||
sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all()
|
sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all()
|
||||||
sid=map(lambda a:a[0], sid)
|
sid=map(lambda a:a[0], sid)
|
||||||
return Article.query.filter(Article.section_id.in_(sid)).count()
|
return db_session.query(func.count(Article.id)).filter(Article.section_id.in_(sid)).scalar()
|
||||||
|
|
||||||
def count(self):
|
def count(self):
|
||||||
return Article.query.count()
|
return db_session.query(func.count(Article.id)).scalar()
|
||||||
|
|
||||||
|
|
||||||
controller=ArtController()
|
controller=ArtController()
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ def fbfeed(url, raw, params={}):
|
|||||||
aa=fbfeedelement(m)
|
aa=fbfeedelement(m)
|
||||||
if not aa.has_key("title"):
|
if not aa.has_key("title"):
|
||||||
aa["title"] = u[1]+ " at " + aa["published"].strftime("%Y-%m-%d %H:%M")
|
aa["title"] = u[1]+ " at " + aa["published"].strftime("%Y-%m-%d %H:%M")
|
||||||
|
if not aa.has_key("section"):
|
||||||
aa["section"]="Facebook: "+u[1]
|
aa["section"]="Facebook: "+u[1]
|
||||||
arts.append(aa)
|
arts.append(aa)
|
||||||
nx=None
|
nx=None
|
||||||
|
|||||||
@@ -46,4 +46,5 @@ def fscharticle(url,raw,params={}):
|
|||||||
d["pi"]=pi
|
d["pi"]=pi
|
||||||
d["sourcetype"]="fscharticle"
|
d["sourcetype"]="fscharticle"
|
||||||
d["section"]= "Fachschaft Chemie"
|
d["section"]= "Fachschaft Chemie"
|
||||||
|
d["url"]=url
|
||||||
return {"article": d}
|
return {"article": d}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from src import clogger # Logger for crawler
|
from src import clogger # Logger for crawler
|
||||||
from src.models import Article # Article model
|
from src.models import Article # Article model
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from src.database import db_session
|
from src.database import db_session_process as db_session
|
||||||
from mqueues import fetch_queue, compile_queue, put_fetch_queue
|
from mqueues import fetch_queue, compile_queue, put_fetch_queue
|
||||||
from fetching import fetch_page, downloadfile, announce_articleid
|
from fetching import fetch_page, downloadfile, announce_articleid
|
||||||
from fixing import fix_html, fix_file
|
from fixing import fix_html, fix_file
|
||||||
@@ -23,7 +23,8 @@ def process_article(art):
|
|||||||
art["text"] = fix_html(art["text"], art["url"])
|
art["text"] = fix_html(art["text"], art["url"])
|
||||||
if "image" in art:
|
if "image" in art:
|
||||||
art["image"]=fix_file(art["url"], art["image"])
|
art["image"]=fix_file(art["url"], art["image"])
|
||||||
clogger.info(art)
|
# clogger.info(art)
|
||||||
|
clogger.info(str(db_session.info))
|
||||||
aa = Article.from_hash(art)
|
aa = Article.from_hash(art)
|
||||||
aa.process_hash(art)
|
aa.process_hash(art)
|
||||||
aa.last_fetched = datetime.now()
|
aa.last_fetched = datetime.now()
|
||||||
@@ -31,11 +32,11 @@ def process_article(art):
|
|||||||
db_session.add(aa)
|
db_session.add(aa)
|
||||||
try:
|
try:
|
||||||
db_session.commit()
|
db_session.commit()
|
||||||
except InvalidRequestError,e:
|
except (InvalidRequestError, Exception),e:
|
||||||
db_session.rollback()
|
db_session.rollback()
|
||||||
clogger.error(e)
|
clogger.error(e)
|
||||||
clogger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
|
clogger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
|
||||||
return aa
|
return True
|
||||||
# app.logger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
|
# app.logger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
|
||||||
# db_session.close()
|
# db_session.close()
|
||||||
# announce_articleid(aa.id)
|
# announce_articleid(aa.id)
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ from mqueues import fetch_queue, compile_queue, process_queue
|
|||||||
from compiler import do_compile
|
from compiler import do_compile
|
||||||
from mprocess import do_process
|
from mprocess import do_process
|
||||||
from fetching import fetch_page
|
from fetching import fetch_page
|
||||||
from gevent import spawn
|
from gevent import spawn,sleep
|
||||||
from itertools import repeat
|
from itertools import repeat
|
||||||
from models import CrawlUrl
|
from models import CrawlUrl
|
||||||
from src import clogger
|
from src import clogger
|
||||||
@@ -22,6 +22,7 @@ def start_workers(f,c,p):
|
|||||||
def work_fetch():
|
def work_fetch():
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
|
sleep(2)
|
||||||
run_fetch()
|
run_fetch()
|
||||||
except Empty:
|
except Empty:
|
||||||
clogger.info("Fetch - Worker died")
|
clogger.info("Fetch - Worker died")
|
||||||
|
|||||||
@@ -25,7 +25,13 @@ class BaseController():
|
|||||||
return o, errors
|
return o, errors
|
||||||
|
|
||||||
def get_all(self):
|
def get_all(self):
|
||||||
return self.__myclass__.query.all()
|
# try:
|
||||||
|
ar = self.__myclass__.query.all()
|
||||||
|
# except Error,e:
|
||||||
|
# db_session.rollback()
|
||||||
|
# clogger.error(e)
|
||||||
|
# ar=[]
|
||||||
|
return ar
|
||||||
|
|
||||||
def delete(self,id):
|
def delete(self,id):
|
||||||
o=self.get(id)
|
o=self.get(id)
|
||||||
|
|||||||
@@ -30,6 +30,9 @@ else:
|
|||||||
|
|
||||||
|
|
||||||
db_session = scoped_session(sessionmaker(autocommit=False,
|
db_session = scoped_session(sessionmaker(autocommit=False,
|
||||||
|
# autoflush=False,
|
||||||
|
bind=engine))
|
||||||
|
db_session_process = scoped_session(sessionmaker(autocommit=False,
|
||||||
# autoflush=False,
|
# autoflush=False,
|
||||||
bind=engine))
|
bind=engine))
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from sqlalchemy import Column, Integer, String, Boolean, DateTime, Text, ForeignKey
|
from sqlalchemy import Column, Integer, String, Boolean, DateTime, Text, ForeignKey
|
||||||
from sqlalchemy.orm import relationship
|
from sqlalchemy.orm import relationship
|
||||||
|
from sqlalchemy.exc import IntegrityError, InvalidRequestError
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from src.database import Base,db_session
|
from src.database import Base,db_session
|
||||||
from marshmallow import Schema, fields, post_load
|
from marshmallow import Schema, fields, post_load
|
||||||
@@ -82,7 +82,11 @@ class Section(Base):
|
|||||||
if s is None:
|
if s is None:
|
||||||
s=Section(fname)
|
s=Section(fname)
|
||||||
db_session.add(s)
|
db_session.add(s)
|
||||||
db_session.commit()
|
try:
|
||||||
|
db_session.commit()
|
||||||
|
except (IntegrityError, InvalidRequestError),e:
|
||||||
|
db_session.rollback()
|
||||||
|
clogger.error(e)
|
||||||
s.foreign_name=fname
|
s.foreign_name=fname
|
||||||
db_session.add(s)
|
db_session.add(s)
|
||||||
db_session.commit()
|
db_session.commit()
|
||||||
|
|||||||
Reference in New Issue
Block a user