diverse fixes und mysql

This commit is contained in:
uwsgi
2017-02-15 10:32:45 +01:00
parent 49ac42b9a5
commit bdfa16728d
11 changed files with 75 additions and 20 deletions

View File

@@ -3,8 +3,11 @@ from .model import ArticleSchema
#import flask #import flask
from datetime import datetime from datetime import datetime
import json import json
from src.sections.model import Section
from src.organizations.model import Organization
from src.database import db_session, read_json from src.database import db_session, read_json
from src.controller import BaseController from src.controller import BaseController
#from src.organizations.controller import controller as org_controller
class ArtController(BaseController): class ArtController(BaseController):
__myclass__=Article __myclass__=Article
__jsonid__='article' __jsonid__='article'
@@ -14,12 +17,14 @@ class ArtController(BaseController):
pp=int(v["per_page"]) pp=int(v["per_page"])
else: else:
pp=20 pp=20
except ValueError:
pp=20
try:
if v.has_key("page"): if v.has_key("page"):
o=(int(v["page"])-1) *pp o=(int(v["page"])-1) *pp
else: else:
o=0 o=0
except ValueError: except ValueError:
pp=20
o=0 o=0
if not (isinstance(pp,int) and pp>0 and pp<10000): if not (isinstance(pp,int) and pp>0 and pp<10000):
pp=20 pp=20
@@ -44,12 +49,22 @@ class ArtController(BaseController):
return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all() return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
def get_section_page(self,section_id, lim, off): def get_organization_page(self,organization_id, lim, off):
return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all() sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all()
sid=map(lambda a:a[0], sid)
articles=Article.query.filter(Article.section_id.in_(sid)).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
return articles
#
# return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
def section_count(self,section_id): def section_count(self,section_id):
return Article.query.filter(Article.section_id==section_id).count() return Article.query.filter(Article.section_id==section_id).count()
def organization_count(self,organization_id):
sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all()
sid=map(lambda a:a[0], sid)
return Article.query.filter(Article.section_id.in_(sid)).count()
def count(self): def count(self):
return Article.query.count() return Article.query.count()

View File

@@ -23,6 +23,9 @@ def index():
if v.has_key("section_id"): if v.has_key("section_id"):
articles=controller.get_section_page(int(v["section_id"]),pp,o) articles=controller.get_section_page(int(v["section_id"]),pp,o)
c=controller.section_count(int(v["section_id"])) c=controller.section_count(int(v["section_id"]))
elif v.has_key("organization_id"):
articles=controller.get_organization_page(int(v["organization_id"]), pp,o)
c=controller.organization_count(int(v["organization_id"]))
else: else:
articles=controller.get_all_page(pp,o) articles=controller.get_all_page(pp,o)
c=controller.count() c=controller.count()

View File

@@ -30,7 +30,9 @@ def process_article(art):
db_session.add(aa) db_session.add(aa)
db_session.commit() db_session.commit()
clogger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8"))) clogger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
db_session.close()
# announce_articleid(aa.id) # announce_articleid(aa.id)
#
return aa return aa
# process a single found url # process a single found url

View File

@@ -7,7 +7,7 @@ from gevent import spawn
from itertools import repeat from itertools import repeat
from models import CrawlUrl from models import CrawlUrl
from src import clogger from src import clogger
from src.database import db_session2 from src.database import db_session2,db_session
def start_workers(f,c,p): def start_workers(f,c,p):
for _ in range(f): for _ in range(f):
clogger.debug("spawn fetchworker") clogger.debug("spawn fetchworker")
@@ -24,6 +24,8 @@ def work_fetch():
def work_process(): def work_process():
while True: while True:
run_process() run_process()
# db_session.close()
def work_compile(): def work_compile():
while True: while True:
run_compile() run_compile()

View File

@@ -34,9 +34,11 @@ class BaseController():
db_session.commit() db_session.commit()
def update(self,id,request): def update(self,id,request):
a=read_json(request) d=read_json(request)
o=self.get(id) o=self.get(id)
success,errors=o.update(a[self.__jsonid__]) if d.has_key(self.__jsonid__):
d= d[self.__jsonid__]
success,errors=o.update(d)
if success: if success:
try: try:
db_session.commit() db_session.commit()

View File

@@ -23,11 +23,14 @@ db_urlfile=cfg.get("db_url_file")
if db_urlfile == None or db_urlfile.strip()=="": if db_urlfile == None or db_urlfile.strip()=="":
db_urlfile="../srctest_cu.db" db_urlfile="../srctest_cu.db"
if cfg.get("db_main_type") == "mysql":
engine = create_engine("mysql+pymysql://%s:%s@localhost/crawler_articles?charset=utf8" % (cfg.get("db_main_user"), cfg.get("db_main_pw")))
else:
engine = create_engine('sqlite:///'+ path.join(db_path,db_mainfile), convert_unicode=True) engine = create_engine('sqlite:///'+ path.join(db_path,db_mainfile), convert_unicode=True)
db_session = scoped_session(sessionmaker(autocommit=False, db_session = scoped_session(sessionmaker(autocommit=False,
autoflush=False, # autoflush=False,
bind=engine)) bind=engine))

View File

@@ -2,6 +2,7 @@ import sys
import json import json
from src.articles.model import Article, FullArticleSchema from src.articles.model import Article, FullArticleSchema
from src.sections.model import Section,FullSectionSchema from src.sections.model import Section,FullSectionSchema
from src.organizations.model import Organization, FullOrganizationSchema
if len(sys.argv) <= 1: if len(sys.argv) <= 1:
raise Error("Kein Zieldateiname angegeben") raise Error("Kein Zieldateiname angegeben")
@@ -13,11 +14,16 @@ def dump_section(s):
return FullSectionSchema().dump(s) return FullSectionSchema().dump(s)
def dump_organization(o):
return FullOrganizationSchema().dump(o)
file = open(sys.argv[1], "w+") file = open(sys.argv[1], "w+")
data={} data={}
data["articles"] = map(dump_article,Article.query.all()) data["articles"] = map(dump_article,Article.query.all())
data["sections"] = map(dump_section,Section.query.all()) data["sections"] = map(dump_section,Section.query.all())
data["organizations"] = map(dump_organization,Organization.query.all())
json.dump (data, file) json.dump (data, file)
file.close() file.close()

View File

@@ -2,6 +2,7 @@ import sys
import json import json
from src.articles.model import Article, FullArticleSchema from src.articles.model import Article, FullArticleSchema
from src.sections.model import Section, FullSectionSchema from src.sections.model import Section, FullSectionSchema
from src.organizations.model import Organization, FullOrganizationSchema
from src.database import db_session from src.database import db_session
from sqlalchemy.exc import IntegrityError from sqlalchemy.exc import IntegrityError
@@ -15,6 +16,8 @@ def load_article(a):
def load_section(s): def load_section(s):
return Section.deserialize(s[0]) return Section.deserialize(s[0])
def load_organization(s):
return Organization.deserialize(s[0])
file = open(sys.argv[1], "r") file = open(sys.argv[1], "r")
@@ -29,6 +32,8 @@ if isinstance(data,dict):
articles=data["articles"] articles=data["articles"]
if data.has_key("sections"): if data.has_key("sections"):
sections=data["sections"] sections=data["sections"]
if data.has_key("organizations"):
organizations=data["organizations"]
else: else:
articles=data articles=data
@@ -37,15 +42,21 @@ else:
articles= map (load_article, articles) articles= map (load_article, articles)
if sections is not None: if sections is not None:
sections=map(load_section, sections) sections=map(load_section, sections)
if organizations is not None:
organizations=map(load_organization,organizations)
for a in articles:
if organizations is not None:
for s in organizations:
if not isinstance(s,Organization):
print type(s)
try: try:
db_session.add(a) db_session.add(s)
db_session.commit() db_session.commit()
except IntegrityError: except IntegrityError:
db_session.rollback() db_session.rollback()
finally:
db_session.rollback()
if sections is not None: if sections is not None:
for s in sections: for s in sections:
if not isinstance(s,Section): if not isinstance(s,Section):
@@ -57,4 +68,14 @@ if sections is not None:
db_session.rollback() db_session.rollback()
for a in articles:
try:
db_session.add(a)
db_session.commit()
except IntegrityError:
db_session.rollback()
finally:
db_session.rollback()
file.close() file.close()

View File

@@ -7,7 +7,7 @@ from marshmallow import Schema, fields, post_load, ValidationError
from src import clogger from src import clogger
import json import json
import flask import flask
from src.models import Section #from src.models import Section
import re import re
def validate_key(key): def validate_key(key):

View File

@@ -7,7 +7,7 @@ from marshmallow import Schema, fields, post_load, ValidationError
from src import clogger from src import clogger
import json import json
import flask import flask
from src.models import Section #from src.sections.model import Section
import re import re
def validate_image(k, img): def validate_image(k, img):

View File

@@ -30,6 +30,7 @@ class FullSectionSchema(Schema):
foreign_name=fields.String() foreign_name=fields.String()
group=fields.String(required=False,allow_none=True ) group=fields.String(required=False,allow_none=True )
organization=fields.Nested(OrganizationCompSchema) organization=fields.Nested(OrganizationCompSchema)
organization_id = fields.Integer()
updated_at = fields.DateTime() updated_at = fields.DateTime()
created_at = fields.DateTime() created_at = fields.DateTime()