From bdfa16728d7c3a291a4e1b5ad064eb5a2389c134 Mon Sep 17 00:00:00 2001 From: uwsgi Date: Wed, 15 Feb 2017 10:32:45 +0100 Subject: [PATCH] diverse fixes und mysql --- articles/controller.py | 21 ++++++++++++++++++--- articles/views.py | 3 +++ compiler/mprocess.py | 4 +++- compiler/mworker.py | 4 +++- controller.py | 6 ++++-- database.py | 9 ++++++--- dump_articles.py | 6 ++++++ load_articles.py | 37 +++++++++++++++++++++++++++++-------- organizations/model.py | 2 +- organizationtypes/model.py | 2 +- sections/model.py | 1 + 11 files changed, 75 insertions(+), 20 deletions(-) diff --git a/articles/controller.py b/articles/controller.py index 2684ee1..6f5f37c 100644 --- a/articles/controller.py +++ b/articles/controller.py @@ -3,8 +3,11 @@ from .model import ArticleSchema #import flask from datetime import datetime import json +from src.sections.model import Section +from src.organizations.model import Organization from src.database import db_session, read_json from src.controller import BaseController +#from src.organizations.controller import controller as org_controller class ArtController(BaseController): __myclass__=Article __jsonid__='article' @@ -14,12 +17,14 @@ class ArtController(BaseController): pp=int(v["per_page"]) else: pp=20 + except ValueError: + pp=20 + try: if v.has_key("page"): o=(int(v["page"])-1) *pp else: o=0 except ValueError: - pp=20 o=0 if not (isinstance(pp,int) and pp>0 and pp<10000): pp=20 @@ -44,12 +49,22 @@ class ArtController(BaseController): return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all() - def get_section_page(self,section_id, lim, off): - return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all() + def get_organization_page(self,organization_id, lim, off): + sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all() + sid=map(lambda a:a[0], sid) + articles=Article.query.filter(Article.section_id.in_(sid)).order_by(Article.published_date.desc()).limit(lim).offset(off).all() + return articles +# +# return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all() def section_count(self,section_id): return Article.query.filter(Article.section_id==section_id).count() + def organization_count(self,organization_id): + sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all() + sid=map(lambda a:a[0], sid) + return Article.query.filter(Article.section_id.in_(sid)).count() + def count(self): return Article.query.count() diff --git a/articles/views.py b/articles/views.py index db9403f..b55d707 100644 --- a/articles/views.py +++ b/articles/views.py @@ -23,6 +23,9 @@ def index(): if v.has_key("section_id"): articles=controller.get_section_page(int(v["section_id"]),pp,o) c=controller.section_count(int(v["section_id"])) + elif v.has_key("organization_id"): + articles=controller.get_organization_page(int(v["organization_id"]), pp,o) + c=controller.organization_count(int(v["organization_id"])) else: articles=controller.get_all_page(pp,o) c=controller.count() diff --git a/compiler/mprocess.py b/compiler/mprocess.py index c7b0dc7..8164a82 100644 --- a/compiler/mprocess.py +++ b/compiler/mprocess.py @@ -30,7 +30,9 @@ def process_article(art): db_session.add(aa) db_session.commit() clogger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8"))) -# announce_articleid(aa.id) + db_session.close() + # announce_articleid(aa.id) + # return aa # process a single found url diff --git a/compiler/mworker.py b/compiler/mworker.py index 0a80e28..61aea55 100644 --- a/compiler/mworker.py +++ b/compiler/mworker.py @@ -7,7 +7,7 @@ from gevent import spawn from itertools import repeat from models import CrawlUrl from src import clogger -from src.database import db_session2 +from src.database import db_session2,db_session def start_workers(f,c,p): for _ in range(f): clogger.debug("spawn fetchworker") @@ -24,6 +24,8 @@ def work_fetch(): def work_process(): while True: run_process() +# db_session.close() + def work_compile(): while True: run_compile() diff --git a/controller.py b/controller.py index de55d54..18e0222 100644 --- a/controller.py +++ b/controller.py @@ -34,9 +34,11 @@ class BaseController(): db_session.commit() def update(self,id,request): - a=read_json(request) + d=read_json(request) o=self.get(id) - success,errors=o.update(a[self.__jsonid__]) + if d.has_key(self.__jsonid__): + d= d[self.__jsonid__] + success,errors=o.update(d) if success: try: db_session.commit() diff --git a/database.py b/database.py index f298fb9..86c8c9a 100644 --- a/database.py +++ b/database.py @@ -23,11 +23,14 @@ db_urlfile=cfg.get("db_url_file") if db_urlfile == None or db_urlfile.strip()=="": db_urlfile="../srctest_cu.db" - -engine = create_engine('sqlite:///'+ path.join(db_path,db_mainfile), convert_unicode=True) +if cfg.get("db_main_type") == "mysql": + engine = create_engine("mysql+pymysql://%s:%s@localhost/crawler_articles?charset=utf8" % (cfg.get("db_main_user"), cfg.get("db_main_pw"))) +else: + engine = create_engine('sqlite:///'+ path.join(db_path,db_mainfile), convert_unicode=True) + db_session = scoped_session(sessionmaker(autocommit=False, - autoflush=False, +# autoflush=False, bind=engine)) diff --git a/dump_articles.py b/dump_articles.py index d3bfab1..4a779b1 100644 --- a/dump_articles.py +++ b/dump_articles.py @@ -2,6 +2,7 @@ import sys import json from src.articles.model import Article, FullArticleSchema from src.sections.model import Section,FullSectionSchema +from src.organizations.model import Organization, FullOrganizationSchema if len(sys.argv) <= 1: raise Error("Kein Zieldateiname angegeben") @@ -13,11 +14,16 @@ def dump_section(s): return FullSectionSchema().dump(s) +def dump_organization(o): + return FullOrganizationSchema().dump(o) + file = open(sys.argv[1], "w+") data={} data["articles"] = map(dump_article,Article.query.all()) data["sections"] = map(dump_section,Section.query.all()) +data["organizations"] = map(dump_organization,Organization.query.all()) + json.dump (data, file) file.close() diff --git a/load_articles.py b/load_articles.py index 3da5a0e..aac541c 100644 --- a/load_articles.py +++ b/load_articles.py @@ -2,6 +2,7 @@ import sys import json from src.articles.model import Article, FullArticleSchema from src.sections.model import Section, FullSectionSchema +from src.organizations.model import Organization, FullOrganizationSchema from src.database import db_session from sqlalchemy.exc import IntegrityError @@ -15,6 +16,8 @@ def load_article(a): def load_section(s): return Section.deserialize(s[0]) +def load_organization(s): + return Organization.deserialize(s[0]) file = open(sys.argv[1], "r") @@ -29,6 +32,8 @@ if isinstance(data,dict): articles=data["articles"] if data.has_key("sections"): sections=data["sections"] + if data.has_key("organizations"): + organizations=data["organizations"] else: articles=data @@ -37,15 +42,21 @@ else: articles= map (load_article, articles) if sections is not None: sections=map(load_section, sections) +if organizations is not None: + organizations=map(load_organization,organizations) + + +if organizations is not None: + for s in organizations: + if not isinstance(s,Organization): + print type(s) + try: + db_session.add(s) + db_session.commit() + except IntegrityError: + db_session.rollback() + -for a in articles: - try: - db_session.add(a) - db_session.commit() - except IntegrityError: - db_session.rollback() - finally: - db_session.rollback() if sections is not None: for s in sections: if not isinstance(s,Section): @@ -56,5 +67,15 @@ if sections is not None: except IntegrityError: db_session.rollback() + +for a in articles: + try: + db_session.add(a) + db_session.commit() + except IntegrityError: + db_session.rollback() + finally: + db_session.rollback() + file.close() diff --git a/organizations/model.py b/organizations/model.py index c1c10dc..58b162b 100644 --- a/organizations/model.py +++ b/organizations/model.py @@ -7,7 +7,7 @@ from marshmallow import Schema, fields, post_load, ValidationError from src import clogger import json import flask -from src.models import Section +#from src.models import Section import re def validate_key(key): diff --git a/organizationtypes/model.py b/organizationtypes/model.py index e7c95e1..538600b 100644 --- a/organizationtypes/model.py +++ b/organizationtypes/model.py @@ -7,7 +7,7 @@ from marshmallow import Schema, fields, post_load, ValidationError from src import clogger import json import flask -from src.models import Section +#from src.sections.model import Section import re def validate_image(k, img): diff --git a/sections/model.py b/sections/model.py index 25ba603..cea6066 100644 --- a/sections/model.py +++ b/sections/model.py @@ -30,6 +30,7 @@ class FullSectionSchema(Schema): foreign_name=fields.String() group=fields.String(required=False,allow_none=True ) organization=fields.Nested(OrganizationCompSchema) + organization_id = fields.Integer() updated_at = fields.DateTime() created_at = fields.DateTime()