diverse fixes und mysql
This commit is contained in:
@@ -3,8 +3,11 @@ from .model import ArticleSchema
|
|||||||
#import flask
|
#import flask
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
|
from src.sections.model import Section
|
||||||
|
from src.organizations.model import Organization
|
||||||
from src.database import db_session, read_json
|
from src.database import db_session, read_json
|
||||||
from src.controller import BaseController
|
from src.controller import BaseController
|
||||||
|
#from src.organizations.controller import controller as org_controller
|
||||||
class ArtController(BaseController):
|
class ArtController(BaseController):
|
||||||
__myclass__=Article
|
__myclass__=Article
|
||||||
__jsonid__='article'
|
__jsonid__='article'
|
||||||
@@ -14,12 +17,14 @@ class ArtController(BaseController):
|
|||||||
pp=int(v["per_page"])
|
pp=int(v["per_page"])
|
||||||
else:
|
else:
|
||||||
pp=20
|
pp=20
|
||||||
|
except ValueError:
|
||||||
|
pp=20
|
||||||
|
try:
|
||||||
if v.has_key("page"):
|
if v.has_key("page"):
|
||||||
o=(int(v["page"])-1) *pp
|
o=(int(v["page"])-1) *pp
|
||||||
else:
|
else:
|
||||||
o=0
|
o=0
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pp=20
|
|
||||||
o=0
|
o=0
|
||||||
if not (isinstance(pp,int) and pp>0 and pp<10000):
|
if not (isinstance(pp,int) and pp>0 and pp<10000):
|
||||||
pp=20
|
pp=20
|
||||||
@@ -44,12 +49,22 @@ class ArtController(BaseController):
|
|||||||
return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
|
return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
|
||||||
|
|
||||||
|
|
||||||
def get_section_page(self,section_id, lim, off):
|
def get_organization_page(self,organization_id, lim, off):
|
||||||
return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
|
sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all()
|
||||||
|
sid=map(lambda a:a[0], sid)
|
||||||
|
articles=Article.query.filter(Article.section_id.in_(sid)).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
|
||||||
|
return articles
|
||||||
|
#
|
||||||
|
# return Article.query.filter(Article.section_id==section_id).order_by(Article.published_date.desc()).limit(lim).offset(off).all()
|
||||||
|
|
||||||
def section_count(self,section_id):
|
def section_count(self,section_id):
|
||||||
return Article.query.filter(Article.section_id==section_id).count()
|
return Article.query.filter(Article.section_id==section_id).count()
|
||||||
|
|
||||||
|
def organization_count(self,organization_id):
|
||||||
|
sid=db_session.query(Section.id).filter(Section.organization_id==int(organization_id)).all()
|
||||||
|
sid=map(lambda a:a[0], sid)
|
||||||
|
return Article.query.filter(Article.section_id.in_(sid)).count()
|
||||||
|
|
||||||
def count(self):
|
def count(self):
|
||||||
return Article.query.count()
|
return Article.query.count()
|
||||||
|
|
||||||
|
|||||||
@@ -23,6 +23,9 @@ def index():
|
|||||||
if v.has_key("section_id"):
|
if v.has_key("section_id"):
|
||||||
articles=controller.get_section_page(int(v["section_id"]),pp,o)
|
articles=controller.get_section_page(int(v["section_id"]),pp,o)
|
||||||
c=controller.section_count(int(v["section_id"]))
|
c=controller.section_count(int(v["section_id"]))
|
||||||
|
elif v.has_key("organization_id"):
|
||||||
|
articles=controller.get_organization_page(int(v["organization_id"]), pp,o)
|
||||||
|
c=controller.organization_count(int(v["organization_id"]))
|
||||||
else:
|
else:
|
||||||
articles=controller.get_all_page(pp,o)
|
articles=controller.get_all_page(pp,o)
|
||||||
c=controller.count()
|
c=controller.count()
|
||||||
|
|||||||
@@ -30,7 +30,9 @@ def process_article(art):
|
|||||||
db_session.add(aa)
|
db_session.add(aa)
|
||||||
db_session.commit()
|
db_session.commit()
|
||||||
clogger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
|
clogger.info("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
|
||||||
|
db_session.close()
|
||||||
# announce_articleid(aa.id)
|
# announce_articleid(aa.id)
|
||||||
|
#
|
||||||
return aa
|
return aa
|
||||||
|
|
||||||
# process a single found url
|
# process a single found url
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from gevent import spawn
|
|||||||
from itertools import repeat
|
from itertools import repeat
|
||||||
from models import CrawlUrl
|
from models import CrawlUrl
|
||||||
from src import clogger
|
from src import clogger
|
||||||
from src.database import db_session2
|
from src.database import db_session2,db_session
|
||||||
def start_workers(f,c,p):
|
def start_workers(f,c,p):
|
||||||
for _ in range(f):
|
for _ in range(f):
|
||||||
clogger.debug("spawn fetchworker")
|
clogger.debug("spawn fetchworker")
|
||||||
@@ -24,6 +24,8 @@ def work_fetch():
|
|||||||
def work_process():
|
def work_process():
|
||||||
while True:
|
while True:
|
||||||
run_process()
|
run_process()
|
||||||
|
# db_session.close()
|
||||||
|
|
||||||
def work_compile():
|
def work_compile():
|
||||||
while True:
|
while True:
|
||||||
run_compile()
|
run_compile()
|
||||||
|
|||||||
@@ -34,9 +34,11 @@ class BaseController():
|
|||||||
db_session.commit()
|
db_session.commit()
|
||||||
|
|
||||||
def update(self,id,request):
|
def update(self,id,request):
|
||||||
a=read_json(request)
|
d=read_json(request)
|
||||||
o=self.get(id)
|
o=self.get(id)
|
||||||
success,errors=o.update(a[self.__jsonid__])
|
if d.has_key(self.__jsonid__):
|
||||||
|
d= d[self.__jsonid__]
|
||||||
|
success,errors=o.update(d)
|
||||||
if success:
|
if success:
|
||||||
try:
|
try:
|
||||||
db_session.commit()
|
db_session.commit()
|
||||||
|
|||||||
@@ -23,11 +23,14 @@ db_urlfile=cfg.get("db_url_file")
|
|||||||
if db_urlfile == None or db_urlfile.strip()=="":
|
if db_urlfile == None or db_urlfile.strip()=="":
|
||||||
db_urlfile="../srctest_cu.db"
|
db_urlfile="../srctest_cu.db"
|
||||||
|
|
||||||
|
if cfg.get("db_main_type") == "mysql":
|
||||||
|
engine = create_engine("mysql+pymysql://%s:%s@localhost/crawler_articles?charset=utf8" % (cfg.get("db_main_user"), cfg.get("db_main_pw")))
|
||||||
|
else:
|
||||||
engine = create_engine('sqlite:///'+ path.join(db_path,db_mainfile), convert_unicode=True)
|
engine = create_engine('sqlite:///'+ path.join(db_path,db_mainfile), convert_unicode=True)
|
||||||
|
|
||||||
|
|
||||||
db_session = scoped_session(sessionmaker(autocommit=False,
|
db_session = scoped_session(sessionmaker(autocommit=False,
|
||||||
autoflush=False,
|
# autoflush=False,
|
||||||
bind=engine))
|
bind=engine))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import sys
|
|||||||
import json
|
import json
|
||||||
from src.articles.model import Article, FullArticleSchema
|
from src.articles.model import Article, FullArticleSchema
|
||||||
from src.sections.model import Section,FullSectionSchema
|
from src.sections.model import Section,FullSectionSchema
|
||||||
|
from src.organizations.model import Organization, FullOrganizationSchema
|
||||||
if len(sys.argv) <= 1:
|
if len(sys.argv) <= 1:
|
||||||
raise Error("Kein Zieldateiname angegeben")
|
raise Error("Kein Zieldateiname angegeben")
|
||||||
|
|
||||||
@@ -13,11 +14,16 @@ def dump_section(s):
|
|||||||
return FullSectionSchema().dump(s)
|
return FullSectionSchema().dump(s)
|
||||||
|
|
||||||
|
|
||||||
|
def dump_organization(o):
|
||||||
|
return FullOrganizationSchema().dump(o)
|
||||||
|
|
||||||
|
|
||||||
file = open(sys.argv[1], "w+")
|
file = open(sys.argv[1], "w+")
|
||||||
data={}
|
data={}
|
||||||
data["articles"] = map(dump_article,Article.query.all())
|
data["articles"] = map(dump_article,Article.query.all())
|
||||||
data["sections"] = map(dump_section,Section.query.all())
|
data["sections"] = map(dump_section,Section.query.all())
|
||||||
|
data["organizations"] = map(dump_organization,Organization.query.all())
|
||||||
|
|
||||||
json.dump (data, file)
|
json.dump (data, file)
|
||||||
|
|
||||||
file.close()
|
file.close()
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import sys
|
|||||||
import json
|
import json
|
||||||
from src.articles.model import Article, FullArticleSchema
|
from src.articles.model import Article, FullArticleSchema
|
||||||
from src.sections.model import Section, FullSectionSchema
|
from src.sections.model import Section, FullSectionSchema
|
||||||
|
from src.organizations.model import Organization, FullOrganizationSchema
|
||||||
from src.database import db_session
|
from src.database import db_session
|
||||||
from sqlalchemy.exc import IntegrityError
|
from sqlalchemy.exc import IntegrityError
|
||||||
|
|
||||||
@@ -15,6 +16,8 @@ def load_article(a):
|
|||||||
def load_section(s):
|
def load_section(s):
|
||||||
return Section.deserialize(s[0])
|
return Section.deserialize(s[0])
|
||||||
|
|
||||||
|
def load_organization(s):
|
||||||
|
return Organization.deserialize(s[0])
|
||||||
|
|
||||||
|
|
||||||
file = open(sys.argv[1], "r")
|
file = open(sys.argv[1], "r")
|
||||||
@@ -29,6 +32,8 @@ if isinstance(data,dict):
|
|||||||
articles=data["articles"]
|
articles=data["articles"]
|
||||||
if data.has_key("sections"):
|
if data.has_key("sections"):
|
||||||
sections=data["sections"]
|
sections=data["sections"]
|
||||||
|
if data.has_key("organizations"):
|
||||||
|
organizations=data["organizations"]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
articles=data
|
articles=data
|
||||||
@@ -37,15 +42,21 @@ else:
|
|||||||
articles= map (load_article, articles)
|
articles= map (load_article, articles)
|
||||||
if sections is not None:
|
if sections is not None:
|
||||||
sections=map(load_section, sections)
|
sections=map(load_section, sections)
|
||||||
|
if organizations is not None:
|
||||||
|
organizations=map(load_organization,organizations)
|
||||||
|
|
||||||
for a in articles:
|
|
||||||
|
if organizations is not None:
|
||||||
|
for s in organizations:
|
||||||
|
if not isinstance(s,Organization):
|
||||||
|
print type(s)
|
||||||
try:
|
try:
|
||||||
db_session.add(a)
|
db_session.add(s)
|
||||||
db_session.commit()
|
db_session.commit()
|
||||||
except IntegrityError:
|
except IntegrityError:
|
||||||
db_session.rollback()
|
db_session.rollback()
|
||||||
finally:
|
|
||||||
db_session.rollback()
|
|
||||||
if sections is not None:
|
if sections is not None:
|
||||||
for s in sections:
|
for s in sections:
|
||||||
if not isinstance(s,Section):
|
if not isinstance(s,Section):
|
||||||
@@ -57,4 +68,14 @@ if sections is not None:
|
|||||||
db_session.rollback()
|
db_session.rollback()
|
||||||
|
|
||||||
|
|
||||||
|
for a in articles:
|
||||||
|
try:
|
||||||
|
db_session.add(a)
|
||||||
|
db_session.commit()
|
||||||
|
except IntegrityError:
|
||||||
|
db_session.rollback()
|
||||||
|
finally:
|
||||||
|
db_session.rollback()
|
||||||
|
|
||||||
|
|
||||||
file.close()
|
file.close()
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from marshmallow import Schema, fields, post_load, ValidationError
|
|||||||
from src import clogger
|
from src import clogger
|
||||||
import json
|
import json
|
||||||
import flask
|
import flask
|
||||||
from src.models import Section
|
#from src.models import Section
|
||||||
import re
|
import re
|
||||||
|
|
||||||
def validate_key(key):
|
def validate_key(key):
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from marshmallow import Schema, fields, post_load, ValidationError
|
|||||||
from src import clogger
|
from src import clogger
|
||||||
import json
|
import json
|
||||||
import flask
|
import flask
|
||||||
from src.models import Section
|
#from src.sections.model import Section
|
||||||
import re
|
import re
|
||||||
|
|
||||||
def validate_image(k, img):
|
def validate_image(k, img):
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ class FullSectionSchema(Schema):
|
|||||||
foreign_name=fields.String()
|
foreign_name=fields.String()
|
||||||
group=fields.String(required=False,allow_none=True )
|
group=fields.String(required=False,allow_none=True )
|
||||||
organization=fields.Nested(OrganizationCompSchema)
|
organization=fields.Nested(OrganizationCompSchema)
|
||||||
|
organization_id = fields.Integer()
|
||||||
updated_at = fields.DateTime()
|
updated_at = fields.DateTime()
|
||||||
created_at = fields.DateTime()
|
created_at = fields.DateTime()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user