From 0c1b586962779062df82882a11c759af6fc91f9d Mon Sep 17 00:00:00 2001 From: Andreas Stephanides Date: Wed, 8 Feb 2017 07:14:36 +0100 Subject: [PATCH] loaddump_Articles --- articles/model.py | 34 ++++++++++++++++++++++++++-- compiler/views.py | 25 ++++++++++----------- database.py | 9 ++++++-- dump_articles.py | 23 +++++++++++++++++++ load_articles.py | 57 +++++++++++++++++++++++++++++++++++++++++++++++ sections/model.py | 25 +++++++++++++++++++-- 6 files changed, 154 insertions(+), 19 deletions(-) create mode 100644 dump_articles.py create mode 100644 load_articles.py diff --git a/articles/model.py b/articles/model.py index 26af34f..6f7f21a 100644 --- a/articles/model.py +++ b/articles/model.py @@ -4,7 +4,7 @@ from sqlalchemy.orm import relationship from datetime import datetime from src.database import Base from src.database import db_session -from marshmallow import Schema, fields +from marshmallow import Schema, fields, post_load from src.sections.model import Section #import json @@ -72,7 +72,14 @@ class Article(Base): def dict(self): return {"id": str(int(self.id)), "title": self.title, "text": self.text, "author": self.author, "section":self.section, "sourcetype": self.sourcetype, "last_fetched": self.last_fetched, "first_fetched": self.first_fetched, "published_date": self.published_date, "date": self.date,"image": self.image, "url": self.url} - + @classmethod + def deserialize(cls,data): + a=Article() + for c in Article.__table__.columns: + if data.has_key(c.key): + setattr(a, c.key,data[c.key]) + return a + # @classmethod # def sections(self): # sects=db_session.query(Article.section).distinct().all() @@ -124,6 +131,29 @@ class Article(Base): #json.JSONEncoder.default = lambda self,obj: ((ArticleSchema().dump(obj)[0]) if isinstance(obj, Article) else None) +class FullArticleSchema(Schema): + id=fields.Integer() + parent_id=fields.Integer(allow_none=True) + url =fields.String() + is_primary=fields.Boolean(allow_none=True) + fingerprint=fields.String() + hash=fields.String(allow_none=True) + last_fetched=fields.DateTime(allow_none=True) + first_fetched=fields.DateTime(allow_none=True) + published_date=fields.DateTime() + date=fields.DateTime(allow_none=True) + text=fields.String() + title=fields.String() + author=fields.String(allow_none=True) + section_id=fields.Integer() + sourcetype =fields.String() + image =fields.String(allow_none=True) + @post_load + def make_article(self, data): + return Article.deserialize(data) + + + class ArticleSchema(Schema): id=fields.Integer() text=fields.String() diff --git a/compiler/views.py b/compiler/views.py index b9770df..ad3eaac 100644 --- a/compiler/views.py +++ b/compiler/views.py @@ -35,19 +35,19 @@ def doc(): # -@compiler_pages.route("/initdb") -@compiler_pages.route("/initdb.json") -def initdb_json(): - init_db() # initialisiere Datenbank - status="Datenbank Neu initialisiert" - return jsonify(status=status) +#@compiler_pages.route("/initdb") +#@compiler_pages.route("/initdb.json") +#def initdb_json(): +# init_db() # initialisiere Datenbank +# status="Datenbank Neu initialisiert" +# return jsonify(status=status) -@compiler_pages.route("/initdb2") -@compiler_pages.route("/initdb2.json") -def initdb_json2(): - init_db2() # initialisiere Datenbank - status="Datenbank Neu initialisiert" - return jsonify(status=status) +#@compiler_pages.route("/initdb2") +#@compiler_pages.route("/initdb2.json") +#def initdb_json2(): +# init_db2() # initialisiere Datenbank +# status="Datenbank Neu initialisiert" +# return jsonify(status=status) @compiler_pages.route("/start") @compiler_pages.route("/start.json") @@ -118,7 +118,6 @@ def urls_que_lst(): @compiler_pages.route("/urls//test") @compiler_pages.route("/urls//test.json") def urls_test_json(id): - # Lade Alle Urls return jsonify(result=urls_test(id)) diff --git a/database.py b/database.py index bae434d..c9b0ecc 100644 --- a/database.py +++ b/database.py @@ -11,11 +11,15 @@ if cfg.get("db_path")==None or cfg.get("db_path").strip()=="": else: db_path=cfg.get("db_path") -db_mainfile=cfg.get("db_mainfile") +db_main_type = cfg.get("db_main_type") +if db_main_type == None or db_main_type.strip()=="": + db_main_type="sqlite" + +db_mainfile=cfg.get("db_main_file") if db_mainfile == None or db_mainfile.strip()=="": db_mainfile="../srctest.db" -db_urlfile=cfg.get("db_mainfile") +db_urlfile=cfg.get("db_url_file") if db_urlfile == None or db_urlfile.strip()=="": db_urlfile="../srctest_cu.db" @@ -26,6 +30,7 @@ db_session = scoped_session(sessionmaker(autocommit=False, autoflush=False, bind=engine)) + engine2 = create_engine('sqlite:///'+ path.join(db_path,db_urlfile), convert_unicode=True) db_session2 = scoped_session(sessionmaker(autocommit=False, diff --git a/dump_articles.py b/dump_articles.py new file mode 100644 index 0000000..d3bfab1 --- /dev/null +++ b/dump_articles.py @@ -0,0 +1,23 @@ +import sys +import json +from src.articles.model import Article, FullArticleSchema +from src.sections.model import Section,FullSectionSchema +if len(sys.argv) <= 1: + raise Error("Kein Zieldateiname angegeben") + + +def dump_article(a): + return FullArticleSchema().dump(a) + +def dump_section(s): + return FullSectionSchema().dump(s) + + + +file = open(sys.argv[1], "w+") +data={} +data["articles"] = map(dump_article,Article.query.all()) +data["sections"] = map(dump_section,Section.query.all()) +json.dump (data, file) + +file.close() diff --git a/load_articles.py b/load_articles.py new file mode 100644 index 0000000..9b9977c --- /dev/null +++ b/load_articles.py @@ -0,0 +1,57 @@ +import sys +import json +from src.articles.model import Article, FullArticleSchema +from src.database import db_session +from sqlalchemy.exc import IntegrityError + +if len(sys.argv) <= 1: + raise Error("Kein Zieldateiname angegeben") + + +def load_article(a): + return FullArticleSchema().load(a[0]).data + +def load_section(s): + return FullSectionSchema().load(s[0]).data + + + +file = open(sys.argv[1], "r") + +data=json.load(file) +articles=None +sections=None +organizations=None + +if isinstace(data,dict): + if data.has_key("articles"): + articles=data["articles"] + if data.has_key("sections"): + sections=data["sections"] + +else: + articles=data + + +articles= map (load_article, articles) +sections=map(load_section,sections) + +for a in articles: + try: + db_session.add(a) + db_session.commit() + except IntegrityError: + db_session.rollback() + finally: + db_session.rollback() + +for s in sections: + try: + db_session.add(s) + db_session.commit() + except IntegrityError: + db_session.rollback() + finally: + db_session.rollback() + +file.close() diff --git a/sections/model.py b/sections/model.py index 1f977d5..1f3b7a3 100644 --- a/sections/model.py +++ b/sections/model.py @@ -3,7 +3,7 @@ from sqlalchemy.orm import relationship from datetime import datetime from src.database import Base,db_session -from marshmallow import Schema, fields +from marshmallow import Schema, fields, post_load import json import flask @@ -31,7 +31,14 @@ class Section(Base): if t == None or t.strip()=="": t=self.foreign_name return t - + @classmethod + def deserialize(cls,data): + a=Section() + for c in Section.__table__.columns: + if data.has_key(c.key): + setattr(a, c.key,data[c.key]) + return a + @classmethod def find_or_create(cls, fname): s=Section.query.filter(Section.foreign_name==fname).first() @@ -44,6 +51,20 @@ class Section(Base): db_session.commit() return s +class FullSectionSchema(Schema): + id=fields.Integer() + url =fields.String() + crawlurl =fields.Integer() + #published_date=fields.DateTime() + #date=fields.DateTime(allow_none=True) + name=fields.String() + foreign_name=fields.String() + group=fields.String() + @post_load + def make_section(self, data): + return Article.deserialize(data) + + class ArticleCompSchema(Schema): id=fields.Integer()