loaddump_Articles

This commit is contained in:
Andreas Stephanides
2017-02-08 07:14:36 +01:00
parent e1c86cdab1
commit 0c1b586962
6 changed files with 154 additions and 19 deletions

View File

@@ -4,7 +4,7 @@ from sqlalchemy.orm import relationship
from datetime import datetime from datetime import datetime
from src.database import Base from src.database import Base
from src.database import db_session from src.database import db_session
from marshmallow import Schema, fields from marshmallow import Schema, fields, post_load
from src.sections.model import Section from src.sections.model import Section
#import json #import json
@@ -72,7 +72,14 @@ class Article(Base):
def dict(self): def dict(self):
return {"id": str(int(self.id)), "title": self.title, "text": self.text, "author": self.author, "section":self.section, "sourcetype": self.sourcetype, "last_fetched": self.last_fetched, "first_fetched": self.first_fetched, "published_date": self.published_date, "date": self.date,"image": self.image, "url": self.url} return {"id": str(int(self.id)), "title": self.title, "text": self.text, "author": self.author, "section":self.section, "sourcetype": self.sourcetype, "last_fetched": self.last_fetched, "first_fetched": self.first_fetched, "published_date": self.published_date, "date": self.date,"image": self.image, "url": self.url}
@classmethod
def deserialize(cls,data):
a=Article()
for c in Article.__table__.columns:
if data.has_key(c.key):
setattr(a, c.key,data[c.key])
return a
# @classmethod # @classmethod
# def sections(self): # def sections(self):
# sects=db_session.query(Article.section).distinct().all() # sects=db_session.query(Article.section).distinct().all()
@@ -124,6 +131,29 @@ class Article(Base):
#json.JSONEncoder.default = lambda self,obj: ((ArticleSchema().dump(obj)[0]) if isinstance(obj, Article) else None) #json.JSONEncoder.default = lambda self,obj: ((ArticleSchema().dump(obj)[0]) if isinstance(obj, Article) else None)
class FullArticleSchema(Schema):
id=fields.Integer()
parent_id=fields.Integer(allow_none=True)
url =fields.String()
is_primary=fields.Boolean(allow_none=True)
fingerprint=fields.String()
hash=fields.String(allow_none=True)
last_fetched=fields.DateTime(allow_none=True)
first_fetched=fields.DateTime(allow_none=True)
published_date=fields.DateTime()
date=fields.DateTime(allow_none=True)
text=fields.String()
title=fields.String()
author=fields.String(allow_none=True)
section_id=fields.Integer()
sourcetype =fields.String()
image =fields.String(allow_none=True)
@post_load
def make_article(self, data):
return Article.deserialize(data)
class ArticleSchema(Schema): class ArticleSchema(Schema):
id=fields.Integer() id=fields.Integer()
text=fields.String() text=fields.String()

View File

@@ -35,19 +35,19 @@ def doc():
# #
@compiler_pages.route("/initdb") #@compiler_pages.route("/initdb")
@compiler_pages.route("/initdb.json") #@compiler_pages.route("/initdb.json")
def initdb_json(): #def initdb_json():
init_db() # initialisiere Datenbank # init_db() # initialisiere Datenbank
status="Datenbank Neu initialisiert" # status="Datenbank Neu initialisiert"
return jsonify(status=status) # return jsonify(status=status)
@compiler_pages.route("/initdb2") #@compiler_pages.route("/initdb2")
@compiler_pages.route("/initdb2.json") #@compiler_pages.route("/initdb2.json")
def initdb_json2(): #def initdb_json2():
init_db2() # initialisiere Datenbank # init_db2() # initialisiere Datenbank
status="Datenbank Neu initialisiert" # status="Datenbank Neu initialisiert"
return jsonify(status=status) # return jsonify(status=status)
@compiler_pages.route("/start") @compiler_pages.route("/start")
@compiler_pages.route("/start.json") @compiler_pages.route("/start.json")
@@ -118,7 +118,6 @@ def urls_que_lst():
@compiler_pages.route("/urls/<int:id>/test") @compiler_pages.route("/urls/<int:id>/test")
@compiler_pages.route("/urls/<int:id>/test.json") @compiler_pages.route("/urls/<int:id>/test.json")
def urls_test_json(id): def urls_test_json(id):
# Lade Alle Urls
return jsonify(result=urls_test(id)) return jsonify(result=urls_test(id))

View File

@@ -11,11 +11,15 @@ if cfg.get("db_path")==None or cfg.get("db_path").strip()=="":
else: else:
db_path=cfg.get("db_path") db_path=cfg.get("db_path")
db_mainfile=cfg.get("db_mainfile") db_main_type = cfg.get("db_main_type")
if db_main_type == None or db_main_type.strip()=="":
db_main_type="sqlite"
db_mainfile=cfg.get("db_main_file")
if db_mainfile == None or db_mainfile.strip()=="": if db_mainfile == None or db_mainfile.strip()=="":
db_mainfile="../srctest.db" db_mainfile="../srctest.db"
db_urlfile=cfg.get("db_mainfile") db_urlfile=cfg.get("db_url_file")
if db_urlfile == None or db_urlfile.strip()=="": if db_urlfile == None or db_urlfile.strip()=="":
db_urlfile="../srctest_cu.db" db_urlfile="../srctest_cu.db"
@@ -26,6 +30,7 @@ db_session = scoped_session(sessionmaker(autocommit=False,
autoflush=False, autoflush=False,
bind=engine)) bind=engine))
engine2 = create_engine('sqlite:///'+ path.join(db_path,db_urlfile), convert_unicode=True) engine2 = create_engine('sqlite:///'+ path.join(db_path,db_urlfile), convert_unicode=True)
db_session2 = scoped_session(sessionmaker(autocommit=False, db_session2 = scoped_session(sessionmaker(autocommit=False,

23
dump_articles.py Normal file
View File

@@ -0,0 +1,23 @@
import sys
import json
from src.articles.model import Article, FullArticleSchema
from src.sections.model import Section,FullSectionSchema
if len(sys.argv) <= 1:
raise Error("Kein Zieldateiname angegeben")
def dump_article(a):
return FullArticleSchema().dump(a)
def dump_section(s):
return FullSectionSchema().dump(s)
file = open(sys.argv[1], "w+")
data={}
data["articles"] = map(dump_article,Article.query.all())
data["sections"] = map(dump_section,Section.query.all())
json.dump (data, file)
file.close()

57
load_articles.py Normal file
View File

@@ -0,0 +1,57 @@
import sys
import json
from src.articles.model import Article, FullArticleSchema
from src.database import db_session
from sqlalchemy.exc import IntegrityError
if len(sys.argv) <= 1:
raise Error("Kein Zieldateiname angegeben")
def load_article(a):
return FullArticleSchema().load(a[0]).data
def load_section(s):
return FullSectionSchema().load(s[0]).data
file = open(sys.argv[1], "r")
data=json.load(file)
articles=None
sections=None
organizations=None
if isinstace(data,dict):
if data.has_key("articles"):
articles=data["articles"]
if data.has_key("sections"):
sections=data["sections"]
else:
articles=data
articles= map (load_article, articles)
sections=map(load_section,sections)
for a in articles:
try:
db_session.add(a)
db_session.commit()
except IntegrityError:
db_session.rollback()
finally:
db_session.rollback()
for s in sections:
try:
db_session.add(s)
db_session.commit()
except IntegrityError:
db_session.rollback()
finally:
db_session.rollback()
file.close()

View File

@@ -3,7 +3,7 @@ from sqlalchemy.orm import relationship
from datetime import datetime from datetime import datetime
from src.database import Base,db_session from src.database import Base,db_session
from marshmallow import Schema, fields from marshmallow import Schema, fields, post_load
import json import json
import flask import flask
@@ -31,7 +31,14 @@ class Section(Base):
if t == None or t.strip()=="": if t == None or t.strip()=="":
t=self.foreign_name t=self.foreign_name
return t return t
@classmethod
def deserialize(cls,data):
a=Section()
for c in Section.__table__.columns:
if data.has_key(c.key):
setattr(a, c.key,data[c.key])
return a
@classmethod @classmethod
def find_or_create(cls, fname): def find_or_create(cls, fname):
s=Section.query.filter(Section.foreign_name==fname).first() s=Section.query.filter(Section.foreign_name==fname).first()
@@ -44,6 +51,20 @@ class Section(Base):
db_session.commit() db_session.commit()
return s return s
class FullSectionSchema(Schema):
id=fields.Integer()
url =fields.String()
crawlurl =fields.Integer()
#published_date=fields.DateTime()
#date=fields.DateTime(allow_none=True)
name=fields.String()
foreign_name=fields.String()
group=fields.String()
@post_load
def make_section(self, data):
return Article.deserialize(data)
class ArticleCompSchema(Schema): class ArticleCompSchema(Schema):
id=fields.Integer() id=fields.Integer()