loaddump_Articles
This commit is contained in:
@@ -4,7 +4,7 @@ from sqlalchemy.orm import relationship
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from src.database import Base
|
from src.database import Base
|
||||||
from src.database import db_session
|
from src.database import db_session
|
||||||
from marshmallow import Schema, fields
|
from marshmallow import Schema, fields, post_load
|
||||||
from src.sections.model import Section
|
from src.sections.model import Section
|
||||||
|
|
||||||
#import json
|
#import json
|
||||||
@@ -72,7 +72,14 @@ class Article(Base):
|
|||||||
def dict(self):
|
def dict(self):
|
||||||
return {"id": str(int(self.id)), "title": self.title, "text": self.text, "author": self.author, "section":self.section, "sourcetype": self.sourcetype, "last_fetched": self.last_fetched, "first_fetched": self.first_fetched, "published_date": self.published_date, "date": self.date,"image": self.image, "url": self.url}
|
return {"id": str(int(self.id)), "title": self.title, "text": self.text, "author": self.author, "section":self.section, "sourcetype": self.sourcetype, "last_fetched": self.last_fetched, "first_fetched": self.first_fetched, "published_date": self.published_date, "date": self.date,"image": self.image, "url": self.url}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def deserialize(cls,data):
|
||||||
|
a=Article()
|
||||||
|
for c in Article.__table__.columns:
|
||||||
|
if data.has_key(c.key):
|
||||||
|
setattr(a, c.key,data[c.key])
|
||||||
|
return a
|
||||||
|
|
||||||
# @classmethod
|
# @classmethod
|
||||||
# def sections(self):
|
# def sections(self):
|
||||||
# sects=db_session.query(Article.section).distinct().all()
|
# sects=db_session.query(Article.section).distinct().all()
|
||||||
@@ -124,6 +131,29 @@ class Article(Base):
|
|||||||
|
|
||||||
#json.JSONEncoder.default = lambda self,obj: ((ArticleSchema().dump(obj)[0]) if isinstance(obj, Article) else None)
|
#json.JSONEncoder.default = lambda self,obj: ((ArticleSchema().dump(obj)[0]) if isinstance(obj, Article) else None)
|
||||||
|
|
||||||
|
class FullArticleSchema(Schema):
|
||||||
|
id=fields.Integer()
|
||||||
|
parent_id=fields.Integer(allow_none=True)
|
||||||
|
url =fields.String()
|
||||||
|
is_primary=fields.Boolean(allow_none=True)
|
||||||
|
fingerprint=fields.String()
|
||||||
|
hash=fields.String(allow_none=True)
|
||||||
|
last_fetched=fields.DateTime(allow_none=True)
|
||||||
|
first_fetched=fields.DateTime(allow_none=True)
|
||||||
|
published_date=fields.DateTime()
|
||||||
|
date=fields.DateTime(allow_none=True)
|
||||||
|
text=fields.String()
|
||||||
|
title=fields.String()
|
||||||
|
author=fields.String(allow_none=True)
|
||||||
|
section_id=fields.Integer()
|
||||||
|
sourcetype =fields.String()
|
||||||
|
image =fields.String(allow_none=True)
|
||||||
|
@post_load
|
||||||
|
def make_article(self, data):
|
||||||
|
return Article.deserialize(data)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ArticleSchema(Schema):
|
class ArticleSchema(Schema):
|
||||||
id=fields.Integer()
|
id=fields.Integer()
|
||||||
text=fields.String()
|
text=fields.String()
|
||||||
|
|||||||
@@ -35,19 +35,19 @@ def doc():
|
|||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
@compiler_pages.route("/initdb")
|
#@compiler_pages.route("/initdb")
|
||||||
@compiler_pages.route("/initdb.json")
|
#@compiler_pages.route("/initdb.json")
|
||||||
def initdb_json():
|
#def initdb_json():
|
||||||
init_db() # initialisiere Datenbank
|
# init_db() # initialisiere Datenbank
|
||||||
status="Datenbank Neu initialisiert"
|
# status="Datenbank Neu initialisiert"
|
||||||
return jsonify(status=status)
|
# return jsonify(status=status)
|
||||||
|
|
||||||
@compiler_pages.route("/initdb2")
|
#@compiler_pages.route("/initdb2")
|
||||||
@compiler_pages.route("/initdb2.json")
|
#@compiler_pages.route("/initdb2.json")
|
||||||
def initdb_json2():
|
#def initdb_json2():
|
||||||
init_db2() # initialisiere Datenbank
|
# init_db2() # initialisiere Datenbank
|
||||||
status="Datenbank Neu initialisiert"
|
# status="Datenbank Neu initialisiert"
|
||||||
return jsonify(status=status)
|
# return jsonify(status=status)
|
||||||
|
|
||||||
@compiler_pages.route("/start")
|
@compiler_pages.route("/start")
|
||||||
@compiler_pages.route("/start.json")
|
@compiler_pages.route("/start.json")
|
||||||
@@ -118,7 +118,6 @@ def urls_que_lst():
|
|||||||
@compiler_pages.route("/urls/<int:id>/test")
|
@compiler_pages.route("/urls/<int:id>/test")
|
||||||
@compiler_pages.route("/urls/<int:id>/test.json")
|
@compiler_pages.route("/urls/<int:id>/test.json")
|
||||||
def urls_test_json(id):
|
def urls_test_json(id):
|
||||||
# Lade Alle Urls
|
|
||||||
return jsonify(result=urls_test(id))
|
return jsonify(result=urls_test(id))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -11,11 +11,15 @@ if cfg.get("db_path")==None or cfg.get("db_path").strip()=="":
|
|||||||
else:
|
else:
|
||||||
db_path=cfg.get("db_path")
|
db_path=cfg.get("db_path")
|
||||||
|
|
||||||
db_mainfile=cfg.get("db_mainfile")
|
db_main_type = cfg.get("db_main_type")
|
||||||
|
if db_main_type == None or db_main_type.strip()=="":
|
||||||
|
db_main_type="sqlite"
|
||||||
|
|
||||||
|
db_mainfile=cfg.get("db_main_file")
|
||||||
if db_mainfile == None or db_mainfile.strip()=="":
|
if db_mainfile == None or db_mainfile.strip()=="":
|
||||||
db_mainfile="../srctest.db"
|
db_mainfile="../srctest.db"
|
||||||
|
|
||||||
db_urlfile=cfg.get("db_mainfile")
|
db_urlfile=cfg.get("db_url_file")
|
||||||
if db_urlfile == None or db_urlfile.strip()=="":
|
if db_urlfile == None or db_urlfile.strip()=="":
|
||||||
db_urlfile="../srctest_cu.db"
|
db_urlfile="../srctest_cu.db"
|
||||||
|
|
||||||
@@ -26,6 +30,7 @@ db_session = scoped_session(sessionmaker(autocommit=False,
|
|||||||
autoflush=False,
|
autoflush=False,
|
||||||
bind=engine))
|
bind=engine))
|
||||||
|
|
||||||
|
|
||||||
engine2 = create_engine('sqlite:///'+ path.join(db_path,db_urlfile), convert_unicode=True)
|
engine2 = create_engine('sqlite:///'+ path.join(db_path,db_urlfile), convert_unicode=True)
|
||||||
|
|
||||||
db_session2 = scoped_session(sessionmaker(autocommit=False,
|
db_session2 = scoped_session(sessionmaker(autocommit=False,
|
||||||
|
|||||||
23
dump_articles.py
Normal file
23
dump_articles.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
import sys
|
||||||
|
import json
|
||||||
|
from src.articles.model import Article, FullArticleSchema
|
||||||
|
from src.sections.model import Section,FullSectionSchema
|
||||||
|
if len(sys.argv) <= 1:
|
||||||
|
raise Error("Kein Zieldateiname angegeben")
|
||||||
|
|
||||||
|
|
||||||
|
def dump_article(a):
|
||||||
|
return FullArticleSchema().dump(a)
|
||||||
|
|
||||||
|
def dump_section(s):
|
||||||
|
return FullSectionSchema().dump(s)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
file = open(sys.argv[1], "w+")
|
||||||
|
data={}
|
||||||
|
data["articles"] = map(dump_article,Article.query.all())
|
||||||
|
data["sections"] = map(dump_section,Section.query.all())
|
||||||
|
json.dump (data, file)
|
||||||
|
|
||||||
|
file.close()
|
||||||
57
load_articles.py
Normal file
57
load_articles.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
import sys
|
||||||
|
import json
|
||||||
|
from src.articles.model import Article, FullArticleSchema
|
||||||
|
from src.database import db_session
|
||||||
|
from sqlalchemy.exc import IntegrityError
|
||||||
|
|
||||||
|
if len(sys.argv) <= 1:
|
||||||
|
raise Error("Kein Zieldateiname angegeben")
|
||||||
|
|
||||||
|
|
||||||
|
def load_article(a):
|
||||||
|
return FullArticleSchema().load(a[0]).data
|
||||||
|
|
||||||
|
def load_section(s):
|
||||||
|
return FullSectionSchema().load(s[0]).data
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
file = open(sys.argv[1], "r")
|
||||||
|
|
||||||
|
data=json.load(file)
|
||||||
|
articles=None
|
||||||
|
sections=None
|
||||||
|
organizations=None
|
||||||
|
|
||||||
|
if isinstace(data,dict):
|
||||||
|
if data.has_key("articles"):
|
||||||
|
articles=data["articles"]
|
||||||
|
if data.has_key("sections"):
|
||||||
|
sections=data["sections"]
|
||||||
|
|
||||||
|
else:
|
||||||
|
articles=data
|
||||||
|
|
||||||
|
|
||||||
|
articles= map (load_article, articles)
|
||||||
|
sections=map(load_section,sections)
|
||||||
|
|
||||||
|
for a in articles:
|
||||||
|
try:
|
||||||
|
db_session.add(a)
|
||||||
|
db_session.commit()
|
||||||
|
except IntegrityError:
|
||||||
|
db_session.rollback()
|
||||||
|
finally:
|
||||||
|
db_session.rollback()
|
||||||
|
|
||||||
|
for s in sections:
|
||||||
|
try:
|
||||||
|
db_session.add(s)
|
||||||
|
db_session.commit()
|
||||||
|
except IntegrityError:
|
||||||
|
db_session.rollback()
|
||||||
|
finally:
|
||||||
|
db_session.rollback()
|
||||||
|
|
||||||
|
file.close()
|
||||||
@@ -3,7 +3,7 @@ from sqlalchemy.orm import relationship
|
|||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from src.database import Base,db_session
|
from src.database import Base,db_session
|
||||||
from marshmallow import Schema, fields
|
from marshmallow import Schema, fields, post_load
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import flask
|
import flask
|
||||||
@@ -31,7 +31,14 @@ class Section(Base):
|
|||||||
if t == None or t.strip()=="":
|
if t == None or t.strip()=="":
|
||||||
t=self.foreign_name
|
t=self.foreign_name
|
||||||
return t
|
return t
|
||||||
|
@classmethod
|
||||||
|
def deserialize(cls,data):
|
||||||
|
a=Section()
|
||||||
|
for c in Section.__table__.columns:
|
||||||
|
if data.has_key(c.key):
|
||||||
|
setattr(a, c.key,data[c.key])
|
||||||
|
return a
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def find_or_create(cls, fname):
|
def find_or_create(cls, fname):
|
||||||
s=Section.query.filter(Section.foreign_name==fname).first()
|
s=Section.query.filter(Section.foreign_name==fname).first()
|
||||||
@@ -44,6 +51,20 @@ class Section(Base):
|
|||||||
db_session.commit()
|
db_session.commit()
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
class FullSectionSchema(Schema):
|
||||||
|
id=fields.Integer()
|
||||||
|
url =fields.String()
|
||||||
|
crawlurl =fields.Integer()
|
||||||
|
#published_date=fields.DateTime()
|
||||||
|
#date=fields.DateTime(allow_none=True)
|
||||||
|
name=fields.String()
|
||||||
|
foreign_name=fields.String()
|
||||||
|
group=fields.String()
|
||||||
|
@post_load
|
||||||
|
def make_section(self, data):
|
||||||
|
return Article.deserialize(data)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ArticleCompSchema(Schema):
|
class ArticleCompSchema(Schema):
|
||||||
id=fields.Integer()
|
id=fields.Integer()
|
||||||
|
|||||||
Reference in New Issue
Block a user