added parent files
This commit is contained in:
29
_parent_files/dump_articles.py
Normal file
29
_parent_files/dump_articles.py
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
import sys
|
||||||
|
import json
|
||||||
|
from src.articles.model import Article, FullArticleSchema
|
||||||
|
from src.sections.model import Section,FullSectionSchema
|
||||||
|
from src.organizations.model import Organization, FullOrganizationSchema
|
||||||
|
if len(sys.argv) <= 1:
|
||||||
|
raise Error("Kein Zieldateiname angegeben")
|
||||||
|
|
||||||
|
|
||||||
|
def dump_article(a):
|
||||||
|
return FullArticleSchema().dump(a)
|
||||||
|
|
||||||
|
def dump_section(s):
|
||||||
|
return FullSectionSchema().dump(s)
|
||||||
|
|
||||||
|
|
||||||
|
def dump_organization(o):
|
||||||
|
return FullOrganizationSchema().dump(o)
|
||||||
|
|
||||||
|
|
||||||
|
file = open(sys.argv[1], "w+")
|
||||||
|
data={}
|
||||||
|
data["articles"] = map(dump_article,Article.query.all())
|
||||||
|
data["sections"] = map(dump_section,Section.query.all())
|
||||||
|
data["organizations"] = map(dump_organization,Organization.query.all())
|
||||||
|
|
||||||
|
json.dump (data, file)
|
||||||
|
|
||||||
|
file.close()
|
||||||
20
_parent_files/dump_urls.py
Normal file
20
_parent_files/dump_urls.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
from src.compiler.models import CrawlCache, CrawlCacheSchema
|
||||||
|
from src.compiler.models import CrawlUrl, CrawlUrlSchema
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
|
||||||
|
if len(sys.argv) <= 1:
|
||||||
|
raise Error("Kein Zieldateiname angegeben")
|
||||||
|
|
||||||
|
def dump_crawlurl(a):
|
||||||
|
return CrawlUrlSchema().dump(a)
|
||||||
|
|
||||||
|
def dump_crawlcache(a):
|
||||||
|
return CrawlCacheSchema().dump(a)
|
||||||
|
|
||||||
|
file = open(sys.argv[1], "w+")
|
||||||
|
data={}
|
||||||
|
data["crawlurls"] = map(dump_crawlurl,CrawlUrl.query.all())
|
||||||
|
#data["crawlcache"] = map(dump_crawlcache,CrawlCache.query.all())
|
||||||
|
json.dump (data, file)
|
||||||
|
file.close()
|
||||||
81
_parent_files/load_articles.py
Normal file
81
_parent_files/load_articles.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
import sys
|
||||||
|
import json
|
||||||
|
from src.articles.model import Article, FullArticleSchema
|
||||||
|
from src.sections.model import Section, FullSectionSchema
|
||||||
|
from src.organizations.model import Organization, FullOrganizationSchema
|
||||||
|
from src.database import db_session
|
||||||
|
from sqlalchemy.exc import IntegrityError
|
||||||
|
|
||||||
|
if len(sys.argv) <= 1:
|
||||||
|
raise Error("Kein Zieldateiname angegeben")
|
||||||
|
|
||||||
|
|
||||||
|
def load_article(a):
|
||||||
|
return Article.deserialize(a[0])
|
||||||
|
|
||||||
|
def load_section(s):
|
||||||
|
return Section.deserialize(s[0])
|
||||||
|
|
||||||
|
def load_organization(s):
|
||||||
|
return Organization.deserialize(s[0])
|
||||||
|
|
||||||
|
|
||||||
|
file = open(sys.argv[1], "r")
|
||||||
|
|
||||||
|
data=json.load(file)
|
||||||
|
articles=None
|
||||||
|
sections=None
|
||||||
|
organizations=None
|
||||||
|
|
||||||
|
if isinstance(data,dict):
|
||||||
|
if data.has_key("articles"):
|
||||||
|
articles=data["articles"]
|
||||||
|
if data.has_key("sections"):
|
||||||
|
sections=data["sections"]
|
||||||
|
if data.has_key("organizations"):
|
||||||
|
organizations=data["organizations"]
|
||||||
|
|
||||||
|
else:
|
||||||
|
articles=data
|
||||||
|
|
||||||
|
|
||||||
|
articles= map (load_article, articles)
|
||||||
|
if sections is not None:
|
||||||
|
sections=map(load_section, sections)
|
||||||
|
if organizations is not None:
|
||||||
|
organizations=map(load_organization,organizations)
|
||||||
|
|
||||||
|
|
||||||
|
if organizations is not None:
|
||||||
|
for s in organizations:
|
||||||
|
if not isinstance(s,Organization):
|
||||||
|
print type(s)
|
||||||
|
try:
|
||||||
|
db_session.add(s)
|
||||||
|
db_session.commit()
|
||||||
|
except IntegrityError:
|
||||||
|
db_session.rollback()
|
||||||
|
|
||||||
|
|
||||||
|
if sections is not None:
|
||||||
|
for s in sections:
|
||||||
|
if not isinstance(s,Section):
|
||||||
|
print type(s)
|
||||||
|
try:
|
||||||
|
db_session.add(s)
|
||||||
|
db_session.commit()
|
||||||
|
except IntegrityError:
|
||||||
|
db_session.rollback()
|
||||||
|
|
||||||
|
|
||||||
|
for a in articles:
|
||||||
|
try:
|
||||||
|
db_session.add(a)
|
||||||
|
db_session.commit()
|
||||||
|
except IntegrityError:
|
||||||
|
db_session.rollback()
|
||||||
|
finally:
|
||||||
|
db_session.rollback()
|
||||||
|
|
||||||
|
|
||||||
|
file.close()
|
||||||
44
_parent_files/load_urls.py
Normal file
44
_parent_files/load_urls.py
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
from src.compiler.models import CrawlCache, CrawlCacheSchema
|
||||||
|
from src.compiler.models import CrawlUrl, CrawlUrlSchema
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
from src.database import db_session2
|
||||||
|
from sqlalchemy.exc import IntegrityError
|
||||||
|
|
||||||
|
if len(sys.argv) <= 1:
|
||||||
|
raise Error("Kein Zieldateiname angegeben")
|
||||||
|
|
||||||
|
|
||||||
|
def insert_array(array, cls, session):
|
||||||
|
for s in array:
|
||||||
|
if not isinstance(s,cls):
|
||||||
|
print type(s)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
session.add(s)
|
||||||
|
session.commit()
|
||||||
|
except IntegrityError:
|
||||||
|
session.rollback()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def load_crawlurl(a):
|
||||||
|
print a
|
||||||
|
return CrawlUrl.deserialize(a[0])
|
||||||
|
def load_crawlcache(a):
|
||||||
|
return CrawlCache.deserialize(a[0])
|
||||||
|
|
||||||
|
|
||||||
|
file = open(sys.argv[1], "r")
|
||||||
|
data=json.load(file)
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
if data.has_key("crawlurls"):
|
||||||
|
crawlurls=data["crawlurls"]
|
||||||
|
crawlurls = map (load_crawlurl, crawlurls)
|
||||||
|
insert_array(crawlurls, CrawlUrl, db_session2)
|
||||||
|
|
||||||
|
if data.has_key("crawlcache"):
|
||||||
|
crawlcache=data["crawlcache"]
|
||||||
|
crawlcache = map (load_crawlcache, crawlcache)
|
||||||
|
insert_array(crawlcache, CrawlCache, db_session2)
|
||||||
3
_parent_files/run
Executable file
3
_parent_files/run
Executable file
@@ -0,0 +1,3 @@
|
|||||||
|
#bin/bash
|
||||||
|
source bin/activate
|
||||||
|
python run.py
|
||||||
6
_parent_files/run.py
Normal file
6
_parent_files/run.py
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
from src import app
|
||||||
|
|
||||||
|
import os
|
||||||
|
#bot.setWebhook("http://localhost:5000/bot")
|
||||||
|
app.run(debug=True, use_reloader=False)
|
||||||
|
|
||||||
10
_parent_files/run_bot.py
Normal file
10
_parent_files/run_bot.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
from src.bot import bot
|
||||||
|
import gevent
|
||||||
|
import time
|
||||||
|
|
||||||
|
#bot.message_loop(run_forever="Listening to Telegram")
|
||||||
|
|
||||||
|
bot.message_loop()
|
||||||
|
|
||||||
|
#while 1:
|
||||||
|
# time.sleep(10)
|
||||||
Reference in New Issue
Block a user