diff --git a/_parent_files/dump_articles.py b/_parent_files/dump_articles.py new file mode 100644 index 0000000..4a779b1 --- /dev/null +++ b/_parent_files/dump_articles.py @@ -0,0 +1,29 @@ +import sys +import json +from src.articles.model import Article, FullArticleSchema +from src.sections.model import Section,FullSectionSchema +from src.organizations.model import Organization, FullOrganizationSchema +if len(sys.argv) <= 1: + raise Error("Kein Zieldateiname angegeben") + + +def dump_article(a): + return FullArticleSchema().dump(a) + +def dump_section(s): + return FullSectionSchema().dump(s) + + +def dump_organization(o): + return FullOrganizationSchema().dump(o) + + +file = open(sys.argv[1], "w+") +data={} +data["articles"] = map(dump_article,Article.query.all()) +data["sections"] = map(dump_section,Section.query.all()) +data["organizations"] = map(dump_organization,Organization.query.all()) + +json.dump (data, file) + +file.close() diff --git a/_parent_files/dump_urls.py b/_parent_files/dump_urls.py new file mode 100644 index 0000000..2dbc3da --- /dev/null +++ b/_parent_files/dump_urls.py @@ -0,0 +1,20 @@ +from src.compiler.models import CrawlCache, CrawlCacheSchema +from src.compiler.models import CrawlUrl, CrawlUrlSchema +import sys +import json + +if len(sys.argv) <= 1: + raise Error("Kein Zieldateiname angegeben") + +def dump_crawlurl(a): + return CrawlUrlSchema().dump(a) + +def dump_crawlcache(a): + return CrawlCacheSchema().dump(a) + +file = open(sys.argv[1], "w+") +data={} +data["crawlurls"] = map(dump_crawlurl,CrawlUrl.query.all()) +#data["crawlcache"] = map(dump_crawlcache,CrawlCache.query.all()) +json.dump (data, file) +file.close() diff --git a/_parent_files/load_articles.py b/_parent_files/load_articles.py new file mode 100644 index 0000000..aac541c --- /dev/null +++ b/_parent_files/load_articles.py @@ -0,0 +1,81 @@ +import sys +import json +from src.articles.model import Article, FullArticleSchema +from src.sections.model import Section, FullSectionSchema +from src.organizations.model import Organization, FullOrganizationSchema +from src.database import db_session +from sqlalchemy.exc import IntegrityError + +if len(sys.argv) <= 1: + raise Error("Kein Zieldateiname angegeben") + + +def load_article(a): + return Article.deserialize(a[0]) + +def load_section(s): + return Section.deserialize(s[0]) + +def load_organization(s): + return Organization.deserialize(s[0]) + + +file = open(sys.argv[1], "r") + +data=json.load(file) +articles=None +sections=None +organizations=None + +if isinstance(data,dict): + if data.has_key("articles"): + articles=data["articles"] + if data.has_key("sections"): + sections=data["sections"] + if data.has_key("organizations"): + organizations=data["organizations"] + +else: + articles=data + + +articles= map (load_article, articles) +if sections is not None: + sections=map(load_section, sections) +if organizations is not None: + organizations=map(load_organization,organizations) + + +if organizations is not None: + for s in organizations: + if not isinstance(s,Organization): + print type(s) + try: + db_session.add(s) + db_session.commit() + except IntegrityError: + db_session.rollback() + + +if sections is not None: + for s in sections: + if not isinstance(s,Section): + print type(s) + try: + db_session.add(s) + db_session.commit() + except IntegrityError: + db_session.rollback() + + +for a in articles: + try: + db_session.add(a) + db_session.commit() + except IntegrityError: + db_session.rollback() + finally: + db_session.rollback() + + +file.close() diff --git a/_parent_files/load_urls.py b/_parent_files/load_urls.py new file mode 100644 index 0000000..1d90dc6 --- /dev/null +++ b/_parent_files/load_urls.py @@ -0,0 +1,44 @@ +from src.compiler.models import CrawlCache, CrawlCacheSchema +from src.compiler.models import CrawlUrl, CrawlUrlSchema +import sys +import json +from src.database import db_session2 +from sqlalchemy.exc import IntegrityError + +if len(sys.argv) <= 1: + raise Error("Kein Zieldateiname angegeben") + + +def insert_array(array, cls, session): + for s in array: + if not isinstance(s,cls): + print type(s) + else: + try: + session.add(s) + session.commit() + except IntegrityError: + session.rollback() + + + +def load_crawlurl(a): + print a + return CrawlUrl.deserialize(a[0]) +def load_crawlcache(a): + return CrawlCache.deserialize(a[0]) + + +file = open(sys.argv[1], "r") +data=json.load(file) +file.close() + +if data.has_key("crawlurls"): + crawlurls=data["crawlurls"] + crawlurls = map (load_crawlurl, crawlurls) + insert_array(crawlurls, CrawlUrl, db_session2) + +if data.has_key("crawlcache"): + crawlcache=data["crawlcache"] + crawlcache = map (load_crawlcache, crawlcache) + insert_array(crawlcache, CrawlCache, db_session2) diff --git a/_parent_files/run b/_parent_files/run new file mode 100755 index 0000000..10247cb --- /dev/null +++ b/_parent_files/run @@ -0,0 +1,3 @@ +#bin/bash +source bin/activate +python run.py \ No newline at end of file diff --git a/_parent_files/run.py b/_parent_files/run.py new file mode 100644 index 0000000..576753e --- /dev/null +++ b/_parent_files/run.py @@ -0,0 +1,6 @@ +from src import app + +import os +#bot.setWebhook("http://localhost:5000/bot") +app.run(debug=True, use_reloader=False) + diff --git a/_parent_files/run_bot.py b/_parent_files/run_bot.py new file mode 100644 index 0000000..1ee2167 --- /dev/null +++ b/_parent_files/run_bot.py @@ -0,0 +1,10 @@ +from src.bot import bot +import gevent +import time + +#bot.message_loop(run_forever="Listening to Telegram") + +bot.message_loop() + +#while 1: +# time.sleep(10)