Files
fachschaften/load_urls.py
2017-02-17 10:02:20 +01:00

45 lines
1.1 KiB
Python

from src.compiler.models import CrawlCache, CrawlCacheSchema
from src.compiler.models import CrawlUrl, CrawlUrlSchema
import sys
import json
from src.database import db_session2
from sqlalchemy.exc import IntegrityError
if len(sys.argv) <= 1:
raise Error("Kein Zieldateiname angegeben")
def insert_array(array, cls, session):
for s in array:
if not isinstance(s,cls):
print type(s)
else:
try:
session.add(s)
session.commit()
except IntegrityError:
session.rollback()
def load_crawlurl(a):
print a
return CrawlUrl.deserialize(a[0])
def load_crawlcache(a):
return CrawlCache.deserialize(a[0])
file = open(sys.argv[1], "r")
data=json.load(file)
file.close()
if data.has_key("crawlurls"):
crawlurls=data["crawlurls"]
crawlurls = map (load_crawlurl, crawlurls)
insert_array(crawlurls, CrawlUrl, db_session2)
if data.has_key("crawlcache"):
crawlcache=data["crawlcache"]
crawlcache = map (load_crawlcache, crawlcache)
insert_array(crawlcache, CrawlCache, db_session2)