fachschaften/load_urls.py

from src.compiler.models import CrawlCache, CrawlCacheSchema
from src.compiler.models import CrawlUrl, CrawlUrlSchema
import sys
import json
from src.database import db_session2
from sqlalchemy.exc import IntegrityError

if len(sys.argv) <= 1:
    raise Error("Kein Zieldateiname angegeben")


def insert_array(array, cls, session):
    for s in array:
        if not isinstance(s,cls):
            print type(s)
        else:
            try:
                session.add(s)
                session.commit()
            except IntegrityError:
                session.rollback()


def load_crawlurl(a):
    print a
    return CrawlUrl.deserialize(a[0])
def load_crawlcache(a):
    return CrawlCache.deserialize(a[0])


file = open(sys.argv[1], "r")
data=json.load(file)
file.close()

if data.has_key("crawlurls"):
    crawlurls=data["crawlurls"]
    crawlurls = map (load_crawlurl, crawlurls)
    insert_array(crawlurls, CrawlUrl, db_session2)

if data.has_key("crawlcache"):
    crawlcache=data["crawlcache"]
    crawlcache = map (load_crawlcache, crawlcache)
    insert_array(crawlcache, CrawlCache, db_session2)