Files
fachschaften/compiler/models.py
Andreas Stephanides 8955bf17f5 init commit
2017-01-14 12:23:04 +01:00

76 lines
2.0 KiB
Python

from sqlalchemy import Column, Integer, String, Boolean, DateTime, Text
from datetime import datetime
from src.database import Base2
from src.database import db_session2
from mqueues import put_fetch_queue
from marshmallow import Schema,fields,ValidationError
import json
import flask
def add_url(tpe, url):
cu=CrawlUrl.find_or_create(tpe,url)
db_session2.add(cu)
db_session2.commit()
cu.schedule()
class CrawlUrlSchema(Schema):
id=fields.Integer()
tpe=fields.String()
url=fields.String()
last_fetched=fields.DateTime()
fetched = fields.DateTime()
class CrawlUrl(Base2):
__tablename__='crawlurls'
id = Column(Integer, primary_key=True)
tpe=Column(String(250))
url = Column(String(250))
last_fetched = Column(DateTime)
def fetched(self):
CrawlCache.query.find(CrawlCache.url==self.url).first()
@classmethod
def find_or_create(self, tpe, url):
aa = CrawlUrl.query.filter(CrawlUrl.url==url).filter(CrawlUrl.tpe==tpe).first()
if aa is None:
aa=CrawlUrl(tpe,url)
return aa
def schedule(self):
put_fetch_queue((0, self.tpe, self.url))
def __init__(self, tpe, url):
self.url=url
self.tpe=tpe
def __json__(self):
return CrawlUrlSchema().dump(self)[0]
class CrawlCacheSchema(Schema):
id=fields.Integer()
raw=fields.String()
url=fields.String()
fetched=fields.DateTime()
class CrawlCache(Base2):
__tablename__='crawlcache'
id = Column(Integer, primary_key=True)
url=Column(String(250))
fetched=Column(DateTime)
raw=Column(Text)
def __init__(self, url,rw):
self.url=url
self.raw=rw
self.fetched=datetime.utcnow()
def __json__(self):
return CrawlCacheSchema().dump(self)
@classmethod
def store(cls, url, rw):
cc=CrawlCache(url,rw)
db_session2.add(cc)
db_session2.commit()
#flask.json.JSONEncoder.default = lambda self,obj: ((obj.__json__()) if isinstance(obj, CrawlUrl) else None)