init commit
This commit is contained in:
75
compiler/models.py
Normal file
75
compiler/models.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from sqlalchemy import Column, Integer, String, Boolean, DateTime, Text
|
||||
from datetime import datetime
|
||||
from src.database import Base2
|
||||
from src.database import db_session2
|
||||
from mqueues import put_fetch_queue
|
||||
from marshmallow import Schema,fields,ValidationError
|
||||
import json
|
||||
import flask
|
||||
|
||||
def add_url(tpe, url):
|
||||
cu=CrawlUrl.find_or_create(tpe,url)
|
||||
db_session2.add(cu)
|
||||
db_session2.commit()
|
||||
cu.schedule()
|
||||
|
||||
|
||||
class CrawlUrlSchema(Schema):
|
||||
id=fields.Integer()
|
||||
tpe=fields.String()
|
||||
url=fields.String()
|
||||
last_fetched=fields.DateTime()
|
||||
fetched = fields.DateTime()
|
||||
|
||||
class CrawlUrl(Base2):
|
||||
__tablename__='crawlurls'
|
||||
id = Column(Integer, primary_key=True)
|
||||
tpe=Column(String(250))
|
||||
url = Column(String(250))
|
||||
last_fetched = Column(DateTime)
|
||||
def fetched(self):
|
||||
CrawlCache.query.find(CrawlCache.url==self.url).first()
|
||||
@classmethod
|
||||
def find_or_create(self, tpe, url):
|
||||
aa = CrawlUrl.query.filter(CrawlUrl.url==url).filter(CrawlUrl.tpe==tpe).first()
|
||||
if aa is None:
|
||||
aa=CrawlUrl(tpe,url)
|
||||
return aa
|
||||
def schedule(self):
|
||||
put_fetch_queue((0, self.tpe, self.url))
|
||||
def __init__(self, tpe, url):
|
||||
self.url=url
|
||||
self.tpe=tpe
|
||||
def __json__(self):
|
||||
return CrawlUrlSchema().dump(self)[0]
|
||||
|
||||
class CrawlCacheSchema(Schema):
|
||||
id=fields.Integer()
|
||||
raw=fields.String()
|
||||
url=fields.String()
|
||||
fetched=fields.DateTime()
|
||||
|
||||
class CrawlCache(Base2):
|
||||
__tablename__='crawlcache'
|
||||
id = Column(Integer, primary_key=True)
|
||||
url=Column(String(250))
|
||||
fetched=Column(DateTime)
|
||||
raw=Column(Text)
|
||||
|
||||
def __init__(self, url,rw):
|
||||
self.url=url
|
||||
self.raw=rw
|
||||
self.fetched=datetime.utcnow()
|
||||
def __json__(self):
|
||||
return CrawlCacheSchema().dump(self)
|
||||
|
||||
@classmethod
|
||||
def store(cls, url, rw):
|
||||
cc=CrawlCache(url,rw)
|
||||
db_session2.add(cc)
|
||||
db_session2.commit()
|
||||
|
||||
|
||||
|
||||
|
||||
#flask.json.JSONEncoder.default = lambda self,obj: ((obj.__json__()) if isinstance(obj, CrawlUrl) else None)
|
||||
Reference in New Issue
Block a user