div. updates
This commit is contained in:
@@ -27,19 +27,23 @@ class CrawlUrl(Base2):
|
||||
tpe=Column(String(250))
|
||||
url = Column(String(250))
|
||||
last_fetched = Column(DateTime)
|
||||
__schema__=CrawlUrlSchema
|
||||
__jsonid__='crawlurl'
|
||||
__whiteattrs__=["id","tpe","url"]
|
||||
__jsonattrs__=None
|
||||
def fetched(self):
|
||||
CrawlCache.query.find(CrawlCache.url==self.url).first()
|
||||
@classmethod
|
||||
def find_or_create(self, tpe, url):
|
||||
aa = CrawlUrl.query.filter(CrawlUrl.url==url).filter(CrawlUrl.tpe==tpe).first()
|
||||
if aa is None:
|
||||
aa=CrawlUrl(tpe,url)
|
||||
aa=CrawlUrl({"tpe":tpe,"url": url})
|
||||
return aa
|
||||
def schedule(self):
|
||||
put_fetch_queue((0, self.tpe, self.url))
|
||||
def __init__(self, tpe, url):
|
||||
self.url=url
|
||||
self.tpe=tpe
|
||||
# def __init__(self, tpe, url):
|
||||
# self.url=url
|
||||
# self.tpe=tpe
|
||||
def __json__(self):
|
||||
return CrawlUrlSchema().dump(self)[0]
|
||||
|
||||
@@ -55,7 +59,10 @@ class CrawlCache(Base2):
|
||||
url=Column(String(250))
|
||||
fetched=Column(DateTime)
|
||||
raw=Column(Text)
|
||||
|
||||
__schema__=CrawlCacheSchema
|
||||
__jsonattrs__=None
|
||||
__jsonid__='crawlcache'
|
||||
__whiteattrs__= []
|
||||
def __init__(self, url,rw):
|
||||
self.url=url
|
||||
self.raw=rw
|
||||
|
||||
Reference in New Issue
Block a user