This commit is contained in:
uwsgi
2017-02-17 10:09:39 +01:00
3 changed files with 40 additions and 22 deletions

View File

@@ -40,6 +40,7 @@ def cleanup_cache():
ten_weeks_ago = current_time - timedelta(days=cfg.cache_days*2)
CrawlCache.query.filter(CrawlCache.fetched<ten_weeks_ago).delete()
def get_cached_page(furl):
current_time = datetime.utcnow()
ten_weeks_ago = current_time - timedelta(days=cfg.cache_days)
@@ -51,7 +52,8 @@ def get_cached_page(furl):
return cc
def fetch_page(furl):
def fetch_page(furl,p={}):
u=urlparse.urlparse(furl)
current_time = datetime.utcnow()
cu=CrawlUrl.query.filter(CrawlUrl.url==furl).first()
@@ -62,8 +64,13 @@ def fetch_page(furl):
clogger.debug("fetching url: "+ str(furl))
if u[0]=='fb':
fb_time_since = str(int((current_time - timedelta(days=10)-datetime(1970,1,1)).total_seconds()))
furl=u[1]+u[2]+"?since="+fb_time_since+"&fields=story,created_time,id,message,attachments"
if p.has_key("nofollow") and p["nofollow"]==False:
furl=u[1]+u[2]+"?fields=story,created_time,id,message,attachments"
else:
furl=u[1]+u[2]+"?since="+fb_time_since+"&fields=story,created_time,id,message,attachments"
cc=get_cached_page(furl) #CrawlCache.query.filter(CrawlCache.url==furl).filter(CrawlCache.fetched>ten_weeks_ago).first()
# cc=CrawlCache.query.filter(CrawlCache.url==furl).filter(CrawlCache.fetched>ten_weeks_ago).first()
if cc is None:
tx = json.dumps(graph.get_object(id=furl))
else: