This commit is contained in:
Andreas Stephanides
2017-02-08 07:13:53 +01:00
parent 589807f5e2
commit 127bc9c557
6 changed files with 84 additions and 25 deletions

View File

@@ -34,24 +34,30 @@ def process_article(art):
return aa
# process a single found url
def process_url(url,tpe, parent_url):
def process_url(url,tpe, parent_url,params={}):
#clogger.debug("process URL of type "+ tpe + ": " + url)
if parent_url is not None:
url=fix_link(parent_url, url)
put_fetch_queue((0,tpe,url,{"nofollow":False}))
params.update({"nofollow":False})
put_fetch_queue((0,tpe,url,params))
# process a url list
def process_urllist(urllist, tpe, parent_url):
def process_urllist(urllist, tpe, parent_url,params={}):
for u in urllist:
process_url(u,tpe, parent_url)
if isinstance(u, basestring):
process_url(u,tpe, parent_url,params)
elif isinstance(u,tuple):
params.update({"parent_item": u[1]})
process_url(u[0], tpe ,parent_url,params)
else:
clogger.error("url has wrong type: "+ type(u))
def do_process(tpe, cont,params={}):
urllist=[]
# clogger.debug("process :" + str(cont))
if "article_links" in cont:
process_urllist(cont["article_links"], article_types[tpe], cont["url"])
process_urllist(cont["article_links"], article_types[tpe], cont["url"], params)
if "index_links" in cont:
process_urllist(cont["index_links"], tpe , cont["url"])