fsch
This commit is contained in:
@@ -34,24 +34,30 @@ def process_article(art):
|
||||
return aa
|
||||
|
||||
# process a single found url
|
||||
def process_url(url,tpe, parent_url):
|
||||
def process_url(url,tpe, parent_url,params={}):
|
||||
#clogger.debug("process URL of type "+ tpe + ": " + url)
|
||||
if parent_url is not None:
|
||||
url=fix_link(parent_url, url)
|
||||
put_fetch_queue((0,tpe,url,{"nofollow":False}))
|
||||
params.update({"nofollow":False})
|
||||
put_fetch_queue((0,tpe,url,params))
|
||||
|
||||
|
||||
# process a url list
|
||||
def process_urllist(urllist, tpe, parent_url):
|
||||
def process_urllist(urllist, tpe, parent_url,params={}):
|
||||
for u in urllist:
|
||||
process_url(u,tpe, parent_url)
|
||||
|
||||
if isinstance(u, basestring):
|
||||
process_url(u,tpe, parent_url,params)
|
||||
elif isinstance(u,tuple):
|
||||
params.update({"parent_item": u[1]})
|
||||
process_url(u[0], tpe ,parent_url,params)
|
||||
else:
|
||||
clogger.error("url has wrong type: "+ type(u))
|
||||
|
||||
def do_process(tpe, cont,params={}):
|
||||
urllist=[]
|
||||
# clogger.debug("process :" + str(cont))
|
||||
if "article_links" in cont:
|
||||
process_urllist(cont["article_links"], article_types[tpe], cont["url"])
|
||||
process_urllist(cont["article_links"], article_types[tpe], cont["url"], params)
|
||||
if "index_links" in cont:
|
||||
process_urllist(cont["index_links"], tpe , cont["url"])
|
||||
|
||||
|
||||
Reference in New Issue
Block a user