diff --git a/compiler/mprocess.py b/compiler/mprocess.py index fb87028..faf45a9 100644 --- a/compiler/mprocess.py +++ b/compiler/mprocess.py @@ -38,7 +38,7 @@ def process_url(url,tpe, parent_url): #clogger.debug("process URL of type "+ tpe + ": " + url) if parent_url is not None: url=fix_link(parent_url, url) - put_fetch_queue((0,tpe,url)) + put_fetch_queue((0,tpe,url,{"nofollow":False})) # process a url list diff --git a/compiler/mworker.py b/compiler/mworker.py index f0d593e..2ff6626 100644 --- a/compiler/mworker.py +++ b/compiler/mworker.py @@ -30,35 +30,39 @@ def work_compile(): def queue_url(tpe, url,params={"nofollow": False}): - fetch_queue.put((0,tpe,url,params)) + fetch_queue.put((False,tpe,url,params)) #param nofollow = True : Don't follow pagination recursivly to only fetch an update def queue_url_upd(tpe, url,params={"nofollow": True}): - fetch_queue.put((0,tpe,url,params)) + fetch_queue.put((True,tpe,url,params)) # fetch a page from the url list def run_fetch(): - tc, tpe, url,params = fetch_queue.get() + try: + tc, tpe, url, p= fetch_queue.get() + except ValueError: + tc, tpe, url= fetch_queue.get() + if tpe is not "dummyarticle" and tpe is not "dummyindex": rw=fetch_page(url) else: rw="
dummytext
" - compile_queue.put((0, tpe, {"url": url, "sourcetype": tpe, "raw": rw},params)) + compile_queue.put((0, tpe, {"url": url, "sourcetype": tpe, "raw": rw},p)) return rw # fetch_queue.task_done() #comile something from the compile list def run_compile(): - tc,tpe,h,params = compile_queue.get() + tc,tpe,h, p = compile_queue.get() h=do_compile(tpe,h) - process_queue.put((0,tpe, h,params)) + process_queue.put((0,tpe, h,p)) return h # compile_queue.task_done() def run_process(): - tc,tpe,h,params = process_queue.get() - do_process(tpe, h,params) + tc,tpe,h,p = process_queue.get() + do_process(tpe, h,p) return h # process_queue.task_done()