merge
This commit is contained in:
@@ -8,6 +8,8 @@ from itertools import repeat
|
||||
from models import CrawlUrl
|
||||
from src import clogger
|
||||
from src.database import db_session2,db_session
|
||||
from Queue import Empty
|
||||
|
||||
def start_workers(f,c,p):
|
||||
for _ in range(f):
|
||||
clogger.debug("spawn fetchworker")
|
||||
@@ -18,17 +20,26 @@ def start_workers(f,c,p):
|
||||
spawn(work_process)
|
||||
|
||||
def work_fetch():
|
||||
while True:
|
||||
run_fetch()
|
||||
|
||||
try:
|
||||
while True:
|
||||
run_fetch()
|
||||
except Empty:
|
||||
clogger.info("Fetch - Worker died")
|
||||
|
||||
def work_process():
|
||||
while True:
|
||||
run_process()
|
||||
# db_session.close()
|
||||
try:
|
||||
while True:
|
||||
run_process()
|
||||
except Empty:
|
||||
clogger.info("Process - Worker died")
|
||||
|
||||
|
||||
def work_compile():
|
||||
while True:
|
||||
run_compile()
|
||||
try:
|
||||
while True:
|
||||
run_compile()
|
||||
except Empty:
|
||||
clogger.info("Compile - Worker died")
|
||||
|
||||
|
||||
def queue_url(tpe, url,params={"nofollow": False}):
|
||||
@@ -38,16 +49,15 @@ def queue_url(tpe, url,params={"nofollow": False}):
|
||||
def queue_url_upd(tpe, url,params={"nofollow": True}):
|
||||
fetch_queue.put((True,tpe,url,params))
|
||||
|
||||
|
||||
# fetch a page from the url list
|
||||
def run_fetch():
|
||||
try:
|
||||
tc, tpe, url, p= fetch_queue.get()
|
||||
tc, tpe, url, p= fetch_queue.get(True, 100)
|
||||
except ValueError:
|
||||
tc, tpe, url= fetch_queue.get()
|
||||
clogger.debug("fetched : "+url)
|
||||
tc, tpe, url= fetch_queue.get(True, 100)
|
||||
clogger.info("Fechted url:"+url)
|
||||
if tpe is not "dummyarticle" and tpe is not "dummyindex":
|
||||
rw=fetch_page(url)
|
||||
rw = fetch_page(url, p)
|
||||
else:
|
||||
rw="<p> dummytext</p>"
|
||||
compile_queue.put((0, tpe, {"url": url, "sourcetype": tpe, "raw": rw},p))
|
||||
@@ -56,7 +66,7 @@ def run_fetch():
|
||||
|
||||
#comile something from the compile list
|
||||
def run_compile():
|
||||
tc,tpe,h, p = compile_queue.get()
|
||||
tc,tpe,h, p = compile_queue.get(True, 100)
|
||||
if p.has_key('parent_item'):
|
||||
h["parent_item"]=p["parent_item"]
|
||||
h=do_compile(tpe,h,p)
|
||||
@@ -65,7 +75,7 @@ def run_compile():
|
||||
# compile_queue.task_done()
|
||||
|
||||
def run_process():
|
||||
tc,tpe,h,p = process_queue.get()
|
||||
tc,tpe,h,p = process_queue.get(True, 100)
|
||||
do_process(tpe, h,p)
|
||||
return h
|
||||
# process_queue.task_done()
|
||||
|
||||
Reference in New Issue
Block a user