This commit is contained in:
uwsgi
2017-02-17 10:09:39 +01:00
3 changed files with 40 additions and 22 deletions

View File

@@ -8,6 +8,8 @@ from itertools import repeat
from models import CrawlUrl
from src import clogger
from src.database import db_session2,db_session
from Queue import Empty
def start_workers(f,c,p):
for _ in range(f):
clogger.debug("spawn fetchworker")
@@ -18,17 +20,26 @@ def start_workers(f,c,p):
spawn(work_process)
def work_fetch():
while True:
run_fetch()
try:
while True:
run_fetch()
except Empty:
clogger.info("Fetch - Worker died")
def work_process():
while True:
run_process()
# db_session.close()
try:
while True:
run_process()
except Empty:
clogger.info("Process - Worker died")
def work_compile():
while True:
run_compile()
try:
while True:
run_compile()
except Empty:
clogger.info("Compile - Worker died")
def queue_url(tpe, url,params={"nofollow": False}):
@@ -38,16 +49,15 @@ def queue_url(tpe, url,params={"nofollow": False}):
def queue_url_upd(tpe, url,params={"nofollow": True}):
fetch_queue.put((True,tpe,url,params))
# fetch a page from the url list
def run_fetch():
try:
tc, tpe, url, p= fetch_queue.get()
tc, tpe, url, p= fetch_queue.get(True, 100)
except ValueError:
tc, tpe, url= fetch_queue.get()
clogger.debug("fetched : "+url)
tc, tpe, url= fetch_queue.get(True, 100)
clogger.info("Fechted url:"+url)
if tpe is not "dummyarticle" and tpe is not "dummyindex":
rw=fetch_page(url)
rw = fetch_page(url, p)
else:
rw="<p> dummytext</p>"
compile_queue.put((0, tpe, {"url": url, "sourcetype": tpe, "raw": rw},p))
@@ -56,7 +66,7 @@ def run_fetch():
#comile something from the compile list
def run_compile():
tc,tpe,h, p = compile_queue.get()
tc,tpe,h, p = compile_queue.get(True, 100)
if p.has_key('parent_item'):
h["parent_item"]=p["parent_item"]
h=do_compile(tpe,h,p)
@@ -65,7 +75,7 @@ def run_compile():
# compile_queue.task_done()
def run_process():
tc,tpe,h,p = process_queue.get()
tc,tpe,h,p = process_queue.get(True, 100)
do_process(tpe, h,p)
return h
# process_queue.task_done()