Files
fachschaften/compiler/mworker.py
Andreas Stephanides eb071d9f95 worker die
2017-02-15 13:53:37 +01:00

82 lines
2.1 KiB
Python

from mqueues import fetch_queue, compile_queue, process_queue
from compiler import do_compile
from mprocess import do_process
from fetching import fetch_page
from gevent import spawn
from itertools import repeat
from models import CrawlUrl
from src import clogger
from Queue import Empty
from src.database import db_session2
def start_workers(f,c,p):
for _ in range(f):
clogger.debug("spawn fetchworker")
spawn(work_fetch)
for _ in range(c):
spawn(work_compile)
for _ in range(p):
spawn(work_process)
def work_fetch():
try:
while True:
run_fetch()
except Empty:
clogger.info("Fetch - Worker died")
def work_process():
try:
while True:
run_process()
except Empty:
clogger.info("Process - Worker died")
def work_compile():
try:
while True:
run_compile()
except Empty:
clogger.info("Compile - Worker died")
def queue_url(tpe, url,params={"nofollow": False}):
fetch_queue.put((False,tpe,url,params))
#param nofollow = True : Don't follow pagination recursivly to only fetch an update
def queue_url_upd(tpe, url,params={"nofollow": True}):
fetch_queue.put((True,tpe,url,params))
# fetch a page from the url list
def run_fetch():
try:
tc, tpe, url, p= fetch_queue.get(True, 100)
except ValueError:
tc, tpe, url= fetch_queue.get(True, 100)
clogger.info("Fechted url:"+url)
if tpe is not "dummyarticle" and tpe is not "dummyindex":
rw=fetch_page(url)
else:
rw="<p> dummytext</p>"
compile_queue.put((0, tpe, {"url": url, "sourcetype": tpe, "raw": rw},p))
return rw
# fetch_queue.task_done()
#comile something from the compile list
def run_compile():
tc,tpe,h, p = compile_queue.get(True, 100)
if p.has_key('parent_item'):
h["parent_item"]=p["parent_item"]
h=do_compile(tpe,h,p)
process_queue.put((0,tpe, h,p))
return h
# compile_queue.task_done()
def run_process():
tc,tpe,h,p = process_queue.get(True, 100)
do_process(tpe, h,p)
return h
# process_queue.task_done()