36 lines
861 B
Python
36 lines
861 B
Python
from .models import CrawlUrl
|
|
from .models import CrawlCache, CrawlCacheSchema
|
|
from src.database import db_session2,init_db,read_json,init_db2
|
|
from compiler import do_compile
|
|
from fetching import fetch_page
|
|
import mworker
|
|
|
|
def urls_test(id):
|
|
cu=CrawlUrl.query.get(id)
|
|
rw=fetch_page(cu.url)
|
|
h= {"url": cu.url, "sourcetype": cu.tpe, "raw": rw}
|
|
h2=do_compile(cu.tpe, h)
|
|
h2["raw"]="raw - html -blocked"
|
|
return {"comp": h2}
|
|
|
|
def urls_que(id):
|
|
cu=CrawlUrl.query.get(id)
|
|
mworker.queue_url(cu.tpe, cu.url)
|
|
return cu
|
|
|
|
def urls_que_upd(id):
|
|
cu=CrawlUrl.query.get(id)
|
|
mworker.queue_url_upd(cu.tpe, cu.url)
|
|
return cu
|
|
|
|
|
|
def url_add(url,tpe):
|
|
u=CrawlUrl.find_or_create(tpe, url)
|
|
db_session2.add(u)
|
|
db_session2.commit()
|
|
return u
|
|
|
|
def start_workers():
|
|
mworker.start_workers(1,1,1)
|
|
return "started workers"
|