This commit is contained in:
Andreas Stephanides
2017-02-08 07:13:53 +01:00
parent 589807f5e2
commit 127bc9c557
6 changed files with 84 additions and 25 deletions

View File

@@ -6,12 +6,12 @@ from datetime import datetime
import re
import urlparse
from src import clogger, cfg
from fixing import fix_link
from src.compiler.fixing import fix_link
import feedparser
#from fetching import downloadfile
import json
def do_compile(tpe, cont):
def do_compile(tpe, cont, params={}):
if type(cont) != dict:
clogger.error("Type Error for do compile for :"+str(cont["url"]))
# Starting to compile an generic object
@@ -20,17 +20,20 @@ def do_compile(tpe, cont):
else:
clogger.debug("compile: type:"+str(tpe)+ "| "+ str(cont["url"]))
if tpe in compiler:
cont=compiler[tpe](cont["url"], cont["raw"])
cont=compiler[tpe](cont["url"], cont["raw"],params)
else:
clogger.error("Compiler for "+tpe+" not found.")
return cont
from comp import rssfeed
from comp import fbfeed
from comp.fsch import fscharticle
def dummyarticle(url, raw):
return {"url": url, "article":{"url": url, "section": "dummysection", "sourcetype": "dummy", "title":"dummytitle", "text": raw, "image": "fff", "author": "me", "published": None}}
def htufeed(url,raw):
def htufeed(url,raw,params={}):
al=[]
f=feedparser.parse(raw)
for e in f['entries']:
@@ -38,7 +41,7 @@ def htufeed(url,raw):
return {"url": url, "next_page": None, "article_links": al, "objecttype":"index"}
def htuarticle(url,raw):
def htuarticle(url,raw,params={}):
sp=BeautifulSoup(raw)
d={}
h=sp.find("meta", {"property": "og:image"})
@@ -71,7 +74,7 @@ def htuarticle(url,raw):
return {"article": d}
def fetarticle(url, raw):
def fetarticle(url, raw,params={}):
sp=BeautifulSoup(raw)
d={}
h=sp.find("h1", {"itemprop": "name"})
@@ -110,7 +113,7 @@ def fetarticle(url, raw):
return {"article": d}
def fsarcharticle(url, raw):
def fsarcharticle(url, raw,params={}):
sp=BeautifulSoup(raw)
d={}
h=sp.find("h1", {"class": "title"})
@@ -135,7 +138,7 @@ def fsarcharticle(url, raw):
d["author"]=None
return {"article": d}
def fsbizarticle(url, raw):
def fsbizarticle(url, raw,params={}):
sp=BeautifulSoup(raw)
d={}
h=sp.find("h1", {"class": "entry-title"})
@@ -159,7 +162,7 @@ def fsbizarticle(url, raw):
d["author"]=h.find("a").text.strip()
return {"article": d}
def fetindex(url, raw):
def fetindex(url, raw,params={}):
if raw is None:
raise Error
# clogger.debug("compile_fetindex: "+str(url))
@@ -178,7 +181,7 @@ def fetindex(url, raw):
al.append(t.attrs["href"])
return {"url": url, "next_page": nl, "article_links": al, "objecttype": "index" }
def fsarchindex(url, raw):
def fsarchindex(url, raw,params={}):
if raw is None:
raise Error
html=BeautifulSoup(raw)
@@ -197,7 +200,7 @@ def fsarchindex(url, raw):
return {"url": url, "next_page": None, "article_links": al, "facebook_links": fl,"objecttype":"index"}
def fsbizindex(url, raw):
def fsbizindex(url, raw,params={}):
if raw is None:
raise Error
print "compile_fsbizindex"
@@ -212,8 +215,7 @@ def fsbizindex(url, raw):
def fsmbindex(url, raw):
def fsmbindex(url, raw,params={}):
if raw is None:
raise Error
html=BeautifulSoup(raw)
@@ -246,7 +248,7 @@ def fsmbindex(url, raw):
articles.append(aa)
return {"url": url, "next_page": np, "articles": articles,"objecttype": "articles"}
compiler = {"fetindex": fetindex, "fetarticle": fetarticle, "fsarchindex": fsarchindex, "fsarcharticle": fsarcharticle, "fsmbindex": fsmbindex, "fsbizindex": fsbizindex, "dummyarticle": dummyarticle,"htuarticle": htuarticle, "htufeed": htufeed, "fbfeed": fbfeed, "fschfeed": rssfeed}
compiler = {"fetindex": fetindex, "fetarticle": fetarticle, "fsarchindex": fsarchindex, "fsarcharticle": fsarcharticle, "fsmbindex": fsmbindex, "fsbizindex": fsbizindex, "dummyarticle": dummyarticle,"htuarticle": htuarticle, "htufeed": htufeed, "fbfeed": fbfeed, "fschfeed": rssfeed, "fscharticle": fscharticle}
compiler = cfg.compiler
for i in compiler:
@@ -254,4 +256,4 @@ for i in compiler:
article_types={"fetindex" : "fetarticle", "fsarchindex": "fsarcharticle", "fsbizindex": "fsbizarticle", "dummyindex": "dummyarticle", "htufeed": "htuarticle"}
article_types={"fetindex" : "fetarticle", "fsarchindex": "fsarcharticle", "fsbizindex": "fsbizarticle", "dummyindex": "dummyarticle", "htufeed": "htuarticle", "fschfeed": "fscharticle"}