fbfeed + sectionreset
This commit is contained in:
@@ -15,12 +15,12 @@ import flask
|
|||||||
|
|
||||||
#flask.json.JSONEncoder.default = lambda self,obj: ((ArticleSchema().dump(obj)[0]) if isinstance(obj, Article) else None)
|
#flask.json.JSONEncoder.default = lambda self,obj: ((ArticleSchema().dump(obj)[0]) if isinstance(obj, Article) else None)
|
||||||
flask.json.JSONEncoder.default = lambda self,obj: ((obj.__json__()) if isinstance(obj, (Base, Article,CrawlUrl)) else None)
|
flask.json.JSONEncoder.default = lambda self,obj: ((obj.__json__()) if isinstance(obj, (Base, Article,CrawlUrl)) else None)
|
||||||
|
import controller
|
||||||
@article_pages.route("/")
|
@article_pages.route("/")
|
||||||
@article_pages.route("")
|
@article_pages.route("")
|
||||||
@article_pages.route(".json")
|
@article_pages.route(".json")
|
||||||
def index():
|
def index():
|
||||||
articles=Article.query.all()
|
articles=controller.get_all()
|
||||||
return jsonify(articles=articles)
|
return jsonify(articles=articles)
|
||||||
|
|
||||||
@article_pages.route("/<int:id>",methods=['PUT'])
|
@article_pages.route("/<int:id>",methods=['PUT'])
|
||||||
|
|||||||
@@ -1 +1,2 @@
|
|||||||
from rss import rssfeed
|
from rss import rssfeed
|
||||||
|
from fb import fbfeed
|
||||||
|
|||||||
50
compiler/comp/fb.py
Normal file
50
compiler/comp/fb.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
from dateutil.parser import parse
|
||||||
|
from datetime import datetime
|
||||||
|
import re
|
||||||
|
import urlparse
|
||||||
|
from src.fb import graph
|
||||||
|
from facebook import GraphAPIError
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
def fbfeedelement(h):
|
||||||
|
art={}
|
||||||
|
art["text"]=""
|
||||||
|
if h.has_key("story"):
|
||||||
|
art["text"]=art["text"]+h["story"]+"<br>"
|
||||||
|
if h.has_key("attachments") and len(h["attachments"]["data"])>0:
|
||||||
|
for a in h["attachments"]["data"]:
|
||||||
|
if a.has_key("media") and a["media"].has_key("image") and a["media"]["image"].has_key("src") and not art.has_key("image"):
|
||||||
|
art["image"]=a["media"]["image"]["src"]
|
||||||
|
if a.has_key("title"):
|
||||||
|
art["title"]=a["title"]
|
||||||
|
if a.has_key("type") and a["type"] in ["event"]:
|
||||||
|
art["url"]=a["url"]
|
||||||
|
if a.has_key("description"):
|
||||||
|
art["text"]=art["text"]+a["description"]+"<br>"
|
||||||
|
|
||||||
|
|
||||||
|
if not art.has_key("title") and h.has_key("story"):
|
||||||
|
art["title"]=h["story"]
|
||||||
|
if h.has_key("message"):
|
||||||
|
art["text"]=art["text"]+h["message"]
|
||||||
|
art["published"] =parse(h["created_time"])
|
||||||
|
if not art.has_key("url"):
|
||||||
|
art["url"]=urlparse.urlunsplit(("http","www.facebook.at",h["id"],"",""))
|
||||||
|
return art
|
||||||
|
|
||||||
|
|
||||||
|
def fbfeed(url, raw):
|
||||||
|
js = json.loads(raw)
|
||||||
|
arts=[]
|
||||||
|
u=urlparse.urlparse(url)
|
||||||
|
for m in js["data"]:
|
||||||
|
aa=fbfeedelement(m)
|
||||||
|
if not aa.has_key("title"):
|
||||||
|
aa["title"] = u[1]+ " at " + aa["published"].strftime("%Y-%m-%d %H:%M")
|
||||||
|
aa["section"]="Facebook: "+u[1]
|
||||||
|
arts.append(aa)
|
||||||
|
nx=None
|
||||||
|
if js.has_key("paging") and js["paging"].has_key("next"):
|
||||||
|
nx=js["paging"]["next"]
|
||||||
|
return {"url": url, "next_page": nx,"articles": arts}
|
||||||
@@ -6,9 +6,7 @@ from datetime import datetime
|
|||||||
import re
|
import re
|
||||||
import urlparse
|
import urlparse
|
||||||
from src import clogger, cfg
|
from src import clogger, cfg
|
||||||
from src.fb import graph
|
|
||||||
from fixing import fix_link
|
from fixing import fix_link
|
||||||
from facebook import GraphAPIError
|
|
||||||
import feedparser
|
import feedparser
|
||||||
|
|
||||||
#from fetching import downloadfile
|
#from fetching import downloadfile
|
||||||
@@ -26,7 +24,7 @@ def do_compile(tpe, cont):
|
|||||||
return cont
|
return cont
|
||||||
|
|
||||||
from comp import rssfeed
|
from comp import rssfeed
|
||||||
|
from comp import fbfeed
|
||||||
def dummyarticle(url, raw):
|
def dummyarticle(url, raw):
|
||||||
return {"url": url, "article":{"url": url, "section": "dummysection", "sourcetype": "dummy", "title":"dummytitle", "text": raw, "image": "fff", "author": "me", "published": None}}
|
return {"url": url, "article":{"url": url, "section": "dummysection", "sourcetype": "dummy", "title":"dummytitle", "text": raw, "image": "fff", "author": "me", "published": None}}
|
||||||
|
|
||||||
@@ -213,33 +211,6 @@ def fsbizindex(url, raw):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def fbfeed(url, raw):
|
|
||||||
js = json.loads(raw)
|
|
||||||
arts=[]
|
|
||||||
u=urlparse.urlparse(url)
|
|
||||||
for m in js["data"]:
|
|
||||||
aa={}
|
|
||||||
aa["url"]=urlparse.urlunsplit(("http","www.facebook.at",m["id"],"",""))
|
|
||||||
aa["published"] =parse(m["created_time"])
|
|
||||||
if m.has_key("message")==True:
|
|
||||||
aa["text"] = m["message"]
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
h=graph.get_object(id=m["id"].split("_")[1])
|
|
||||||
if h.has_key("description"):
|
|
||||||
aa["text"]=h["description"]
|
|
||||||
else:
|
|
||||||
aa["text"]=json.dumps()
|
|
||||||
except GraphAPIError:
|
|
||||||
aa["text"]=""
|
|
||||||
if m.has_key("story")==True:
|
|
||||||
aa["title"] = m["story"]
|
|
||||||
else:
|
|
||||||
aa["title"] = u[1]+ " at " + aa["published"].strftime("%Y-%m-%d %H:%M")
|
|
||||||
aa["section"]="Facebook: "+u[1]
|
|
||||||
arts.append(aa)
|
|
||||||
return {"url": url, "next_page": js["paging"]["next"],"articles": arts}
|
|
||||||
|
|
||||||
def fsmbindex(url, raw):
|
def fsmbindex(url, raw):
|
||||||
if raw is None:
|
if raw is None:
|
||||||
raise Error
|
raise Error
|
||||||
|
|||||||
@@ -40,10 +40,10 @@ def fetch_page(furl):
|
|||||||
if u[0] == '':
|
if u[0] == '':
|
||||||
furl=urlparse.urlunsplit(("http",u[1],u[2],u[3],u[4]))
|
furl=urlparse.urlunsplit(("http",u[1],u[2],u[3],u[4]))
|
||||||
cc=CrawlCache.query.filter(CrawlCache.url==furl).filter(CrawlCache.fetched>ten_weeks_ago).first()
|
cc=CrawlCache.query.filter(CrawlCache.url==furl).filter(CrawlCache.fetched>ten_weeks_ago).first()
|
||||||
if cc is None:
|
if cc is None or u[0]=='fb': # no caching for Facebook
|
||||||
clogger.debug("fetching url: "+ str(furl))
|
clogger.debug("fetching url: "+ str(furl))
|
||||||
if u[0]=='fb':
|
if u[0]=='fb':
|
||||||
tx = json.dumps(graph.get_object(id=u[1]+u[2]))
|
tx = json.dumps(graph.get_object(id=u[1]+u[2]+"?fields=story,created_time,id,message,attachments"))
|
||||||
else:
|
else:
|
||||||
tx=s.get(furl).text
|
tx=s.get(furl).text
|
||||||
CrawlCache.store(furl,tx)
|
CrawlCache.store(furl,tx)
|
||||||
|
|||||||
@@ -1,13 +1,14 @@
|
|||||||
from flask import Blueprint, jsonify, render_template, abort, redirect, url_for, request
|
from flask import Blueprint, jsonify, render_template, abort, redirect, url_for, request, Response
|
||||||
compiler_pages = Blueprint('compiler', __name__,
|
compiler_pages = Blueprint('compiler', __name__,
|
||||||
template_folder='.')
|
template_folder='.')
|
||||||
|
|
||||||
from src.database import db_session2,init_db,read_json,init_db2
|
from src.database import db_session2,init_db,read_json,init_db2,db_session
|
||||||
from .models import CrawlUrl
|
from .models import CrawlUrl
|
||||||
from .models import CrawlCache, CrawlCacheSchema
|
from .models import CrawlCache, CrawlCacheSchema
|
||||||
from .models import CrawlUrlSchema
|
from .models import CrawlUrlSchema
|
||||||
from src import clogger
|
from src import clogger
|
||||||
from src.articles import Article
|
from src.articles import Article
|
||||||
|
from src.sections import Section
|
||||||
#import mworker
|
#import mworker
|
||||||
import flask
|
import flask
|
||||||
import json
|
import json
|
||||||
@@ -63,6 +64,15 @@ def urls_index_json():
|
|||||||
status=CrawlUrl.query.all()
|
status=CrawlUrl.query.all()
|
||||||
return jsonify(urls=status)
|
return jsonify(urls=status)
|
||||||
|
|
||||||
|
@compiler_pages.route("/urls.lst")
|
||||||
|
def urls_lst():
|
||||||
|
cus=CrawlUrl.query.all()
|
||||||
|
urls=map((lambda cu: ("id %d %s " % (cu.id, cu.url))),cus)
|
||||||
|
urls=map((lambda u: u+"\n"),urls)
|
||||||
|
return Response(urls,mimetype='text/plain')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# show an existing CrawlUrl
|
# show an existing CrawlUrl
|
||||||
@compiler_pages.route("/urls/<int:id>")
|
@compiler_pages.route("/urls/<int:id>")
|
||||||
@compiler_pages.route("/urls/<int:id>.json")
|
@compiler_pages.route("/urls/<int:id>.json")
|
||||||
@@ -82,6 +92,18 @@ def urls_que_json(id):
|
|||||||
return jsonify(urls=cu, cache=cc)
|
return jsonify(urls=cu, cache=cc)
|
||||||
|
|
||||||
|
|
||||||
|
@compiler_pages.route("/urls/que.lst")
|
||||||
|
def urls_que_lst():
|
||||||
|
# Lade Alle Urls
|
||||||
|
# cu=urls_que(id)
|
||||||
|
#cc=CrawlCache.query.filter(CrawlCache.url==cu.url)
|
||||||
|
cus=CrawlUrl.query.all()
|
||||||
|
urls=map((lambda cu: url_for('.urls_que_json',id=cu.id)),cus)
|
||||||
|
if request.values.has_key('url'):
|
||||||
|
urls=map((lambda u: request.values["url"]+ u),urls)
|
||||||
|
urls=map((lambda u: u+"\n"),urls)
|
||||||
|
return Response(urls,mimetype='text/plain')
|
||||||
|
|
||||||
|
|
||||||
# que an existing CrawlUrl for fetching
|
# que an existing CrawlUrl for fetching
|
||||||
@compiler_pages.route("/urls/<int:id>/test")
|
@compiler_pages.route("/urls/<int:id>/test")
|
||||||
@@ -137,3 +159,15 @@ def delete(id):
|
|||||||
db_session2.delete(cu)
|
db_session2.delete(cu)
|
||||||
db_session2.commit()
|
db_session2.commit()
|
||||||
return jsonify(url={})
|
return jsonify(url={})
|
||||||
|
|
||||||
|
|
||||||
|
@compiler_pages.route("/section/<int:id>/reset",methods=['GET'])
|
||||||
|
@compiler_pages.route("/section/<int:id>/reset.json",methods=['GET'])
|
||||||
|
def reset(id):
|
||||||
|
section=Section.query.get(id)
|
||||||
|
clogger.info(section)
|
||||||
|
for a in section.articles:
|
||||||
|
db_session.delete(a)
|
||||||
|
db_session.commit()
|
||||||
|
section=Section.query.get(id)
|
||||||
|
return jsonify(section=section,articles=section.articles)
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ def index():
|
|||||||
sections=Section.query.all()
|
sections=Section.query.all()
|
||||||
return jsonify(sections=sections)
|
return jsonify(sections=sections)
|
||||||
|
|
||||||
|
|
||||||
@section_pages.route("/<int:id>",methods=['PUT'])
|
@section_pages.route("/<int:id>",methods=['PUT'])
|
||||||
@section_pages.route("/<int:id>.json",methods=['PUT'])
|
@section_pages.route("/<int:id>.json",methods=['PUT'])
|
||||||
def update(id):
|
def update(id):
|
||||||
@@ -35,3 +36,5 @@ def get(id):
|
|||||||
clogger.info(section)
|
clogger.info(section)
|
||||||
# section=SectionSchema().dump(section)[0]
|
# section=SectionSchema().dump(section)[0]
|
||||||
return jsonify(section=section,articles=section.articles)
|
return jsonify(section=section,articles=section.articles)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user