init commit

2017-01-14 12:23:04 +01:00
commit 8955bf17f5
32 changed files with 1555 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,5 @@
 /__init__.py~
 /__init__.pyc
 *.pyc
 *~
 config.cfg
--- a/init.py
+++ b/init.py
@@ -0,0 +1,101 @@
 import os
 import sys
 package_directory = os.path.dirname(os.path.abspath(__file__))
 from config import Config
 cfg = Config(file(os.path.join(package_directory, 'config.cfg')))
 #--------------- Logging
 import logging
 download_path="./cdw"
 file_handler=logging.FileHandler(cfg.logfile)
 file_handler.setLevel(logging.DEBUG)
 stream_handler=logging.StreamHandler(sys.stdout)
 clt=logging.getLogger('mylogger')
 clt.setLevel(logging.DEBUG)
 clt.addHandler(file_handler)
 clt.addHandler(stream_handler)
 clogger=clt
 #----------------
 lg=clt
 from gevent import spawn, monkey
 monkey.patch_all()
 from .compiler import start_workers
 #start_workers(1,1,1)
 # Framework
 from flask import Flask, jsonify, render_template, redirect, request,send_from_directory
 # Cross Site Scripting
 from flask_cors import CORS, cross_origin
 #Authentication
 from flask_jwt import JWT, jwt_required, current_identity
 from src.models import Article,Section
 from src.users import authenticate, identity
 from datetime import datetime
 app = Flask(__name__)
 CORS(app)
 app.config['LOGGER_NAME']='mylogger'
 app.logger.setLevel(logging.DEBUG)
 app.logger.info("Server Started")
 app.config['SECRET_KEY'] = 'super-secret'
 import flask
 import json
 from database import Base
 from models import Article, CrawlUrl, CrawlCache
 flask.json.JSONEncoder.default = lambda self,obj: ((obj.__json__()) if isinstance(obj, (Base, Article,Section, CrawlUrl,CrawlCache)) else None) 
 json.JSONEncoder.default = lambda self,obj: ((obj.__json__()) if isinstance(obj, (Base, Article,CrawlUrl,CrawlCache)) else None) 
 #bot.dosmth()
 #lg.debug(bot.bot)
 # Allow Cross Site Scripting 	
@app.after_request
 def after_request(response):
    response.headers.add('Access-Control-Allow-Origin', '*')
    if request.method == 'OPTIONS':
        response.headers['Access-Control-Allow-Methods'] = 'DELETE, GET, POST, PUT'
        headers = request.headers.get('Access-Control-Request-Headers')
        if headers:
            response.headers['Access-Control-Allow-Headers'] = headers
    return response
 from .articles.views import article_pages
 from .sections.views import section_pages
 from .compiler.views import compiler_pages
@app.route("/")
@app.route("/index")
@app.route("/home")
 def home():
    text="It work's, please do something"
    return jsonify(text=text)
 app.register_blueprint(article_pages, url_prefix='/articles')
 app.register_blueprint(section_pages, url_prefix='/sections')
 app.register_blueprint(compiler_pages, url_prefix='/compiler')
 from src.bot import bot
 if not app.debug or os.environ.get("WERKZEUG_RUN_MAIN") == "true":
    bot.message_loop()
 # ------------ Telegram Bot
 #from bot import  bot_queue
 #@app.route('/bot', methods=['GET', 'POST'])
 #def pass_update():
 #    bot_queue.put(request.data)  # pass update to bot
 #    return 'OK'
--- a/articles/init.py
+++ b/articles/init.py
@@ -0,0 +1,2 @@
 from .model import Article
 from .views import article_pages
--- a/articles/model.py
+++ b/articles/model.py
@@ -0,0 +1,139 @@
 from sqlalchemy import Column, Integer, String, Boolean, DateTime, Text, ForeignKey
 from sqlalchemy.orm import relationship
 from datetime import datetime
 from src.database import Base
 from src.database import db_session
 from marshmallow import Schema, fields
 from src.sections.model import Section
 #import json
 import json
 import flask 
 #json.JSONEncoder.default = lambda self,obj: (obj.isoformat() if isinstance(obj, datetime) else None)
 import hashlib
 #import clogger
 import logging
 #from crawler.compiler.mqueues import put_fetch_queue
 from src import clogger
 #json.JSONEncoder.default = lambda self,obj: (obj.isoformat() if isinstance(obj, datetime) else None)
 def calc_fingerprint(a):
    return calc_fingerprint_h({"url": a.url, "title":a.title, "published": str(a.published_date)})
 def calc_fingerprint_h(a):
    if a["published"] is not None and a["published"]!= "None":
     #   clogger.info( "published:"+str(a["published"]))
        if a["published"] is str:
            pp=parse(a["published"])
        else:
            pp=a["published"]
    else:
        pp=""
    #clogger.info( unicode(a["url"])+ unicode(a["title"])+unicode(pp))
    h=hashlib.md5()
    h.update(unicode(a["url"]))
    h.update(a["title"].encode("utf-8"))
    h.update(unicode(pp))
    return h.hexdigest()
 class ArticleSchema(Schema):
    id=fields.Integer()
    text=fields.String()
    title=fields.String()
    author=fields.String()
    sourcetype =fields.String()
    image =fields.String()
    url =fields.String()
    published_date=fields.DateTime()
    date=fields.DateTime()
    first_fetched=fields.DateTime()
    section_id=fields.Integer()
 class Article(Base):
    __tablename__ = 'articles'
    id = Column(Integer, primary_key=True)
    parent_id= Column(Integer)
    url = Column(String(250))
    is_primary = Column(Boolean)
    fingerprint = Column(String(250),unique=True)
    hash = Column(String(250))
    last_fetched = Column(DateTime)
    first_fetched=Column(DateTime)
    published_date = Column(DateTime)
    date = Column(DateTime)
    text = Column(Text)
    title = Column(String(250))    
    author = Column(String(250))
    section = relationship("Section")
    section_id=Column(Integer, ForeignKey('sections.id'))
    sourcetype = Column(String(250))
    image=Column(String(250))
    def __init__(self, url=None,title=None, published_date=None):
        self.url=url
        self.title=title
        self.published_date=published_date
        self.first_fetched=datetime.now()
    def __json__(self):
        return ArticleSchema().dump(self)[0]
    def dict(self):
        return {"id": str(int(self.id)), "title": self.title, "text": self.text, "author": self.author, "section":self.section, "sourcetype": self.sourcetype, "last_fetched": self.last_fetched, "first_fetched": self.first_fetched, "published_date": self.published_date, "date": self.date,"image": self.image, "url": self.url}
 #    @classmethod
 #    def sections(self):
 #        sects=db_session.query(Article.section).distinct().all()
 #        for i in range(len(sects)):
 #            sects[i]=sects[i][0]
 #        return sects
    @classmethod
    def from_hash(cls, a):
        fp = calc_fingerprint_h(a)
        aa = Article.query.filter(Article.fingerprint==fp).first()
        if aa is None:
            clogger.debug( "new Article")
            if a["published"] is not None:
                if a["published"] is str:
                    pd= parse(a["published"])
                else:
                    pd=a["published"]
            else:
                pd=None
            aa=Article(a["url"], a["title"],pd)
            aa.fingerprint = calc_fingerprint(aa)
            db_session.add(aa)
            db_session.commit()
        return aa
    def process_hash(self, a):
        self.text=a["text"].decode('utf8')
        if "image" in a:
            self.image=a["image"]
        if "author" in a:
            self.author=a["author"]
        if "title" in a:
            self.title=a["title"]        
        if "author" in a:
            self.author=a["author"]
        if "sourcetype" in a:
            self.sourcetype=a["sourcetype"]
        if "section" in a:
            self.section=Section.find_or_create(a["section"])
 #        if "last_fetched" in a:
 #            self.last_fetched=a["last_fetched"]
        if "published_date" in a:
            self.published_date=a["published_date"]
 #flask.json.JSONEncoder.default = lambda self,obj: ((ArticleSchema().dump(obj)[0]) if isinstance(obj, Article) else None)
 #json.JSONEncoder.default = lambda self,obj: ((ArticleSchema().dump(obj)[0]) if isinstance(obj, Article) else None)
--- a/articles/views.py
+++ b/articles/views.py
@@ -0,0 +1,65 @@
 from flask import Blueprint, jsonify, render_template, abort, redirect, url_for, request
 article_pages = Blueprint('articles', __name__)
 from .model import Article
 from .model import ArticleSchema
 #import flask
 from datetime import datetime
 import json
 #flask.json.JSONEncoder.default = lambda self,obj: (obj.isoformat() if isinstance(obj, datetime) else None)
 #flask.json.JSONEncoder.default = lambda self,obj: ((obj.dict()) if isinstance(obj, Article) else None)
 from src import clogger
 import json
 from src.database import db_session, read_json
 import flask
 #flask.json.JSONEncoder.default = lambda self,obj: ((ArticleSchema().dump(obj)[0]) if isinstance(obj, Article) else None)
 flask.json.JSONEncoder.default = lambda self,obj: ((obj.__json__()) if isinstance(obj, (Base, Article,CrawlUrl)) else None) 
@article_pages.route("/")
@article_pages.route("")
@article_pages.route(".json")
 def index():
    articles=Article.query.all()
    return jsonify(articles=articles)
@article_pages.route("/<int:id>",methods=['PUT'])
@article_pages.route("/<int:id>.json",methods=['PUT'])
 def update(id):
    article=Article.query.get(id)
    clogger.info(request.data)
    a=request.get_json()
    article.text=a["text"]
    db_session.commit()
    return jsonify(article=article)
@article_pages.route("/<int:id>",methods=['GET'])
@article_pages.route("/<int:id>.json",methods=['GET'])
 def get(id):
    article=Article.query.get(id)
    clogger.info(article)
 #    article=ArticleSchema().dump(article)[0]
    return jsonify(article=article)
@article_pages.route("/<int:id>",methods=['DELETE'])
@article_pages.route("/<int:id>.json",methods=['DELETE'])
 def delete(id):
    article=Article.query.get(id)
    clogger.info(id)
    if article != None:
        db_session.delete(article)
        db_session.commit()
    return jsonify(article={})
@article_pages.route("/",methods=['POST'])
@article_pages.route("",methods=['POST'])
@article_pages.route(".json",methods=['POST'])
 def create():
    article=Article()
    a=read_json(request)
    article.text=a["article"]["text"]
    db_session.add(article)
    db_session.commit()
    return jsonify(article=article)
--- a/bot/init.py
+++ b/bot/init.py
@@ -0,0 +1 @@
 from .bot import bot
--- a/bot/bot.py
+++ b/bot/bot.py
@@ -0,0 +1,140 @@
 import telepot
 import datetime
 import time
 import json
 from Queue import Queue
 #import os
 from src import lg,cfg
 #from gevent import spawn
 from telepot.namedtuple import InlineKeyboardMarkup, InlineKeyboardButton
 from telepot.delegate import (
    per_chat_id, pave_event_space, include_callback_query_chat_id, create_open, per_inline_from_id )
 from src.compiler import CrawlUrl
 from gevent import spawn, monkey, Greenlet
 def IKB(h):
    return InlineKeyboardButton(text=h["text"], callback_data=h["callback_data"])
 def IKB2(h):
    return [IKB(h)]
 def IKM(h):
    return InlineKeyboardMarkup(inline_keyboard=[ map(IKB,h)])
 def IKM2(h):
    return InlineKeyboardMarkup(inline_keyboard= map(IKB2,h))
 def query_que_url(url):
    print(json.dumps(url))
    return {"text": url.url, "callback_data":"/urls/"+str(url.id)+"/que"}
 def handle_urls(handler, cmd):
    curls=CrawlUrl.query.all()
    #sent=handler.sender.sendMessage(json.dumps(curls))
    kb= IKM2(map(query_que_url,curls))
    print json.dumps(cmd)
    if len(cmd) >= 4 and cmd[3]=="que":
        sent=handler.sender.sendMessage("I qued url "+str(cmd[2]), reply_markup=None)
    else:
        sent=handler.sender.sendMessage("que?", reply_markup=kb)
        handler._edit_msg_ident = telepot.message_identifier(sent)
        handler._editor = telepot.helper.Editor(handler.bot, sent)
 def execute_command(handler,cmd,msg=None):
    if cmd[1]=='urls':
        handle_urls(handler,cmd)
 def handle(handler,msg):
    content_type,chat_type,chat_id = telepot.glance(msg)
    if msg.has_key('text'):
        if msg['text'][0]=='/':
            cmd = msg['text'].split("/")
            execute_command(handler, cmd, msg)            
    if msg.has_key('data'):
        lg.debug(msg['data'])
 class InlineHandler(telepot.helper.InlineUserHandler, telepot.helper.AnswererMixin):
    def __init__(self, *args, **kwargs):
        super(InlineHandler, self).__init__(*args, **kwargs)
    def on_inline_query(self, msg):
        def compute_answer():
            query_id, from_id, query_string = telepot.glance(msg, flavor='inline_query')
            print(self.id, ':', 'Inline Query:', query_id, from_id, query_string)
            articles = [{'type': 'article',
                             'id': 'abc', 'title': query_string, 'message_text': query_string}]
            return articles
        self.answerer.answer(msg, compute_answer)
    def on_chosen_inline_result(self, msg):
        from pprint import pprint
        pprint(msg)
        result_id, from_id, query_string = telepot.glance(msg, flavor='chosen_inline_result')
        print(self.id, ':', 'Chosen Inline Result:', result_id, from_id, query_string)
 class FetBot(telepot.helper.ChatHandler):
    def __init__(self, *args, **kwargs):
 #        super(FetBot,self).__init__(*args,**kwargs)
        super(FetBot,self).__init__( *args,**kwargs)
    _editor=None
    _edit_msg_ident=None
    keyboard=IKM([{"text":"START","callback_data": "start"},
                  {"text":"Don't Start","callback_data":"notstart"}
    ])
    keyboard =InlineKeyboardMarkup(
        inline_keyboard=[[
            InlineKeyboardButton(text='START', callback_data='start'),
            InlineKeyboardButton(text='START', callback_data='start')
        ]]
    )
    def on_chat_message(self,msg):
        handle(self,msg)
        content_type,chat_type,chat_id = telepot.glance(msg)
        lg.debug(content_type)
        if content_type=="photo" or content_type=="sticker":
            lg.debug("try to download %s" % msg[content_type][-1]["file_id"])
            f=self.bot.getFile(msg[content_type][-1]['file_id'])
            lg.debug(f)
            self.bot.download_file(f['file_id'], "dwn/" + f['file_path'])
            #  self.bot.getFile(msg['photo'][-1]['file_id']), "dwn")
        #self._cancel_last()
        #sent=self.sender.sendMessage("Hello World", reply_markup=self.keyboard)
        #self._editor = telepot.helper.Editor(self.bot, sent)
        #self._edit_msg_ident = telepot.message_identifier(sent)
    def on_callback_query(self, msg):
        query_id, from_id, query_data = telepot.glance(msg, flavor='callback_query')
        lg.debug(json.dumps(msg))
        self._cancel_last()
        if query_data[0]=='/':
            cmd = query_data.split("/")
            execute_command(self, cmd, msg)            
 #        self.sender.sendMessage("Danke")
        self.bot.answerCallbackQuery(query_id, text='Ok. But I am going to keep asking.')
        #self.bot.answerCallbackQuery(query_id)
    def _cancel_last(self):
        if self._editor:
            self._editor.editMessageReplyMarkup(reply_markup=None)
            self._editor = None
            self._edit_msg_ident = None
 bot=None
 bot  = telepot.DelegatorBot(cfg.token, [include_callback_query_chat_id(pave_event_space())(per_chat_id(),create_open,FetBot,timeout=20),
 pave_event_space()(
        per_inline_from_id(), create_open, InlineHandler, timeout=10),
    ])
--- a/compiler/README
+++ b/compiler/README
@@ -0,0 +1,10 @@
 Das ist die API für den Compiler
 Folgende Befehle sind implementiert:
 GET doc: Diese Dokumentation!
 GET initdb: Initialisiere die Datenbank, ACHTUNG Daten werden gelöscht
 POST urls:
 Erwartet Daten im Format {"url": {"type": typ, "url": "someurl.html"}}
 Fügt diese Url der Überwachung hinzu
 IN PROCESS:
 GET urls: Alle Urls die überwacht werden sollen
--- a/compiler/README.html
+++ b/compiler/README.html
@@ -0,0 +1 @@
 sdf
--- a/compiler/init.py
+++ b/compiler/init.py
@@ -0,0 +1,15 @@
 #from mprocess import do_process, process_urllist
 #from compiler import do_compile
 #from mworker import run_fetch, run_process, run_compile
 # include models for  final objects
 from src.models import Article
 # starting workers
 from mworker import start_workers
 from models import add_url, CrawlUrl
 #start_workers(1,1,1)
 from fetching import announce_articleid
--- a/compiler/comp/init.py
+++ b/compiler/comp/init.py
@@ -0,0 +1 @@
 from rss import rssfeed
--- a/compiler/comp/initpy
+++ b/compiler/comp/initpy
@@ -0,0 +1 @@
 from rss import rssfeed
--- a/compiler/comp/rss.py
+++ b/compiler/comp/rss.py
@@ -0,0 +1,8 @@
 import feedparser
 def rssfeed(url,raw):
    al=[]
    f=feedparser.parse(raw)
    for e in f['entries']:
        al.append(e['link'])
    return {"url": url, "next_page": None, "article_links": al, "objecttype":"index"}
--- a/compiler/compile.py
+++ b/compiler/compile.py
@@ -0,0 +1,153 @@
 from bs4 import BeautifulSoup
 import crawler.objects.models 
 #from crawler.objects.models import Object
 from dateutil.parser import parse
 from datetime import datetime
 import re
 def hello():
    return "hello"
 def fetarticle(o):
    sp=BeautifulSoup(o.raw_fixed)
    d={}
    h=sp.find("h1", {"itemprop": "name"})
    d["title"]=unicode(h.text).strip()
    h=sp.find("div", {"itemprop": "articleBody"})
    if h is not None:
        d["text"]=(h.encode_contents()).strip()
    else:
        d["text"]=""
    d["url"]=o.url
    h=sp.find("span", {"itemprop": "author"})
    if h is not None:
        d["author"]=h.text.strip()
    h=sp.find("span", {"itemprop": "articleSection"})
    if h is not None:
        d["section"]= "FET - " + h.text.strip()
    h=sp.find("span", {"itemprop": "datePublished"})
    if h is not None:
        d["published"]=parse(h.encode_contents().strip())
    h=sp.find("meta", {"property": "og:image"})
    if h is not None:
        d["image"]=h.attrs["content"]
    hh=sp.find_all("div", {"class":"media"})
    for h in hh:
        if h is not None:
            h=h.find("div", {"class": "pull-left"})
        if h is not None:
            h=h.find("a")    
        if h is not None:
            d["image2"]=crawler.objects.models.download_file(h.attrs["href"])
    return {"article": d}
 def fsarcharticle(o):
    sp=BeautifulSoup(o.raw_fixed)
    d={}
    h=sp.find("h1", {"class": "title"})
    if h is not None:
        d["title"]=h.text.strip()
    d["url"]=o.url
    d["published"]=None
    h=sp.find("article")
    h=h.find("div", {"class": "content"})
    d["text"]=h.encode_contents().strip()
    h=sp.find("article").find("h1", {"class": "title"})
    if h is not None:
        d["title"]=h.text.strip()
    else:
        d["title"]=""
    d["image"]=""
    d["sourcetype"]="fsarcharticle"
    d["section"]="fsarch"
    d["author"]=None
    return {"article": d}
 def fetindex(o):
 #    if type(o) is not Object:
 #        raise TypeError
    if o.raw is None:
        raise Error
    print "compile_fetindex"
    html=BeautifulSoup(o.raw_fixed)
    h = html.find("li", {"class": "next_page" })
    if h is not None:
        nl=h.find("a")
        nl=crawler.objects.models.fix_link(o.url,nl.attrs["href"])
    else:
        nl=None
    h= html.find("ul", {"id": "neuigkeiten"})
    links=h.find_all("a")
    al = []
    for t in links:
        al.append(t.attrs["href"])
    return {"url": o.url, "next_page": nl, "article_links": al, "objecttype": "index" }
 def fsarchindex(o):
    if o.raw is None:
        raise Error
    html=BeautifulSoup(o.raw_fixed)
    h= html.find("article")
    print unicode(h)
    links=h.find_all("a")
    al = []
    fl=[]
    for t in links:
        url=t.attrs["href"]
        if re.search("fachschaftarchitektur\.at", url): 
            al.append(t.attrs["href"])
        if re.search("facebook\.com/events", url): 
            fl.append(t.attrs["href"])
    return {"url": o.url, "next_page": None, "article_links": al, "facebook_links": fl,"objecttype":"index"}
 def fsbizindex(o):
    if o.raw is None:
        raise Error
    print "compile_fsbizindex"
    html=BeautifulSoup(o.raw_fixed)
    h= html.find("section", {"id": "primary"})
    links=h.find_all("h1", {"class": "entry-title"})
    al = []
    for t in links:
        al.append(t.find("a").attrs["href"])
    return {"url": o.url,"article_links": al,"objecttype": "index"}
 def fsmbindex(o):
    if o.raw is None:
        raise Error
    html=BeautifulSoup(o.raw_fixed)
    h= html.find("a",{"class": "next"})
    if h is not None:
        np=h.attrs["href"]
    else:
        np=None
    h=html.find("div", {"id": "main"}).find("div", {"class": "inside"}).find("div", {"class": "mod_newslist"})
    if h is not None:
        ats=h.find_all("div",{"class": "block"})
        articles=[]
        for a in ats:
            aa={}
            h=a.find("h3")
            if h is not None:
                aa["title"] = h.text.strip()
            h=a.find("div", {"class": "ce_text"})
            if h is not None:
                aa["text"] = (h.encode_contents()).strip()
            aa["info"]=[]
            hh=a.find_all("p", {"class": "info"},recursive=False)
            for h in hh:
                aa["info"].append(unicode(h.text))
                if re.search(r'von', str(h)):
                    h1=  re.sub(r'[^\d]*(\d+)\.(\d+)\.(\d+)[^\d]*', r'\3/\2/\1',unicode(h.text)) 
                    aa["published"] =parse(h1.strip())
                    aa["author"]=re.sub(r'^.*von(.*)$', r'\1',unicode(h.text)).strip() #h.text + "--" #+ re.sub(r'[^\d]*(\d+)\.(\d+)\.(\d+)[^\d]*', r'\3/\2/\1',hh) 
            aa["section"]="FSMB"
            articles.append(aa)
    return {"url": o.url, "next_page": np, "articles": articles,"objecttype": "articles"}
--- a/compiler/compiler.py
+++ b/compiler/compiler.py
@@ -0,0 +1,258 @@
 from bs4 import BeautifulSoup
 #import crawler.objects.models 
 #from crawler.objects.models import Object
 from dateutil.parser import parse
 from datetime import datetime
 import re
 import urlparse
 from src import clogger, cfg
 from src.fb import graph
 from fixing import fix_link
 from facebook import GraphAPIError
 #from fetching import downloadfile
 import json
 def do_compile(tpe, cont):
    if  type(cont) != dict:
        clogger.error("Type Error for do compile for :"+str(cont["url"]))
    # Starting to compile an generic object
    if "url" not in cont:
        clogger.error("no url can't compile "+tpe)
    else:
        clogger.debug("compile: type:"+str(tpe)+ "| "+ str(cont["url"]))
        if tpe in compiler:
            cont=compiler[tpe](cont["url"], cont["raw"])
    return cont
 from comp import rssfeed
 def dummyarticle(url, raw):
    return {"url": url, "article":{"url": url, "section": "dummysection", "sourcetype": "dummy", "title":"dummytitle", "text": raw, "image": "fff", "author": "me", "published": None}}
 def htufeed(url,raw):
    al=[]
    f=feedparser.parse(raw)
    for e in f['entries']:
        al.append(e['link'])
    return {"url": url, "next_page": None, "article_links": al, "objecttype":"index"}
 def htuarticle(url,raw):
    sp=BeautifulSoup(raw)
    d={}
    h=sp.find("meta", {"property": "og:image"})    
    if h is not None:
        d["image"]=h.attrs["content"]
        d["image2"]=d["image"]
    h=sp.find("div", {"class": "patternRevInfo"})
    if h is not None:
 #        clogger.debug(h.text.strip())
        h1=  re.sub(r'.*- (\d+) ([a-zA-Z]+) (\d+) - ([:\d]+)[^\d]*', r'\3/\2/\1 \4',unicode(h.text.strip()))
 #        clogger.debug(h1)
        d["published"]=parse(h1)
  #      clogger.debug(parse(h1))
  #      clogger.debug(d["published"])
    h=h.find("a")
    if h is not None:
        d["author"]=h.text.strip()
    h=sp.find("div", {"class": "foswikiTopic"})
    h1=h.find("h4")
    if h1 is not None:
        d["title"]= h1.text.strip()
        h1.extract() # remove head
    else:
        h1=sp.find("meta", {"name": "WEBTOPIC"})    
        d["title"]= h1.attrs["content"]
    d["text"]=(h.encode_contents()).strip()
    d["section"]="HTU"
    d["url"]=url
 #    clogger.debug(d)
    return {"article": d}
 def fetarticle(url, raw):
    sp=BeautifulSoup(raw)
    d={}
    h=sp.find("h1", {"itemprop": "name"})
    d["title"]=unicode(h.text).strip()
    h=sp.find("div", {"itemprop": "articleBody"})
    if h is not None:
        d["text"]=(h.encode_contents()).strip()
    else:
        d["text"]=""
    d["url"]=url
    h=sp.find("span", {"itemprop": "author"})
    if h is not None:
        d["author"]=h.text.strip()
    h=sp.find("span", {"itemprop": "articleSection"})
    if h is not None:
        d["section"]= "FET - " + h.text.strip()
    h=sp.find("span", {"itemprop": "datePublished"})
    if h is not None:
        d["published"]=parse(h.encode_contents().strip())
    h=sp.find("meta", {"property": "og:image"})    
    if h is not None:
        d["image"]=h.attrs["content"]
        d["image2"]=d["image"]
 #    hh=sp.find_all("div", {"class":"media"})
 #    for h in hh:
 #        if h is not None:
 #            h=h.find("div", {"class": "pull-left"})
 #        if h is not None:
 #            h=h.find("a")    
 #        if h is not None:
 #            d["image2"]=downloadfile(fix_link(url,h.attrs["href"]))
    return {"article": d}
 def fsarcharticle(url, raw):
    sp=BeautifulSoup(raw)
    d={}
    h=sp.find("h1", {"class": "title"})
    if h is not None:
        d["title"]=h.text.strip()
    d["url"]=url
    d["published"]=None
    h=sp.find("article")
    if h is not None:
        h=h.find("div", {"class": "content"})
        d["text"]=h.encode_contents().strip()
    h=sp.find("article")
    if h is not None:
        h=h.find("h1", {"class": "title"})
    if h is not None:
        d["title"]=h.text.strip()
    else:
        d["title"]=""
    d["image"]=""
    d["sourcetype"]="fsarcharticle"
    d["section"]="fsarch"
    d["author"]=None
    return {"article": d}
 def fetindex(url, raw):
    if raw is None:
        raise Error
 #    clogger.debug("compile_fetindex: "+str(url))
    html=BeautifulSoup(raw)
    h = html.find("li", {"class": "next_page" })
    if h is not None:
        nl=h.find("a")
        nl=fix_link(url,nl.attrs["href"])
    else:
        nl=None
    h= html.find("ul", {"id": "neuigkeiten"})
    al = []
    if h is not None:
        links=h.find_all("a")
        for t in links:
            al.append(t.attrs["href"])
    return {"url": url, "next_page": nl, "article_links": al, "objecttype": "index" }
 def fsarchindex(url, raw):
    if raw is None:
        raise Error
    html=BeautifulSoup(raw)
    h= html.find("article")
    print unicode(h)
    links=h.find_all("a")
    al = []
    fl=[]
    for t in links:
        url=t.attrs["href"]
        if re.search("fachschaftarchitektur\.at", url): 
            al.append(t.attrs["href"])
        if re.search("facebook\.com/events", url): 
            fl.append(t.attrs["href"])
    return {"url": url, "next_page": None, "article_links": al, "facebook_links": fl,"objecttype":"index"}
 def fsbizindex(url, raw):
    if raw is None:
        raise Error
    print "compile_fsbizindex"
    html=BeautifulSoup(raw)
    h= html.find("section", {"id": "primary"})
    links=h.find_all("h1", {"class": "entry-title"})
    al = []
    for t in links:
        al.append(t.find("a").attrs["href"])
    return {"url": url,"article_links": al,"objecttype": "index"}
 def fbfeed(url, raw):
    js = json.loads(raw)
    arts=[]
    u=urlparse.urlparse(url)
    for m in js["data"]:
        aa={}
        aa["url"]=urlparse.urlunsplit(("http","www.facebook.at",m["id"],"",""))
        aa["published"] =parse(m["created_time"])
        if m.has_key("message")==True:
            aa["text"] = m["message"]
        else:
            try:
                h=graph.get_object(id=m["id"].split("_")[1])
                if h.has_key("description"):
                    aa["text"]=h["description"]
                else:
                    aa["text"]=json.dumps()
            except GraphAPIError:
                aa["text"]=""
        if m.has_key("story")==True:
            aa["title"] = m["story"]
        else:
            aa["title"] = u[1]+ " at " + aa["published"].strftime("%Y-%m-%d %H:%M")
        aa["section"]="Facebook: "+u[1]
        arts.append(aa)
    return {"url": url, "next_page": js["paging"]["next"],"articles": arts}
 def fsmbindex(url, raw):
    if raw is None:
        raise Error
    html=BeautifulSoup(raw)
    h= html.find("a",{"class": "next"})
    if h is not None:
        np=h.attrs["href"]
    else:
        np=None
    h=html.find("div", {"id": "main"}).find("div", {"class": "inside"}).find("div", {"class": "mod_newslist"})
    if h is not None:
        ats=h.find_all("div",{"class": "block"})
        articles=[]
        for a in ats:
            aa={}
            h=a.find("h3")
            if h is not None:
                aa["title"] = h.text.strip()
            h=a.find("div", {"class": "ce_text"})
            if h is not None:
                aa["text"] = (h.encode_contents()).strip()
            aa["info"]=[]
            hh=a.find_all("p", {"class": "info"},recursive=False)
            for h in hh:
                aa["info"].append(unicode(h.text))
                if re.search(r'von', str(h)):
                    h1=  re.sub(r'[^\d]*(\d+)\.(\d+)\.(\d+)[^\d]*', r'\3/\2/\1',unicode(h.text)) 
                    aa["published"] =parse(h1.strip())
                    aa["author"]=re.sub(r'^.*von(.*)$', r'\1',unicode(h.text)).strip() #h.text + "--" #+ re.sub(r'[^\d]*(\d+)\.(\d+)\.(\d+)[^\d]*', r'\3/\2/\1',hh) 
            aa["section"]="FSMB"
            articles.append(aa)
    return {"url": url, "next_page": np, "articles": articles,"objecttype": "articles"}
 compiler = {"fetindex": fetindex, "fetarticle": fetarticle, "fsarchindex": fsarchindex, "fsarcharticle": fsarcharticle, "fsmbindex": fsmbindex, "fsbizindex": fsbizindex, "dummyarticle": dummyarticle,"htuarticle": htuarticle, "htufeed": htufeed, "fbfeed": fbfeed, "fschfeed": rssfeed}    
 compiler = cfg.compiler
 for i in compiler:
    compiler[i]=eval(compiler[i])
 article_types={"fetindex" : "fetarticle", "fsarchindex": "fsarcharticle", "fsbizindex": "fsbizarticle", "dummyindex": "dummyarticle", "htufeed": "htuarticle"}
--- a/compiler/fetching.py
+++ b/compiler/fetching.py
@@ -0,0 +1,67 @@
 from requests import session
 s=session()
 from src import package_directory, download_path,cfg
 from os import path, makedirs
 import os
 import json
 from gevent import spawn
 from src import clogger
 from src.fb import graph
 from hashlib import md5
 import errno
 import urlparse
 def announce_articleid(id):
    for u in cfg.announcearticle_url:
        s.get( u % id)
 def downloadfile(url):
    relative_name=path.join("downloads",str(md5(url).hexdigest()),url.split('/')[-1])
    local_filename = path.join(download_path,relative_name)
    if not os.path.exists(os.path.dirname(local_filename)):
        try:
            os.makedirs(os.path.dirname(local_filename))
        except OSError as exc: # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise
    if not path.exists(local_filename):
        spawn(fetch_load_file, url, local_filename)
    return relative_name
 from models import CrawlCache 
 from datetime import datetime, timedelta
 def fetch_page(furl):
    current_time = datetime.utcnow()
    ten_weeks_ago = current_time - timedelta(days=cfg.cache_days)
    u=urlparse.urlparse(furl)
    if u[0] == '':
        furl=urlparse.urlunsplit(("http",u[1],u[2],u[3],u[4]))
    cc=CrawlCache.query.filter(CrawlCache.url==furl).filter(CrawlCache.fetched>ten_weeks_ago).first()
    if cc is None:
        clogger.debug("fetching url:  "+ str(furl))
        if u[0]=='fb':
            tx = json.dumps(graph.get_object(id=u[1]+u[2]))
        else:
            tx=s.get(furl).text
        CrawlCache.store(furl,tx)
    else:
    #if furl is not None:
 #            clogger.debug("cache hit")
        tx=cc.raw
    return tx
 def fetch_load_file(furl, path):
    try:
        clogger.info("Downloading "+ str(furl))
        r = s.get(furl, stream=True)
        f = open(path, 'wb')
        for chunk in r.iter_content(chunk_size=1024): 
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
        f.close()
    except Exception, e:
        #clogger.error("Error Occured during fetching:"+str(furl))
        clogger.error(e,exc_info=True)
--- a/compiler/fixing.py
+++ b/compiler/fixing.py
@@ -0,0 +1,37 @@
 from bs4 import BeautifulSoup
 from urlparse import urlparse, urlunparse, urljoin
 from fetching import downloadfile
 import bleach
 def fix_link(url, link):
    r= urlparse(link)
    if r.scheme is None or r.scheme == '':
        return urljoin(url,link)
    else:
        return link
 def fix_file(url, link):
    u=fix_link(url,link)
    return downloadfile(u)
 def load_file(url, link):
    return fix_file(url,link)
 def fix_html(html, baseurl):
    html=bleach.clean(html, tags=['b','p','span','a','img','div','br','strong','ul','li'], strip=True)
    sp=BeautifulSoup(html)
    images=sp.find_all("img")
    for t in images:
        if "src" in t.attrs and t.attrs["src"] is not None:
            t.attrs["src"]=fix_file(baseurl,t.attrs["src"])
    links=sp.find_all("a")
    for t in links:
        if "href" in t.attrs:
            t.attrs["href"]=fix_link(baseurl, t.attrs["href"])
    for t in sp.find_all("script"):
        t.extract()
        b=sp.find("base")
        if b is not None:
            b.attrs["href"]=""
    return sp
--- a/compiler/models.py
+++ b/compiler/models.py
@@ -0,0 +1,75 @@
 from sqlalchemy import Column, Integer, String, Boolean, DateTime, Text
 from datetime import datetime
 from src.database import Base2
 from src.database import db_session2
 from mqueues import put_fetch_queue
 from marshmallow import Schema,fields,ValidationError
 import json
 import flask
 def add_url(tpe, url):
    cu=CrawlUrl.find_or_create(tpe,url)
    db_session2.add(cu)
    db_session2.commit()
    cu.schedule()
 class CrawlUrlSchema(Schema):
    id=fields.Integer()
    tpe=fields.String()
    url=fields.String()
    last_fetched=fields.DateTime()
    fetched = fields.DateTime()
 class CrawlUrl(Base2):
    __tablename__='crawlurls'
    id = Column(Integer, primary_key=True)
    tpe=Column(String(250))
    url = Column(String(250))
    last_fetched = Column(DateTime)
    def fetched(self):
        CrawlCache.query.find(CrawlCache.url==self.url).first()
    @classmethod
    def find_or_create(self, tpe, url):
        aa = CrawlUrl.query.filter(CrawlUrl.url==url).filter(CrawlUrl.tpe==tpe).first()        
        if aa is None:
            aa=CrawlUrl(tpe,url)
        return aa
    def schedule(self):
        put_fetch_queue((0, self.tpe, self.url))
    def __init__(self, tpe, url):
        self.url=url
        self.tpe=tpe
    def __json__(self):
        return CrawlUrlSchema().dump(self)[0]
 class CrawlCacheSchema(Schema):
    id=fields.Integer()
    raw=fields.String()
    url=fields.String()
    fetched=fields.DateTime()
 class CrawlCache(Base2):
    __tablename__='crawlcache'
    id = Column(Integer, primary_key=True)
    url=Column(String(250))
    fetched=Column(DateTime)
    raw=Column(Text)
    def __init__(self, url,rw):
        self.url=url
        self.raw=rw
        self.fetched=datetime.utcnow()
    def __json__(self):
        return CrawlCacheSchema().dump(self)
    @classmethod
    def store(cls, url, rw):
        cc=CrawlCache(url,rw)
        db_session2.add(cc)
        db_session2.commit()
 #flask.json.JSONEncoder.default = lambda self,obj: ((obj.__json__()) if isinstance(obj, CrawlUrl) else None)
--- a/compiler/mprocess.py
+++ b/compiler/mprocess.py
@@ -0,0 +1,74 @@
 from src import clogger # Logger for crawler
 from src.models import Article # Article model
 from datetime import datetime 
 from src.database import db_session
 from mqueues import fetch_queue, compile_queue, put_fetch_queue
 from fetching import fetch_page, downloadfile, announce_articleid
 from fixing import fix_html, fix_file
 from compiler import article_types
 from fixing import fix_link
 # process article expects an hash with raw data for the article and puts it into an
 # article object stored in the database it is intended to prevent dublicates
 def is_article_hash(h):
    return "text" in h  and "url" in h and "sourcetype" in h and "section" in h
 def process_article(art):
    if not is_article_hash(art):
        clogger.error("Invalid article hash:" + str(art))
        aa=None
    else:
        art["text"]=fix_html(art["text"],art["url"])
        if "image" in art:
            art["image"]=fix_file(art["url"], art["image"])
        clogger.info(art)
        aa = Article.from_hash(art)
        aa.process_hash(art)
        aa.last_fetched=datetime.now()
        aa.sourcetype=art["sourcetype"]
        db_session.add(aa)
        db_session.commit()
        clogger.debug("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
 #        announce_articleid(aa.id)
    return aa
 # process a single found url
 def process_url(url,tpe, parent_url):
    #clogger.debug("process URL of type "+ tpe + ": " + url)
    if parent_url is not None:
        url=fix_link(parent_url, url)
    put_fetch_queue((0,tpe,url))
 # process a url list
 def process_urllist(urllist, tpe, parent_url):
    for u in urllist:
        process_url(u,tpe, parent_url)
 def do_process(tpe, cont):
    urllist=[]
 #    clogger.debug("process :" + str(cont))
    if "article_links" in cont:
        process_urllist(cont["article_links"],  article_types[tpe], cont["url"])
    if "index_links" in cont:
        process_urllist(cont["index_links"],  tpe , cont["url"])
    if "next_page" in cont and cont["next_page"] is not None:
        process_url(cont["next_page"],tpe, cont["url"])
    if "article" in cont:
        art=cont["article"]
        art["sourcetype"]=tpe
        process_article(art)
    if "articles" in cont:
        clogger.debug("articles")
        for a in cont["articles"]:
            if "title" in a:
                a["sourcetype"]=tpe
                if a.has_key("url")==False:
                    a["url"]=cont["url"]
                process_article(a)
    return 
--- a/compiler/mqueues.py
+++ b/compiler/mqueues.py
@@ -0,0 +1,8 @@
 from gevent.queue import Queue, JoinableQueue
 fetch_queue = Queue()
 compile_queue = Queue()
 process_queue = Queue()
 def put_fetch_queue(o):
    fetch_queue.put(o)
--- a/compiler/mworker.py
+++ b/compiler/mworker.py
@@ -0,0 +1,58 @@
 from mqueues import fetch_queue, compile_queue, process_queue
 from compiler import do_compile
 from mprocess import do_process
 from fetching import fetch_page
 from gevent import spawn
 from itertools import repeat
 from src import clogger
 def start_workers(f,c,p):
    for _ in range(f):
        clogger.debug("spawn fetchworker")
        spawn(work_fetch)
    for _ in range(c):
        spawn(work_compile)
    for _ in range(p):
        spawn(work_process)
 def work_fetch():
    while True:
        run_fetch()
 def work_process():
    while True:
        run_process()
 def work_compile():
    while True:
        run_compile()
 def queue_url(tpe, url):
    fetch_queue.put((0,tpe,url))
 # fetch a page from the url list
 def run_fetch():
    tc, tpe, url = fetch_queue.get()
    if tpe is not "dummyarticle" and tpe is not "dummyindex":
        rw=fetch_page(url)
    else:
        rw="<p> dummytext</p>"
    compile_queue.put((0, tpe, {"url": url, "sourcetype": tpe, "raw": rw}))
    return rw
    #    fetch_queue.task_done()
 #comile something from the compile list
 def run_compile():
    tc,tpe,h = compile_queue.get()
    h=do_compile(tpe,h)
    process_queue.put((0,tpe, h))
    return h
    #    compile_queue.task_done()
 def run_process():
    tc,tpe,h = process_queue.get()
    do_process(tpe, h)
    return h
 #    process_queue.task_done()
--- a/compiler/views.py
+++ b/compiler/views.py
@@ -0,0 +1,146 @@
 from flask import Blueprint, jsonify, render_template, abort, redirect, url_for, request
 compiler_pages = Blueprint('compiler', __name__,
                        template_folder='.')
 from src.database import db_session2,init_db,read_json,init_db2
 from .models import CrawlUrl
 from .models import CrawlCache, CrawlCacheSchema
 from .models import CrawlUrlSchema
 from src import clogger
 from src.articles import Article
 #import mworker
 import flask
 import json
 import mworker
 from compiler import do_compile
 from fetching import fetch_page
 #flask.json.JSONEncoder.default = lambda self,obj: ((obj.__json__()) if isinstance(obj, (Article,CrawlUrl)) else None) 
@compiler_pages.route("/")
@compiler_pages.route("")
@compiler_pages.route(".json")
 def index():
    status="For documentation goto /doc"
    return jsonify(status=status)
@compiler_pages.route("/doc")
@compiler_pages.route("/doc.json")
 def doc():
        return render_template("README")
 #    return jsonify(status=render_template("README"))
 #
@compiler_pages.route("/initdb")
@compiler_pages.route("/initdb.json")
 def initdb_json():
    init_db() # initialisiere Datenbank
    status="Datenbank Neu initialisiert"
    return jsonify(status=status)
@compiler_pages.route("/initdb2")
@compiler_pages.route("/initdb2.json")
 def initdb_json2():
    init_db2() # initialisiere Datenbank
    status="Datenbank Neu initialisiert"
    return jsonify(status=status)
@compiler_pages.route("/start")
@compiler_pages.route("/start.json")
 def start_json():
    mworker.start_workers(1,1,1) # initialisiere Datenbank
    status="Worker gestartet"
    return jsonify(status=status)
@compiler_pages.route("/urls")
@compiler_pages.route("/urls.json")
 def urls_index_json():
    # Lade Alle Urls
    status=CrawlUrl.query.all()
    return jsonify(urls=status)
 # show an existing CrawlUrl
@compiler_pages.route("/urls/<int:id>")
@compiler_pages.route("/urls/<int:id>.json")
 def urls_json(id):
    # Lade Alle Urls
    status=CrawlUrl.query.get(id)
    cc=CrawlCache.query.filter(CrawlCache.url==status.url).first()
    return jsonify(urls=status, cache=cc.__json__())
 # que an existing CrawlUrl for fetching
@compiler_pages.route("/urls/<int:id>/que")
@compiler_pages.route("/urls/<int:id>/que.json")
 def urls_que_json(id):
    # Lade Alle Urls
    cu=CrawlUrl.query.get(id)
    mworker.queue_url(cu.tpe, cu.url)
    cc=CrawlCache.query.filter(CrawlCache.url==cu.url)
    mworker.start_workers(1,1,1) # initialisiere Datenbank
    status="Worker gestartet"
    return jsonify(urls=cu, cache=cc)
 # que an existing CrawlUrl for fetching
@compiler_pages.route("/urls/<int:id>/test")
@compiler_pages.route("/urls/<int:id>/test.json")
 def urls_test_json(id):
    # Lade Alle Urls
    cu=CrawlUrl.query.get(id)
    rw=fetch_page(cu.url)
    h= {"url": cu.url, "sourcetype": cu.tpe, "raw": rw}
    h2=do_compile(cu.tpe, h)
    return jsonify(urls=cu,hs=h2,rw=rw)
@compiler_pages.route("/debug",methods=['GET','PUT'])
 def debug():
    status="did nothing"
    js=read_json(request)
    clogger.info(request.get_json())
    if js["cmd"] == "runfetch":
        mworker.run_fetch()
        status="fetched something"
    if js["cmd"] == "que":
        cu = CrawlUrl.query.get(js["id"])
        mworker.queue_url(cu.tpe, cu.url)
        status= mworker.run_fetch()
    if js["cmd"] == "comp":
        status=mworker.run_compile()
    if js["cmd"]=="process":
        status=mworker.run_process()
    return jsonify(status=status)
@compiler_pages.route("/debugurl")
 def debugurl():
    s=CrawlUrlSchema()
    status=CrawlUrl.query.all()
    return jsonify(status=status)
@compiler_pages.route("/urls",methods=['POST'])
 def add_urls():
    # Lese Daten
    js =read_json(request)
    #    clogger.info(js)
    # Finde oder Erzeuge Url in der Datenbank
    url=CrawlUrlSchema().load(js["url"])
    clogger.info(url)
    url=CrawlUrl.find_or_create(url.data["tpe"], url.data["url"])
    db_session2.add(url)
    db_session2.commit()
    return jsonify(url=url, kk=js)
@compiler_pages.route("/urls/<int:id>",methods=['DELETE'])
@compiler_pages.route("/urls<int:id>.json",methods=['DELETE'])
 def delete(id):
    cu=CrawlUrl.query.get(id)
    if cu != None:
        db_session2.delete(cu)
        db_session2.commit()
    return jsonify(url={})
--- a/crawler/init.py
+++ b/crawler/init.py
@@ -0,0 +1,4 @@
 def init():
    return " "
--- a/database.py
+++ b/database.py
@@ -0,0 +1,55 @@
 from sqlalchemy import create_engine
 from sqlalchemy.orm import scoped_session, sessionmaker
 from sqlalchemy.ext.declarative import declarative_base
 from src import package_directory,clogger, cfg
 from os import path
 import json
 #engine = create_engine('sqlite:////home/andreas/www/crawler/test.db', convert_unicode=True)
 if cfg.get("db_path")==None or cfg.get("db_path").strip()=="":
    db_path=package_directory
 else:
    db_path=cfg.get("db_path")
 db_mainfile=cfg.get("db_mainfile")
 if db_mainfile == None or db_mainfile.strip()=="":
    db_mainfile="../srctest.db"
 db_urlfile=cfg.get("db_mainfile")
 if db_urlfile == None or db_urlfile.strip()=="":
    db_urlfile="../srctest_cu.db"
 engine = create_engine('sqlite:///'+ path.join(db_path,db_mainfile), convert_unicode=True)
 db_session = scoped_session(sessionmaker(autocommit=False,
                                         autoflush=False,
                                         bind=engine))
 engine2 = create_engine('sqlite:///'+  path.join(db_path,db_urlfile), convert_unicode=True)
 db_session2 = scoped_session(sessionmaker(autocommit=False,
                                         autoflush=False,
                                          bind=engine2))
 Base = declarative_base()
 Base.query = db_session.query_property()
 Base2 = declarative_base()
 Base2.query = db_session2.query_property()
 def read_json(rq):
    js=rq.get_json()
    clogger.info(rq.data)
    if js is None:
        js=rq.form.to_dict()
    if js=={} and rq.data != "":
        js=json.loads(rq.data)    
    return js
 def init_db():
    import src.models    
    Base.metadata.create_all(bind=engine)
 def init_db2():
    from .compiler.models import CrawlUrl, CrawlCache
    Base2.metadata.create_all(bind=engine2)
--- a/fb.py
+++ b/fb.py
@@ -0,0 +1,4 @@
 from src import cfg
 import facebook
 graph = facebook.GraphAPI(access_token=cfg.fb_token, version='2.3')
--- a/meta.py
+++ b/meta.py
@@ -0,0 +1,21 @@
 import os
 package_directory = os.path.dirname(os.path.abspath(__file__))
 from config import Config
 import logging
 import sys
 cfg = Config(file(os.path.join(package_directory, 'config.cfg')))
 #--------------- Logging
 file_handler=logging.FileHandler(cfg.logfile)
 file_handler.setLevel(logging.INFO)
 std_handler=logging.StreamHandler(stream=sys.stdout)
 std_handler.setLevel(logging.DEBUG)
 lg=logging.getLogger('mylogger')
 lg.setLevel(logging.DEBUG)
 lg.addHandler(file_handler)
 lg.addHandler(std_handler)
 #----------------
--- a/models.py
+++ b/models.py
@@ -0,0 +1,4 @@
 from .articles.model import Article
 from .sections.model import Section
 from .compiler.models import CrawlUrl, CrawlCache
--- a/sections/init.py
+++ b/sections/init.py
@@ -0,0 +1 @@
 from .model import Section
--- a/sections/model.py
+++ b/sections/model.py
@@ -0,0 +1,44 @@
 from sqlalchemy import Column, Integer, String, Boolean, DateTime, Text, ForeignKey
 from sqlalchemy.orm import relationship
 from datetime import datetime
 from src.database import Base,db_session
 from marshmallow import Schema, fields
 import json
 import flask 
 #from src.articles import Article
 class SectionSchema(Schema):
    id=fields.Integer()
    foreign_name=fields.String()
    name=fields.String()
 class Section(Base):
    __tablename__ = 'sections'
    id = Column(Integer, primary_key=True)
    url = Column(String(250))
    crawlurl = Column(Integer)
    foreign_name = Column(String(250),unique=True)
    name=Column(String(250))
    group = Column(String(250))
    articles=relationship("Article", back_populates="section")
    def __json__(self):
        return SectionSchema().dump(self)[0]
    def __init__(self, url=None,fname=None):
        self.url=url
        self.foreign_name=fname
    @classmethod
    def find_or_create(cls, fname):
        s=Section.query.filter(Section.foreign_name==fname).first()
        if s is None:
            s=Section(fname)
            db_session.add(s)
            db_session.commit()
        s.foreign_name=fname
        db_session.add(s)
        db_session.commit()
        return s
--- a/sections/views.py
+++ b/sections/views.py
@@ -0,0 +1,37 @@
 from flask import Blueprint, jsonify, render_template, abort, redirect, url_for, request
 section_pages = Blueprint('sections', __name__)
 from .model import Section
 from .model import SectionSchema
 #import flask
 from datetime import datetime
 import json
 from src import clogger
 from src.database import db_session, read_json
 import flask
@section_pages.route("/")
@section_pages.route("")
@section_pages.route(".json")
 def index():
    sections=Section.query.all()
    return jsonify(sections=sections)
@section_pages.route("/<int:id>",methods=['PUT'])
@section_pages.route("/<int:id>.json",methods=['PUT'])
 def update(id):
    section=Section.query.get(id)
    clogger.info(request.data)
    a=request.get_json()
    section.text=a["text"]
    db_session.commit()
    return jsonify(section=section)
@section_pages.route("/<int:id>",methods=['GET'])
@section_pages.route("/<int:id>.json",methods=['GET'])
 def get(id):
    section=Section.query.get(id)
    clogger.info(section)
 #    section=SectionSchema().dump(section)[0]
    return jsonify(section=section,articles=section.articles)
--- a/templates/home.html
+++ b/templates/home.html
@@ -0,0 +1 @@
 <h1>Hello World</h1>
--- a/users/users.py
+++ b/users/users.py
@@ -0,0 +1,19 @@
 class User(object):
    def __init__(self, id, username, password):
        self.id = id
        self.username = username
        self.password = password
    def __str__(self):
        return "User(id='%s')" % self.id
 user = User(1, 'user', 'password')
 def authenticate(username, password):
    if username == user.username and password == user.password:
        return user
 def identity(payload):
    return user
		`@@ -0,0 +1,2 @@`
							`from .model import Article`
							`from .views import article_pages`