From 8955bf17f552c7e2bcbca211633329de84f566dd Mon Sep 17 00:00:00 2001
From: Andreas Stephanides <andreas.stephanides@gmail.com>
Date: Sat, 14 Jan 2017 12:23:04 +0100
Subject: [PATCH] init commit

---
 .gitignore                |   5 +
 __init__.py               | 101 +++++++++++++++
 articles/__init__.py      |   2 +
 articles/model.py         | 139 ++++++++++++++++++++
 articles/views.py         |  65 ++++++++++
 bot/__init__.py           |   1 +
 bot/bot.py                | 140 +++++++++++++++++++++
 compiler/README           |  10 ++
 compiler/README.html      |   1 +
 compiler/__init__.py      |  15 +++
 compiler/comp/__init__.py |   1 +
 compiler/comp/__init__py  |   1 +
 compiler/comp/rss.py      |   8 ++
 compiler/compile.py       | 153 ++++++++++++++++++++++
 compiler/compiler.py      | 258 ++++++++++++++++++++++++++++++++++++++
 compiler/fetching.py      |  67 ++++++++++
 compiler/fixing.py        |  37 ++++++
 compiler/models.py        |  75 +++++++++++
 compiler/mprocess.py      |  74 +++++++++++
 compiler/mqueues.py       |   8 ++
 compiler/mworker.py       |  58 +++++++++
 compiler/views.py         | 146 +++++++++++++++++++++
 crawler/__init__.py       |   4 +
 database.py               |  55 ++++++++
 fb.py                     |   4 +
 meta.py                   |  21 ++++
 models.py                 |   4 +
 sections/__init__.py      |   1 +
 sections/model.py         |  44 +++++++
 sections/views.py         |  37 ++++++
 templates/home.html       |   1 +
 users/users.py            |  19 +++
 32 files changed, 1555 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 __init__.py
 create mode 100644 articles/__init__.py
 create mode 100644 articles/model.py
 create mode 100644 articles/views.py
 create mode 100644 bot/__init__.py
 create mode 100644 bot/bot.py
 create mode 100644 compiler/README
 create mode 100644 compiler/README.html
 create mode 100644 compiler/__init__.py
 create mode 100644 compiler/comp/__init__.py
 create mode 100644 compiler/comp/__init__py
 create mode 100644 compiler/comp/rss.py
 create mode 100644 compiler/compile.py
 create mode 100644 compiler/compiler.py
 create mode 100644 compiler/fetching.py
 create mode 100644 compiler/fixing.py
 create mode 100644 compiler/models.py
 create mode 100644 compiler/mprocess.py
 create mode 100644 compiler/mqueues.py
 create mode 100644 compiler/mworker.py
 create mode 100644 compiler/views.py
 create mode 100644 crawler/__init__.py
 create mode 100644 database.py
 create mode 100644 fb.py
 create mode 100644 meta.py
 create mode 100644 models.py
 create mode 100644 sections/__init__.py
 create mode 100644 sections/model.py
 create mode 100644 sections/views.py
 create mode 100644 templates/home.html
 create mode 100644 users/users.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e6e1129
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+/__init__.py~
+/__init__.pyc
+*.pyc
+*~
+config.cfg
\ No newline at end of file
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..2198b8c
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1,101 @@
+import os
+import sys
+package_directory = os.path.dirname(os.path.abspath(__file__))
+from config import Config
+
+cfg = Config(file(os.path.join(package_directory, 'config.cfg')))
+#--------------- Logging
+import logging
+download_path="./cdw"
+file_handler=logging.FileHandler(cfg.logfile)
+file_handler.setLevel(logging.DEBUG)
+stream_handler=logging.StreamHandler(sys.stdout)
+
+clt=logging.getLogger('mylogger')
+clt.setLevel(logging.DEBUG)
+clt.addHandler(file_handler)
+clt.addHandler(stream_handler)
+
+clogger=clt
+#----------------
+lg=clt
+from gevent import spawn, monkey
+monkey.patch_all()
+from .compiler import start_workers
+#start_workers(1,1,1)
+
+
+
+# Framework
+from flask import Flask, jsonify, render_template, redirect, request,send_from_directory
+# Cross Site Scripting
+from flask_cors import CORS, cross_origin
+#Authentication
+from flask_jwt import JWT, jwt_required, current_identity
+
+from src.models import Article,Section
+from src.users import authenticate, identity
+from datetime import datetime
+
+app = Flask(__name__)
+CORS(app)
+app.config['LOGGER_NAME']='mylogger'
+app.logger.setLevel(logging.DEBUG)
+app.logger.info("Server Started")
+
+app.config['SECRET_KEY'] = 'super-secret'
+import flask
+import json
+from database import Base
+from models import Article, CrawlUrl, CrawlCache
+
+
+flask.json.JSONEncoder.default = lambda self,obj: ((obj.__json__()) if isinstance(obj, (Base, Article,Section, CrawlUrl,CrawlCache)) else None) 
+json.JSONEncoder.default = lambda self,obj: ((obj.__json__()) if isinstance(obj, (Base, Article,CrawlUrl,CrawlCache)) else None) 
+
+
+
+#bot.dosmth()
+#lg.debug(bot.bot)
+
+
+
+
+# Allow Cross Site Scripting 	
+@app.after_request
+def after_request(response):
+    response.headers.add('Access-Control-Allow-Origin', '*')
+    if request.method == 'OPTIONS':
+        response.headers['Access-Control-Allow-Methods'] = 'DELETE, GET, POST, PUT'
+        headers = request.headers.get('Access-Control-Request-Headers')
+        if headers:
+            response.headers['Access-Control-Allow-Headers'] = headers
+    return response
+from .articles.views import article_pages
+from .sections.views import section_pages
+from .compiler.views import compiler_pages
+
+
+@app.route("/")
+@app.route("/index")
+@app.route("/home")
+def home():
+    text="It work's, please do something"
+    return jsonify(text=text)
+
+app.register_blueprint(article_pages, url_prefix='/articles')
+app.register_blueprint(section_pages, url_prefix='/sections')
+app.register_blueprint(compiler_pages, url_prefix='/compiler')
+
+
+from src.bot import bot
+if not app.debug or os.environ.get("WERKZEUG_RUN_MAIN") == "true":
+    bot.message_loop()
+
+
+# ------------ Telegram Bot
+#from bot import  bot_queue
+#@app.route('/bot', methods=['GET', 'POST'])
+#def pass_update():
+#    bot_queue.put(request.data)  # pass update to bot
+#    return 'OK'
diff --git a/articles/__init__.py b/articles/__init__.py
new file mode 100644
index 0000000..fdf13cf
--- /dev/null
+++ b/articles/__init__.py
@@ -0,0 +1,2 @@
+from .model import Article
+from .views import article_pages
diff --git a/articles/model.py b/articles/model.py
new file mode 100644
index 0000000..14b57cd
--- /dev/null
+++ b/articles/model.py
@@ -0,0 +1,139 @@
+
+from sqlalchemy import Column, Integer, String, Boolean, DateTime, Text, ForeignKey
+from sqlalchemy.orm import relationship
+from datetime import datetime
+from src.database import Base
+from src.database import db_session
+from marshmallow import Schema, fields
+from src.sections.model import Section
+
+#import json
+import json
+import flask 
+#json.JSONEncoder.default = lambda self,obj: (obj.isoformat() if isinstance(obj, datetime) else None)
+import hashlib
+
+#import clogger
+import logging
+#from crawler.compiler.mqueues import put_fetch_queue
+from src import clogger
+#json.JSONEncoder.default = lambda self,obj: (obj.isoformat() if isinstance(obj, datetime) else None)
+
+
+
+
+def calc_fingerprint(a):
+    return calc_fingerprint_h({"url": a.url, "title":a.title, "published": str(a.published_date)})
+
+def calc_fingerprint_h(a):
+    if a["published"] is not None and a["published"]!= "None":
+     #   clogger.info( "published:"+str(a["published"]))
+        if a["published"] is str:
+            pp=parse(a["published"])
+        else:
+            pp=a["published"]
+    else:
+        pp=""
+    #clogger.info( unicode(a["url"])+ unicode(a["title"])+unicode(pp))
+    h=hashlib.md5()
+    h.update(unicode(a["url"]))
+    h.update(a["title"].encode("utf-8"))
+    h.update(unicode(pp))
+    return h.hexdigest()
+
+
+class ArticleSchema(Schema):
+    id=fields.Integer()
+    text=fields.String()
+    title=fields.String()
+    author=fields.String()
+    sourcetype =fields.String()
+    image =fields.String()
+    url =fields.String()
+    published_date=fields.DateTime()
+    date=fields.DateTime()
+    first_fetched=fields.DateTime()
+    section_id=fields.Integer()
+    
+class Article(Base):
+    __tablename__ = 'articles'
+    id = Column(Integer, primary_key=True)
+    parent_id= Column(Integer)
+    url = Column(String(250))
+    is_primary = Column(Boolean)
+    fingerprint = Column(String(250),unique=True)
+    hash = Column(String(250))
+    last_fetched = Column(DateTime)
+    first_fetched=Column(DateTime)
+    published_date = Column(DateTime)
+    date = Column(DateTime)
+    text = Column(Text)
+    title = Column(String(250))    
+    author = Column(String(250))
+    section = relationship("Section")
+    section_id=Column(Integer, ForeignKey('sections.id'))
+    sourcetype = Column(String(250))
+    image=Column(String(250))
+    
+    def __init__(self, url=None,title=None, published_date=None):
+        self.url=url
+        self.title=title
+        self.published_date=published_date
+        self.first_fetched=datetime.now()
+    def __json__(self):
+        return ArticleSchema().dump(self)[0]
+
+    def dict(self):
+        return {"id": str(int(self.id)), "title": self.title, "text": self.text, "author": self.author, "section":self.section, "sourcetype": self.sourcetype, "last_fetched": self.last_fetched, "first_fetched": self.first_fetched, "published_date": self.published_date, "date": self.date,"image": self.image, "url": self.url}
+
+
+#    @classmethod
+#    def sections(self):
+#        sects=db_session.query(Article.section).distinct().all()
+#        for i in range(len(sects)):
+#            sects[i]=sects[i][0]
+#        return sects
+        
+    @classmethod
+    def from_hash(cls, a):
+        fp = calc_fingerprint_h(a)
+        aa = Article.query.filter(Article.fingerprint==fp).first()
+        if aa is None:
+            clogger.debug( "new Article")
+            if a["published"] is not None:
+                if a["published"] is str:
+                    pd= parse(a["published"])
+                else:
+                    pd=a["published"]
+            else:
+                pd=None
+            aa=Article(a["url"], a["title"],pd)
+            aa.fingerprint = calc_fingerprint(aa)
+            db_session.add(aa)
+            db_session.commit()
+        return aa
+    
+    def process_hash(self, a):
+        self.text=a["text"].decode('utf8')
+        if "image" in a:
+            self.image=a["image"]
+        if "author" in a:
+            self.author=a["author"]
+        if "title" in a:
+            self.title=a["title"]        
+        if "author" in a:
+            self.author=a["author"]
+        if "sourcetype" in a:
+            self.sourcetype=a["sourcetype"]
+        if "section" in a:
+            self.section=Section.find_or_create(a["section"])
+#        if "last_fetched" in a:
+#            self.last_fetched=a["last_fetched"]
+        if "published_date" in a:
+            self.published_date=a["published_date"]
+
+
+    
+#flask.json.JSONEncoder.default = lambda self,obj: ((ArticleSchema().dump(obj)[0]) if isinstance(obj, Article) else None)
+
+#json.JSONEncoder.default = lambda self,obj: ((ArticleSchema().dump(obj)[0]) if isinstance(obj, Article) else None)
diff --git a/articles/views.py b/articles/views.py
new file mode 100644
index 0000000..7202b4b
--- /dev/null
+++ b/articles/views.py
@@ -0,0 +1,65 @@
+from flask import Blueprint, jsonify, render_template, abort, redirect, url_for, request
+article_pages = Blueprint('articles', __name__)
+from .model import Article
+from .model import ArticleSchema
+#import flask
+from datetime import datetime
+import json
+
+#flask.json.JSONEncoder.default = lambda self,obj: (obj.isoformat() if isinstance(obj, datetime) else None)
+#flask.json.JSONEncoder.default = lambda self,obj: ((obj.dict()) if isinstance(obj, Article) else None)
+from src import clogger
+import json
+from src.database import db_session, read_json
+import flask
+
+#flask.json.JSONEncoder.default = lambda self,obj: ((ArticleSchema().dump(obj)[0]) if isinstance(obj, Article) else None)
+flask.json.JSONEncoder.default = lambda self,obj: ((obj.__json__()) if isinstance(obj, (Base, Article,CrawlUrl)) else None) 
+
+@article_pages.route("/")
+@article_pages.route("")
+@article_pages.route(".json")
+def index():
+    articles=Article.query.all()
+    return jsonify(articles=articles)
+
+@article_pages.route("/<int:id>",methods=['PUT'])
+@article_pages.route("/<int:id>.json",methods=['PUT'])
+def update(id):
+    article=Article.query.get(id)
+    clogger.info(request.data)
+    a=request.get_json()
+    article.text=a["text"]
+    db_session.commit()
+    return jsonify(article=article)
+
+
+@article_pages.route("/<int:id>",methods=['GET'])
+@article_pages.route("/<int:id>.json",methods=['GET'])
+def get(id):
+    article=Article.query.get(id)
+    clogger.info(article)
+#    article=ArticleSchema().dump(article)[0]
+    return jsonify(article=article)
+
+@article_pages.route("/<int:id>",methods=['DELETE'])
+@article_pages.route("/<int:id>.json",methods=['DELETE'])
+def delete(id):
+    article=Article.query.get(id)
+    clogger.info(id)
+    if article != None:
+        db_session.delete(article)
+        db_session.commit()
+    return jsonify(article={})
+
+
+@article_pages.route("/",methods=['POST'])
+@article_pages.route("",methods=['POST'])
+@article_pages.route(".json",methods=['POST'])
+def create():
+    article=Article()
+    a=read_json(request)
+    article.text=a["article"]["text"]
+    db_session.add(article)
+    db_session.commit()
+    return jsonify(article=article)
diff --git a/bot/__init__.py b/bot/__init__.py
new file mode 100644
index 0000000..4cff7c7
--- /dev/null
+++ b/bot/__init__.py
@@ -0,0 +1 @@
+from .bot import bot
diff --git a/bot/bot.py b/bot/bot.py
new file mode 100644
index 0000000..7392b8f
--- /dev/null
+++ b/bot/bot.py
@@ -0,0 +1,140 @@
+import telepot
+import datetime
+import time
+import json
+from Queue import Queue
+#import os
+from src import lg,cfg
+#from gevent import spawn
+from telepot.namedtuple import InlineKeyboardMarkup, InlineKeyboardButton
+from telepot.delegate import (
+    per_chat_id, pave_event_space, include_callback_query_chat_id, create_open, per_inline_from_id )
+from src.compiler import CrawlUrl
+from gevent import spawn, monkey, Greenlet
+
+def IKB(h):
+    return InlineKeyboardButton(text=h["text"], callback_data=h["callback_data"])
+
+def IKB2(h):
+    return [IKB(h)]
+def IKM(h):
+    return InlineKeyboardMarkup(inline_keyboard=[ map(IKB,h)])
+
+def IKM2(h):
+    return InlineKeyboardMarkup(inline_keyboard= map(IKB2,h))
+
+
+def query_que_url(url):
+    print(json.dumps(url))
+    return {"text": url.url, "callback_data":"/urls/"+str(url.id)+"/que"}
+
+def handle_urls(handler, cmd):
+    curls=CrawlUrl.query.all()
+    #sent=handler.sender.sendMessage(json.dumps(curls))
+    kb= IKM2(map(query_que_url,curls))
+    print json.dumps(cmd)
+    if len(cmd) >= 4 and cmd[3]=="que":
+        sent=handler.sender.sendMessage("I qued url "+str(cmd[2]), reply_markup=None)
+    else:
+        sent=handler.sender.sendMessage("que?", reply_markup=kb)
+        handler._edit_msg_ident = telepot.message_identifier(sent)
+        handler._editor = telepot.helper.Editor(handler.bot, sent)
+    
+def execute_command(handler,cmd,msg=None):
+    if cmd[1]=='urls':
+        handle_urls(handler,cmd)
+    
+    
+def handle(handler,msg):
+    content_type,chat_type,chat_id = telepot.glance(msg)
+    if msg.has_key('text'):
+        if msg['text'][0]=='/':
+            cmd = msg['text'].split("/")
+            execute_command(handler, cmd, msg)            
+    if msg.has_key('data'):
+        lg.debug(msg['data'])
+
+
+class InlineHandler(telepot.helper.InlineUserHandler, telepot.helper.AnswererMixin):
+    def __init__(self, *args, **kwargs):
+        super(InlineHandler, self).__init__(*args, **kwargs)
+
+    def on_inline_query(self, msg):
+        def compute_answer():
+            query_id, from_id, query_string = telepot.glance(msg, flavor='inline_query')
+            print(self.id, ':', 'Inline Query:', query_id, from_id, query_string)
+
+            articles = [{'type': 'article',
+                             'id': 'abc', 'title': query_string, 'message_text': query_string}]
+
+            return articles
+
+        self.answerer.answer(msg, compute_answer)
+
+    def on_chosen_inline_result(self, msg):
+        from pprint import pprint
+        pprint(msg)
+        result_id, from_id, query_string = telepot.glance(msg, flavor='chosen_inline_result')
+        print(self.id, ':', 'Chosen Inline Result:', result_id, from_id, query_string)
+
+
+class FetBot(telepot.helper.ChatHandler):
+    def __init__(self, *args, **kwargs):
+#        super(FetBot,self).__init__(*args,**kwargs)
+        super(FetBot,self).__init__( *args,**kwargs)
+
+    _editor=None
+    _edit_msg_ident=None
+    keyboard=IKM([{"text":"START","callback_data": "start"},
+                  {"text":"Don't Start","callback_data":"notstart"}
+    ])
+    keyboard =InlineKeyboardMarkup(
+        inline_keyboard=[[
+            InlineKeyboardButton(text='START', callback_data='start'),
+            InlineKeyboardButton(text='START', callback_data='start')
+        ]]
+    )
+    def on_chat_message(self,msg):
+        handle(self,msg)
+        content_type,chat_type,chat_id = telepot.glance(msg)
+        lg.debug(content_type)
+        if content_type=="photo" or content_type=="sticker":
+            lg.debug("try to download %s" % msg[content_type][-1]["file_id"])
+            f=self.bot.getFile(msg[content_type][-1]['file_id'])
+            lg.debug(f)
+            self.bot.download_file(f['file_id'], "dwn/" + f['file_path'])
+            #  self.bot.getFile(msg['photo'][-1]['file_id']), "dwn")
+        #self._cancel_last()
+        #sent=self.sender.sendMessage("Hello World", reply_markup=self.keyboard)
+        #self._editor = telepot.helper.Editor(self.bot, sent)
+        #self._edit_msg_ident = telepot.message_identifier(sent)
+
+    def on_callback_query(self, msg):
+        query_id, from_id, query_data = telepot.glance(msg, flavor='callback_query')
+        lg.debug(json.dumps(msg))
+        self._cancel_last()
+        if query_data[0]=='/':
+            cmd = query_data.split("/")
+            execute_command(self, cmd, msg)            
+            
+#        self.sender.sendMessage("Danke")
+        self.bot.answerCallbackQuery(query_id, text='Ok. But I am going to keep asking.')
+        #self.bot.answerCallbackQuery(query_id)
+    def _cancel_last(self):
+        if self._editor:
+            self._editor.editMessageReplyMarkup(reply_markup=None)
+            self._editor = None
+            self._edit_msg_ident = None
+
+  
+
+
+bot=None
+bot  = telepot.DelegatorBot(cfg.token, [include_callback_query_chat_id(pave_event_space())(per_chat_id(),create_open,FetBot,timeout=20),
+ pave_event_space()(
+        per_inline_from_id(), create_open, InlineHandler, timeout=10),
+    ])
+
+
+
+
diff --git a/compiler/README b/compiler/README
new file mode 100644
index 0000000..027667f
--- /dev/null
+++ b/compiler/README
@@ -0,0 +1,10 @@
+Das ist die API für den Compiler
+Folgende Befehle sind implementiert:
+GET doc: Diese Dokumentation!
+GET initdb: Initialisiere die Datenbank, ACHTUNG Daten werden gelöscht
+POST urls:
+Erwartet Daten im Format {"url": {"type": typ, "url": "someurl.html"}}
+Fügt diese Url der Überwachung hinzu
+
+IN PROCESS:
+GET urls: Alle Urls die überwacht werden sollen
\ No newline at end of file
diff --git a/compiler/README.html b/compiler/README.html
new file mode 100644
index 0000000..2f4e349
--- /dev/null
+++ b/compiler/README.html
@@ -0,0 +1 @@
+sdf
diff --git a/compiler/__init__.py b/compiler/__init__.py
new file mode 100644
index 0000000..5284e55
--- /dev/null
+++ b/compiler/__init__.py
@@ -0,0 +1,15 @@
+
+
+#from mprocess import do_process, process_urllist
+#from compiler import do_compile
+#from mworker import run_fetch, run_process, run_compile
+
+# include models for  final objects
+from src.models import Article
+# starting workers
+from mworker import start_workers
+
+from models import add_url, CrawlUrl
+#start_workers(1,1,1)
+
+from fetching import announce_articleid
diff --git a/compiler/comp/__init__.py b/compiler/comp/__init__.py
new file mode 100644
index 0000000..0a7c135
--- /dev/null
+++ b/compiler/comp/__init__.py
@@ -0,0 +1 @@
+from rss import rssfeed
diff --git a/compiler/comp/__init__py b/compiler/comp/__init__py
new file mode 100644
index 0000000..532849f
--- /dev/null
+++ b/compiler/comp/__init__py
@@ -0,0 +1 @@
+from rss import rssfeed
\ No newline at end of file
diff --git a/compiler/comp/rss.py b/compiler/comp/rss.py
new file mode 100644
index 0000000..cb5a95b
--- /dev/null
+++ b/compiler/comp/rss.py
@@ -0,0 +1,8 @@
+import feedparser
+
+def rssfeed(url,raw):
+    al=[]
+    f=feedparser.parse(raw)
+    for e in f['entries']:
+        al.append(e['link'])
+    return {"url": url, "next_page": None, "article_links": al, "objecttype":"index"}
diff --git a/compiler/compile.py b/compiler/compile.py
new file mode 100644
index 0000000..9cfcf37
--- /dev/null
+++ b/compiler/compile.py
@@ -0,0 +1,153 @@
+from bs4 import BeautifulSoup
+import crawler.objects.models 
+#from crawler.objects.models import Object
+from dateutil.parser import parse
+from datetime import datetime
+import re
+def hello():
+    return "hello"
+
+
+def fetarticle(o):
+    sp=BeautifulSoup(o.raw_fixed)
+    d={}
+    h=sp.find("h1", {"itemprop": "name"})
+    d["title"]=unicode(h.text).strip()
+    h=sp.find("div", {"itemprop": "articleBody"})
+    if h is not None:
+        d["text"]=(h.encode_contents()).strip()
+    else:
+        d["text"]=""
+    d["url"]=o.url
+    h=sp.find("span", {"itemprop": "author"})
+    if h is not None:
+        d["author"]=h.text.strip()
+    h=sp.find("span", {"itemprop": "articleSection"})
+    if h is not None:
+        d["section"]= "FET - " + h.text.strip()
+
+    h=sp.find("span", {"itemprop": "datePublished"})
+    if h is not None:
+        d["published"]=parse(h.encode_contents().strip())
+    h=sp.find("meta", {"property": "og:image"})
+    
+    if h is not None:
+        d["image"]=h.attrs["content"]
+        
+    hh=sp.find_all("div", {"class":"media"})
+    for h in hh:
+        if h is not None:
+            h=h.find("div", {"class": "pull-left"})
+        if h is not None:
+            h=h.find("a")    
+        if h is not None:
+            d["image2"]=crawler.objects.models.download_file(h.attrs["href"])
+    return {"article": d}
+
+def fsarcharticle(o):
+    sp=BeautifulSoup(o.raw_fixed)
+    d={}
+    h=sp.find("h1", {"class": "title"})
+    if h is not None:
+        d["title"]=h.text.strip()
+    d["url"]=o.url
+    d["published"]=None
+    h=sp.find("article")
+    h=h.find("div", {"class": "content"})
+    d["text"]=h.encode_contents().strip()
+    h=sp.find("article").find("h1", {"class": "title"})
+    if h is not None:
+        d["title"]=h.text.strip()
+    else:
+        d["title"]=""
+    d["image"]=""
+    d["sourcetype"]="fsarcharticle"
+    d["section"]="fsarch"
+    d["author"]=None
+    return {"article": d}
+
+def fetindex(o):
+#    if type(o) is not Object:
+#        raise TypeError
+    if o.raw is None:
+        raise Error
+    print "compile_fetindex"
+    html=BeautifulSoup(o.raw_fixed)
+    h = html.find("li", {"class": "next_page" })
+    if h is not None:
+        nl=h.find("a")
+        nl=crawler.objects.models.fix_link(o.url,nl.attrs["href"])
+    else:
+        nl=None
+    h= html.find("ul", {"id": "neuigkeiten"})
+    links=h.find_all("a")
+    al = []
+    for t in links:
+        al.append(t.attrs["href"])
+    return {"url": o.url, "next_page": nl, "article_links": al, "objecttype": "index" }
+
+def fsarchindex(o):
+    if o.raw is None:
+        raise Error
+    html=BeautifulSoup(o.raw_fixed)
+    h= html.find("article")
+    print unicode(h)
+    links=h.find_all("a")
+    al = []
+    fl=[]
+    for t in links:
+        url=t.attrs["href"]
+        if re.search("fachschaftarchitektur\.at", url): 
+            al.append(t.attrs["href"])
+        if re.search("facebook\.com/events", url): 
+            fl.append(t.attrs["href"])
+        
+    return {"url": o.url, "next_page": None, "article_links": al, "facebook_links": fl,"objecttype":"index"}
+
+
+def fsbizindex(o):
+    if o.raw is None:
+        raise Error
+    print "compile_fsbizindex"
+    html=BeautifulSoup(o.raw_fixed)
+    h= html.find("section", {"id": "primary"})
+    links=h.find_all("h1", {"class": "entry-title"})
+    al = []
+    for t in links:
+
+        al.append(t.find("a").attrs["href"])
+    return {"url": o.url,"article_links": al,"objecttype": "index"}
+
+
+def fsmbindex(o):
+    if o.raw is None:
+        raise Error
+    html=BeautifulSoup(o.raw_fixed)
+    h= html.find("a",{"class": "next"})
+    if h is not None:
+        np=h.attrs["href"]
+    else:
+        np=None
+    h=html.find("div", {"id": "main"}).find("div", {"class": "inside"}).find("div", {"class": "mod_newslist"})
+    if h is not None:
+        ats=h.find_all("div",{"class": "block"})
+        articles=[]
+        for a in ats:
+            aa={}
+            h=a.find("h3")
+            if h is not None:
+                aa["title"] = h.text.strip()
+            h=a.find("div", {"class": "ce_text"})
+            if h is not None:
+                aa["text"] = (h.encode_contents()).strip()
+            aa["info"]=[]
+            hh=a.find_all("p", {"class": "info"},recursive=False)
+            for h in hh:
+                aa["info"].append(unicode(h.text))
+                if re.search(r'von', str(h)):
+                    h1=  re.sub(r'[^\d]*(\d+)\.(\d+)\.(\d+)[^\d]*', r'\3/\2/\1',unicode(h.text)) 
+                    aa["published"] =parse(h1.strip())
+                    aa["author"]=re.sub(r'^.*von(.*)$', r'\1',unicode(h.text)).strip() #h.text + "--" #+ re.sub(r'[^\d]*(\d+)\.(\d+)\.(\d+)[^\d]*', r'\3/\2/\1',hh) 
+            aa["section"]="FSMB"
+            articles.append(aa)
+    return {"url": o.url, "next_page": np, "articles": articles,"objecttype": "articles"}
diff --git a/compiler/compiler.py b/compiler/compiler.py
new file mode 100644
index 0000000..de34084
--- /dev/null
+++ b/compiler/compiler.py
@@ -0,0 +1,258 @@
+from bs4 import BeautifulSoup
+#import crawler.objects.models 
+#from crawler.objects.models import Object
+from dateutil.parser import parse
+from datetime import datetime
+import re
+import urlparse
+from src import clogger, cfg
+from src.fb import graph
+from fixing import fix_link
+from facebook import GraphAPIError
+#from fetching import downloadfile
+import json
+def do_compile(tpe, cont):
+    if  type(cont) != dict:
+        clogger.error("Type Error for do compile for :"+str(cont["url"]))
+    # Starting to compile an generic object
+    if "url" not in cont:
+        clogger.error("no url can't compile "+tpe)
+    else:
+        clogger.debug("compile: type:"+str(tpe)+ "| "+ str(cont["url"]))
+        if tpe in compiler:
+            cont=compiler[tpe](cont["url"], cont["raw"])
+    return cont
+
+from comp import rssfeed
+
+def dummyarticle(url, raw):
+    return {"url": url, "article":{"url": url, "section": "dummysection", "sourcetype": "dummy", "title":"dummytitle", "text": raw, "image": "fff", "author": "me", "published": None}}
+
+
+
+def htufeed(url,raw):
+    al=[]
+    f=feedparser.parse(raw)
+    for e in f['entries']:
+        al.append(e['link'])
+    return {"url": url, "next_page": None, "article_links": al, "objecttype":"index"}
+    
+
+def htuarticle(url,raw):
+    sp=BeautifulSoup(raw)
+    d={}
+    h=sp.find("meta", {"property": "og:image"})    
+    if h is not None:
+        d["image"]=h.attrs["content"]
+        d["image2"]=d["image"]
+    h=sp.find("div", {"class": "patternRevInfo"})
+    if h is not None:
+#        clogger.debug(h.text.strip())
+        h1=  re.sub(r'.*- (\d+) ([a-zA-Z]+) (\d+) - ([:\d]+)[^\d]*', r'\3/\2/\1 \4',unicode(h.text.strip()))
+#        clogger.debug(h1)
+        d["published"]=parse(h1)
+  #      clogger.debug(parse(h1))
+  #      clogger.debug(d["published"])
+    h=h.find("a")
+    if h is not None:
+        d["author"]=h.text.strip()
+    h=sp.find("div", {"class": "foswikiTopic"})
+    h1=h.find("h4")
+    if h1 is not None:
+        d["title"]= h1.text.strip()
+        h1.extract() # remove head
+    else:
+        h1=sp.find("meta", {"name": "WEBTOPIC"})    
+        d["title"]= h1.attrs["content"]
+    d["text"]=(h.encode_contents()).strip()
+    d["section"]="HTU"
+    d["url"]=url
+#    clogger.debug(d)
+    return {"article": d}
+
+    
+def fetarticle(url, raw):
+    sp=BeautifulSoup(raw)
+    d={}
+    h=sp.find("h1", {"itemprop": "name"})
+    d["title"]=unicode(h.text).strip()
+    h=sp.find("div", {"itemprop": "articleBody"})
+    if h is not None:
+        d["text"]=(h.encode_contents()).strip()
+    else:
+        d["text"]=""
+    d["url"]=url
+    h=sp.find("span", {"itemprop": "author"})
+    if h is not None:
+        d["author"]=h.text.strip()
+    h=sp.find("span", {"itemprop": "articleSection"})
+    if h is not None:
+        d["section"]= "FET - " + h.text.strip()
+
+    h=sp.find("span", {"itemprop": "datePublished"})
+    if h is not None:
+        d["published"]=parse(h.encode_contents().strip())
+
+    h=sp.find("meta", {"property": "og:image"})    
+    if h is not None:
+        d["image"]=h.attrs["content"]
+        d["image2"]=d["image"]
+#    hh=sp.find_all("div", {"class":"media"})
+#    for h in hh:
+#        if h is not None:
+#            h=h.find("div", {"class": "pull-left"})
+#        if h is not None:
+#            h=h.find("a")    
+#        if h is not None:
+#            d["image2"]=downloadfile(fix_link(url,h.attrs["href"]))
+    return {"article": d}
+
+
+def fsarcharticle(url, raw):
+    sp=BeautifulSoup(raw)
+    d={}
+    h=sp.find("h1", {"class": "title"})
+    if h is not None:
+        d["title"]=h.text.strip()
+    d["url"]=url
+    d["published"]=None
+    h=sp.find("article")
+    if h is not None:
+        h=h.find("div", {"class": "content"})
+        d["text"]=h.encode_contents().strip()
+    h=sp.find("article")
+    if h is not None:
+        h=h.find("h1", {"class": "title"})
+    if h is not None:
+        d["title"]=h.text.strip()
+    else:
+        d["title"]=""
+    d["image"]=""
+    d["sourcetype"]="fsarcharticle"
+    d["section"]="fsarch"
+    d["author"]=None
+    return {"article": d}
+
+def fetindex(url, raw):
+    if raw is None:
+        raise Error
+#    clogger.debug("compile_fetindex: "+str(url))
+    html=BeautifulSoup(raw)
+    h = html.find("li", {"class": "next_page" })
+    if h is not None:
+        nl=h.find("a")
+        nl=fix_link(url,nl.attrs["href"])
+    else:
+        nl=None
+    h= html.find("ul", {"id": "neuigkeiten"})
+    al = []
+    if h is not None:
+        links=h.find_all("a")
+        for t in links:
+            al.append(t.attrs["href"])
+    return {"url": url, "next_page": nl, "article_links": al, "objecttype": "index" }
+
+def fsarchindex(url, raw):
+    if raw is None:
+        raise Error
+    html=BeautifulSoup(raw)
+    h= html.find("article")
+    print unicode(h)
+    links=h.find_all("a")
+    al = []
+    fl=[]
+    for t in links:
+        url=t.attrs["href"]
+        if re.search("fachschaftarchitektur\.at", url): 
+            al.append(t.attrs["href"])
+        if re.search("facebook\.com/events", url): 
+            fl.append(t.attrs["href"])
+        
+    return {"url": url, "next_page": None, "article_links": al, "facebook_links": fl,"objecttype":"index"}
+
+
+def fsbizindex(url, raw):
+    if raw is None:
+        raise Error
+    print "compile_fsbizindex"
+    html=BeautifulSoup(raw)
+    h= html.find("section", {"id": "primary"})
+    links=h.find_all("h1", {"class": "entry-title"})
+    al = []
+    for t in links:
+
+        al.append(t.find("a").attrs["href"])
+    return {"url": url,"article_links": al,"objecttype": "index"}
+
+
+
+
+def fbfeed(url, raw):
+    js = json.loads(raw)
+    arts=[]
+    u=urlparse.urlparse(url)
+    for m in js["data"]:
+        aa={}
+        aa["url"]=urlparse.urlunsplit(("http","www.facebook.at",m["id"],"",""))
+        aa["published"] =parse(m["created_time"])
+        if m.has_key("message")==True:
+            aa["text"] = m["message"]
+        else:
+            try:
+                h=graph.get_object(id=m["id"].split("_")[1])
+                if h.has_key("description"):
+                    aa["text"]=h["description"]
+                else:
+                    aa["text"]=json.dumps()
+            except GraphAPIError:
+                aa["text"]=""
+        if m.has_key("story")==True:
+            aa["title"] = m["story"]
+        else:
+            aa["title"] = u[1]+ " at " + aa["published"].strftime("%Y-%m-%d %H:%M")
+        aa["section"]="Facebook: "+u[1]
+        arts.append(aa)
+    return {"url": url, "next_page": js["paging"]["next"],"articles": arts}
+
+def fsmbindex(url, raw):
+    if raw is None:
+        raise Error
+    html=BeautifulSoup(raw)
+    h= html.find("a",{"class": "next"})
+    if h is not None:
+        np=h.attrs["href"]
+    else:
+        np=None
+    h=html.find("div", {"id": "main"}).find("div", {"class": "inside"}).find("div", {"class": "mod_newslist"})
+    if h is not None:
+        ats=h.find_all("div",{"class": "block"})
+        articles=[]
+        for a in ats:
+            aa={}
+            h=a.find("h3")
+            if h is not None:
+                aa["title"] = h.text.strip()
+            h=a.find("div", {"class": "ce_text"})
+            if h is not None:
+                aa["text"] = (h.encode_contents()).strip()
+            aa["info"]=[]
+            hh=a.find_all("p", {"class": "info"},recursive=False)
+            for h in hh:
+                aa["info"].append(unicode(h.text))
+                if re.search(r'von', str(h)):
+                    h1=  re.sub(r'[^\d]*(\d+)\.(\d+)\.(\d+)[^\d]*', r'\3/\2/\1',unicode(h.text)) 
+                    aa["published"] =parse(h1.strip())
+                    aa["author"]=re.sub(r'^.*von(.*)$', r'\1',unicode(h.text)).strip() #h.text + "--" #+ re.sub(r'[^\d]*(\d+)\.(\d+)\.(\d+)[^\d]*', r'\3/\2/\1',hh) 
+            aa["section"]="FSMB"
+            articles.append(aa)
+    return {"url": url, "next_page": np, "articles": articles,"objecttype": "articles"}
+
+compiler = {"fetindex": fetindex, "fetarticle": fetarticle, "fsarchindex": fsarchindex, "fsarcharticle": fsarcharticle, "fsmbindex": fsmbindex, "fsbizindex": fsbizindex, "dummyarticle": dummyarticle,"htuarticle": htuarticle, "htufeed": htufeed, "fbfeed": fbfeed, "fschfeed": rssfeed}    
+
+compiler = cfg.compiler
+for i in compiler:
+    compiler[i]=eval(compiler[i])
+
+
+
+article_types={"fetindex" : "fetarticle", "fsarchindex": "fsarcharticle", "fsbizindex": "fsbizarticle", "dummyindex": "dummyarticle", "htufeed": "htuarticle"}
diff --git a/compiler/fetching.py b/compiler/fetching.py
new file mode 100644
index 0000000..522278f
--- /dev/null
+++ b/compiler/fetching.py
@@ -0,0 +1,67 @@
+from requests import session
+s=session()
+from src import package_directory, download_path,cfg
+from os import path, makedirs
+import os
+import json
+from gevent import spawn
+from src import clogger
+from src.fb import graph
+from hashlib import md5
+import errno
+import urlparse
+def announce_articleid(id):
+    for u in cfg.announcearticle_url:
+        s.get( u % id)
+
+def downloadfile(url):
+    relative_name=path.join("downloads",str(md5(url).hexdigest()),url.split('/')[-1])
+    local_filename = path.join(download_path,relative_name)
+    if not os.path.exists(os.path.dirname(local_filename)):
+        try:
+            os.makedirs(os.path.dirname(local_filename))
+        except OSError as exc: # Guard against race condition
+            if exc.errno != errno.EEXIST:
+                raise
+    if not path.exists(local_filename):
+        spawn(fetch_load_file, url, local_filename)
+    return relative_name
+
+from models import CrawlCache 
+from datetime import datetime, timedelta
+
+
+
+
+def fetch_page(furl):
+    current_time = datetime.utcnow()
+    ten_weeks_ago = current_time - timedelta(days=cfg.cache_days)
+    u=urlparse.urlparse(furl)
+    if u[0] == '':
+        furl=urlparse.urlunsplit(("http",u[1],u[2],u[3],u[4]))
+    cc=CrawlCache.query.filter(CrawlCache.url==furl).filter(CrawlCache.fetched>ten_weeks_ago).first()
+    if cc is None:
+        clogger.debug("fetching url:  "+ str(furl))
+        if u[0]=='fb':
+            tx = json.dumps(graph.get_object(id=u[1]+u[2]))
+        else:
+            tx=s.get(furl).text
+        CrawlCache.store(furl,tx)
+    else:
+    #if furl is not None:
+#            clogger.debug("cache hit")
+        tx=cc.raw
+    return tx
+
+def fetch_load_file(furl, path):
+    try:
+        clogger.info("Downloading "+ str(furl))
+        r = s.get(furl, stream=True)
+        f = open(path, 'wb')
+        for chunk in r.iter_content(chunk_size=1024): 
+            if chunk: # filter out keep-alive new chunks
+                f.write(chunk)
+        f.close()
+    except Exception, e:
+        #clogger.error("Error Occured during fetching:"+str(furl))
+        clogger.error(e,exc_info=True)
diff --git a/compiler/fixing.py b/compiler/fixing.py
new file mode 100644
index 0000000..e835844
--- /dev/null
+++ b/compiler/fixing.py
@@ -0,0 +1,37 @@
+from bs4 import BeautifulSoup
+from urlparse import urlparse, urlunparse, urljoin
+from fetching import downloadfile
+import bleach
+
+def fix_link(url, link):
+    r= urlparse(link)
+    if r.scheme is None or r.scheme == '':
+        return urljoin(url,link)
+    else:
+        return link
+
+def fix_file(url, link):
+    u=fix_link(url,link)
+    return downloadfile(u)
+
+def load_file(url, link):
+    return fix_file(url,link)
+
+
+def fix_html(html, baseurl):
+    html=bleach.clean(html, tags=['b','p','span','a','img','div','br','strong','ul','li'], strip=True)
+    sp=BeautifulSoup(html)
+    images=sp.find_all("img")
+    for t in images:
+        if "src" in t.attrs and t.attrs["src"] is not None:
+            t.attrs["src"]=fix_file(baseurl,t.attrs["src"])
+    links=sp.find_all("a")
+    for t in links:
+        if "href" in t.attrs:
+            t.attrs["href"]=fix_link(baseurl, t.attrs["href"])
+    for t in sp.find_all("script"):
+        t.extract()
+        b=sp.find("base")
+        if b is not None:
+            b.attrs["href"]=""
+    return sp
diff --git a/compiler/models.py b/compiler/models.py
new file mode 100644
index 0000000..e774590
--- /dev/null
+++ b/compiler/models.py
@@ -0,0 +1,75 @@
+from sqlalchemy import Column, Integer, String, Boolean, DateTime, Text
+from datetime import datetime
+from src.database import Base2
+from src.database import db_session2
+from mqueues import put_fetch_queue
+from marshmallow import Schema,fields,ValidationError
+import json
+import flask
+
+def add_url(tpe, url):
+    cu=CrawlUrl.find_or_create(tpe,url)
+    db_session2.add(cu)
+    db_session2.commit()
+    cu.schedule()
+
+
+class CrawlUrlSchema(Schema):
+    id=fields.Integer()
+    tpe=fields.String()
+    url=fields.String()
+    last_fetched=fields.DateTime()
+    fetched = fields.DateTime()
+    
+class CrawlUrl(Base2):
+    __tablename__='crawlurls'
+    id = Column(Integer, primary_key=True)
+    tpe=Column(String(250))
+    url = Column(String(250))
+    last_fetched = Column(DateTime)
+    def fetched(self):
+        CrawlCache.query.find(CrawlCache.url==self.url).first()
+    @classmethod
+    def find_or_create(self, tpe, url):
+        aa = CrawlUrl.query.filter(CrawlUrl.url==url).filter(CrawlUrl.tpe==tpe).first()        
+        if aa is None:
+            aa=CrawlUrl(tpe,url)
+        return aa
+    def schedule(self):
+        put_fetch_queue((0, self.tpe, self.url))
+    def __init__(self, tpe, url):
+        self.url=url
+        self.tpe=tpe
+    def __json__(self):
+        return CrawlUrlSchema().dump(self)[0]
+
+class CrawlCacheSchema(Schema):
+    id=fields.Integer()
+    raw=fields.String()
+    url=fields.String()
+    fetched=fields.DateTime()
+    
+class CrawlCache(Base2):
+    __tablename__='crawlcache'
+    id = Column(Integer, primary_key=True)
+    url=Column(String(250))
+    fetched=Column(DateTime)
+    raw=Column(Text)
+
+    def __init__(self, url,rw):
+        self.url=url
+        self.raw=rw
+        self.fetched=datetime.utcnow()
+    def __json__(self):
+        return CrawlCacheSchema().dump(self)
+
+    @classmethod
+    def store(cls, url, rw):
+        cc=CrawlCache(url,rw)
+        db_session2.add(cc)
+        db_session2.commit()
+
+
+
+        
+#flask.json.JSONEncoder.default = lambda self,obj: ((obj.__json__()) if isinstance(obj, CrawlUrl) else None)
diff --git a/compiler/mprocess.py b/compiler/mprocess.py
new file mode 100644
index 0000000..86062bb
--- /dev/null
+++ b/compiler/mprocess.py
@@ -0,0 +1,74 @@
+from src import clogger # Logger for crawler
+from src.models import Article # Article model
+from datetime import datetime 
+from src.database import db_session
+from mqueues import fetch_queue, compile_queue, put_fetch_queue
+from fetching import fetch_page, downloadfile, announce_articleid
+from fixing import fix_html, fix_file
+
+from compiler import article_types
+from fixing import fix_link
+# process article expects an hash with raw data for the article and puts it into an
+# article object stored in the database it is intended to prevent dublicates
+
+def is_article_hash(h):
+    return "text" in h  and "url" in h and "sourcetype" in h and "section" in h
+
+def process_article(art):
+    if not is_article_hash(art):
+        clogger.error("Invalid article hash:" + str(art))
+        aa=None
+    else:
+        art["text"]=fix_html(art["text"],art["url"])
+        if "image" in art:
+            art["image"]=fix_file(art["url"], art["image"])
+        clogger.info(art)
+        aa = Article.from_hash(art)
+        aa.process_hash(art)
+        aa.last_fetched=datetime.now()
+        aa.sourcetype=art["sourcetype"]
+        db_session.add(aa)
+        db_session.commit()
+        clogger.debug("Updated/Added Article "+ str(aa.id) + ": " + (aa.title.encode("utf-8")))
+#        announce_articleid(aa.id)
+    return aa
+
+# process a single found url
+def process_url(url,tpe, parent_url):
+    #clogger.debug("process URL of type "+ tpe + ": " + url)
+    if parent_url is not None:
+        url=fix_link(parent_url, url)
+    put_fetch_queue((0,tpe,url))
+
+
+# process a url list
+def process_urllist(urllist, tpe, parent_url):
+    for u in urllist:
+        process_url(u,tpe, parent_url)
+        
+        
+def do_process(tpe, cont):
+    urllist=[]
+#    clogger.debug("process :" + str(cont))
+    if "article_links" in cont:
+        process_urllist(cont["article_links"],  article_types[tpe], cont["url"])
+    if "index_links" in cont:
+        process_urllist(cont["index_links"],  tpe , cont["url"])
+
+    if "next_page" in cont and cont["next_page"] is not None:
+        process_url(cont["next_page"],tpe, cont["url"])
+
+    if "article" in cont:
+        art=cont["article"]
+        art["sourcetype"]=tpe
+        process_article(art)
+
+    if "articles" in cont:
+        clogger.debug("articles")
+        for a in cont["articles"]:
+            if "title" in a:
+                a["sourcetype"]=tpe
+                if a.has_key("url")==False:
+                    a["url"]=cont["url"]
+                process_article(a)
+    return 
diff --git a/compiler/mqueues.py b/compiler/mqueues.py
new file mode 100644
index 0000000..b87c4ef
--- /dev/null
+++ b/compiler/mqueues.py
@@ -0,0 +1,8 @@
+from gevent.queue import Queue, JoinableQueue
+fetch_queue = Queue()
+compile_queue = Queue()
+process_queue = Queue()
+
+def put_fetch_queue(o):
+    fetch_queue.put(o)
+
diff --git a/compiler/mworker.py b/compiler/mworker.py
new file mode 100644
index 0000000..b623978
--- /dev/null
+++ b/compiler/mworker.py
@@ -0,0 +1,58 @@
+
+from mqueues import fetch_queue, compile_queue, process_queue
+from compiler import do_compile
+from mprocess import do_process
+from fetching import fetch_page
+from gevent import spawn
+from itertools import repeat
+from src import clogger
+def start_workers(f,c,p):
+    for _ in range(f):
+        clogger.debug("spawn fetchworker")
+        spawn(work_fetch)
+    for _ in range(c):
+        spawn(work_compile)
+    for _ in range(p):
+        spawn(work_process)
+    
+def work_fetch():
+    while True:
+        run_fetch()
+        
+def work_process():
+    while True:
+        run_process()
+def work_compile():
+    while True:
+        run_compile()
+    
+
+def queue_url(tpe, url):
+    fetch_queue.put((0,tpe,url))
+
+
+# fetch a page from the url list
+def run_fetch():
+    tc, tpe, url = fetch_queue.get()
+    if tpe is not "dummyarticle" and tpe is not "dummyindex":
+        rw=fetch_page(url)
+    else:
+        rw="<p> dummytext</p>"
+    compile_queue.put((0, tpe, {"url": url, "sourcetype": tpe, "raw": rw}))
+    return rw
+    #    fetch_queue.task_done()
+
+#comile something from the compile list
+def run_compile():
+    tc,tpe,h = compile_queue.get()
+    h=do_compile(tpe,h)
+    process_queue.put((0,tpe, h))
+    return h
+    #    compile_queue.task_done()
+
+def run_process():
+    tc,tpe,h = process_queue.get()
+    do_process(tpe, h)
+    return h
+#    process_queue.task_done()
+    
diff --git a/compiler/views.py b/compiler/views.py
new file mode 100644
index 0000000..0cfbbad
--- /dev/null
+++ b/compiler/views.py
@@ -0,0 +1,146 @@
+from flask import Blueprint, jsonify, render_template, abort, redirect, url_for, request
+compiler_pages = Blueprint('compiler', __name__,
+                        template_folder='.')
+
+from src.database import db_session2,init_db,read_json,init_db2
+from .models import CrawlUrl
+from .models import CrawlCache, CrawlCacheSchema
+from .models import CrawlUrlSchema
+from src import clogger
+from src.articles import Article
+#import mworker
+import flask
+import json
+import mworker
+
+from compiler import do_compile
+from fetching import fetch_page
+
+#flask.json.JSONEncoder.default = lambda self,obj: ((obj.__json__()) if isinstance(obj, (Article,CrawlUrl)) else None) 
+
+@compiler_pages.route("/")
+@compiler_pages.route("")
+@compiler_pages.route(".json")
+def index():
+    status="For documentation goto /doc"
+    return jsonify(status=status)
+
+@compiler_pages.route("/doc")
+@compiler_pages.route("/doc.json")
+def doc():
+        return render_template("README")
+#    return jsonify(status=render_template("README"))
+#
+
+
+@compiler_pages.route("/initdb")
+@compiler_pages.route("/initdb.json")
+def initdb_json():
+    init_db() # initialisiere Datenbank
+    status="Datenbank Neu initialisiert"
+    return jsonify(status=status)
+
+@compiler_pages.route("/initdb2")
+@compiler_pages.route("/initdb2.json")
+def initdb_json2():
+    init_db2() # initialisiere Datenbank
+    status="Datenbank Neu initialisiert"
+    return jsonify(status=status)
+
+@compiler_pages.route("/start")
+@compiler_pages.route("/start.json")
+def start_json():
+    mworker.start_workers(1,1,1) # initialisiere Datenbank
+    status="Worker gestartet"
+    return jsonify(status=status)
+    
+
+@compiler_pages.route("/urls")
+@compiler_pages.route("/urls.json")
+def urls_index_json():
+    # Lade Alle Urls
+    status=CrawlUrl.query.all()
+    return jsonify(urls=status)
+
+# show an existing CrawlUrl
+@compiler_pages.route("/urls/<int:id>")
+@compiler_pages.route("/urls/<int:id>.json")
+def urls_json(id):
+    # Lade Alle Urls
+    status=CrawlUrl.query.get(id)
+    cc=CrawlCache.query.filter(CrawlCache.url==status.url).first()
+    return jsonify(urls=status, cache=cc.__json__())
+
+# que an existing CrawlUrl for fetching
+@compiler_pages.route("/urls/<int:id>/que")
+@compiler_pages.route("/urls/<int:id>/que.json")
+def urls_que_json(id):
+    # Lade Alle Urls
+    cu=CrawlUrl.query.get(id)
+    mworker.queue_url(cu.tpe, cu.url)
+    cc=CrawlCache.query.filter(CrawlCache.url==cu.url)
+    mworker.start_workers(1,1,1) # initialisiere Datenbank
+    status="Worker gestartet"
+    return jsonify(urls=cu, cache=cc)
+
+
+# que an existing CrawlUrl for fetching
+@compiler_pages.route("/urls/<int:id>/test")
+@compiler_pages.route("/urls/<int:id>/test.json")
+def urls_test_json(id):
+    # Lade Alle Urls
+    cu=CrawlUrl.query.get(id)
+    rw=fetch_page(cu.url)
+    h= {"url": cu.url, "sourcetype": cu.tpe, "raw": rw}
+    h2=do_compile(cu.tpe, h)
+    return jsonify(urls=cu,hs=h2,rw=rw)
+
+
+
+
+@compiler_pages.route("/debug",methods=['GET','PUT'])
+def debug():
+    status="did nothing"
+    js=read_json(request)
+    clogger.info(request.get_json())
+    if js["cmd"] == "runfetch":
+        mworker.run_fetch()
+        status="fetched something"
+    if js["cmd"] == "que":
+        cu = CrawlUrl.query.get(js["id"])
+        mworker.queue_url(cu.tpe, cu.url)
+        status= mworker.run_fetch()
+    if js["cmd"] == "comp":
+        status=mworker.run_compile()
+    if js["cmd"]=="process":
+        status=mworker.run_process()
+    return jsonify(status=status)
+
+@compiler_pages.route("/debugurl")
+def debugurl():
+    s=CrawlUrlSchema()
+    status=CrawlUrl.query.all()
+    return jsonify(status=status)
+    
+
+@compiler_pages.route("/urls",methods=['POST'])
+def add_urls():
+    # Lese Daten
+    js =read_json(request)
+    #    clogger.info(js)
+    # Finde oder Erzeuge Url in der Datenbank
+    url=CrawlUrlSchema().load(js["url"])
+    clogger.info(url)
+    url=CrawlUrl.find_or_create(url.data["tpe"], url.data["url"])
+    db_session2.add(url)
+    db_session2.commit()
+    return jsonify(url=url, kk=js)
+
+@compiler_pages.route("/urls/<int:id>",methods=['DELETE'])
+@compiler_pages.route("/urls<int:id>.json",methods=['DELETE'])
+def delete(id):
+    cu=CrawlUrl.query.get(id)
+    if cu != None:
+        db_session2.delete(cu)
+        db_session2.commit()
+    return jsonify(url={})
diff --git a/crawler/__init__.py b/crawler/__init__.py
new file mode 100644
index 0000000..b31d450
--- /dev/null
+++ b/crawler/__init__.py
@@ -0,0 +1,4 @@
+
+
+def init():
+    return " "
diff --git a/database.py b/database.py
new file mode 100644
index 0000000..bae434d
--- /dev/null
+++ b/database.py
@@ -0,0 +1,55 @@
+from sqlalchemy import create_engine
+from sqlalchemy.orm import scoped_session, sessionmaker
+from sqlalchemy.ext.declarative import declarative_base
+from src import package_directory,clogger, cfg
+from os import path
+import json
+#engine = create_engine('sqlite:////home/andreas/www/crawler/test.db', convert_unicode=True)
+
+if cfg.get("db_path")==None or cfg.get("db_path").strip()=="":
+    db_path=package_directory
+else:
+    db_path=cfg.get("db_path")
+
+db_mainfile=cfg.get("db_mainfile")
+if db_mainfile == None or db_mainfile.strip()=="":
+    db_mainfile="../srctest.db"
+
+db_urlfile=cfg.get("db_mainfile")
+if db_urlfile == None or db_urlfile.strip()=="":
+    db_urlfile="../srctest_cu.db"
+
+    
+engine = create_engine('sqlite:///'+ path.join(db_path,db_mainfile), convert_unicode=True)
+
+db_session = scoped_session(sessionmaker(autocommit=False,
+                                         autoflush=False,
+                                         bind=engine))
+
+engine2 = create_engine('sqlite:///'+  path.join(db_path,db_urlfile), convert_unicode=True)
+
+db_session2 = scoped_session(sessionmaker(autocommit=False,
+                                         autoflush=False,
+                                          bind=engine2))
+
+Base = declarative_base()
+Base.query = db_session.query_property()
+Base2 = declarative_base()
+Base2.query = db_session2.query_property()
+
+def read_json(rq):
+    js=rq.get_json()
+    clogger.info(rq.data)
+    if js is None:
+        js=rq.form.to_dict()
+    if js=={} and rq.data != "":
+        js=json.loads(rq.data)    
+    return js
+
+def init_db():
+    import src.models    
+    Base.metadata.create_all(bind=engine)
+
+def init_db2():
+    from .compiler.models import CrawlUrl, CrawlCache
+    Base2.metadata.create_all(bind=engine2)
diff --git a/fb.py b/fb.py
new file mode 100644
index 0000000..30b1828
--- /dev/null
+++ b/fb.py
@@ -0,0 +1,4 @@
+from src import cfg
+import facebook
+
+graph = facebook.GraphAPI(access_token=cfg.fb_token, version='2.3')
diff --git a/meta.py b/meta.py
new file mode 100644
index 0000000..53528c3
--- /dev/null
+++ b/meta.py
@@ -0,0 +1,21 @@
+import os
+package_directory = os.path.dirname(os.path.abspath(__file__))
+from config import Config
+import logging
+import sys
+
+cfg = Config(file(os.path.join(package_directory, 'config.cfg')))
+#--------------- Logging
+
+
+file_handler=logging.FileHandler(cfg.logfile)
+file_handler.setLevel(logging.INFO)
+std_handler=logging.StreamHandler(stream=sys.stdout)
+std_handler.setLevel(logging.DEBUG)
+
+lg=logging.getLogger('mylogger')
+lg.setLevel(logging.DEBUG)
+lg.addHandler(file_handler)
+lg.addHandler(std_handler)
+
+#----------------
diff --git a/models.py b/models.py
new file mode 100644
index 0000000..2205562
--- /dev/null
+++ b/models.py
@@ -0,0 +1,4 @@
+
+from .articles.model import Article
+from .sections.model import Section
+from .compiler.models import CrawlUrl, CrawlCache
diff --git a/sections/__init__.py b/sections/__init__.py
new file mode 100644
index 0000000..745c2cd
--- /dev/null
+++ b/sections/__init__.py
@@ -0,0 +1 @@
+from .model import Section
diff --git a/sections/model.py b/sections/model.py
new file mode 100644
index 0000000..2d289f3
--- /dev/null
+++ b/sections/model.py
@@ -0,0 +1,44 @@
+from sqlalchemy import Column, Integer, String, Boolean, DateTime, Text, ForeignKey
+from sqlalchemy.orm import relationship
+
+from datetime import datetime
+from src.database import Base,db_session
+from marshmallow import Schema, fields
+
+import json
+import flask 
+
+#from src.articles import Article
+
+class SectionSchema(Schema):
+    id=fields.Integer()
+    foreign_name=fields.String()
+    name=fields.String()
+
+class Section(Base):
+    __tablename__ = 'sections'
+    id = Column(Integer, primary_key=True)
+    url = Column(String(250))
+    crawlurl = Column(Integer)
+    foreign_name = Column(String(250),unique=True)
+    name=Column(String(250))
+    group = Column(String(250))
+    articles=relationship("Article", back_populates="section")
+
+    def __json__(self):
+        return SectionSchema().dump(self)[0]
+    def __init__(self, url=None,fname=None):
+        self.url=url
+        self.foreign_name=fname
+        
+    @classmethod
+    def find_or_create(cls, fname):
+        s=Section.query.filter(Section.foreign_name==fname).first()
+        if s is None:
+            s=Section(fname)
+            db_session.add(s)
+            db_session.commit()
+        s.foreign_name=fname
+        db_session.add(s)
+        db_session.commit()
+        return s
diff --git a/sections/views.py b/sections/views.py
new file mode 100644
index 0000000..f82929a
--- /dev/null
+++ b/sections/views.py
@@ -0,0 +1,37 @@
+from flask import Blueprint, jsonify, render_template, abort, redirect, url_for, request
+section_pages = Blueprint('sections', __name__)
+from .model import Section
+from .model import SectionSchema
+#import flask
+from datetime import datetime
+import json
+from src import clogger
+
+from src.database import db_session, read_json
+import flask
+
+@section_pages.route("/")
+@section_pages.route("")
+@section_pages.route(".json")
+def index():
+    sections=Section.query.all()
+    return jsonify(sections=sections)
+
+@section_pages.route("/<int:id>",methods=['PUT'])
+@section_pages.route("/<int:id>.json",methods=['PUT'])
+def update(id):
+    section=Section.query.get(id)
+    clogger.info(request.data)
+    a=request.get_json()
+    section.text=a["text"]
+    db_session.commit()
+    return jsonify(section=section)
+
+
+@section_pages.route("/<int:id>",methods=['GET'])
+@section_pages.route("/<int:id>.json",methods=['GET'])
+def get(id):
+    section=Section.query.get(id)
+    clogger.info(section)
+#    section=SectionSchema().dump(section)[0]
+    return jsonify(section=section,articles=section.articles)
diff --git a/templates/home.html b/templates/home.html
new file mode 100644
index 0000000..f3e333e
--- /dev/null
+++ b/templates/home.html
@@ -0,0 +1 @@
+<h1>Hello World</h1>
diff --git a/users/users.py b/users/users.py
new file mode 100644
index 0000000..55896b3
--- /dev/null
+++ b/users/users.py
@@ -0,0 +1,19 @@
+
+class User(object):
+    def __init__(self, id, username, password):
+        self.id = id
+        self.username = username
+        self.password = password
+ 
+    def __str__(self):
+        return "User(id='%s')" % self.id
+ 
+user = User(1, 'user', 'password')
+def authenticate(username, password):
+    if username == user.username and password == user.password:
+        return user
+ 
+def identity(payload):
+    return user
+ 
+