Merge branch 'master' of https://git.triton.fet.at/git/service_mail

2017-08-11 18:05:38 +02:00
parent e9da8cb5b0 ba6add107c
commit 9882fba6f3
22 changed files with 250 additions and 283 deletions
--- a/classifier.py
+++ b/classifier.py
@@ -1,184 +0,0 @@
-from sklearn.base import BaseEstimator, TransformerMixin
-from sklearn.feature_extraction import DictVectorizer
-from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
-from sklearn.preprocessing import LabelEncoder
-from sklearn.pipeline import Pipeline, FeatureUnion
-from sklearn.naive_bayes import MultinomialNB
-from sklearn.neural_network import MLPClassifier
-from sklearn.model_selection import train_test_split
-import numpy as np
-import yaml
-from storage import MailThread,db_session
-
-with open("data.yml", 'r') as stream:
-    try:
-        train=yaml.load(stream)
-    except yaml.YAMLError as exc:
-        print(exc)
-
-data_types= { "answered": bool, "maintopic": str}
-
-def store_training_data(i, d,key=b"answered"):
-    global train
-    if not data_types.has_key(key):
-        raise ValueError("Key "+str(key)+" unknown")
-    if not train.has_key(i):
-        train[i]={}
-    if not key is None and type(train[i]) is dict:
-        if not type(d)  is data_types[key]:
-            #            print str(type(d)) + " vs " + str(data_types[key])
-            raise TypeError("Data - %s - for key "% d +str(key)+" must be " +str(data_types[key])+ " but it is "+ str(type(d)))
-        train[i][key]=d
-        
-    
-    with open("data.yml","w") as file:
-        file.write(yaml.dump(train,default_flow_style=True))
-        file.close()
-
-
-# Lade Trainingsdaten fuer einen angegebenen key (Label/Eigenschaft) 
-def get_training_threads(key="answered"):
-    t_a=[]
-    d_a=[]
-    d_a2=[]
-    for i in train:
-        t=db_session.query(MailThread).filter(MailThread.firstmail==i).first()
-        if not t is None:   # Thread muss in der Datenbank sein
-            if train[i].has_key(key): # In den Trainingsdaten muss der relevante Key sein
-                t_a.append(t)
-                d_a.append(train[i][key])
-                le=LabelEncoder()
-                d_a2=le.fit_transform(d_a)
-    return (t_a,d_a2,le)
-
-
-def in_training(i, key="answered"):
-    return train.has_key(i) and train[i].has_key(key)
-
-
-def print_answers(l):
-    cc=l.classes_
-    c_id=l.transform(cc)
-    for i,c in enumerate(cc):
-        print str(i) + ":  " + str(c)
-    return None
-
-
-class ThreadDictExtractor(BaseEstimator, TransformerMixin):
-    def fit(self, x, y=None):
-        return self
-    def transform(self, X,y=None):
-        return [t.mail_flat_dict() for t in X]
-
-class ThreadSubjectExtractor(BaseEstimator, TransformerMixin):
-    def fit(self, x, y=None):
-        return self
-    def transform(self, X,y=None):
-        return [t.subject() for t in X]
-
-class ThreadTextExtractor(BaseEstimator, TransformerMixin):
-    def fit(self, x, y=None):
-        return self
-    def transform(self, X,y=None):
-        return [t.text() for t in X]
-
-
-
-
-
-def build_pipe(p=b"pipe1"):
-
-    if p == "pipe1":
-        p=Pipeline([('tde', ThreadDictExtractor()),
-                    ('dv',DictVectorizer()),
-                    ('clf', MultinomialNB())
-        ])
-    elif p=="pipe2":
-        p = Pipeline([
-            ('union', FeatureUnion(transformer_list=[
-                ('subject', Pipeline([('tse', ThreadSubjectExtractor()),
-                                      ('cv',CountVectorizer()),
-                                      ('tfidf', TfidfTransformer())
-                ])),
-                ('text',    Pipeline([('tte',ThreadTextExtractor()),
-                                      ('cv',CountVectorizer()),
-                                      ('tfidf', TfidfTransformer())
-                ])),
-                ('envelope', Pipeline([('tde', ThreadDictExtractor()),
-                                       ('dv',DictVectorizer())
-                ]))
-            ], transformer_weights={
-                'subject': 1,
-                'text': 0.7,
-                'envelope': 0.7
-            } )),
-            ('clf', MultinomialNB())
-        ])
-    elif p=="pipe2b":
-        p = Pipeline([
-            ('union', FeatureUnion(transformer_list=[
-            ('subject', Pipeline([('tse', ThreadSubjectExtractor()),
-                                ('cv',CountVectorizer()),
-                                ('tfidf', TfidfTransformer())
-        ])),
-            ('text',    Pipeline([('tte',ThreadTextExtractor()),
-                                  ('cv',CountVectorizer()),
-                                ('tfidf', TfidfTransformer())
-            ])),
-            ('envelope', Pipeline([('tde', ThreadDictExtractor()),
-                                   ('dv',DictVectorizer())
-            ]))
-            ], transformer_weights={
-                'subject': 1,
-                'text': 0.7,
-                'envelope': 0.7
-            } )),
-            ('mlc', MLPClassifier())
-        ])
-    elif p=="pipe2c":
-        p = Pipeline([
-            ('union', FeatureUnion(transformer_list=[
-            ('subject', Pipeline([('tse', ThreadSubjectExtractor()),
-                                ('cv',CountVectorizer()),
-                                ('tfidf', TfidfTransformer())
-        ])),
-            ('text',    Pipeline([('tte',ThreadTextExtractor()),
-                                  ('cv',CountVectorizer()),
-                                ('tfidf', TfidfTransformer())
-            ])),
-            ('envelope', Pipeline([('tde', ThreadDictExtractor()),
-                                   ('dv',DictVectorizer())
-            ]))
-            ], transformer_weights={
-                'subject': 1,
-                'text': 1,
-                'envelope': 0.4
-            } )),
-            ('mlc', MLPClassifier())
-        ])
-    else:
-        raise ValueError("The pipe %s is not a valid pipe")
-    return p
-
-def get_pipe(p=b"pipe1",k=b"answered"):
-    p=build_pipe(p)
-    tt= get_training_threads(k)
-    p.fit(tt[0],tt[1])
-    return p,tt[2]
-
-from sklearn.metrics import accuracy_score
-
-def test_pipe(pp,k):
-    tt= get_training_threads(k)
-    X_train,X_test,y_train,y_test=train_test_split(tt[0],tt[1],test_size=0.2)
-    if type(pp) is list:
-        for p in pp:
-            print "pipe: %s" % p
-            p=build_pipe(p)
-            p.fit(X_train,y_train)
-            ypred=p.predict(X_test)
-            print accuracy_score(y_test,ypred)
-
-#pipe1=get_pipe("pipe1", "answered")
-#pipe2=get_pipe("pipe2", "maintopic")
-#pipe2b=get_pipe("pipe2b", "maintopic")
--- a/classifier/classifier.py
+++ b/classifier/classifier.py
@@ -102,7 +102,7 @@ def get_pipe(p=b"pipe1",k=b"answered"):

 def test_pipe(pp,k):
    tt= get_training_threads(k)
-    X_train,X_test,y_train,y_test=train_test_split(tt[0],tt[1],test_size=0.2)
+    X_train,X_test,y_train,y_test=train_test_split(tt[0],tt[1],test_size=0.4)
    if type(pp) is list:
        for p in pp:
            print "pipe: %s" % p
--- a/classify_mail.py
+++ b/classify_mail.py
@@ -1,25 +0,0 @@
-from sklearn.feature_extraction.text import TfidfTransformer,CountVectorizer
-from sklearn.feature_extraction import DictVectorizer
-from sklearn.naive_bayes import MultinomialNB
-from sklearn.pipeline import Pipeline, FeatureUnion
-import sys
-import yaml
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.preprocessing import LabelEncoder
-
-
-text_clf = Pipeline([('vect', CountVectorizer()),('tfidf', TfidfTransformer()),('clf', MultinomialNB())])
-
-text_ohc = Pipeline([('ohc', OneHotEncoder()),('clf', MultinomialNB())])
-
-combined_features = FeatureUnion([('vect1', CountVectorizer()),('vect2', CountVectorizer())])
-
-
-enc=OneHotEncoder()
-with open("example_1.yaml", 'r') as stream:
-    try:
-        train=yaml.safe_load(stream)
-    except yaml.YAMLError as exc:
-        print(exc)
-
-tc=text_clf.fit(train["data"],train["target"])
--- a/classify_text.py
+++ b/classify_text.py
@@ -1,42 +0,0 @@
-from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
-from sklearn.naive_bayes import MultinomialNB
-from sklearn.pipeline import Pipeline
-text_clf = Pipeline([('vect', CountVectorizer()),('tfidf', TfidfTransformer()),('clf', MultinomialNB())])
-import sys
-import yaml
-
-
-
-with open("example_1.yaml", 'r') as stream:
-    try:
-        train=yaml.safe_load(stream)
-    except yaml.YAMLError as exc:
-        print(exc)
-
-tc=text_clf.fit(train["data"],train["target"])
-print(sys.argv[1])
-
-answ=(tc.predict([sys.argv[1]]))[0]
-print train["target_names"][answ]
-
-for i in range(0,  (len(train["target_names"]))):
-    print (str(i)+"  "+ train["target_names"][i])
-
-ca=int(raw_input("Correct answer.."))
-
-
-if ca == answ:
-           print ("Yes I got it right")
-else:
-    print("should I remember this?")
-    a=raw_input("shoudIrememberthis?")
-    if a == "y":
-        train["data"].append(sys.argv[1])
-        train["target"].append(ca)
-        print yaml.dump(train,default_flow_style=False)
-        file=open("example_1.yaml","w")
-        file.write(yaml.dump(train,default_flow_style=False))
-        file.close()
-    else:
-        print ("Ok, I already forgot")
-    
--- a/data.yml
+++ b/data.yml
@@ -7,9 +7,10 @@
  27171: {maintopic: ausleihen}, 27178: {maintopic: studium}, 27182: {maintopic: studium},
  27197: {maintopic: information}, 27201: {maintopic: information}, 27218: {maintopic: information},
  27219: {maintopic: studium}, 27222: {maintopic: information}, 27226: {maintopic: ausleihen},
-  27420: {answered: true, maintopic: studium}, 27422: {answered: true, maintopic: studium},
-  27425: {answered: false, maintopic: studium}, 27431: {answered: false, maintopic: information},
-  27434: {answered: false, lang: de, maintopic: information}, 27435: {answered: false},
+  27263: {maintopic: ausleihen}, 27267: {maintopic: ausleihen}, 27420: {answered: true,
+    maintopic: studium}, 27422: {answered: true, maintopic: studium}, 27425: {answered: false,
+    maintopic: studium}, 27431: {answered: false, maintopic: information}, 27434: {
+    answered: false, lang: de, maintopic: information}, 27435: {answered: false},
  27438: {answered: false, maintopic: information}, 27439: {answered: true, maintopic: studium},
  27441: {answered: false, maintopic: studium}, 27444: {answered: true, maintopic: ausleihen},
  27454: {answered: false, maintopic: information}, 27455: {answered: false, maintopic: information},
--- a/db_model_version_1.py
+++ b/db_model_version_1.py
@@ -0,0 +1,35 @@
+
+## File autogenerated by genmodel.py
+
+from sqlalchemy import *
+
+
+meta = MetaData()
+
+mails = Table('mails', meta,
+    Column('created_at', TIMESTAMP, nullable=False),
+    Column('updated_at', TIMESTAMP, nullable=False),
+    Column('id', INTEGER, primary_key=True, nullable=False),
+    Column('date', DATETIME),
+    Column('envelope', TEXT),
+    Column('body', TEXT),
+    Column('text', TEXT),
+    Column('from_', TEXT),
+    Column('from_mailbox', VARCHAR),
+    Column('from_host', VARCHAR),
+    Column('to_', TEXT),
+    Column('to_mailbox', TEXT),
+    Column('to_host', VARCHAR),
+    Column('subject', TEXT),
+)
+
+threads = Table('threads', meta,
+    Column('created_at', TIMESTAMP, nullable=False),
+    Column('updated_at', TIMESTAMP, nullable=False),
+    Column('id', INTEGER, primary_key=True, nullable=False),
+    Column('firstmail', INTEGER),
+    Column('islabeled', BOOLEAN),
+    Column('opened', BOOLEAN),
+    Column('body', TEXT),
+)
+
--- a/db_repository/README
+++ b/db_repository/README
@@ -0,0 +1,4 @@
+This is a database migration repository.
+
+More information at
+http://code.google.com/p/sqlalchemy-migrate/
--- a/db_repository/init.py
+++ b/db_repository/init.py
--- a/db_repository/manage.py
+++ b/db_repository/manage.py
@@ -0,0 +1,6 @@
+#!.env/bin/python
+from migrate.versioning.shell import main
+import storage
+
+if __name__ == '__main__':
+    main(debug='False')
--- a/db_repository/migrate.cfg
+++ b/db_repository/migrate.cfg
@@ -0,0 +1,25 @@
+[db_settings]
+# Used to identify which repository this database is versioned under.
+# You can use the name of your project.
+repository_id=service mail db repository
+
+# The name of the database table used to track the schema version.
+# This name shouldn't already be used by your project.
+# If this is changed once a database is under version control, you'll need to
+# change the table name in each database too.
+version_table=migrate_version
+
+# When committing a change script, Migrate will attempt to generate the
+# sql for all supported databases; normally, if one of them fails - probably
+# because you don't have that database installed - it is ignored and the
+# commit continues, perhaps ending successfully.
+# Databases in this list MUST compile successfully during a commit, or the
+# entire commit will fail. List the databases your application will actually
+# be using to ensure your updates to that database work properly.
+# This must be a list; example: ['postgres','sqlite']
+required_dbs=[]
+
+# When creating new change scripts, Migrate will stamp the new script with
+# a version number. By default this is latest_version + 1. You can set this
+# to 'true' to tell Migrate to use the UTC timestamp instead.
+use_timestamp_numbering=False
--- a/db_repository/storage
+++ b/db_repository/storage
@@ -0,0 +1 @@
+storage
--- a/db_repository/versions/002_add_date_to_threads.py
+++ b/db_repository/versions/002_add_date_to_threads.py
@@ -0,0 +1,33 @@
+from sqlalchemy import *
+from migrate import *
+
+
+from migrate.changeset import schema
+pre_meta = MetaData()
+post_meta = MetaData()
+threads = Table('threads', post_meta,
+    Column('created_at', TIMESTAMP, nullable=False),
+    Column('updated_at', TIMESTAMP, nullable=False),
+    Column('id', Integer, primary_key=True, nullable=False),
+    Column('firstmail', Integer),
+    Column('date', DateTime),
+    Column('islabeled', Boolean),
+    Column('opened', Boolean),
+    Column('body', Text),
+)
+
+
+def upgrade(migrate_engine):
+    # Upgrade operations go here. Don't create your own engine; bind
+    # migrate_engine to your metadata
+    pre_meta.bind = migrate_engine
+    post_meta.bind = migrate_engine
+    post_meta.tables['threads'].columns['date'].create()
+
+
+def downgrade(migrate_engine):
+    # Operations to reverse the above upgrade go here.
+    pre_meta.bind = migrate_engine
+    post_meta.bind = migrate_engine
+    post_meta.tables['threads'].columns['date'].drop()
+
--- a/db_repository/versions/003_maintopic.py
+++ b/db_repository/versions/003_maintopic.py
@@ -0,0 +1,34 @@
+from sqlalchemy import *
+from migrate import *
+
+
+from migrate.changeset import schema
+pre_meta = MetaData()
+post_meta = MetaData()
+threads = Table('threads', post_meta,
+    Column('created_at', TIMESTAMP, nullable=False),
+    Column('updated_at', TIMESTAMP, nullable=False),
+    Column('id', Integer, primary_key=True, nullable=False),
+    Column('firstmail', Integer),
+    Column('date', DateTime),
+    Column('islabeled', Boolean),
+    Column('opened', Boolean),
+    Column('body', Text),
+    Column('maintopic', String),
+)
+
+
+def upgrade(migrate_engine):
+    # Upgrade operations go here. Don't create your own engine; bind
+    # migrate_engine to your metadata
+    pre_meta.bind = migrate_engine
+    post_meta.bind = migrate_engine
+    post_meta.tables['threads'].columns['maintopic'].create()
+
+
+def downgrade(migrate_engine):
+    # Operations to reverse the above upgrade go here.
+    pre_meta.bind = migrate_engine
+    post_meta.bind = migrate_engine
+    post_meta.tables['threads'].columns['maintopic'].drop()
+
--- a/db_repository/versions/init.py
+++ b/db_repository/versions/init.py
--- a/flaskapp/init.py
+++ b/flaskapp/init.py
@@ -5,7 +5,7 @@ from flask import Flask,jsonify,send_from_directory, render_template
 from config import Config
 import yaml
 import os
-
+from sqlalchemy import desc
 from storage import MailThread,db_session

 app = Flask(__name__, template_folder="templates", static_folder="static")
@@ -20,20 +20,35 @@ def render_index(mths,code=200):
                           ), code    
 from classifier import get_pipe
 mail_threads=db_session.query(MailThread).all()
-pipe1,le=get_pipe("pipe1",b"answered")
-pipe2,le2=get_pipe("pipe2b", b"maintopic")
-pipe3,le3=get_pipe("pipe2b", b"lang")
+#pipe1,le=get_pipe("pipe1",b"answered")
+#pipe2,le2=get_pipe("pipe2b", b"maintopic")
+#pipe3,le3=get_pipe("pipe2b", b"lang")

-answered=le.inverse_transform(pipe1.predict(mail_threads))
-maintopic=le2.inverse_transform(pipe2.predict(mail_threads))
-lang=le3.inverse_transform(pipe3.predict(mail_threads))
+#answered=le.inverse_transform(pipe1.predict(mail_threads))
+#maintopic=le2.inverse_transform(pipe2.predict(mail_threads))
+#lang=le3.inverse_transform(pipe3.predict(mail_threads))
+
+#for i, t in enumerate(mail_threads):
+#    t.answered=answered[i]
+#    t.maintopic=maintopic[i]
+#    t.lang=lang[i]

-for i, t in enumerate(mail_threads):
-    t.answered=answered[i]
-    t.maintopic=maintopic[i]
-    t.lang=lang[i]
@app.route("/")
 def hello():
-    mth=mail_threads
+    mth=db_session.query(MailThread).order_by(desc(MailThread.date)).all()
    return render_index(mth) 

+@app.route("/answered/<id>/<value>")
+def store_answered(id, value):
+    mth=db_session.query(MailThread).filter(MailThread.firstmail==int(id)).first()
+    value= value in ["true", "True", "1", "t"]
+    mth.answered=bool(value)
+    mth.opened=bool(value)
+    return render_index([mth]) 
+
+
+
+@app.route("/studium")
+def studium():
+    mth=db_session.query(MailThread).filter(MailThread.maintopic=="studium").order_by(desc(MailThread.date)).all()
+    return render_index(mth) 
--- a/flaskapp/templates/index.html
+++ b/flaskapp/templates/index.html
@@ -6,6 +6,11 @@
 	<script src="static/bootstrap/js/bootstrap.min.js" ></script>
    </head>
    <body>
+      <style>
+	.card.opened-True {
+    background: lightgreen;
+}
+      </style>
 	<div class="container">
 	    <div class="row">
 		<div class="col-12">
@@ -14,7 +19,7 @@
 		  <div id="accordion" role="tablist" aria-multiselectable="true">

 			{% for m in mths %}
-			<div class="card" style="padding-top: 2pt; padding-bottom:2pt; border-radius:0;margin-top:1pt; margin-bottom:1pt">
+			<div class="card opened-{{m.opened}}" style="padding-top: 2pt; padding-bottom:2pt; border-radius:0;margin-top:1pt; margin-bottom:1pt">
 			    <div class="" role="tab" id="heading{{m.firstmail}}">
 			      <b class="mb-0">
 				  <a data-toggle="collapse" data-parent="#accordion" href="#collapse{{m.firstmail}}" aria-expanded="true" aria-controls="collapse1">
@@ -26,7 +31,7 @@
 			    <div id="collapse{{m.firstmail}}" class="collapse" role="tabpanel" aria-labelledby="headingOne">

 				<div class="card-block">
-				    <div style="white-space: pre-wrap;font:Courier, monospace; font-size:small; width:50em; border: thin blue solid;">					{{ m.text() }}				    </div>
+				    <div style="white-space: pre-wrap;font:Courier, monospace; font-size:small; width:50em; border: thin blue solid;">					{{ m.print_text() }}				    </div>
 				</div>
 			    </div>
 			</div>
@@ -36,7 +41,7 @@

 			{% for m in mths %}
 			ID: {{m.tstr()}}	    
-			{{ m.text() }}
+			{{ m.print_text() }}
 			{% endfor %}
 			
 		    </div>
--- a/migration.py
+++ b/migration.py
@@ -0,0 +1,6 @@
+#!.env/bin/python
+from migrate.versioning.shell import main
+import storage
+
+if __name__ == '__main__':
+    main(debug='False',repository="db_repository", url="sqlite:///test.sqlite")
--- a/run.py
+++ b/run.py
@@ -16,7 +16,6 @@ from storage import Mail, MailThread, db_session
 #import yaml
 #import email
 from classifier import get_training_threads,  print_answers, in_training, store_training_data, get_pipe, test_pipe, train_single_thread # , pipe2, pipe2b
-from flaskapp import app


 def predict_thread(p,l,t):
@@ -27,10 +26,31 @@ def predict_thread(p,l,t):

 #print "arg1:"+sys.argv[1]
 if len(sys.argv)>1:
+
    if sys.argv[1] == "fetch_threads":
        print flatten_threads(fetch_threads())

+    if sys.argv[1] == "predict_threads":
+        pipe1,le=get_pipe("pipe1",b"answered")
+        pipe2,le2=get_pipe("pipe2b", b"maintopic")
+        pipe3,le3=get_pipe("pipe2b", b"lang")
+        mail_threads=db_session.query(MailThread).all()
+
+        answered=le.inverse_transform(pipe1.predict(mail_threads))
+        maintopic=le2.inverse_transform(pipe2.predict(mail_threads))
+        lang=le3.inverse_transform(pipe3.predict(mail_threads))
+
+        for i, t in enumerate(mail_threads):
+                t.answered=answered[i]
+                t.opened=answered[i]
+
+                t.maintopic=maintopic[i]
+                t.lang=lang[i]
+                db_session.add(t)
+                db_session.commit()
+
    if sys.argv[1] == "run_server":
+            from flaskapp import app
            app.run(port=3000,debug=True)

    if sys.argv[1] == "print_threads":
--- a/storage/init.py
+++ b/storage/init.py
@@ -1,3 +1,5 @@
 from database import db_session, init_db
 from mail_model import Mail
 from thread_model import MailThread
+from database import Base
+metadata=Base.metadata
--- a/storage/mail_model.py
+++ b/storage/mail_model.py
@@ -75,6 +75,8 @@ class Mail(Base):
        from_array=[]
 #        print "Status"
        #        print env
+        if not env.date is None:
+            self.date=env.date
        if not env.to is  None:
            for t in env.to:
                a={"host": t.host, "mail": t.mailbox}
--- a/storage/thread_model.py
+++ b/storage/thread_model.py
@@ -23,15 +23,17 @@ class MailThread(Base):
    __tablename__ = 'threads'
    id = Column(Integer, primary_key=True)
    firstmail  = Column(Integer)
+    date = Column(DateTime)
    islabeled = Column(Boolean)
    opened = Column(Boolean)
    body = Column(Text)
+    maintopic=Column(String)
    __schema__=FullThreadSchema
    __jsonid__='thread'
    __whiteattrs__= ["body"]
    __jsonattrs__=None
    answered=False
-    maintopic="information"
+#    maintopic="information"
    lang=""
    def bdy(self):
        return yaml.load(self.body)
@@ -51,7 +53,7 @@ class MailThread(Base):

    def tstr(self):
        fr=yaml.load(self.mails()[0].from_)
-        return "(" + str(self.answered)+ ", "+ str(self.maintopic)+ ", "+ str(self.lang) + ") " + str(self.firstmail)+": "+str(fr[0]["mail"])+"@"+str(fr[0]["host"]) + " | ".join(yaml.load(self.mails()[0].subject))
+        return "(" + str(self.opened)+ ", "+ str(self.maintopic)+ ", "+ str(self.lang) + ") " + str(self.firstmail)+": "+str(fr[0]["mail"])+"@"+str(fr[0]["host"]) + " | ".join(yaml.load(self.mails()[0].subject))
    
    def mails(self):
        a=[]
@@ -94,7 +96,34 @@ class MailThread(Base):
            m.compile_text()
            db_session.add(m)
            db_session.commit()
+        self.date=self.mails()[0].date

+    def print_text(self,filter="all"):
+        a=u""
+        def mail_txt(m):
+            #txt ="Gesendet von: "+ str(m.from_mailbox)+"@"+str(m.from_host) +"\n"
+            txt=""
+            fr=yaml.load(m.from_)
+            txt= txt+ "Gesendet von: "+str(fr[0]["mail"])+"@"+str(fr[0]["host"])+" am "+ str(m.date) + "\n" 
+            t=yaml.load(m.text)
+            if type(t) is unicode:
+                #txt=txt
+                txt=txt+t
+            else:
+                t=t.decode("ISO-8859-1")
+                txt=txt+t
+            return txt
+
+        if filter=="all":
+            mm=self.mails()
+            for m in mm:
+                a=a+mail_txt(m)+"\n****........................................***\n"
+        elif filter=="first":
+            a=mail_txt(m[0])
+        a=re.sub(r'\n\s*\n',r'\n',a)
+        a=re.sub(r'<!--.*-->',r'',a,flags=re.MULTILINE|re.DOTALL)
+        a=re.sub(r'\s*>+ .*\n',r'',a)
+        return a
    def text(self,filter="all"):
        a=u""
        def mail_txt(m):
@@ -108,11 +137,11 @@ class MailThread(Base):
        mm=self.mails()
        if filter=="all":
            for m in mm:
-                a=a+mail_txt(m)+"****........................................***\n"
+                a=a+mail_txt(m)+"\n****........................................***\n"
        elif filter=="first":
            a=mail_txt(m[0])
        a=re.sub(r'\n\s*\n',r'\n',a)
-#        a=re.sub(r'<!--.*-->',r'',a,flags=re.MULTILINE|re.DOTALL)
+        a=re.sub(r'<!--.*-->',r'',a,flags=re.MULTILINE|re.DOTALL)
        a=re.sub(r'\s*>+ .*\n',r'',a)


--- a/test.sqlite
+++ b/test.sqlite