rewrite classifier

2017-08-07 10:20:28 +02:00
parent ff0bdc6d3b
commit 94d8d26187
11 changed files with 411 additions and 98 deletions
--- a/classifier/init.py
+++ b/classifier/init.py
@@ -0,0 +1,4 @@
+from classifier import in_training, print_answers
+from classifier import get_pipe, test_pipe, get_training_threads
+from training import train_single_thread
+from classifier import store_training_data
--- a/classifier/classifier.py
+++ b/classifier/classifier.py
@@ -0,0 +1,191 @@
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.feature_extraction import DictVectorizer
+from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
+from sklearn.preprocessing import LabelEncoder
+from sklearn.pipeline import Pipeline, FeatureUnion
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.neural_network import MLPClassifier
+from sklearn.model_selection import train_test_split
+import numpy as np
+import yaml
+from storage import MailThread,db_session
+from sklearn.metrics import accuracy_score
+
+
+with open("data.yml", 'r') as stream:
+    try:
+        train=yaml.load(stream)
+    except yaml.YAMLError as exc:
+        print(exc)
+
+data_types= { "answered": bool, "maintopic": str, "lang": str}
+
+def set_train_data(i,d,key=b"answered"):
+    global train
+    #------------------------------------    
+    if not data_types.has_key(key):
+        raise ValueError("Key "+str(key)+" unknown")
+    if not train.has_key(i) or train[i] is None:
+        train[i]={}
+    if not type(d)  is data_types[key]:
+        raise TypeError("Data - %s - for key "% d +str(key)+" must be " +str(data_types[key])+ " but it is "+ str(type(d)))
+    #------------------------------------
+    train[i][key]=d
+
+        
+def store_training_data(i, d,key=b"answered"):
+    set_train_data(i,d,key)
+    with open("data.yml","w") as file:
+        file.write(yaml.dump(train,default_flow_style=True))
+        file.close()
+
+
+# Lade Trainingsdaten fuer einen angegebenen key (Label/Eigenschaft) 
+def get_training_threads(key="answered", filter=[]):
+    if not data_types.has_key(key):
+        raise ValueError("Key "+str(key)+" unknown")
+    #------------------------------------
+    t_a=[]
+    d_a=[]
+    d_a2=[]
+    #------------------------------------
+    for i in train:
+        if train[i].has_key(key): # In den Trainingsdaten muss der relevante Key sein
+            t=db_session.query(MailThread).filter(MailThread.firstmail==i).first()
+            if not t is None:   # Thread muss in der Datenbank sein
+                t_a.append(t)
+                d_a.append(train[i][key])
+    le=LabelEncoder()
+    d_a2=le.fit_transform(d_a)
+    return (t_a,d_a2,le)
+
+
+def in_training(i, key="answered"):
+    return train.has_key(i) and train[i].has_key(key)
+
+
+def print_answers(l):
+    
+    cc=l.classes_
+    c_id=l.transform(cc)
+    for i,c in enumerate(cc):
+        print str(i) + ":  " + str(c)
+    return None
+
+
+class ThreadDictExtractor(BaseEstimator, TransformerMixin):
+    def fit(self, x, y=None):
+        return self
+    def transform(self, X,y=None):
+        return [t.mail_flat_dict() for t in X]
+
+class ThreadSubjectExtractor(BaseEstimator, TransformerMixin):
+    def fit(self, x, y=None):
+        return self
+    def transform(self, X,y=None):
+        return [t.subject() for t in X]
+
+class ThreadTextExtractor(BaseEstimator, TransformerMixin):
+    def fit(self, x, y=None):
+        return self
+    def transform(self, X,y=None):
+        return [t.text() for t in X]
+
+def get_pipe(p=b"pipe1",k=b"answered"):
+    p=build_pipe(p)
+    tt= get_training_threads(k)
+    if len(tt[0]) > 0:
+        p.fit(tt[0],tt[1])
+        return p,tt[2]
+    else:
+        return None, None
+
+def test_pipe(pp,k):
+    tt= get_training_threads(k)
+    X_train,X_test,y_train,y_test=train_test_split(tt[0],tt[1],test_size=0.2)
+    if type(pp) is list:
+        for p in pp:
+            print "pipe: %s" % p
+            p=build_pipe(p)
+            p.fit(X_train,y_train)
+            ypred=p.predict(X_test)
+            print accuracy_score(y_test,ypred)
+
+
+
+
+def build_pipe(p=b"pipe1"):
+    if p == "pipe1":
+        p=Pipeline([('tde', ThreadDictExtractor()),
+                    ('dv',DictVectorizer()),
+                    ('clf', MultinomialNB())
+        ])
+    elif p=="pipe2":
+        p = Pipeline([
+            ('union', FeatureUnion(transformer_list=[
+                ('subject', Pipeline([('tse', ThreadSubjectExtractor()),
+                                      ('cv',CountVectorizer()),
+                                      ('tfidf', TfidfTransformer())
+                ])),
+                ('text',    Pipeline([('tte',ThreadTextExtractor()),
+                                      ('cv',CountVectorizer()),
+                                      ('tfidf', TfidfTransformer())
+                ])),
+                ('envelope', Pipeline([('tde', ThreadDictExtractor()),
+                                       ('dv',DictVectorizer())
+                ]))
+            ], transformer_weights={
+                'subject': 1,
+                'text': 0.7,
+                'envelope': 0.7
+            } )),
+            ('clf', MultinomialNB())
+        ])
+    elif p=="pipe2b":
+        p = Pipeline([
+            ('union', FeatureUnion(transformer_list=[
+            ('subject', Pipeline([('tse', ThreadSubjectExtractor()),
+                                ('cv',CountVectorizer()),
+                                ('tfidf', TfidfTransformer())
+        ])),
+            ('text',    Pipeline([('tte',ThreadTextExtractor()),
+                                  ('cv',CountVectorizer()),
+                                ('tfidf', TfidfTransformer())
+            ])),
+            ('envelope', Pipeline([('tde', ThreadDictExtractor()),
+                                   ('dv',DictVectorizer())
+            ]))
+            ], transformer_weights={
+                'subject': 1,
+                'text': 0.7,
+                'envelope': 0.7
+            } )),
+            ('mlc', MLPClassifier())
+        ])
+    elif p=="pipe2c":
+        p = Pipeline([
+            ('union', FeatureUnion(transformer_list=[
+            ('subject', Pipeline([('tse', ThreadSubjectExtractor()),
+                                ('cv',CountVectorizer()),
+                                ('tfidf', TfidfTransformer())
+        ])),
+            ('text',    Pipeline([('tte',ThreadTextExtractor()),
+                                  ('cv',CountVectorizer()),
+                                ('tfidf', TfidfTransformer())
+            ])),
+            ('envelope', Pipeline([('tde', ThreadDictExtractor()),
+                                   ('dv',DictVectorizer())
+            ]))
+            ], transformer_weights={
+                'subject': 1,
+                'text': 1,
+                'envelope': 0.4
+            } )),
+            ('mlc', MLPClassifier())
+        ])
+    else:
+        raise ValueError("The pipe %s is not a valid pipe")
+    return p
+
+
+
--- a/classifier/classify_mail.py
+++ b/classifier/classify_mail.py
@@ -0,0 +1,25 @@
+from sklearn.feature_extraction.text import TfidfTransformer,CountVectorizer
+from sklearn.feature_extraction import DictVectorizer
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.pipeline import Pipeline, FeatureUnion
+import sys
+import yaml
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import LabelEncoder
+
+
+text_clf = Pipeline([('vect', CountVectorizer()),('tfidf', TfidfTransformer()),('clf', MultinomialNB())])
+
+text_ohc = Pipeline([('ohc', OneHotEncoder()),('clf', MultinomialNB())])
+
+combined_features = FeatureUnion([('vect1', CountVectorizer()),('vect2', CountVectorizer())])
+
+
+enc=OneHotEncoder()
+with open("example_1.yaml", 'r') as stream:
+    try:
+        train=yaml.safe_load(stream)
+    except yaml.YAMLError as exc:
+        print(exc)
+
+tc=text_clf.fit(train["data"],train["target"])
--- a/classifier/classify_text.py
+++ b/classifier/classify_text.py
@@ -0,0 +1,42 @@
+from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.pipeline import Pipeline
+text_clf = Pipeline([('vect', CountVectorizer()),('tfidf', TfidfTransformer()),('clf', MultinomialNB())])
+import sys
+import yaml
+
+
+
+with open("example_1.yaml", 'r') as stream:
+    try:
+        train=yaml.safe_load(stream)
+    except yaml.YAMLError as exc:
+        print(exc)
+
+tc=text_clf.fit(train["data"],train["target"])
+print(sys.argv[1])
+
+answ=(tc.predict([sys.argv[1]]))[0]
+print train["target_names"][answ]
+
+for i in range(0,  (len(train["target_names"]))):
+    print (str(i)+"  "+ train["target_names"][i])
+
+ca=int(raw_input("Correct answer.."))
+
+
+if ca == answ:
+           print ("Yes I got it right")
+else:
+    print("should I remember this?")
+    a=raw_input("shoudIrememberthis?")
+    if a == "y":
+        train["data"].append(sys.argv[1])
+        train["target"].append(ca)
+        print yaml.dump(train,default_flow_style=False)
+        file=open("example_1.yaml","w")
+        file.write(yaml.dump(train,default_flow_style=False))
+        file.close()
+    else:
+        print ("Ok, I already forgot")
+    
--- a/classifier/training.py
+++ b/classifier/training.py
@@ -0,0 +1,70 @@
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import LabelEncoder
+import numpy
+from storage import Mail, MailThread, db_session
+from classifier import store_training_data, print_answers
+
+
+
+def train_fit_pipe():
+        tt= get_training_threads(b"answered")
+        pipe1.fit(tt[0],tt[1])
+        return pipe1,tt[2]
+def train_fit_pipe2():
+        tt= get_training_threads(b"maintopic")
+        pipe2.fit(tt[0],tt[1])
+        return pipe2,tt[2]
+
+def train_fit_pipe2b():
+        tt= get_training_threads(b"maintopic")
+        pipe2b.fit(tt[0],tt[1])
+        return pipe2b,tt[2]
+
+
+def predict_thread(mth,p,le,key):
+        #-------------------------------------------------------
+        if not type(p) is Pipeline: raise TypeError("Second Argument needs to be type Pipeline")
+        if not type(le) is LabelEncoder: raise TypeError("Second Argument needs to be type LabelEncoder")
+        #-------------------------------------------------------
+        pre=p.predict([mth])
+        answ=pre[0]
+        print "Status is answered is estimated to be: " + str(le.inverse_transform(pre)[0])
+        return answ
+
+def train_single_thread(tid,p,le,key="answered"):
+        if (not type(tid) is int): raise TypeError("ID must be of type int") 
+
+        mth=db_session.query(MailThread).filter(MailThread.firstmail==tid).first()
+        if mth is None: raise ValueError("Thread with firstmail %d not in Database" %tid)
+        print mth.firstmail
+        print mth.subject()
+        print mth.text()
+
+        if not p is None and not le is None:
+                answ=predict_thread(mth,p,le,key)
+        else: answ=None
+        if not le is None:
+                print_answers(le)
+        
+        ca=raw_input("Correct answer..")
+        try:
+                ca=int(ca)
+        except ValueError:
+                print "String Data"
+                
+        if type(ca)==int:
+                if ca == answ:
+                        print ("Yes I got it right")
+                else:
+                        print("Oh no...!")
+                l=le.inverse_transform([ca])[0]
+                if type(l) is numpy.bool_:
+                        l=bool(l)
+                if type(l) is numpy.string_:
+                        l=str(l)
+                store_training_data(tid,l, key)
+        elif not ca.strip() == "":
+                store_training_data(tid, ca, key)
+        else:
+                print "couldn't handle %s" % ca 
+    
--- a/data.yml
+++ b/data.yml
@@ -1,31 +1,32 @@
-{26808: {maintopic: jobausschreibung}, 27017: {maintopic: jobausschreibung}, 27070: {
-    maintopic: ausleihen}, 27083: {maintopic: ausleihen}, 27086: {maintopic: information},
-  27094: {maintopic: information}, 27096: {maintopic: jobausschreibung}, 27102: {
-    maintopic: studium}, 27118: {maintopic: information}, 27127: {maintopic: studium},
-  27130: {maintopic: information}, 27133: {maintopic: information}, 27141: {maintopic: information},
-  27146: {maintopic: information}, 27166: {maintopic: umfragen}, 27171: {maintopic: ausleihen},
-  27178: {maintopic: studium}, 27182: {maintopic: studium}, 27197: {maintopic: information},
-  27201: {maintopic: information}, 27218: {maintopic: information}, 27219: {maintopic: studium},
-  27222: {maintopic: information}, 27226: {maintopic: ausleihen}, 27420: {answered: true,
-    maintopic: studium}, 27422: {answered: true, maintopic: studium}, 27425: {answered: false,
-    maintopic: studium}, 27431: {answered: false, maintopic: information}, 27434: {
-    answered: false, maintopic: information}, 27435: {answered: false}, 27438: {answered: false,
-    maintopic: information}, 27439: {answered: true, maintopic: studium}, 27441: {
-    answered: false, maintopic: studium}, 27444: {answered: true, maintopic: ausleihen},
+{26808: {maintopic: jobausschreibung}, 27008: {lang: de}, 27017: {lang: de, maintopic: jobausschreibung},
+  27061: {lang: de}, 27070: {maintopic: ausleihen}, 27083: {maintopic: ausleihen},
+  27086: {maintopic: information}, 27094: {maintopic: information}, 27096: {maintopic: jobausschreibung},
+  27102: {lang: en, maintopic: studium}, 27118: {maintopic: information}, 27127: {
+    maintopic: studium}, 27130: {maintopic: information}, 27133: {maintopic: information},
+  27141: {maintopic: information}, 27146: {maintopic: information}, 27166: {maintopic: umfragen},
+  27171: {maintopic: ausleihen}, 27178: {maintopic: studium}, 27182: {maintopic: studium},
+  27197: {maintopic: information}, 27201: {maintopic: information}, 27218: {maintopic: information},
+  27219: {maintopic: studium}, 27222: {maintopic: information}, 27226: {maintopic: ausleihen},
+  27420: {answered: true, maintopic: studium}, 27422: {answered: true, maintopic: studium},
+  27425: {answered: false, maintopic: studium}, 27431: {answered: false, maintopic: information},
+  27434: {answered: false, lang: de, maintopic: information}, 27435: {answered: false},
+  27438: {answered: false, maintopic: information}, 27439: {answered: true, maintopic: studium},
+  27441: {answered: false, maintopic: studium}, 27444: {answered: true, maintopic: ausleihen},
  27454: {answered: false, maintopic: information}, 27455: {answered: false, maintopic: information},
-  27456: {answered: false, maintopic: studium}, 27457: {answered: false, maintopic: jobausschreibung},
-  27468: {answered: true, maintopic: studium}, 27489: {answered: false, maintopic: information},
-  27490: {answered: false, maintopic: fachschaftenzeugs}, 27491: {answered: false,
-    maintopic: jobausschreibung}, 27492: {answered: false, maintopic: information},
-  27495: {answered: false, maintopic: information}, 27496: {answered: true, maintopic: ausleihen},
-  27497: {answered: false, maintopic: information}, 27500: {answered: true, maintopic: studium},
-  27501: {answered: false, maintopic: information}, 27514: {answered: true, maintopic: studium},
-  27515: {answered: true, maintopic: studium}, 27518: {answered: true, maintopic: studium},
+  27456: {answered: false, lang: de, maintopic: studium}, 27457: {answered: false,
+    maintopic: jobausschreibung}, 27468: {answered: true, maintopic: studium}, 27489: {
+    answered: false, lang: en, maintopic: information}, 27490: {answered: false, maintopic: fachschaftenzeugs},
+  27491: {answered: false, maintopic: jobausschreibung}, 27492: {answered: false,
+    maintopic: information}, 27495: {answered: false, maintopic: information}, 27496: {
+    answered: true, maintopic: ausleihen}, 27497: {answered: false, maintopic: information},
+  27500: {answered: true, lang: en, maintopic: studium}, 27501: {answered: false,
+    lang: en, maintopic: information}, 27514: {answered: true, maintopic: studium},
+  27515: {answered: true, lang: en, maintopic: studium}, 27518: {answered: true, maintopic: studium},
  27523: {answered: false, maintopic: jobausschreibung}, 27526: {answered: false,
-    maintopic: studium}, 27536: {answered: true, maintopic: studium}, 27541: {answered: true,
-    maintopic: studium}, 27542: {answered: false, maintopic: studium}, 27543: {answered: false,
-    maintopic: information}, 27544: {answered: true, maintopic: studium}, 27545: {
-    answered: false, maintopic: umfragen}, 27546: {answered: false, maintopic: information},
+    maintopic: studium}, 27536: {answered: true, lang: de, maintopic: studium}, 27541: {
+    answered: true, maintopic: studium}, 27542: {answered: false, maintopic: studium},
+  27543: {answered: false, maintopic: information}, 27544: {answered: true, maintopic: studium},
+  27545: {answered: false, maintopic: umfragen}, 27546: {answered: false, maintopic: information},
  27547: {answered: false, maintopic: studium}, 27549: {answered: false}, 27550: {
    answered: false, maintopic: information}, 27553: {answered: false, maintopic: information},
  27558: {answered: false}, 27560: {answered: false, maintopic: ausleihen}, 27562: {
--- a/flaskapp/init.py
+++ b/flaskapp/init.py
@@ -22,11 +22,16 @@ from classifier import get_pipe
 mail_threads=db_session.query(MailThread).all()
 pipe1,le=get_pipe("pipe1",b"answered")
 pipe2,le2=get_pipe("pipe2b", b"maintopic")
+pipe3,le3=get_pipe("pipe2b", b"lang")
+
 answered=le.inverse_transform(pipe1.predict(mail_threads))
 maintopic=le2.inverse_transform(pipe2.predict(mail_threads))
+lang=le3.inverse_transform(pipe3.predict(mail_threads))
+
 for i, t in enumerate(mail_threads):
    t.answered=answered[i]
    t.maintopic=maintopic[i]
+    t.lang=lang[i]
@app.route("/")
 def hello():
    mth=mail_threads
--- a/run.py
+++ b/run.py
@@ -1,87 +1,30 @@
 from __future__ import unicode_literals
-import imapclient
+#import imapclient
 from config import Config
 import sys
-from email.header import decode_header
-import email
+#from email.header import decode_header
+#import email
 import codecs
-import sys
-import bs4
+
+#import sys
+#import bs4
+
 #sys.stdout = codecs.getwriter('utf8')(sys.stdout)
 from storage.fetch_mail import fetch_mail
 from storage.fetch_mail import fetch_threads, flatten_threads
-
 from storage import Mail, MailThread, db_session
-import yaml
-import email
-from classifier import get_training_threads, ThreadDictExtractor,  print_answers, in_training, store_training_data, get_pipe, test_pipe # , pipe2, pipe2b
-from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import LabelEncoder
-import numpy
+#import yaml
+#import email
+from classifier import get_training_threads,  print_answers, in_training, store_training_data, get_pipe, test_pipe, train_single_thread # , pipe2, pipe2b
+from flaskapp import app


-
-def train_fit_pipe():
-        tt= get_training_threads(b"answered")
-#        print tt[1]
-#        print tt[0]
-        pipe1.fit(tt[0],tt[1])
-        return pipe1,tt[2]
-def train_fit_pipe2():
-        tt= get_training_threads(b"maintopic")
-        pipe2.fit(tt[0],tt[1])
-        return pipe2,tt[2]
-
-def train_fit_pipe2b():
-        tt= get_training_threads(b"maintopic")
-        pipe2b.fit(tt[0],tt[1])
-        return pipe2b,tt[2]
-
 def predict_thread(p,l,t):
        pre=p.predict([t])
        print "Status is answered is estimated to be: " + str(l.inverse_transform(pre)[0])
        return pre
-
-
-def train_single_thread(tid,p,le,key="answered"):
-        if (not type(tid) is int): raise TypeError("ID must be of type int") 
-        if not type(p) is Pipeline: raise TypeError("Second Argument needs to be type Pipeline")
-        if not type(le) is LabelEncoder: raise TypeError("Second Argument needs to be type LabelEncoder")
-        mth=db_session.query(MailThread).filter(MailThread.firstmail==tid).first()
-        if mth is None: raise ValueError("Thread with firstmail %d not in Database" %tid)
-        # Predict the value 
-        pre=p.predict([mth])
-        answ=pre[0]
-        #
-#        print mth.to_text()
-#        print mth.text()
-        print "Status is answered is estimated to be: " + str(le.inverse_transform(pre)[0])
-        print_answers(le)
-
-        ca=raw_input("Correct answer..")
-        try:
-                ca=int(ca)
-     
-        except ValueError:
-                print "String Data"
-        if type(ca)==int:
-                if ca == answ:
-                        print ("Yes I got it right")
-                else:
-                        print("Oh no...!")
-                l=le.inverse_transform([ca])[0]
-                if type(l) is numpy.bool_:
-                        l=bool(l)
-                if type(l) is numpy.string_:
-                        l=str(l)
-                store_training_data(tid,l, key)
-        elif not ca.strip() == "":
-                store_training_data(tid, ca, key)
-        else:
-                print "couldn't handle %s" % ca 
-    
-from flaskapp import app
                
+
 #print "arg1:"+sys.argv[1]
 if len(sys.argv)>1:
    if sys.argv[1] == "fetch_threads":
@@ -89,6 +32,7 @@ if len(sys.argv)>1:

    if sys.argv[1] == "run_server":
            app.run(port=3000,debug=True)
+
    if sys.argv[1] == "print_threads":
        mth=db_session.query(MailThread).all()
        for t in mth:
@@ -122,7 +66,15 @@ if len(sys.argv)>1:
        p, le=get_pipe("pipe2", "maintopic")
        pb, lb =get_pipe("pipe2b", "maintopic")
        
-        train_single_thread(int(sys.argv[2]),p,le,b"maintopic")        
+        train_single_thread(int(sys.argv[2]),p,le,b"maintopic")
+
+    if sys.argv[1] == "train_thrd3":
+#        p, le=get_pipe("pipe2", "maintopic")
+        pb, lb =get_pipe("pipe2b", "lang")
+        
+        train_single_thread(int(sys.argv[2]),pb,lb,b"lang")
+
+        
    if sys.argv[1] == "train_all2":
        p, labelencoder=train_fit_pipe2()
        pb, lb=train_fit_pipe2b()
--- a/3
+++ b/3
@@ -0,0 +1,3 @@
+#!/bin/bash
+. .env/bin/activate
+python run.py run_server
--- a/storage/fetch_mail.py
+++ b/storage/fetch_mail.py
@@ -35,3 +35,22 @@ def flatten_threads(thrds, array=[], level=0):
        for t in thrds: 
            array.append(flatten_threads(t,[],1))
    return array
+
+def store_threads(thrds):
+    for t in thrds:
+        if type(t[0]) is int:
+            th=db_session.query(MailThread).filter(MailThread.firstmail==t[0]).first()
+            # Wenn nicht gefunden neuen anlegen
+            if th == None:
+                th=MailThread()
+                th.firstmail=t[0]
+            elif not th.body == yaml.dump(t): # Ansonsten body vergleichen
+                th.body=yaml.dump(t) # body zb (27422,27506), (27450,)
+                th.islabeled=False
+                th.opened=True
+            else:
+                th.body=yaml.dump(t)
+            db_session.add(th)
+            db_session.commit()
+        
+
--- a/storage/thread_model.py
+++ b/storage/thread_model.py
@@ -32,6 +32,7 @@ class MailThread(Base):
    __jsonattrs__=None
    answered=False
    maintopic="information"
+    lang=""
    def bdy(self):
        return yaml.load(self.body)

@@ -50,7 +51,7 @@ class MailThread(Base):

    def tstr(self):
        fr=yaml.load(self.mails()[0].from_)
-        return "(" + str(self.answered)+ ", "+ str(self.maintopic) + ") " + str(self.firstmail)+": "+str(fr[0]["mail"])+"@"+str(fr[0]["host"]) + " | ".join(yaml.load(self.mails()[0].subject))
+        return "(" + str(self.answered)+ ", "+ str(self.maintopic)+ ", "+ str(self.lang) + ") " + str(self.firstmail)+": "+str(fr[0]["mail"])+"@"+str(fr[0]["host"]) + " | ".join(yaml.load(self.mails()[0].subject))
    
    def mails(self):
        a=[]
@@ -111,7 +112,7 @@ class MailThread(Base):
        elif filter=="first":
            a=mail_txt(m[0])
        a=re.sub(r'\n\s*\n',r'\n',a)
-        a=re.sub(r'<!--.*-->',r'',a,flags=re.MULTILINE|re.DOTALL)
+#        a=re.sub(r'<!--.*-->',r'',a,flags=re.MULTILINE|re.DOTALL)
        a=re.sub(r'\s*>+ .*\n',r'',a)