Merge branch 'master' of https://git.triton.fet.at/git/service_mail
This commit is contained in:
184
classifier.py
184
classifier.py
@@ -1,184 +0,0 @@
|
|||||||
from sklearn.base import BaseEstimator, TransformerMixin
|
|
||||||
from sklearn.feature_extraction import DictVectorizer
|
|
||||||
from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
|
|
||||||
from sklearn.preprocessing import LabelEncoder
|
|
||||||
from sklearn.pipeline import Pipeline, FeatureUnion
|
|
||||||
from sklearn.naive_bayes import MultinomialNB
|
|
||||||
from sklearn.neural_network import MLPClassifier
|
|
||||||
from sklearn.model_selection import train_test_split
|
|
||||||
import numpy as np
|
|
||||||
import yaml
|
|
||||||
from storage import MailThread,db_session
|
|
||||||
|
|
||||||
with open("data.yml", 'r') as stream:
|
|
||||||
try:
|
|
||||||
train=yaml.load(stream)
|
|
||||||
except yaml.YAMLError as exc:
|
|
||||||
print(exc)
|
|
||||||
|
|
||||||
data_types= { "answered": bool, "maintopic": str}
|
|
||||||
|
|
||||||
def store_training_data(i, d,key=b"answered"):
|
|
||||||
global train
|
|
||||||
if not data_types.has_key(key):
|
|
||||||
raise ValueError("Key "+str(key)+" unknown")
|
|
||||||
if not train.has_key(i):
|
|
||||||
train[i]={}
|
|
||||||
if not key is None and type(train[i]) is dict:
|
|
||||||
if not type(d) is data_types[key]:
|
|
||||||
# print str(type(d)) + " vs " + str(data_types[key])
|
|
||||||
raise TypeError("Data - %s - for key "% d +str(key)+" must be " +str(data_types[key])+ " but it is "+ str(type(d)))
|
|
||||||
train[i][key]=d
|
|
||||||
|
|
||||||
|
|
||||||
with open("data.yml","w") as file:
|
|
||||||
file.write(yaml.dump(train,default_flow_style=True))
|
|
||||||
file.close()
|
|
||||||
|
|
||||||
|
|
||||||
# Lade Trainingsdaten fuer einen angegebenen key (Label/Eigenschaft)
|
|
||||||
def get_training_threads(key="answered"):
|
|
||||||
t_a=[]
|
|
||||||
d_a=[]
|
|
||||||
d_a2=[]
|
|
||||||
for i in train:
|
|
||||||
t=db_session.query(MailThread).filter(MailThread.firstmail==i).first()
|
|
||||||
if not t is None: # Thread muss in der Datenbank sein
|
|
||||||
if train[i].has_key(key): # In den Trainingsdaten muss der relevante Key sein
|
|
||||||
t_a.append(t)
|
|
||||||
d_a.append(train[i][key])
|
|
||||||
le=LabelEncoder()
|
|
||||||
d_a2=le.fit_transform(d_a)
|
|
||||||
return (t_a,d_a2,le)
|
|
||||||
|
|
||||||
|
|
||||||
def in_training(i, key="answered"):
|
|
||||||
return train.has_key(i) and train[i].has_key(key)
|
|
||||||
|
|
||||||
|
|
||||||
def print_answers(l):
|
|
||||||
cc=l.classes_
|
|
||||||
c_id=l.transform(cc)
|
|
||||||
for i,c in enumerate(cc):
|
|
||||||
print str(i) + ": " + str(c)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
class ThreadDictExtractor(BaseEstimator, TransformerMixin):
|
|
||||||
def fit(self, x, y=None):
|
|
||||||
return self
|
|
||||||
def transform(self, X,y=None):
|
|
||||||
return [t.mail_flat_dict() for t in X]
|
|
||||||
|
|
||||||
class ThreadSubjectExtractor(BaseEstimator, TransformerMixin):
|
|
||||||
def fit(self, x, y=None):
|
|
||||||
return self
|
|
||||||
def transform(self, X,y=None):
|
|
||||||
return [t.subject() for t in X]
|
|
||||||
|
|
||||||
class ThreadTextExtractor(BaseEstimator, TransformerMixin):
|
|
||||||
def fit(self, x, y=None):
|
|
||||||
return self
|
|
||||||
def transform(self, X,y=None):
|
|
||||||
return [t.text() for t in X]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def build_pipe(p=b"pipe1"):
|
|
||||||
|
|
||||||
if p == "pipe1":
|
|
||||||
p=Pipeline([('tde', ThreadDictExtractor()),
|
|
||||||
('dv',DictVectorizer()),
|
|
||||||
('clf', MultinomialNB())
|
|
||||||
])
|
|
||||||
elif p=="pipe2":
|
|
||||||
p = Pipeline([
|
|
||||||
('union', FeatureUnion(transformer_list=[
|
|
||||||
('subject', Pipeline([('tse', ThreadSubjectExtractor()),
|
|
||||||
('cv',CountVectorizer()),
|
|
||||||
('tfidf', TfidfTransformer())
|
|
||||||
])),
|
|
||||||
('text', Pipeline([('tte',ThreadTextExtractor()),
|
|
||||||
('cv',CountVectorizer()),
|
|
||||||
('tfidf', TfidfTransformer())
|
|
||||||
])),
|
|
||||||
('envelope', Pipeline([('tde', ThreadDictExtractor()),
|
|
||||||
('dv',DictVectorizer())
|
|
||||||
]))
|
|
||||||
], transformer_weights={
|
|
||||||
'subject': 1,
|
|
||||||
'text': 0.7,
|
|
||||||
'envelope': 0.7
|
|
||||||
} )),
|
|
||||||
('clf', MultinomialNB())
|
|
||||||
])
|
|
||||||
elif p=="pipe2b":
|
|
||||||
p = Pipeline([
|
|
||||||
('union', FeatureUnion(transformer_list=[
|
|
||||||
('subject', Pipeline([('tse', ThreadSubjectExtractor()),
|
|
||||||
('cv',CountVectorizer()),
|
|
||||||
('tfidf', TfidfTransformer())
|
|
||||||
])),
|
|
||||||
('text', Pipeline([('tte',ThreadTextExtractor()),
|
|
||||||
('cv',CountVectorizer()),
|
|
||||||
('tfidf', TfidfTransformer())
|
|
||||||
])),
|
|
||||||
('envelope', Pipeline([('tde', ThreadDictExtractor()),
|
|
||||||
('dv',DictVectorizer())
|
|
||||||
]))
|
|
||||||
], transformer_weights={
|
|
||||||
'subject': 1,
|
|
||||||
'text': 0.7,
|
|
||||||
'envelope': 0.7
|
|
||||||
} )),
|
|
||||||
('mlc', MLPClassifier())
|
|
||||||
])
|
|
||||||
elif p=="pipe2c":
|
|
||||||
p = Pipeline([
|
|
||||||
('union', FeatureUnion(transformer_list=[
|
|
||||||
('subject', Pipeline([('tse', ThreadSubjectExtractor()),
|
|
||||||
('cv',CountVectorizer()),
|
|
||||||
('tfidf', TfidfTransformer())
|
|
||||||
])),
|
|
||||||
('text', Pipeline([('tte',ThreadTextExtractor()),
|
|
||||||
('cv',CountVectorizer()),
|
|
||||||
('tfidf', TfidfTransformer())
|
|
||||||
])),
|
|
||||||
('envelope', Pipeline([('tde', ThreadDictExtractor()),
|
|
||||||
('dv',DictVectorizer())
|
|
||||||
]))
|
|
||||||
], transformer_weights={
|
|
||||||
'subject': 1,
|
|
||||||
'text': 1,
|
|
||||||
'envelope': 0.4
|
|
||||||
} )),
|
|
||||||
('mlc', MLPClassifier())
|
|
||||||
])
|
|
||||||
else:
|
|
||||||
raise ValueError("The pipe %s is not a valid pipe")
|
|
||||||
return p
|
|
||||||
|
|
||||||
def get_pipe(p=b"pipe1",k=b"answered"):
|
|
||||||
p=build_pipe(p)
|
|
||||||
tt= get_training_threads(k)
|
|
||||||
p.fit(tt[0],tt[1])
|
|
||||||
return p,tt[2]
|
|
||||||
|
|
||||||
from sklearn.metrics import accuracy_score
|
|
||||||
|
|
||||||
def test_pipe(pp,k):
|
|
||||||
tt= get_training_threads(k)
|
|
||||||
X_train,X_test,y_train,y_test=train_test_split(tt[0],tt[1],test_size=0.2)
|
|
||||||
if type(pp) is list:
|
|
||||||
for p in pp:
|
|
||||||
print "pipe: %s" % p
|
|
||||||
p=build_pipe(p)
|
|
||||||
p.fit(X_train,y_train)
|
|
||||||
ypred=p.predict(X_test)
|
|
||||||
print accuracy_score(y_test,ypred)
|
|
||||||
|
|
||||||
#pipe1=get_pipe("pipe1", "answered")
|
|
||||||
#pipe2=get_pipe("pipe2", "maintopic")
|
|
||||||
#pipe2b=get_pipe("pipe2b", "maintopic")
|
|
||||||
@@ -102,7 +102,7 @@ def get_pipe(p=b"pipe1",k=b"answered"):
|
|||||||
|
|
||||||
def test_pipe(pp,k):
|
def test_pipe(pp,k):
|
||||||
tt= get_training_threads(k)
|
tt= get_training_threads(k)
|
||||||
X_train,X_test,y_train,y_test=train_test_split(tt[0],tt[1],test_size=0.2)
|
X_train,X_test,y_train,y_test=train_test_split(tt[0],tt[1],test_size=0.4)
|
||||||
if type(pp) is list:
|
if type(pp) is list:
|
||||||
for p in pp:
|
for p in pp:
|
||||||
print "pipe: %s" % p
|
print "pipe: %s" % p
|
||||||
|
|||||||
@@ -1,25 +0,0 @@
|
|||||||
from sklearn.feature_extraction.text import TfidfTransformer,CountVectorizer
|
|
||||||
from sklearn.feature_extraction import DictVectorizer
|
|
||||||
from sklearn.naive_bayes import MultinomialNB
|
|
||||||
from sklearn.pipeline import Pipeline, FeatureUnion
|
|
||||||
import sys
|
|
||||||
import yaml
|
|
||||||
from sklearn.preprocessing import OneHotEncoder
|
|
||||||
from sklearn.preprocessing import LabelEncoder
|
|
||||||
|
|
||||||
|
|
||||||
text_clf = Pipeline([('vect', CountVectorizer()),('tfidf', TfidfTransformer()),('clf', MultinomialNB())])
|
|
||||||
|
|
||||||
text_ohc = Pipeline([('ohc', OneHotEncoder()),('clf', MultinomialNB())])
|
|
||||||
|
|
||||||
combined_features = FeatureUnion([('vect1', CountVectorizer()),('vect2', CountVectorizer())])
|
|
||||||
|
|
||||||
|
|
||||||
enc=OneHotEncoder()
|
|
||||||
with open("example_1.yaml", 'r') as stream:
|
|
||||||
try:
|
|
||||||
train=yaml.safe_load(stream)
|
|
||||||
except yaml.YAMLError as exc:
|
|
||||||
print(exc)
|
|
||||||
|
|
||||||
tc=text_clf.fit(train["data"],train["target"])
|
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
|
|
||||||
from sklearn.naive_bayes import MultinomialNB
|
|
||||||
from sklearn.pipeline import Pipeline
|
|
||||||
text_clf = Pipeline([('vect', CountVectorizer()),('tfidf', TfidfTransformer()),('clf', MultinomialNB())])
|
|
||||||
import sys
|
|
||||||
import yaml
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
with open("example_1.yaml", 'r') as stream:
|
|
||||||
try:
|
|
||||||
train=yaml.safe_load(stream)
|
|
||||||
except yaml.YAMLError as exc:
|
|
||||||
print(exc)
|
|
||||||
|
|
||||||
tc=text_clf.fit(train["data"],train["target"])
|
|
||||||
print(sys.argv[1])
|
|
||||||
|
|
||||||
answ=(tc.predict([sys.argv[1]]))[0]
|
|
||||||
print train["target_names"][answ]
|
|
||||||
|
|
||||||
for i in range(0, (len(train["target_names"]))):
|
|
||||||
print (str(i)+" "+ train["target_names"][i])
|
|
||||||
|
|
||||||
ca=int(raw_input("Correct answer.."))
|
|
||||||
|
|
||||||
|
|
||||||
if ca == answ:
|
|
||||||
print ("Yes I got it right")
|
|
||||||
else:
|
|
||||||
print("should I remember this?")
|
|
||||||
a=raw_input("shoudIrememberthis?")
|
|
||||||
if a == "y":
|
|
||||||
train["data"].append(sys.argv[1])
|
|
||||||
train["target"].append(ca)
|
|
||||||
print yaml.dump(train,default_flow_style=False)
|
|
||||||
file=open("example_1.yaml","w")
|
|
||||||
file.write(yaml.dump(train,default_flow_style=False))
|
|
||||||
file.close()
|
|
||||||
else:
|
|
||||||
print ("Ok, I already forgot")
|
|
||||||
|
|
||||||
7
data.yml
7
data.yml
@@ -7,9 +7,10 @@
|
|||||||
27171: {maintopic: ausleihen}, 27178: {maintopic: studium}, 27182: {maintopic: studium},
|
27171: {maintopic: ausleihen}, 27178: {maintopic: studium}, 27182: {maintopic: studium},
|
||||||
27197: {maintopic: information}, 27201: {maintopic: information}, 27218: {maintopic: information},
|
27197: {maintopic: information}, 27201: {maintopic: information}, 27218: {maintopic: information},
|
||||||
27219: {maintopic: studium}, 27222: {maintopic: information}, 27226: {maintopic: ausleihen},
|
27219: {maintopic: studium}, 27222: {maintopic: information}, 27226: {maintopic: ausleihen},
|
||||||
27420: {answered: true, maintopic: studium}, 27422: {answered: true, maintopic: studium},
|
27263: {maintopic: ausleihen}, 27267: {maintopic: ausleihen}, 27420: {answered: true,
|
||||||
27425: {answered: false, maintopic: studium}, 27431: {answered: false, maintopic: information},
|
maintopic: studium}, 27422: {answered: true, maintopic: studium}, 27425: {answered: false,
|
||||||
27434: {answered: false, lang: de, maintopic: information}, 27435: {answered: false},
|
maintopic: studium}, 27431: {answered: false, maintopic: information}, 27434: {
|
||||||
|
answered: false, lang: de, maintopic: information}, 27435: {answered: false},
|
||||||
27438: {answered: false, maintopic: information}, 27439: {answered: true, maintopic: studium},
|
27438: {answered: false, maintopic: information}, 27439: {answered: true, maintopic: studium},
|
||||||
27441: {answered: false, maintopic: studium}, 27444: {answered: true, maintopic: ausleihen},
|
27441: {answered: false, maintopic: studium}, 27444: {answered: true, maintopic: ausleihen},
|
||||||
27454: {answered: false, maintopic: information}, 27455: {answered: false, maintopic: information},
|
27454: {answered: false, maintopic: information}, 27455: {answered: false, maintopic: information},
|
||||||
|
|||||||
35
db_model_version_1.py
Normal file
35
db_model_version_1.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
|
||||||
|
## File autogenerated by genmodel.py
|
||||||
|
|
||||||
|
from sqlalchemy import *
|
||||||
|
|
||||||
|
|
||||||
|
meta = MetaData()
|
||||||
|
|
||||||
|
mails = Table('mails', meta,
|
||||||
|
Column('created_at', TIMESTAMP, nullable=False),
|
||||||
|
Column('updated_at', TIMESTAMP, nullable=False),
|
||||||
|
Column('id', INTEGER, primary_key=True, nullable=False),
|
||||||
|
Column('date', DATETIME),
|
||||||
|
Column('envelope', TEXT),
|
||||||
|
Column('body', TEXT),
|
||||||
|
Column('text', TEXT),
|
||||||
|
Column('from_', TEXT),
|
||||||
|
Column('from_mailbox', VARCHAR),
|
||||||
|
Column('from_host', VARCHAR),
|
||||||
|
Column('to_', TEXT),
|
||||||
|
Column('to_mailbox', TEXT),
|
||||||
|
Column('to_host', VARCHAR),
|
||||||
|
Column('subject', TEXT),
|
||||||
|
)
|
||||||
|
|
||||||
|
threads = Table('threads', meta,
|
||||||
|
Column('created_at', TIMESTAMP, nullable=False),
|
||||||
|
Column('updated_at', TIMESTAMP, nullable=False),
|
||||||
|
Column('id', INTEGER, primary_key=True, nullable=False),
|
||||||
|
Column('firstmail', INTEGER),
|
||||||
|
Column('islabeled', BOOLEAN),
|
||||||
|
Column('opened', BOOLEAN),
|
||||||
|
Column('body', TEXT),
|
||||||
|
)
|
||||||
|
|
||||||
4
db_repository/README
Normal file
4
db_repository/README
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
This is a database migration repository.
|
||||||
|
|
||||||
|
More information at
|
||||||
|
http://code.google.com/p/sqlalchemy-migrate/
|
||||||
0
db_repository/__init__.py
Normal file
0
db_repository/__init__.py
Normal file
6
db_repository/manage.py
Normal file
6
db_repository/manage.py
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
#!.env/bin/python
|
||||||
|
from migrate.versioning.shell import main
|
||||||
|
import storage
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main(debug='False')
|
||||||
25
db_repository/migrate.cfg
Normal file
25
db_repository/migrate.cfg
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
[db_settings]
|
||||||
|
# Used to identify which repository this database is versioned under.
|
||||||
|
# You can use the name of your project.
|
||||||
|
repository_id=service mail db repository
|
||||||
|
|
||||||
|
# The name of the database table used to track the schema version.
|
||||||
|
# This name shouldn't already be used by your project.
|
||||||
|
# If this is changed once a database is under version control, you'll need to
|
||||||
|
# change the table name in each database too.
|
||||||
|
version_table=migrate_version
|
||||||
|
|
||||||
|
# When committing a change script, Migrate will attempt to generate the
|
||||||
|
# sql for all supported databases; normally, if one of them fails - probably
|
||||||
|
# because you don't have that database installed - it is ignored and the
|
||||||
|
# commit continues, perhaps ending successfully.
|
||||||
|
# Databases in this list MUST compile successfully during a commit, or the
|
||||||
|
# entire commit will fail. List the databases your application will actually
|
||||||
|
# be using to ensure your updates to that database work properly.
|
||||||
|
# This must be a list; example: ['postgres','sqlite']
|
||||||
|
required_dbs=[]
|
||||||
|
|
||||||
|
# When creating new change scripts, Migrate will stamp the new script with
|
||||||
|
# a version number. By default this is latest_version + 1. You can set this
|
||||||
|
# to 'true' to tell Migrate to use the UTC timestamp instead.
|
||||||
|
use_timestamp_numbering=False
|
||||||
1
db_repository/storage
Symbolic link
1
db_repository/storage
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
storage
|
||||||
33
db_repository/versions/002_add_date_to_threads.py
Normal file
33
db_repository/versions/002_add_date_to_threads.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
from sqlalchemy import *
|
||||||
|
from migrate import *
|
||||||
|
|
||||||
|
|
||||||
|
from migrate.changeset import schema
|
||||||
|
pre_meta = MetaData()
|
||||||
|
post_meta = MetaData()
|
||||||
|
threads = Table('threads', post_meta,
|
||||||
|
Column('created_at', TIMESTAMP, nullable=False),
|
||||||
|
Column('updated_at', TIMESTAMP, nullable=False),
|
||||||
|
Column('id', Integer, primary_key=True, nullable=False),
|
||||||
|
Column('firstmail', Integer),
|
||||||
|
Column('date', DateTime),
|
||||||
|
Column('islabeled', Boolean),
|
||||||
|
Column('opened', Boolean),
|
||||||
|
Column('body', Text),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade(migrate_engine):
|
||||||
|
# Upgrade operations go here. Don't create your own engine; bind
|
||||||
|
# migrate_engine to your metadata
|
||||||
|
pre_meta.bind = migrate_engine
|
||||||
|
post_meta.bind = migrate_engine
|
||||||
|
post_meta.tables['threads'].columns['date'].create()
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade(migrate_engine):
|
||||||
|
# Operations to reverse the above upgrade go here.
|
||||||
|
pre_meta.bind = migrate_engine
|
||||||
|
post_meta.bind = migrate_engine
|
||||||
|
post_meta.tables['threads'].columns['date'].drop()
|
||||||
|
|
||||||
34
db_repository/versions/003_maintopic.py
Normal file
34
db_repository/versions/003_maintopic.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
from sqlalchemy import *
|
||||||
|
from migrate import *
|
||||||
|
|
||||||
|
|
||||||
|
from migrate.changeset import schema
|
||||||
|
pre_meta = MetaData()
|
||||||
|
post_meta = MetaData()
|
||||||
|
threads = Table('threads', post_meta,
|
||||||
|
Column('created_at', TIMESTAMP, nullable=False),
|
||||||
|
Column('updated_at', TIMESTAMP, nullable=False),
|
||||||
|
Column('id', Integer, primary_key=True, nullable=False),
|
||||||
|
Column('firstmail', Integer),
|
||||||
|
Column('date', DateTime),
|
||||||
|
Column('islabeled', Boolean),
|
||||||
|
Column('opened', Boolean),
|
||||||
|
Column('body', Text),
|
||||||
|
Column('maintopic', String),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade(migrate_engine):
|
||||||
|
# Upgrade operations go here. Don't create your own engine; bind
|
||||||
|
# migrate_engine to your metadata
|
||||||
|
pre_meta.bind = migrate_engine
|
||||||
|
post_meta.bind = migrate_engine
|
||||||
|
post_meta.tables['threads'].columns['maintopic'].create()
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade(migrate_engine):
|
||||||
|
# Operations to reverse the above upgrade go here.
|
||||||
|
pre_meta.bind = migrate_engine
|
||||||
|
post_meta.bind = migrate_engine
|
||||||
|
post_meta.tables['threads'].columns['maintopic'].drop()
|
||||||
|
|
||||||
0
db_repository/versions/__init__.py
Normal file
0
db_repository/versions/__init__.py
Normal file
@@ -5,7 +5,7 @@ from flask import Flask,jsonify,send_from_directory, render_template
|
|||||||
from config import Config
|
from config import Config
|
||||||
import yaml
|
import yaml
|
||||||
import os
|
import os
|
||||||
|
from sqlalchemy import desc
|
||||||
from storage import MailThread,db_session
|
from storage import MailThread,db_session
|
||||||
|
|
||||||
app = Flask(__name__, template_folder="templates", static_folder="static")
|
app = Flask(__name__, template_folder="templates", static_folder="static")
|
||||||
@@ -20,20 +20,35 @@ def render_index(mths,code=200):
|
|||||||
), code
|
), code
|
||||||
from classifier import get_pipe
|
from classifier import get_pipe
|
||||||
mail_threads=db_session.query(MailThread).all()
|
mail_threads=db_session.query(MailThread).all()
|
||||||
pipe1,le=get_pipe("pipe1",b"answered")
|
#pipe1,le=get_pipe("pipe1",b"answered")
|
||||||
pipe2,le2=get_pipe("pipe2b", b"maintopic")
|
#pipe2,le2=get_pipe("pipe2b", b"maintopic")
|
||||||
pipe3,le3=get_pipe("pipe2b", b"lang")
|
#pipe3,le3=get_pipe("pipe2b", b"lang")
|
||||||
|
|
||||||
answered=le.inverse_transform(pipe1.predict(mail_threads))
|
#answered=le.inverse_transform(pipe1.predict(mail_threads))
|
||||||
maintopic=le2.inverse_transform(pipe2.predict(mail_threads))
|
#maintopic=le2.inverse_transform(pipe2.predict(mail_threads))
|
||||||
lang=le3.inverse_transform(pipe3.predict(mail_threads))
|
#lang=le3.inverse_transform(pipe3.predict(mail_threads))
|
||||||
|
|
||||||
|
#for i, t in enumerate(mail_threads):
|
||||||
|
# t.answered=answered[i]
|
||||||
|
# t.maintopic=maintopic[i]
|
||||||
|
# t.lang=lang[i]
|
||||||
|
|
||||||
for i, t in enumerate(mail_threads):
|
|
||||||
t.answered=answered[i]
|
|
||||||
t.maintopic=maintopic[i]
|
|
||||||
t.lang=lang[i]
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
def hello():
|
def hello():
|
||||||
mth=mail_threads
|
mth=db_session.query(MailThread).order_by(desc(MailThread.date)).all()
|
||||||
return render_index(mth)
|
return render_index(mth)
|
||||||
|
|
||||||
|
@app.route("/answered/<id>/<value>")
|
||||||
|
def store_answered(id, value):
|
||||||
|
mth=db_session.query(MailThread).filter(MailThread.firstmail==int(id)).first()
|
||||||
|
value= value in ["true", "True", "1", "t"]
|
||||||
|
mth.answered=bool(value)
|
||||||
|
mth.opened=bool(value)
|
||||||
|
return render_index([mth])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/studium")
|
||||||
|
def studium():
|
||||||
|
mth=db_session.query(MailThread).filter(MailThread.maintopic=="studium").order_by(desc(MailThread.date)).all()
|
||||||
|
return render_index(mth)
|
||||||
|
|||||||
@@ -6,6 +6,11 @@
|
|||||||
<script src="static/bootstrap/js/bootstrap.min.js" ></script>
|
<script src="static/bootstrap/js/bootstrap.min.js" ></script>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
<style>
|
||||||
|
.card.opened-True {
|
||||||
|
background: lightgreen;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="col-12">
|
<div class="col-12">
|
||||||
@@ -14,7 +19,7 @@
|
|||||||
<div id="accordion" role="tablist" aria-multiselectable="true">
|
<div id="accordion" role="tablist" aria-multiselectable="true">
|
||||||
|
|
||||||
{% for m in mths %}
|
{% for m in mths %}
|
||||||
<div class="card" style="padding-top: 2pt; padding-bottom:2pt; border-radius:0;margin-top:1pt; margin-bottom:1pt">
|
<div class="card opened-{{m.opened}}" style="padding-top: 2pt; padding-bottom:2pt; border-radius:0;margin-top:1pt; margin-bottom:1pt">
|
||||||
<div class="" role="tab" id="heading{{m.firstmail}}">
|
<div class="" role="tab" id="heading{{m.firstmail}}">
|
||||||
<b class="mb-0">
|
<b class="mb-0">
|
||||||
<a data-toggle="collapse" data-parent="#accordion" href="#collapse{{m.firstmail}}" aria-expanded="true" aria-controls="collapse1">
|
<a data-toggle="collapse" data-parent="#accordion" href="#collapse{{m.firstmail}}" aria-expanded="true" aria-controls="collapse1">
|
||||||
@@ -26,7 +31,7 @@
|
|||||||
<div id="collapse{{m.firstmail}}" class="collapse" role="tabpanel" aria-labelledby="headingOne">
|
<div id="collapse{{m.firstmail}}" class="collapse" role="tabpanel" aria-labelledby="headingOne">
|
||||||
|
|
||||||
<div class="card-block">
|
<div class="card-block">
|
||||||
<div style="white-space: pre-wrap;font:Courier, monospace; font-size:small; width:50em; border: thin blue solid;"> {{ m.text() }} </div>
|
<div style="white-space: pre-wrap;font:Courier, monospace; font-size:small; width:50em; border: thin blue solid;"> {{ m.print_text() }} </div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -36,7 +41,7 @@
|
|||||||
|
|
||||||
{% for m in mths %}
|
{% for m in mths %}
|
||||||
ID: {{m.tstr()}}
|
ID: {{m.tstr()}}
|
||||||
{{ m.text() }}
|
{{ m.print_text() }}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
6
migration.py
Executable file
6
migration.py
Executable file
@@ -0,0 +1,6 @@
|
|||||||
|
#!.env/bin/python
|
||||||
|
from migrate.versioning.shell import main
|
||||||
|
import storage
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main(debug='False',repository="db_repository", url="sqlite:///test.sqlite")
|
||||||
22
run.py
22
run.py
@@ -16,7 +16,6 @@ from storage import Mail, MailThread, db_session
|
|||||||
#import yaml
|
#import yaml
|
||||||
#import email
|
#import email
|
||||||
from classifier import get_training_threads, print_answers, in_training, store_training_data, get_pipe, test_pipe, train_single_thread # , pipe2, pipe2b
|
from classifier import get_training_threads, print_answers, in_training, store_training_data, get_pipe, test_pipe, train_single_thread # , pipe2, pipe2b
|
||||||
from flaskapp import app
|
|
||||||
|
|
||||||
|
|
||||||
def predict_thread(p,l,t):
|
def predict_thread(p,l,t):
|
||||||
@@ -27,10 +26,31 @@ def predict_thread(p,l,t):
|
|||||||
|
|
||||||
#print "arg1:"+sys.argv[1]
|
#print "arg1:"+sys.argv[1]
|
||||||
if len(sys.argv)>1:
|
if len(sys.argv)>1:
|
||||||
|
|
||||||
if sys.argv[1] == "fetch_threads":
|
if sys.argv[1] == "fetch_threads":
|
||||||
print flatten_threads(fetch_threads())
|
print flatten_threads(fetch_threads())
|
||||||
|
|
||||||
|
if sys.argv[1] == "predict_threads":
|
||||||
|
pipe1,le=get_pipe("pipe1",b"answered")
|
||||||
|
pipe2,le2=get_pipe("pipe2b", b"maintopic")
|
||||||
|
pipe3,le3=get_pipe("pipe2b", b"lang")
|
||||||
|
mail_threads=db_session.query(MailThread).all()
|
||||||
|
|
||||||
|
answered=le.inverse_transform(pipe1.predict(mail_threads))
|
||||||
|
maintopic=le2.inverse_transform(pipe2.predict(mail_threads))
|
||||||
|
lang=le3.inverse_transform(pipe3.predict(mail_threads))
|
||||||
|
|
||||||
|
for i, t in enumerate(mail_threads):
|
||||||
|
t.answered=answered[i]
|
||||||
|
t.opened=answered[i]
|
||||||
|
|
||||||
|
t.maintopic=maintopic[i]
|
||||||
|
t.lang=lang[i]
|
||||||
|
db_session.add(t)
|
||||||
|
db_session.commit()
|
||||||
|
|
||||||
if sys.argv[1] == "run_server":
|
if sys.argv[1] == "run_server":
|
||||||
|
from flaskapp import app
|
||||||
app.run(port=3000,debug=True)
|
app.run(port=3000,debug=True)
|
||||||
|
|
||||||
if sys.argv[1] == "print_threads":
|
if sys.argv[1] == "print_threads":
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
from database import db_session, init_db
|
from database import db_session, init_db
|
||||||
from mail_model import Mail
|
from mail_model import Mail
|
||||||
from thread_model import MailThread
|
from thread_model import MailThread
|
||||||
|
from database import Base
|
||||||
|
metadata=Base.metadata
|
||||||
|
|||||||
@@ -75,6 +75,8 @@ class Mail(Base):
|
|||||||
from_array=[]
|
from_array=[]
|
||||||
# print "Status"
|
# print "Status"
|
||||||
# print env
|
# print env
|
||||||
|
if not env.date is None:
|
||||||
|
self.date=env.date
|
||||||
if not env.to is None:
|
if not env.to is None:
|
||||||
for t in env.to:
|
for t in env.to:
|
||||||
a={"host": t.host, "mail": t.mailbox}
|
a={"host": t.host, "mail": t.mailbox}
|
||||||
|
|||||||
@@ -23,15 +23,17 @@ class MailThread(Base):
|
|||||||
__tablename__ = 'threads'
|
__tablename__ = 'threads'
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
firstmail = Column(Integer)
|
firstmail = Column(Integer)
|
||||||
|
date = Column(DateTime)
|
||||||
islabeled = Column(Boolean)
|
islabeled = Column(Boolean)
|
||||||
opened = Column(Boolean)
|
opened = Column(Boolean)
|
||||||
body = Column(Text)
|
body = Column(Text)
|
||||||
|
maintopic=Column(String)
|
||||||
__schema__=FullThreadSchema
|
__schema__=FullThreadSchema
|
||||||
__jsonid__='thread'
|
__jsonid__='thread'
|
||||||
__whiteattrs__= ["body"]
|
__whiteattrs__= ["body"]
|
||||||
__jsonattrs__=None
|
__jsonattrs__=None
|
||||||
answered=False
|
answered=False
|
||||||
maintopic="information"
|
# maintopic="information"
|
||||||
lang=""
|
lang=""
|
||||||
def bdy(self):
|
def bdy(self):
|
||||||
return yaml.load(self.body)
|
return yaml.load(self.body)
|
||||||
@@ -51,7 +53,7 @@ class MailThread(Base):
|
|||||||
|
|
||||||
def tstr(self):
|
def tstr(self):
|
||||||
fr=yaml.load(self.mails()[0].from_)
|
fr=yaml.load(self.mails()[0].from_)
|
||||||
return "(" + str(self.answered)+ ", "+ str(self.maintopic)+ ", "+ str(self.lang) + ") " + str(self.firstmail)+": "+str(fr[0]["mail"])+"@"+str(fr[0]["host"]) + " | ".join(yaml.load(self.mails()[0].subject))
|
return "(" + str(self.opened)+ ", "+ str(self.maintopic)+ ", "+ str(self.lang) + ") " + str(self.firstmail)+": "+str(fr[0]["mail"])+"@"+str(fr[0]["host"]) + " | ".join(yaml.load(self.mails()[0].subject))
|
||||||
|
|
||||||
def mails(self):
|
def mails(self):
|
||||||
a=[]
|
a=[]
|
||||||
@@ -94,7 +96,34 @@ class MailThread(Base):
|
|||||||
m.compile_text()
|
m.compile_text()
|
||||||
db_session.add(m)
|
db_session.add(m)
|
||||||
db_session.commit()
|
db_session.commit()
|
||||||
|
self.date=self.mails()[0].date
|
||||||
|
|
||||||
|
def print_text(self,filter="all"):
|
||||||
|
a=u""
|
||||||
|
def mail_txt(m):
|
||||||
|
#txt ="Gesendet von: "+ str(m.from_mailbox)+"@"+str(m.from_host) +"\n"
|
||||||
|
txt=""
|
||||||
|
fr=yaml.load(m.from_)
|
||||||
|
txt= txt+ "Gesendet von: "+str(fr[0]["mail"])+"@"+str(fr[0]["host"])+" am "+ str(m.date) + "\n"
|
||||||
|
t=yaml.load(m.text)
|
||||||
|
if type(t) is unicode:
|
||||||
|
#txt=txt
|
||||||
|
txt=txt+t
|
||||||
|
else:
|
||||||
|
t=t.decode("ISO-8859-1")
|
||||||
|
txt=txt+t
|
||||||
|
return txt
|
||||||
|
|
||||||
|
if filter=="all":
|
||||||
|
mm=self.mails()
|
||||||
|
for m in mm:
|
||||||
|
a=a+mail_txt(m)+"\n****........................................***\n"
|
||||||
|
elif filter=="first":
|
||||||
|
a=mail_txt(m[0])
|
||||||
|
a=re.sub(r'\n\s*\n',r'\n',a)
|
||||||
|
a=re.sub(r'<!--.*-->',r'',a,flags=re.MULTILINE|re.DOTALL)
|
||||||
|
a=re.sub(r'\s*>+ .*\n',r'',a)
|
||||||
|
return a
|
||||||
def text(self,filter="all"):
|
def text(self,filter="all"):
|
||||||
a=u""
|
a=u""
|
||||||
def mail_txt(m):
|
def mail_txt(m):
|
||||||
@@ -108,11 +137,11 @@ class MailThread(Base):
|
|||||||
mm=self.mails()
|
mm=self.mails()
|
||||||
if filter=="all":
|
if filter=="all":
|
||||||
for m in mm:
|
for m in mm:
|
||||||
a=a+mail_txt(m)+"****........................................***\n"
|
a=a+mail_txt(m)+"\n****........................................***\n"
|
||||||
elif filter=="first":
|
elif filter=="first":
|
||||||
a=mail_txt(m[0])
|
a=mail_txt(m[0])
|
||||||
a=re.sub(r'\n\s*\n',r'\n',a)
|
a=re.sub(r'\n\s*\n',r'\n',a)
|
||||||
# a=re.sub(r'<!--.*-->',r'',a,flags=re.MULTILINE|re.DOTALL)
|
a=re.sub(r'<!--.*-->',r'',a,flags=re.MULTILINE|re.DOTALL)
|
||||||
a=re.sub(r'\s*>+ .*\n',r'',a)
|
a=re.sub(r'\s*>+ .*\n',r'',a)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
BIN
test.sqlite
BIN
test.sqlite
Binary file not shown.
Reference in New Issue
Block a user