This commit is contained in:
andis
2017-08-11 20:34:17 +02:00
parent 51acbfbd38
commit 81fa9cc575
10 changed files with 202 additions and 79 deletions

View File

@@ -41,7 +41,7 @@ def store_training_data(i, d,key=b"answered"):
# Lade Trainingsdaten fuer einen angegebenen key (Label/Eigenschaft)
def get_training_threads(key="answered", filter=[]):
def get_training_threads(key="answered", filters=[]):
if not data_types.has_key(key):
raise ValueError("Key "+str(key)+" unknown")
#------------------------------------
@@ -49,6 +49,17 @@ def get_training_threads(key="answered", filter=[]):
d_a=[]
d_a2=[]
#------------------------------------
if "db" in filters:
tt=db_session.query(MailThread).filter(MailThread.istrained==True).all()
for t in tt:
t_a.append(t)
if key =="answered":
d_a.append(t.answered)
elif key=="maintopic":
d_a.append(t.maintopic)
else:
for i in train:
if train[i].has_key(key): # In den Trainingsdaten muss der relevante Key sein
t=db_session.query(MailThread).filter(MailThread.firstmail==i).first()
@@ -91,9 +102,9 @@ class ThreadTextExtractor(BaseEstimator, TransformerMixin):
def transform(self, X,y=None):
return [t.text() for t in X]
def get_pipe(p=b"pipe1",k=b"answered"):
def get_pipe(p=b"pipe1",k=b"answered",filters=[]):
p=build_pipe(p)
tt= get_training_threads(k)
tt= get_training_threads(k,filters)
if len(tt[0]) > 0:
p.fit(tt[0],tt[1])
return p,tt[2]

View File

@@ -60,7 +60,7 @@ def train_single_thread(tid,p,le,key="answered"):
l=le.inverse_transform([ca])[0]
if type(l) is numpy.bool_:
l=bool(l)
if type(l) is numpy.string_:
if type(l) is numpy.string_ or type(l) is numpy.unicode_:
l=str(l)
store_training_data(tid,l, key)
elif not ca.strip() == "":

27
create_migration Executable file
View File

@@ -0,0 +1,27 @@
#!/bin/bash
if [ $# -eq 0 ]
then
echo "No Arguments supplied"
exit
fi
echo "creating a new migration"
./migration.py compare_model_to_db storage.metadata
echo "Dump current database state to file"
./migration.py create_model > oldmodel.py
ls db_repository/versions
echo "Choose a filename for the new migration"
read filename
./migration.py make_update_script_for_model --oldmodel=oldmodel:meta --model=storage:metadata > db_repository/versions/$filename.py
cp test.sqlite test.sqlite.bak
./migration.py test
rm test.sqlite
mv test.sqlite.bak test.sqlite
rm oldmodel.py

View File

@@ -1,38 +1,38 @@
{26808: {maintopic: jobausschreibung}, 27008: {lang: de}, 27017: {lang: de, maintopic: jobausschreibung},
27061: {lang: de}, 27070: {maintopic: ausleihen}, 27083: {maintopic: ausleihen},
27086: {maintopic: information}, 27094: {maintopic: information}, 27096: {maintopic: jobausschreibung},
27102: {lang: en, maintopic: studium}, 27118: {maintopic: information}, 27127: {
maintopic: studium}, 27130: {maintopic: information}, 27133: {maintopic: information},
27141: {maintopic: information}, 27146: {maintopic: information}, 27166: {maintopic: umfragen},
27171: {maintopic: ausleihen}, 27178: {maintopic: studium}, 27182: {maintopic: studium},
27197: {maintopic: information}, 27201: {maintopic: information}, 27218: {maintopic: information},
27219: {maintopic: studium}, 27222: {maintopic: information}, 27226: {maintopic: ausleihen},
27263: {maintopic: ausleihen}, 27267: {maintopic: ausleihen}, 27420: {answered: true,
maintopic: studium}, 27422: {answered: true, maintopic: studium}, 27425: {answered: false,
maintopic: studium}, 27431: {answered: false, maintopic: information}, 27434: {
answered: false, lang: de, maintopic: information}, 27435: {answered: false},
27438: {answered: false, maintopic: information}, 27439: {answered: true, maintopic: studium},
27441: {answered: false, maintopic: studium}, 27444: {answered: true, maintopic: ausleihen},
27454: {answered: false, maintopic: information}, 27455: {answered: false, maintopic: information},
27456: {answered: false, lang: de, maintopic: studium}, 27457: {answered: false,
maintopic: jobausschreibung}, 27468: {answered: true, maintopic: studium}, 27489: {
answered: false, lang: en, maintopic: information}, 27490: {answered: false, maintopic: fachschaftenzeugs},
27491: {answered: false, maintopic: jobausschreibung}, 27492: {answered: false,
maintopic: information}, 27495: {answered: false, maintopic: information}, 27496: {
answered: true, maintopic: ausleihen}, 27497: {answered: false, maintopic: information},
27500: {answered: true, lang: en, maintopic: studium}, 27501: {answered: false,
lang: en, maintopic: information}, 27514: {answered: true, maintopic: studium},
27515: {answered: true, lang: en, maintopic: studium}, 27518: {answered: true, maintopic: studium},
27523: {answered: false, maintopic: jobausschreibung}, 27526: {answered: false,
maintopic: studium}, 27536: {answered: true, lang: de, maintopic: studium}, 27541: {
answered: true, maintopic: studium}, 27542: {answered: false, maintopic: studium},
27543: {answered: false, maintopic: information}, 27544: {answered: true, maintopic: studium},
27545: {answered: false, maintopic: umfragen}, 27546: {answered: false, maintopic: information},
27547: {answered: false, maintopic: studium}, 27549: {answered: false}, 27550: {
answered: false, maintopic: information}, 27553: {answered: false, maintopic: information},
27558: {answered: false}, 27560: {answered: false, maintopic: ausleihen}, 27562: {
answered: false}, 27564: {answered: false, maintopic: jobausschreibung}, 27565: {
answered: true, maintopic: ausleihen}, 27566: {answered: false, maintopic: information},
{26808: {maintopic: jobausschreibung}, 26992: {maintopic: jobausschreibung}, 27008: {
lang: de}, 27017: {lang: de, maintopic: jobausschreibung}, 27061: {lang: de},
27070: {maintopic: ausleihen}, 27083: {maintopic: ausleihen}, 27086: {maintopic: information},
27094: {maintopic: information}, 27096: {maintopic: jobausschreibung}, 27102: {
lang: en, maintopic: studium}, 27118: {maintopic: information}, 27127: {maintopic: studium},
27130: {maintopic: information}, 27133: {maintopic: information}, 27141: {maintopic: information},
27146: {maintopic: information}, 27166: {maintopic: umfragen}, 27171: {maintopic: ausleihen},
27178: {maintopic: studium}, 27182: {maintopic: studium}, 27197: {maintopic: information},
27201: {maintopic: information}, 27218: {maintopic: information}, 27219: {maintopic: studium},
27222: {maintopic: information}, 27226: {maintopic: ausleihen}, 27263: {maintopic: ausleihen},
27267: {maintopic: ausleihen}, 27420: {answered: true, maintopic: studium}, 27422: {
answered: true, maintopic: studium}, 27425: {answered: false, maintopic: studium},
27431: {answered: false, maintopic: information}, 27434: {answered: false, lang: de,
maintopic: information}, 27435: {answered: false}, 27438: {answered: false, maintopic: information},
27439: {answered: true, maintopic: studium}, 27441: {answered: false, maintopic: studium},
27444: {answered: true, maintopic: ausleihen}, 27454: {answered: false, maintopic: information},
27455: {answered: false, maintopic: information}, 27456: {answered: false, lang: de,
maintopic: studium}, 27457: {answered: false, maintopic: jobausschreibung}, 27468: {
answered: true, maintopic: studium}, 27489: {answered: false, lang: en, maintopic: information},
27490: {answered: false, maintopic: fachschaftenzeugs}, 27491: {answered: false,
maintopic: jobausschreibung}, 27492: {answered: false, maintopic: information},
27495: {answered: false, maintopic: information}, 27496: {answered: true, maintopic: ausleihen},
27497: {answered: false, maintopic: information}, 27500: {answered: true, lang: en,
maintopic: studium}, 27501: {answered: false, lang: en, maintopic: information},
27514: {answered: true, maintopic: studium}, 27515: {answered: true, lang: en, maintopic: studium},
27518: {answered: true, maintopic: studium}, 27523: {answered: false, maintopic: jobausschreibung},
27526: {answered: false, maintopic: studium}, 27536: {answered: true, lang: de,
maintopic: studium}, 27541: {answered: true, maintopic: studium}, 27542: {answered: false,
maintopic: studium}, 27543: {answered: false, maintopic: information}, 27544: {
answered: true, maintopic: studium}, 27545: {answered: false, maintopic: umfragen},
27546: {answered: false, maintopic: information}, 27547: {answered: false, maintopic: studium},
27549: {answered: false}, 27550: {answered: false, maintopic: information}, 27553: {
answered: false, maintopic: information}, 27558: {answered: false}, 27560: {answered: false,
maintopic: ausleihen}, 27562: {answered: false}, 27564: {answered: false, maintopic: jobausschreibung},
27565: {answered: true, maintopic: ausleihen}, 27566: {answered: false, maintopic: information},
27567: {answered: false, maintopic: information}, 27568: {answered: false}, 27575: {
answered: false, maintopic: information}, 27577: {answered: false, maintopic: information},
27579: {answered: true, maintopic: diplomarbeit}, 27582: {answered: false, maintopic: studium},

View File

@@ -0,0 +1,39 @@
from sqlalchemy import *
from migrate import *
from migrate.changeset import schema
pre_meta = MetaData()
post_meta = MetaData()
threads = Table('threads', post_meta,
Column('created_at', TIMESTAMP, nullable=False),
Column('updated_at', TIMESTAMP, nullable=False),
Column('id', Integer, primary_key=True, nullable=False),
Column('firstmail', Integer),
Column('date', DateTime),
Column('islabeled', Boolean),
Column('istrained', Boolean),
Column('opened', Boolean),
Column('body', Text),
Column('maintopic', String),
Column('lang', String),
Column('answered', String),
)
def upgrade(migrate_engine):
# Upgrade operations go here. Don't create your own engine; bind
# migrate_engine to your metadata
pre_meta.bind = migrate_engine
post_meta.bind = migrate_engine
post_meta.tables['threads'].columns['answered'].create()
post_meta.tables['threads'].columns['lang'].create()
def downgrade(migrate_engine):
# Operations to reverse the above upgrade go here.
pre_meta.bind = migrate_engine
post_meta.bind = migrate_engine
post_meta.tables['threads'].columns['answered'].drop()
post_meta.tables['threads'].columns['lang'].drop()

View File

@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
import flask
from flask import Flask,jsonify,send_from_directory, render_template
from flask import Flask,jsonify,send_from_directory, render_template, request,redirect,url_for
from config import Config
import yaml
import os
@@ -14,12 +14,12 @@ package_directory = os.path.dirname(os.path.abspath(__file__))
cfg = Config(file(os.path.join(package_directory, 'config.cfg')))
def render_index(mths,code=200):
def render_index(mths,opened=None,code=200):
return render_template("index.html",mths=mths,
title=cfg.title.decode("utf8"),
title=cfg.title.decode("utf8"),opened=opened
), code
from classifier import get_pipe
mail_threads=db_session.query(MailThread).all()
#mail_threads=db_session.query(MailThread).all()
#pipe1,le=get_pipe("pipe1",b"answered")
#pipe2,le2=get_pipe("pipe2b", b"maintopic")
#pipe3,le3=get_pipe("pipe2b", b"lang")
@@ -33,28 +33,62 @@ mail_threads=db_session.query(MailThread).all()
# t.maintopic=maintopic[i]
# t.lang=lang[i]
maintopic_values=["studium", "information","ausleihen"]
@app.route("/")
def hello():
mth=db_session.query(MailThread).order_by(desc(MailThread.date)).all()
return render_index(mth)
@app.route("/answered/<id>/<value>")
def store_answered(id, value):
def store_value(id,key,value):
mth=db_session.query(MailThread).filter(MailThread.firstmail==int(id)).first()
if key =="answered":
value = value in ["true", "True", "1", "t"]
mth.answered=bool(value)
mth.opened=bool(value)
return render_index([mth])
if key=="maintopic" and value in maintopic_values:
mth.maintopic=str(value)
if key =="trained":
value = value in ["true", "True", "1", "t"]
mth.istrained=bool(value)
@app.route("/<int:id>")
def store_answered(id):
key = request.args.get('key')
value = request.args.get('value')
if not key is None and not value is None:
store_value(id,key,value)
return render_index([mth], opened=id)
@app.route("/studium")
@app.route("/studium/")
def studium():
mth=db_session.query(MailThread).filter(MailThread.maintopic=="studium").order_by(desc(MailThread.date)).all()
return render_index(mth)
@app.route("/<maintopic>")
@app.route("/<maintopic>/")
def maintopic(maintopic):
mth=db_session.query(MailThread).filter(MailThread.maintopic=="%s" % maintopic).order_by(desc(MailThread.date)).all()
return render_index(mth)
@app.route("/<maintopic>/<int:id>")
def maintopic_store(maintopic,id):
if maintopic == "trained":
mth=db_session.query(MailThread).filter(MailThread.istrained==True).order_by(desc(MailThread.date)).all()
else:
mth=db_session.query(MailThread).filter(MailThread.maintopic=="%s" % maintopic).order_by(desc(MailThread.date)).all()
key = request.args.get('key')
value = request.args.get('value')
if not key is None and not value is None:
store_value(id,key,value)
return redirect(url_for('maintopic_store', id=id, maintopic=maintopic), 302)
else:
return render_index(mth,opened=id)

View File

@@ -1,13 +1,13 @@
<html>
<head>
<title>{{title}}</title>
<script src="static/jquery-3.2.0.min.js" ></script>
<link rel="stylesheet" href="static/bootstrap/css/bootstrap.min.css"/>
<script src="static/bootstrap/js/bootstrap.min.js" ></script>
<script src="/static/jquery-3.2.0.min.js" ></script>
<link rel="stylesheet" href="/static/bootstrap/css/bootstrap.min.css"/>
<script src="/static/bootstrap/js/bootstrap.min.js" ></script>
</head>
<body>
<style>
.card.opened-True {
.card.answ-1,.card.answ-True {
background: lightgreen;
}
</style>
@@ -19,19 +19,20 @@
<div id="accordion" role="tablist" aria-multiselectable="true">
{% for m in mths %}
<div class="card opened-{{m.opened}}" style="padding-top: 2pt; padding-bottom:2pt; border-radius:0;margin-top:1pt; margin-bottom:1pt">
<div class="card answ-{{m.is_answered()}}" style="padding-top: 2pt; padding-bottom:2pt; border-radius:0;margin-top:1pt; margin-bottom:1pt">
<div class="" role="tab" id="heading{{m.firstmail}}">
<b class="mb-0">
<a data-toggle="collapse" data-parent="#accordion" href="#collapse{{m.firstmail}}" aria-expanded="true" aria-controls="collapse1">
{{m.tstr()}}
{% if m.istrained %} trained: {% endif %} {{m.tstr()}}
</a>
</b>
</div>
<div id="collapse{{m.firstmail}}" class="collapse" role="tabpanel" aria-labelledby="headingOne">
<div id="collapse{{m.firstmail}}" class="collapse {{'show' if m.firstmail==opened}}" role="tabpanel" aria-labelledby="headingOne">
<div class="card-block">
{{m.maintopic}}
<a href="{{m.firstmail}}?key=answered&value={{(not m.is_answered())}}">answered:{{(not m.is_answered())}}</a>
{{m.maintopic}}, {{ m.istrained }} <a href="{{m.firstmail}}?key=trained&value={{(not m.istrained)}}">trained:{{(not m.istrained)}}</a>
<div style="white-space: pre-wrap;font:Courier, monospace; font-size:small; width:50em; border: thin blue solid;">
{{ m.print_text() }}
</div>

24
run.py
View File

@@ -34,18 +34,18 @@ if len(sys.argv)>1:
pipe1,le=get_pipe("pipe1",b"answered")
pipe2,le2=get_pipe("pipe2b", b"maintopic")
pipe3,le3=get_pipe("pipe2b", b"lang")
mail_threads=db_session.query(MailThread).all()
mail_threads=db_session.query(MailThread).filter(MailThread.istrained==False).all()
answered=le.inverse_transform(pipe1.predict(mail_threads))
maintopic=le2.inverse_transform(pipe2.predict(mail_threads))
lang=le3.inverse_transform(pipe3.predict(mail_threads))
for i, t in enumerate(mail_threads):
t.answered=answered[i]
t.opened=answered[i]
t.answered=bool(answered[i])
t.opened=bool(answered[i])
t.maintopic=maintopic[i]
t.lang=lang[i]
t.maintopic=str(maintopic[i])
t.lang=str(lang[i])
db_session.add(t)
db_session.commit()
@@ -75,7 +75,14 @@ if len(sys.argv)>1:
for t in mth:
t.compile()
if sys.argv[1] == "trained_threads_from_yml":
from classifier.classifier import train
for k in train:
print k
t=db_session.query(MailThread).filter(MailThread.firstmail==k).first()
t.istrained=True
db_session.add(t)
db_session.commit()
if sys.argv[1] == "print_threads2":
mth=db_session.query(MailThread).all()
for t in mth:
@@ -83,8 +90,8 @@ if len(sys.argv)>1:
print "---------------\n"
if sys.argv[1] == "train_thrd2":
p, le=get_pipe("pipe2", "maintopic")
pb, lb =get_pipe("pipe2b", "maintopic")
p, le=get_pipe("pipe2", "maintopic",["db"])
pb, lb =get_pipe("pipe2b", "maintopic",["db"])
train_single_thread(int(sys.argv[2]),p,le,b"maintopic")
@@ -122,6 +129,7 @@ if len(sys.argv)>1:
print le.inverse_transform(pipe2.predict([t]))
if sys.argv[1] == "train_thrd":
pipe1, labelencoder=train_fit_pipe()
train_single_thread(int(sys.argv[2]),pipe1,labelencoder)

View File

@@ -29,16 +29,19 @@ class MailThread(Base):
opened = Column(Boolean)
body = Column(Text)
maintopic=Column(String)
lang=Column(String)
answered=Column(String)
__schema__=FullThreadSchema
__jsonid__='thread'
__whiteattrs__= ["body"]
__jsonattrs__=None
answered=False
# answered=False
# maintopic="information"
lang=""
# lang=""
def bdy(self):
return yaml.load(self.body)
def is_answered(self):
return self.answered in ["1", "true", "True", "t","T"]
def to_text(self):
mmm=self.mails()
txt=""

Binary file not shown.