div tzg
This commit is contained in:
@@ -41,7 +41,7 @@ def store_training_data(i, d,key=b"answered"):
|
||||
|
||||
|
||||
# Lade Trainingsdaten fuer einen angegebenen key (Label/Eigenschaft)
|
||||
def get_training_threads(key="answered", filter=[]):
|
||||
def get_training_threads(key="answered", filters=[]):
|
||||
if not data_types.has_key(key):
|
||||
raise ValueError("Key "+str(key)+" unknown")
|
||||
#------------------------------------
|
||||
@@ -49,6 +49,17 @@ def get_training_threads(key="answered", filter=[]):
|
||||
d_a=[]
|
||||
d_a2=[]
|
||||
#------------------------------------
|
||||
if "db" in filters:
|
||||
tt=db_session.query(MailThread).filter(MailThread.istrained==True).all()
|
||||
for t in tt:
|
||||
t_a.append(t)
|
||||
if key =="answered":
|
||||
d_a.append(t.answered)
|
||||
elif key=="maintopic":
|
||||
d_a.append(t.maintopic)
|
||||
|
||||
|
||||
else:
|
||||
for i in train:
|
||||
if train[i].has_key(key): # In den Trainingsdaten muss der relevante Key sein
|
||||
t=db_session.query(MailThread).filter(MailThread.firstmail==i).first()
|
||||
@@ -91,9 +102,9 @@ class ThreadTextExtractor(BaseEstimator, TransformerMixin):
|
||||
def transform(self, X,y=None):
|
||||
return [t.text() for t in X]
|
||||
|
||||
def get_pipe(p=b"pipe1",k=b"answered"):
|
||||
def get_pipe(p=b"pipe1",k=b"answered",filters=[]):
|
||||
p=build_pipe(p)
|
||||
tt= get_training_threads(k)
|
||||
tt= get_training_threads(k,filters)
|
||||
if len(tt[0]) > 0:
|
||||
p.fit(tt[0],tt[1])
|
||||
return p,tt[2]
|
||||
|
||||
@@ -60,7 +60,7 @@ def train_single_thread(tid,p,le,key="answered"):
|
||||
l=le.inverse_transform([ca])[0]
|
||||
if type(l) is numpy.bool_:
|
||||
l=bool(l)
|
||||
if type(l) is numpy.string_:
|
||||
if type(l) is numpy.string_ or type(l) is numpy.unicode_:
|
||||
l=str(l)
|
||||
store_training_data(tid,l, key)
|
||||
elif not ca.strip() == "":
|
||||
|
||||
27
create_migration
Executable file
27
create_migration
Executable file
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
if [ $# -eq 0 ]
|
||||
then
|
||||
echo "No Arguments supplied"
|
||||
exit
|
||||
fi
|
||||
|
||||
echo "creating a new migration"
|
||||
./migration.py compare_model_to_db storage.metadata
|
||||
|
||||
echo "Dump current database state to file"
|
||||
./migration.py create_model > oldmodel.py
|
||||
|
||||
ls db_repository/versions
|
||||
echo "Choose a filename for the new migration"
|
||||
read filename
|
||||
|
||||
./migration.py make_update_script_for_model --oldmodel=oldmodel:meta --model=storage:metadata > db_repository/versions/$filename.py
|
||||
|
||||
cp test.sqlite test.sqlite.bak
|
||||
./migration.py test
|
||||
rm test.sqlite
|
||||
mv test.sqlite.bak test.sqlite
|
||||
|
||||
|
||||
|
||||
rm oldmodel.py
|
||||
70
data.yml
70
data.yml
@@ -1,38 +1,38 @@
|
||||
{26808: {maintopic: jobausschreibung}, 27008: {lang: de}, 27017: {lang: de, maintopic: jobausschreibung},
|
||||
27061: {lang: de}, 27070: {maintopic: ausleihen}, 27083: {maintopic: ausleihen},
|
||||
27086: {maintopic: information}, 27094: {maintopic: information}, 27096: {maintopic: jobausschreibung},
|
||||
27102: {lang: en, maintopic: studium}, 27118: {maintopic: information}, 27127: {
|
||||
maintopic: studium}, 27130: {maintopic: information}, 27133: {maintopic: information},
|
||||
27141: {maintopic: information}, 27146: {maintopic: information}, 27166: {maintopic: umfragen},
|
||||
27171: {maintopic: ausleihen}, 27178: {maintopic: studium}, 27182: {maintopic: studium},
|
||||
27197: {maintopic: information}, 27201: {maintopic: information}, 27218: {maintopic: information},
|
||||
27219: {maintopic: studium}, 27222: {maintopic: information}, 27226: {maintopic: ausleihen},
|
||||
27263: {maintopic: ausleihen}, 27267: {maintopic: ausleihen}, 27420: {answered: true,
|
||||
maintopic: studium}, 27422: {answered: true, maintopic: studium}, 27425: {answered: false,
|
||||
maintopic: studium}, 27431: {answered: false, maintopic: information}, 27434: {
|
||||
answered: false, lang: de, maintopic: information}, 27435: {answered: false},
|
||||
27438: {answered: false, maintopic: information}, 27439: {answered: true, maintopic: studium},
|
||||
27441: {answered: false, maintopic: studium}, 27444: {answered: true, maintopic: ausleihen},
|
||||
27454: {answered: false, maintopic: information}, 27455: {answered: false, maintopic: information},
|
||||
27456: {answered: false, lang: de, maintopic: studium}, 27457: {answered: false,
|
||||
maintopic: jobausschreibung}, 27468: {answered: true, maintopic: studium}, 27489: {
|
||||
answered: false, lang: en, maintopic: information}, 27490: {answered: false, maintopic: fachschaftenzeugs},
|
||||
27491: {answered: false, maintopic: jobausschreibung}, 27492: {answered: false,
|
||||
maintopic: information}, 27495: {answered: false, maintopic: information}, 27496: {
|
||||
answered: true, maintopic: ausleihen}, 27497: {answered: false, maintopic: information},
|
||||
27500: {answered: true, lang: en, maintopic: studium}, 27501: {answered: false,
|
||||
lang: en, maintopic: information}, 27514: {answered: true, maintopic: studium},
|
||||
27515: {answered: true, lang: en, maintopic: studium}, 27518: {answered: true, maintopic: studium},
|
||||
27523: {answered: false, maintopic: jobausschreibung}, 27526: {answered: false,
|
||||
maintopic: studium}, 27536: {answered: true, lang: de, maintopic: studium}, 27541: {
|
||||
answered: true, maintopic: studium}, 27542: {answered: false, maintopic: studium},
|
||||
27543: {answered: false, maintopic: information}, 27544: {answered: true, maintopic: studium},
|
||||
27545: {answered: false, maintopic: umfragen}, 27546: {answered: false, maintopic: information},
|
||||
27547: {answered: false, maintopic: studium}, 27549: {answered: false}, 27550: {
|
||||
answered: false, maintopic: information}, 27553: {answered: false, maintopic: information},
|
||||
27558: {answered: false}, 27560: {answered: false, maintopic: ausleihen}, 27562: {
|
||||
answered: false}, 27564: {answered: false, maintopic: jobausschreibung}, 27565: {
|
||||
answered: true, maintopic: ausleihen}, 27566: {answered: false, maintopic: information},
|
||||
{26808: {maintopic: jobausschreibung}, 26992: {maintopic: jobausschreibung}, 27008: {
|
||||
lang: de}, 27017: {lang: de, maintopic: jobausschreibung}, 27061: {lang: de},
|
||||
27070: {maintopic: ausleihen}, 27083: {maintopic: ausleihen}, 27086: {maintopic: information},
|
||||
27094: {maintopic: information}, 27096: {maintopic: jobausschreibung}, 27102: {
|
||||
lang: en, maintopic: studium}, 27118: {maintopic: information}, 27127: {maintopic: studium},
|
||||
27130: {maintopic: information}, 27133: {maintopic: information}, 27141: {maintopic: information},
|
||||
27146: {maintopic: information}, 27166: {maintopic: umfragen}, 27171: {maintopic: ausleihen},
|
||||
27178: {maintopic: studium}, 27182: {maintopic: studium}, 27197: {maintopic: information},
|
||||
27201: {maintopic: information}, 27218: {maintopic: information}, 27219: {maintopic: studium},
|
||||
27222: {maintopic: information}, 27226: {maintopic: ausleihen}, 27263: {maintopic: ausleihen},
|
||||
27267: {maintopic: ausleihen}, 27420: {answered: true, maintopic: studium}, 27422: {
|
||||
answered: true, maintopic: studium}, 27425: {answered: false, maintopic: studium},
|
||||
27431: {answered: false, maintopic: information}, 27434: {answered: false, lang: de,
|
||||
maintopic: information}, 27435: {answered: false}, 27438: {answered: false, maintopic: information},
|
||||
27439: {answered: true, maintopic: studium}, 27441: {answered: false, maintopic: studium},
|
||||
27444: {answered: true, maintopic: ausleihen}, 27454: {answered: false, maintopic: information},
|
||||
27455: {answered: false, maintopic: information}, 27456: {answered: false, lang: de,
|
||||
maintopic: studium}, 27457: {answered: false, maintopic: jobausschreibung}, 27468: {
|
||||
answered: true, maintopic: studium}, 27489: {answered: false, lang: en, maintopic: information},
|
||||
27490: {answered: false, maintopic: fachschaftenzeugs}, 27491: {answered: false,
|
||||
maintopic: jobausschreibung}, 27492: {answered: false, maintopic: information},
|
||||
27495: {answered: false, maintopic: information}, 27496: {answered: true, maintopic: ausleihen},
|
||||
27497: {answered: false, maintopic: information}, 27500: {answered: true, lang: en,
|
||||
maintopic: studium}, 27501: {answered: false, lang: en, maintopic: information},
|
||||
27514: {answered: true, maintopic: studium}, 27515: {answered: true, lang: en, maintopic: studium},
|
||||
27518: {answered: true, maintopic: studium}, 27523: {answered: false, maintopic: jobausschreibung},
|
||||
27526: {answered: false, maintopic: studium}, 27536: {answered: true, lang: de,
|
||||
maintopic: studium}, 27541: {answered: true, maintopic: studium}, 27542: {answered: false,
|
||||
maintopic: studium}, 27543: {answered: false, maintopic: information}, 27544: {
|
||||
answered: true, maintopic: studium}, 27545: {answered: false, maintopic: umfragen},
|
||||
27546: {answered: false, maintopic: information}, 27547: {answered: false, maintopic: studium},
|
||||
27549: {answered: false}, 27550: {answered: false, maintopic: information}, 27553: {
|
||||
answered: false, maintopic: information}, 27558: {answered: false}, 27560: {answered: false,
|
||||
maintopic: ausleihen}, 27562: {answered: false}, 27564: {answered: false, maintopic: jobausschreibung},
|
||||
27565: {answered: true, maintopic: ausleihen}, 27566: {answered: false, maintopic: information},
|
||||
27567: {answered: false, maintopic: information}, 27568: {answered: false}, 27575: {
|
||||
answered: false, maintopic: information}, 27577: {answered: false, maintopic: information},
|
||||
27579: {answered: true, maintopic: diplomarbeit}, 27582: {answered: false, maintopic: studium},
|
||||
|
||||
39
db_repository/versions/005_answered_lang.py
Normal file
39
db_repository/versions/005_answered_lang.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from sqlalchemy import *
|
||||
from migrate import *
|
||||
|
||||
|
||||
from migrate.changeset import schema
|
||||
pre_meta = MetaData()
|
||||
post_meta = MetaData()
|
||||
threads = Table('threads', post_meta,
|
||||
Column('created_at', TIMESTAMP, nullable=False),
|
||||
Column('updated_at', TIMESTAMP, nullable=False),
|
||||
Column('id', Integer, primary_key=True, nullable=False),
|
||||
Column('firstmail', Integer),
|
||||
Column('date', DateTime),
|
||||
Column('islabeled', Boolean),
|
||||
Column('istrained', Boolean),
|
||||
Column('opened', Boolean),
|
||||
Column('body', Text),
|
||||
Column('maintopic', String),
|
||||
Column('lang', String),
|
||||
Column('answered', String),
|
||||
)
|
||||
|
||||
|
||||
def upgrade(migrate_engine):
|
||||
# Upgrade operations go here. Don't create your own engine; bind
|
||||
# migrate_engine to your metadata
|
||||
pre_meta.bind = migrate_engine
|
||||
post_meta.bind = migrate_engine
|
||||
post_meta.tables['threads'].columns['answered'].create()
|
||||
post_meta.tables['threads'].columns['lang'].create()
|
||||
|
||||
|
||||
def downgrade(migrate_engine):
|
||||
# Operations to reverse the above upgrade go here.
|
||||
pre_meta.bind = migrate_engine
|
||||
post_meta.bind = migrate_engine
|
||||
post_meta.tables['threads'].columns['answered'].drop()
|
||||
post_meta.tables['threads'].columns['lang'].drop()
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import flask
|
||||
|
||||
from flask import Flask,jsonify,send_from_directory, render_template
|
||||
from flask import Flask,jsonify,send_from_directory, render_template, request,redirect,url_for
|
||||
from config import Config
|
||||
import yaml
|
||||
import os
|
||||
@@ -14,12 +14,12 @@ package_directory = os.path.dirname(os.path.abspath(__file__))
|
||||
cfg = Config(file(os.path.join(package_directory, 'config.cfg')))
|
||||
|
||||
|
||||
def render_index(mths,code=200):
|
||||
def render_index(mths,opened=None,code=200):
|
||||
return render_template("index.html",mths=mths,
|
||||
title=cfg.title.decode("utf8"),
|
||||
title=cfg.title.decode("utf8"),opened=opened
|
||||
), code
|
||||
from classifier import get_pipe
|
||||
mail_threads=db_session.query(MailThread).all()
|
||||
#mail_threads=db_session.query(MailThread).all()
|
||||
#pipe1,le=get_pipe("pipe1",b"answered")
|
||||
#pipe2,le2=get_pipe("pipe2b", b"maintopic")
|
||||
#pipe3,le3=get_pipe("pipe2b", b"lang")
|
||||
@@ -33,28 +33,62 @@ mail_threads=db_session.query(MailThread).all()
|
||||
# t.maintopic=maintopic[i]
|
||||
# t.lang=lang[i]
|
||||
|
||||
maintopic_values=["studium", "information","ausleihen"]
|
||||
|
||||
@app.route("/")
|
||||
def hello():
|
||||
mth=db_session.query(MailThread).order_by(desc(MailThread.date)).all()
|
||||
return render_index(mth)
|
||||
|
||||
@app.route("/answered/<id>/<value>")
|
||||
def store_answered(id, value):
|
||||
def store_value(id,key,value):
|
||||
mth=db_session.query(MailThread).filter(MailThread.firstmail==int(id)).first()
|
||||
|
||||
if key =="answered":
|
||||
value = value in ["true", "True", "1", "t"]
|
||||
mth.answered=bool(value)
|
||||
mth.opened=bool(value)
|
||||
return render_index([mth])
|
||||
if key=="maintopic" and value in maintopic_values:
|
||||
mth.maintopic=str(value)
|
||||
if key =="trained":
|
||||
value = value in ["true", "True", "1", "t"]
|
||||
mth.istrained=bool(value)
|
||||
|
||||
@app.route("/<int:id>")
|
||||
def store_answered(id):
|
||||
|
||||
key = request.args.get('key')
|
||||
value = request.args.get('value')
|
||||
if not key is None and not value is None:
|
||||
store_value(id,key,value)
|
||||
|
||||
return render_index([mth], opened=id)
|
||||
|
||||
|
||||
|
||||
@app.route("/studium")
|
||||
@app.route("/studium/")
|
||||
def studium():
|
||||
mth=db_session.query(MailThread).filter(MailThread.maintopic=="studium").order_by(desc(MailThread.date)).all()
|
||||
return render_index(mth)
|
||||
|
||||
|
||||
@app.route("/<maintopic>")
|
||||
@app.route("/<maintopic>/")
|
||||
def maintopic(maintopic):
|
||||
mth=db_session.query(MailThread).filter(MailThread.maintopic=="%s" % maintopic).order_by(desc(MailThread.date)).all()
|
||||
return render_index(mth)
|
||||
|
||||
@app.route("/<maintopic>/<int:id>")
|
||||
def maintopic_store(maintopic,id):
|
||||
if maintopic == "trained":
|
||||
mth=db_session.query(MailThread).filter(MailThread.istrained==True).order_by(desc(MailThread.date)).all()
|
||||
else:
|
||||
mth=db_session.query(MailThread).filter(MailThread.maintopic=="%s" % maintopic).order_by(desc(MailThread.date)).all()
|
||||
|
||||
key = request.args.get('key')
|
||||
value = request.args.get('value')
|
||||
|
||||
|
||||
if not key is None and not value is None:
|
||||
store_value(id,key,value)
|
||||
return redirect(url_for('maintopic_store', id=id, maintopic=maintopic), 302)
|
||||
else:
|
||||
return render_index(mth,opened=id)
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>{{title}}</title>
|
||||
<script src="static/jquery-3.2.0.min.js" ></script>
|
||||
<link rel="stylesheet" href="static/bootstrap/css/bootstrap.min.css"/>
|
||||
<script src="static/bootstrap/js/bootstrap.min.js" ></script>
|
||||
<script src="/static/jquery-3.2.0.min.js" ></script>
|
||||
<link rel="stylesheet" href="/static/bootstrap/css/bootstrap.min.css"/>
|
||||
<script src="/static/bootstrap/js/bootstrap.min.js" ></script>
|
||||
</head>
|
||||
<body>
|
||||
<style>
|
||||
.card.opened-True {
|
||||
.card.answ-1,.card.answ-True {
|
||||
background: lightgreen;
|
||||
}
|
||||
</style>
|
||||
@@ -19,19 +19,20 @@
|
||||
<div id="accordion" role="tablist" aria-multiselectable="true">
|
||||
|
||||
{% for m in mths %}
|
||||
<div class="card opened-{{m.opened}}" style="padding-top: 2pt; padding-bottom:2pt; border-radius:0;margin-top:1pt; margin-bottom:1pt">
|
||||
<div class="card answ-{{m.is_answered()}}" style="padding-top: 2pt; padding-bottom:2pt; border-radius:0;margin-top:1pt; margin-bottom:1pt">
|
||||
<div class="" role="tab" id="heading{{m.firstmail}}">
|
||||
<b class="mb-0">
|
||||
<a data-toggle="collapse" data-parent="#accordion" href="#collapse{{m.firstmail}}" aria-expanded="true" aria-controls="collapse1">
|
||||
{{m.tstr()}}
|
||||
{% if m.istrained %} trained: {% endif %} {{m.tstr()}}
|
||||
</a>
|
||||
</b>
|
||||
</div>
|
||||
|
||||
<div id="collapse{{m.firstmail}}" class="collapse" role="tabpanel" aria-labelledby="headingOne">
|
||||
<div id="collapse{{m.firstmail}}" class="collapse {{'show' if m.firstmail==opened}}" role="tabpanel" aria-labelledby="headingOne">
|
||||
|
||||
<div class="card-block">
|
||||
{{m.maintopic}}
|
||||
<a href="{{m.firstmail}}?key=answered&value={{(not m.is_answered())}}">answered:{{(not m.is_answered())}}</a>
|
||||
{{m.maintopic}}, {{ m.istrained }} <a href="{{m.firstmail}}?key=trained&value={{(not m.istrained)}}">trained:{{(not m.istrained)}}</a>
|
||||
<div style="white-space: pre-wrap;font:Courier, monospace; font-size:small; width:50em; border: thin blue solid;">
|
||||
{{ m.print_text() }}
|
||||
</div>
|
||||
|
||||
24
run.py
24
run.py
@@ -34,18 +34,18 @@ if len(sys.argv)>1:
|
||||
pipe1,le=get_pipe("pipe1",b"answered")
|
||||
pipe2,le2=get_pipe("pipe2b", b"maintopic")
|
||||
pipe3,le3=get_pipe("pipe2b", b"lang")
|
||||
mail_threads=db_session.query(MailThread).all()
|
||||
mail_threads=db_session.query(MailThread).filter(MailThread.istrained==False).all()
|
||||
|
||||
answered=le.inverse_transform(pipe1.predict(mail_threads))
|
||||
maintopic=le2.inverse_transform(pipe2.predict(mail_threads))
|
||||
lang=le3.inverse_transform(pipe3.predict(mail_threads))
|
||||
|
||||
for i, t in enumerate(mail_threads):
|
||||
t.answered=answered[i]
|
||||
t.opened=answered[i]
|
||||
t.answered=bool(answered[i])
|
||||
t.opened=bool(answered[i])
|
||||
|
||||
t.maintopic=maintopic[i]
|
||||
t.lang=lang[i]
|
||||
t.maintopic=str(maintopic[i])
|
||||
t.lang=str(lang[i])
|
||||
db_session.add(t)
|
||||
db_session.commit()
|
||||
|
||||
@@ -75,7 +75,14 @@ if len(sys.argv)>1:
|
||||
for t in mth:
|
||||
t.compile()
|
||||
|
||||
|
||||
if sys.argv[1] == "trained_threads_from_yml":
|
||||
from classifier.classifier import train
|
||||
for k in train:
|
||||
print k
|
||||
t=db_session.query(MailThread).filter(MailThread.firstmail==k).first()
|
||||
t.istrained=True
|
||||
db_session.add(t)
|
||||
db_session.commit()
|
||||
if sys.argv[1] == "print_threads2":
|
||||
mth=db_session.query(MailThread).all()
|
||||
for t in mth:
|
||||
@@ -83,8 +90,8 @@ if len(sys.argv)>1:
|
||||
print "---------------\n"
|
||||
|
||||
if sys.argv[1] == "train_thrd2":
|
||||
p, le=get_pipe("pipe2", "maintopic")
|
||||
pb, lb =get_pipe("pipe2b", "maintopic")
|
||||
p, le=get_pipe("pipe2", "maintopic",["db"])
|
||||
pb, lb =get_pipe("pipe2b", "maintopic",["db"])
|
||||
|
||||
train_single_thread(int(sys.argv[2]),p,le,b"maintopic")
|
||||
|
||||
@@ -122,6 +129,7 @@ if len(sys.argv)>1:
|
||||
print le.inverse_transform(pipe2.predict([t]))
|
||||
|
||||
|
||||
|
||||
if sys.argv[1] == "train_thrd":
|
||||
pipe1, labelencoder=train_fit_pipe()
|
||||
train_single_thread(int(sys.argv[2]),pipe1,labelencoder)
|
||||
|
||||
@@ -29,16 +29,19 @@ class MailThread(Base):
|
||||
opened = Column(Boolean)
|
||||
body = Column(Text)
|
||||
maintopic=Column(String)
|
||||
lang=Column(String)
|
||||
answered=Column(String)
|
||||
__schema__=FullThreadSchema
|
||||
__jsonid__='thread'
|
||||
__whiteattrs__= ["body"]
|
||||
__jsonattrs__=None
|
||||
answered=False
|
||||
# answered=False
|
||||
# maintopic="information"
|
||||
lang=""
|
||||
# lang=""
|
||||
def bdy(self):
|
||||
return yaml.load(self.body)
|
||||
|
||||
def is_answered(self):
|
||||
return self.answered in ["1", "true", "True", "t","T"]
|
||||
def to_text(self):
|
||||
mmm=self.mails()
|
||||
txt=""
|
||||
|
||||
BIN
test.sqlite
BIN
test.sqlite
Binary file not shown.
Reference in New Issue
Block a user