from __future__ import unicode_literals #import imapclient from config import Config import sys #from email.header import decode_header #import email import codecs #import sys #import bs4 import yaml #sys.stdout = codecs.getwriter('utf8')(sys.stdout) from storage.fetch_mail import fetch_mail from storage.fetch_mail import fetch_threads, flatten_threads from storage import Mail, MailThread, db_session #import yaml #import email from classifier import get_training_threads, print_answers, in_training, store_training_data, get_pipe, test_pipe, train_single_thread # , pipe2, pipe2b def predict_thread(p,l,t): pre=p.predict([t]) print "Status is answered is estimated to be: " + str(l.inverse_transform(pre)[0]) return pre #print "arg1:"+sys.argv[1] if len(sys.argv)>1: if sys.argv[1] == "fetch_threads": print flatten_threads(fetch_threads()) if sys.argv[1] == "predict_threads": pipe1,le=get_pipe("pipe1",b"answered") pipe2,le2=get_pipe("pipe2b", b"maintopic") pipe3,le3=get_pipe("pipe2b", b"lang") mail_threads=db_session.query(MailThread).all() answered=le.inverse_transform(pipe1.predict(mail_threads)) maintopic=le2.inverse_transform(pipe2.predict(mail_threads)) lang=le3.inverse_transform(pipe3.predict(mail_threads)) for i, t in enumerate(mail_threads): t.answered=answered[i] t.opened=answered[i] t.maintopic=maintopic[i] t.lang=lang[i] db_session.add(t) db_session.commit() if sys.argv[1] == "run_server": from flaskapp import app app.run(port=3000,debug=True) if sys.argv[1] == "print_threads": mth=db_session.query(MailThread).all() for t in mth: print t.firstmail print t.mail_flat_dict() if sys.argv[1] == "print_thrd": if len(sys.argv)<3: mth=db_session.query(MailThread).all() for t in mth: print t.firstmail else: t=db_session.query(MailThread).filter(MailThread.firstmail==sys.argv[2]).first() print t.firstmail print t.subject() print t.text() if sys.argv[1] == "compile_threads": mth=db_session.query(MailThread).all() for t in mth: t.compile() if sys.argv[1] == "print_threads2": mth=db_session.query(MailThread).all() for t in mth: print t.to_text() print "---------------\n" if sys.argv[1] == "train_thrd2": p, le=get_pipe("pipe2", "maintopic") pb, lb =get_pipe("pipe2b", "maintopic") train_single_thread(int(sys.argv[2]),p,le,b"maintopic") if sys.argv[1] == "train_thrd3": # p, le=get_pipe("pipe2", "maintopic") pb, lb =get_pipe("pipe2b", "lang") train_single_thread(int(sys.argv[2]),pb,lb,b"lang") if sys.argv[1] == "train_all2": p, labelencoder=train_fit_pipe2() pb, lb=train_fit_pipe2b() mth=db_session.query(MailThread).all() print mth for t in mth: if not in_training(t.firstmail,"maintopic"): print "---------------------------------------------------" print "---------------------------------------------------" print t.firstmail print t.text() predict_thread(pb,lb,t) train_single_thread(t.firstmail, p, labelencoder, b"maintopic") if sys.argv[1] == "benchpipe2": test_pipe(["pipe2","pipe2b","pipe2c"],"maintopic") if sys.argv[1] == "testpipe2": from classifier import ThreadSubjectExtractor, ThreadTextExtractor pipe2,le=train_fit_pipe2() if len(sys.argv)>2: t=db_session.query(MailThread).filter(MailThread.firstmail==sys.argv[2]).first() print t.to_text() print le.inverse_transform(pipe2.predict([t])) if sys.argv[1] == "train_thrd": pipe1, labelencoder=train_fit_pipe() train_single_thread(int(sys.argv[2]),pipe1,labelencoder) if sys.argv[1] == "train_all": pipe1, labelencoder=train_fit_pipe() mth=db_session.query(MailThread).all() print mth for t in mth: if not in_training(t.firstmail): print "---------------------------------------------------" print "---------------------------------------------------" print t.firstmail train_single_thread(t.firstmail,pipe1,labelencoder) if sys.argv[1] == "print_thread": mth=db_session.query(MailThread).filter(MailThread.firstmail==int(sys.argv[2])).first() print mth.mail_dicts() print mth.mail_flat_dict() if sys.argv[1] == "store_threads": thrds=flatten_threads(fetch_threads()) for t in thrds: if type(t[0]) is int: th=db_session.query(MailThread).filter(MailThread.firstmail==t[0]).first() if th == None: th=MailThread() th.firstmail=t[0] if not th.body == yaml.dump(t): th.body=yaml.dump(t) th.islabeled=False th.opened=True else: th.body=yaml.dump(t) db_session.add(th) db_session.commit() print thrds if sys.argv[1] == "print_raw_mail": mm=db_session.query(Mail).filter(Mail.id==int(sys.argv[2])).first() print yaml.load(mm.envelope) if sys.argv[1] == "print_mail": mm=db_session.query(Mail).filter(Mail.id==int(sys.argv[2])).first() mm.compile_text() mm.compile_envelope() print mm.subject print "----------" print mm.text if sys.argv[1] == "mail_dict_test": mm=db_session.query(Mail).filter(Mail.id==int(sys.argv[2])).first() mm.compile_envelope() print mm.dict_envelope() if sys.argv[1] == "load_mail": mm=db_session.query(Mail).filter(Mail.id==int(sys.argv[2])).first() mm.compile_text() print mm.text env=yaml.load(mm.envelope) print env.subject print env if sys.argv[1] == "store_mail": m=fetch_mail(int(sys.argv[2])) mm=Mail() mm.envelope=yaml.dump(m['ENVELOPE']) mm.body=yaml.dump(m['RFC822']) mm.id=m['id'] db_session.add(mm) db_session.commit() if sys.argv[1] == "fetch_mail": print "fetching mail %d " % int(sys.argv[2]) m=fetch_mail(int(sys.argv[2])) hd=decode_header(m['ENVELOPE'].subject) hd2=[] # print hd for h in hd: if not h[1] is None: hd2.append(h[0].decode(h[1])) # print h[0].decode(h[1]) else: hd2.append(h[0]) print "\nBetreff:" for h in hd2: print h print "FROM:" for t in m['ENVELOPE'].from_: print t print "TO:" for t in m['ENVELOPE'].to: print t em=email.message_from_string(m['RFC822']) for p in em.walk(): if p.get_content_maintype()=="text": print p.get_payload() elif p.get_content_maintype()=="multipart": print p.get_payload() else: print p.get_content_maintype() if sys.argv[1] == "initdb": from storage import init_db init_db()