from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import Pipeline text_clf = Pipeline([('vect', CountVectorizer()),('tfidf', TfidfTransformer()),('clf', MultinomialNB())]) import sys import yaml with open("example_1.yaml", 'r') as stream: try: train=yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc) tc=text_clf.fit(train["data"],train["target"]) print(sys.argv[1]) answ=(tc.predict([sys.argv[1]]))[0] print train["target_names"][answ] for i in range(0, (len(train["target_names"]))): print (str(i)+" "+ train["target_names"][i]) ca=int(raw_input("Correct answer..")) if ca == answ: print ("Yes I got it right") else: print("should I remember this?") a=raw_input("shoudIrememberthis?") if a == "y": train["data"].append(sys.argv[1]) train["target"].append(ca) print yaml.dump(train,default_flow_style=False) file=open("example_1.yaml","w") file.write(yaml.dump(train,default_flow_style=False)) file.close() else: print ("Ok, I already forgot")