from sklearn.feature_extraction.text import TfidfTransformer,CountVectorizer from sklearn.feature_extraction import DictVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import Pipeline, FeatureUnion import sys import yaml from sklearn.preprocessing import OneHotEncoder from sklearn.preprocessing import LabelEncoder text_clf = Pipeline([('vect', CountVectorizer()),('tfidf', TfidfTransformer()),('clf', MultinomialNB())]) text_ohc = Pipeline([('ohc', OneHotEncoder()),('clf', MultinomialNB())]) combined_features = FeatureUnion([('vect1', CountVectorizer()),('vect2', CountVectorizer())]) enc=OneHotEncoder() with open("example_1.yaml", 'r') as stream: try: train=yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc) tc=text_clf.fit(train["data"],train["target"])