26 lines
837 B
Python
26 lines
837 B
Python
from sklearn.feature_extraction.text import TfidfTransformer,CountVectorizer
|
|
from sklearn.feature_extraction import DictVectorizer
|
|
from sklearn.naive_bayes import MultinomialNB
|
|
from sklearn.pipeline import Pipeline, FeatureUnion
|
|
import sys
|
|
import yaml
|
|
from sklearn.preprocessing import OneHotEncoder
|
|
from sklearn.preprocessing import LabelEncoder
|
|
|
|
|
|
text_clf = Pipeline([('vect', CountVectorizer()),('tfidf', TfidfTransformer()),('clf', MultinomialNB())])
|
|
|
|
text_ohc = Pipeline([('ohc', OneHotEncoder()),('clf', MultinomialNB())])
|
|
|
|
combined_features = FeatureUnion([('vect1', CountVectorizer()),('vect2', CountVectorizer())])
|
|
|
|
|
|
enc=OneHotEncoder()
|
|
with open("example_1.yaml", 'r') as stream:
|
|
try:
|
|
train=yaml.safe_load(stream)
|
|
except yaml.YAMLError as exc:
|
|
print(exc)
|
|
|
|
tc=text_clf.fit(train["data"],train["target"])
|