自然言語処理データセットfetch_20newsgroupsから学習させたモデルをpickle保存した。pickle loadして文章を学習に基づいて分類させようとしたが、Vocabulary not fitted or providedというエラー発生。
以下エラーメッセージ全行です。
Traceback (most recent call last):
File "<ipython-input-2-694698f5959c>", line 1, in <module>
runfile('C:/pyfile3/AI_nlp_categories3.py', wdir='C:/pyfile3')
File "C:\Users\BDS001\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Users\BDS001\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/pyfile3/AI_nlp_categories3.py", line 28, in <module>
nlpcat.model_run(input_data)
File "C:/pyfile3/AI_nlp_categories3.py", line 13, in model_run
input_tc=count_vectorizer.transform(data)
File "C:\Users\BDS001\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py", line 1267, in transform
self._check_vocabulary()
File "C:\Users\BDS001\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py", line 490, in _check_vocabulary
raise NotFittedError("Vocabulary not fitted or provided")
NotFittedError: Vocabulary not fitted or provided
python3
1#学習プログラム 2from sklearn.datasets import fetch_20newsgroups 3from sklearn.naive_bayes import MultinomialNB 4from sklearn.feature_extraction.text import TfidfTransformer 5from sklearn.feature_extraction.text import CountVectorizer 6tfidf=TfidfTransformer() 7count_vectorizer=CountVectorizer() 8import pickle 9 10class Nlp_categories: 11 def get_train_data(self,categories_): 12 training_data=fetch_20newsgroups(subset="train", 13 categories=categories_.keys(), 14 shuffle=True,random_state=5) 15 self.categ=categories_ 16 self.training_data=training_data 17 def count_terms(self): 18 train_tc=count_vectorizer.fit_transform(self.training_data.data) 19 print("Dimensions of training data;",train_tc.shape) 20 self.train_tc=train_tc 21 def train_data(self): 22 train_tfidf=tfidf.fit_transform(self.train_tc) 23 classifier=MultinomialNB().fit(train_tfidf,self.training_data.target) 24 self.classifier=classifier 25 def file_train_data(self,model_name): 26 with open(model_name,mode='wb') as mfs: 27 pickle.dump(self.classifier,mfs) 28 29if __name__ == "__main__": 30 31 category_map={"talk.politics.misc":"Politics","rec.autos":"Autos", 32 "rec.sport.hockey":"Hockey","sci.electronics":"Electronics", 33 "sci.med":"Medicine"} コード 34 35 category_model="C:/pyfile3/model_nlp_categories.pkl" 36 37 nlp_cat=Nlp_categories() 38 nlp_cat.get_train_data(category_map) 39 nlp_cat.count_terms() 40 nlp_cat.train_data() 41 nlp_cat.file_train_data(category_model) 42 43#pickle loadプログラム 44from sklearn.feature_extraction.text import TfidfTransformer 45from sklearn.feature_extraction.text import CountVectorizer 46tfidf=TfidfTransformer() 47count_vectorizer=CountVectorizer() 48import pickle 49 50class NlpCategoriesModelRun: 51 def model_load(self,model_file): 52 with open(model_file,mode="rb") as mfl: 53 self.analyzer_model=pickle.load(mfl) 54 def model_run(self,data): 55 input_tc=count_vectorizer.transform(data) 56 input_tfidf=tfidf.transform(input_tc) 57 predictions=self.analyzer_model.predict(input_tfidf) 58 59if __name__ == "__main__": 60 61 model_name="C:/pyfile3/model_nlp_categories.pkl" 62 63 input_data=[ 64 "A lot of devices can operate wireless", 65 "Political debates help us understand the perspectives of both sides" 66 ] 67 68 nlpcat=NlpCategoriesModelRun() 69 nlpcat.model_load(model_name) 70 nlpcat.model_run(input_data)
回答1件
あなたの回答
tips
プレビュー