質問編集履歴
1
tf_idfs = vectorizer.fit_transform(training_docs)に修正しました。(変更前はwords(df.ix[i,"titlebeginning"]))
test
CHANGED
File without changes
|
test
CHANGED
@@ -110,7 +110,9 @@
|
|
110
110
|
|
111
111
|
vectorizer = TfidfVectorizer(use_idf=True, token_pattern=u'(?u)\b\w+\b')
|
112
112
|
|
113
|
-
tf_idfs = vectorizer.fit_transform(
|
113
|
+
tf_idfs = vectorizer.fit_transform(training_docs)
|
114
|
+
|
115
|
+
print(tf_idfs)
|
114
116
|
|
115
117
|
```
|
116
118
|
|
@@ -134,4 +136,98 @@
|
|
134
136
|
|
135
137
|
|
136
138
|
|
139
|
+
エラー内容
|
140
|
+
|
141
|
+
```python
|
142
|
+
|
143
|
+
AttributeError Traceback (most recent call last)
|
144
|
+
|
145
|
+
<ipython-input-32-a49b5702c1c0> in <module>()
|
146
|
+
|
147
|
+
4 for i in range(0,len(df)):
|
148
|
+
|
149
|
+
5 vectorizer = TfidfVectorizer(use_idf=True, token_pattern=u'(?u)\b\w+\b')
|
150
|
+
|
151
|
+
----> 6 tf_idfs = vectorizer.fit_transform(training_docs)
|
152
|
+
|
153
|
+
7 print(tf_idfs)
|
154
|
+
|
155
|
+
|
156
|
+
|
157
|
+
~/anaconda3/envs/kenkyuu/lib/python3.6/site-packages/sklearn/feature_extraction/text.py in fit_transform(self, raw_documents, y)
|
158
|
+
|
159
|
+
1379 Tf-idf-weighted document-term matrix.
|
160
|
+
|
161
|
+
1380 """
|
162
|
+
|
163
|
+
-> 1381 X = super(TfidfVectorizer, self).fit_transform(raw_documents)
|
164
|
+
|
165
|
+
1382 self._tfidf.fit(X)
|
166
|
+
|
167
|
+
1383 # X is already a transformed view of raw_documents so
|
168
|
+
|
169
|
+
|
170
|
+
|
171
|
+
~/anaconda3/envs/kenkyuu/lib/python3.6/site-packages/sklearn/feature_extraction/text.py in fit_transform(self, raw_documents, y)
|
172
|
+
|
173
|
+
867
|
174
|
+
|
175
|
+
868 vocabulary, X = self._count_vocab(raw_documents,
|
176
|
+
|
177
|
+
--> 869 self.fixed_vocabulary_)
|
178
|
+
|
179
|
+
870
|
180
|
+
|
181
|
+
871 if self.binary:
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
~/anaconda3/envs/kenkyuu/lib/python3.6/site-packages/sklearn/feature_extraction/text.py in _count_vocab(self, raw_documents, fixed_vocab)
|
186
|
+
|
187
|
+
790 for doc in raw_documents:
|
188
|
+
|
189
|
+
791 feature_counter = {}
|
190
|
+
|
191
|
+
--> 792 for feature in analyze(doc):
|
192
|
+
|
193
|
+
793 try:
|
194
|
+
|
195
|
+
794 feature_idx = vocabulary[feature]
|
196
|
+
|
197
|
+
|
198
|
+
|
199
|
+
~/anaconda3/envs/kenkyuu/lib/python3.6/site-packages/sklearn/feature_extraction/text.py in <lambda>(doc)
|
200
|
+
|
201
|
+
264
|
202
|
+
|
203
|
+
265 return lambda doc: self._word_ngrams(
|
204
|
+
|
205
|
+
--> 266 tokenize(preprocess(self.decode(doc))), stop_words)
|
206
|
+
|
207
|
+
267
|
208
|
+
|
209
|
+
268 else:
|
210
|
+
|
211
|
+
|
212
|
+
|
213
|
+
~/anaconda3/envs/kenkyuu/lib/python3.6/site-packages/sklearn/feature_extraction/text.py in <lambda>(x)
|
214
|
+
|
215
|
+
230
|
216
|
+
|
217
|
+
231 if self.lowercase:
|
218
|
+
|
219
|
+
--> 232 return lambda x: strip_accents(x.lower())
|
220
|
+
|
221
|
+
233 else:
|
222
|
+
|
223
|
+
234 return strip_accents
|
224
|
+
|
225
|
+
|
226
|
+
|
227
|
+
AttributeError: 'TaggedDocument' object has no attribute 'lower'
|
228
|
+
|
229
|
+
```
|
230
|
+
|
231
|
+
|
232
|
+
|
137
233
|
[こちらが問題のファイルになります](https://www.dropbox.com/s/auixihg8n344voz/%E3%82%BF%E3%82%A4%E3%83%88%E3%83%AB%E3%81%A8%E5%86%92%E9%A0%AD%E4%B8%80%E8%A6%A7.csv?dl=0)
|