質問編集履歴
4
試したこと①の修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -150,11 +150,7 @@
|
|
150
150
|
|
151
151
|
from jaconv import hira2kata
|
152
152
|
|
153
|
-
|
154
|
-
|
155
|
-
|
153
|
+
data = open('sample.csv', 'r').readlines()
|
156
|
-
|
157
|
-
data = file.read() #readですべて読み込む
|
158
154
|
|
159
155
|
data_with_syn = []
|
160
156
|
|
@@ -164,6 +160,10 @@
|
|
164
160
|
|
165
161
|
|
166
162
|
|
163
|
+
#結果 ['\ufeffりんご\n', 'いぬ\n', 'いちご\n', 'くま\n', 'みかん\n', 'ねこ']
|
164
|
+
|
165
|
+
|
166
|
+
|
167
167
|
for datum in data:
|
168
168
|
|
169
169
|
ret = wn.synsets(datum, lang='jpn')
|
@@ -202,57 +202,27 @@
|
|
202
202
|
|
203
203
|
```
|
204
204
|
|
205
|
-
Unknown word:
|
205
|
+
Unknown word: リンゴ
|
206
|
-
|
207
|
-
|
206
|
+
|
208
|
-
|
209
|
-
Unknown word: ン.
|
210
|
-
|
211
|
-
Unknown word: ゴ.
|
212
|
-
|
213
|
-
Unknown word:
|
214
|
-
|
215
|
-
.
|
207
|
+
.
|
216
|
-
|
208
|
+
|
217
|
-
Unknown word: イ
|
209
|
+
Unknown word: イヌ
|
218
|
-
|
219
|
-
|
210
|
+
|
220
|
-
|
221
|
-
Unknown word:
|
222
|
-
|
223
|
-
.
|
211
|
+
.
|
224
|
-
|
212
|
+
|
225
|
-
Unknown word: イ
|
213
|
+
Unknown word: イチゴ
|
226
|
-
|
227
|
-
|
214
|
+
|
228
|
-
|
229
|
-
Unknown word: ゴ.
|
230
|
-
|
231
|
-
Unknown word:
|
232
|
-
|
233
|
-
.
|
215
|
+
.
|
234
|
-
|
216
|
+
|
235
|
-
Unknown word: ク
|
217
|
+
Unknown word: クマ
|
236
|
-
|
237
|
-
|
218
|
+
|
238
|
-
|
239
|
-
Unknown word:
|
240
|
-
|
241
|
-
.
|
219
|
+
.
|
242
|
-
|
220
|
+
|
243
|
-
Unknown word: カ
|
221
|
+
Unknown word: ミカン
|
244
|
-
|
245
|
-
|
222
|
+
|
246
|
-
|
247
|
-
Unknown word:
|
248
|
-
|
249
|
-
.
|
223
|
+
.
|
250
|
-
|
251
|
-
|
224
|
+
|
252
|
-
|
253
|
-
Unknown word: コ.
|
254
|
-
|
255
|
-
('
|
225
|
+
('ネコ', Synset('cat.n.01'))
|
256
226
|
|
257
227
|
```
|
258
228
|
|
3
ご回答を受けて試したこと①の修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -142,24 +142,26 @@
|
|
142
142
|
|
143
143
|
###ご回答を受けて試したこと
|
144
144
|
|
145
|
-
①csvから読み込んだデータでもできるかどうか試しましたが、以下の
|
145
|
+
①csvから読み込んだデータでもできるかどうか試しましたが、以下の結果が出てしまいました、
|
146
146
|
|
147
147
|
```Python
|
148
148
|
|
149
|
-
import pandas as pd
|
150
|
-
|
151
|
-
import numpy as np
|
152
|
-
|
153
149
|
from nltk.corpus import wordnet as wn
|
154
150
|
|
155
151
|
from jaconv import hira2kata
|
156
152
|
|
157
153
|
|
158
154
|
|
159
|
-
|
155
|
+
file = open('sample.csv', 'r') #読み込みモードでオープン
|
156
|
+
|
157
|
+
data = file.read() #readですべて読み込む
|
160
158
|
|
161
159
|
data_with_syn = []
|
162
160
|
|
161
|
+
|
162
|
+
|
163
|
+
print(data)
|
164
|
+
|
163
165
|
|
164
166
|
|
165
167
|
for datum in data:
|
@@ -196,19 +198,127 @@
|
|
196
198
|
|
197
199
|
|
198
200
|
|
201
|
+
結果
|
202
|
+
|
203
|
+
```
|
204
|
+
|
205
|
+
Unknown word: .
|
206
|
+
|
207
|
+
Unknown word: リ.
|
208
|
+
|
209
|
+
Unknown word: ン.
|
210
|
+
|
211
|
+
Unknown word: ゴ.
|
212
|
+
|
213
|
+
Unknown word:
|
214
|
+
|
215
|
+
.
|
216
|
+
|
217
|
+
Unknown word: イ.
|
218
|
+
|
219
|
+
Unknown word: ヌ.
|
220
|
+
|
221
|
+
Unknown word:
|
222
|
+
|
223
|
+
.
|
224
|
+
|
225
|
+
Unknown word: イ.
|
226
|
+
|
227
|
+
Unknown word: チ.
|
228
|
+
|
229
|
+
Unknown word: ゴ.
|
230
|
+
|
231
|
+
Unknown word:
|
232
|
+
|
233
|
+
.
|
234
|
+
|
235
|
+
Unknown word: ク.
|
236
|
+
|
237
|
+
Unknown word: マ.
|
238
|
+
|
239
|
+
Unknown word:
|
240
|
+
|
241
|
+
.
|
242
|
+
|
243
|
+
Unknown word: カ.
|
244
|
+
|
245
|
+
Unknown word: ン.
|
246
|
+
|
247
|
+
Unknown word:
|
248
|
+
|
249
|
+
.
|
250
|
+
|
251
|
+
Unknown word: ネ.
|
252
|
+
|
253
|
+
Unknown word: コ.
|
254
|
+
|
255
|
+
('ミ', Synset('mi.n.08'))
|
256
|
+
|
257
|
+
```
|
258
|
+
|
259
|
+
|
260
|
+
|
261
|
+
②単語を英語にした場合、jaconvは使えないため、エラー処理に困る
|
262
|
+
|
263
|
+
```python
|
264
|
+
|
265
|
+
from nltk.corpus import wordnet as wn
|
266
|
+
|
267
|
+
#from jaconv import hira2kata
|
268
|
+
|
269
|
+
|
270
|
+
|
271
|
+
data = ['apple', 'dog', "strawberry", 'bear', 'orange', 'cats']
|
272
|
+
|
273
|
+
data_with_syn = []
|
274
|
+
|
275
|
+
|
276
|
+
|
277
|
+
for datum in data:
|
278
|
+
|
279
|
+
ret = wn.synsets(datum, lang='en')
|
280
|
+
|
281
|
+
if ret:
|
282
|
+
|
283
|
+
data_with_syn.append((datum, ret[0]))
|
284
|
+
|
285
|
+
continue
|
286
|
+
|
287
|
+
|
288
|
+
|
289
|
+
#datum = hira2kata(datum)
|
290
|
+
|
291
|
+
#ret = wn.synsets(datum, lang='en')
|
292
|
+
|
293
|
+
#if ret:
|
294
|
+
|
295
|
+
#data_with_syn.append((datum, ret[0]))
|
296
|
+
|
297
|
+
# continue
|
298
|
+
|
299
|
+
|
300
|
+
|
301
|
+
print(f'Unknown word: {datum}.')
|
302
|
+
|
303
|
+
|
304
|
+
|
305
|
+
print(*data_with_syn, sep='\n')
|
306
|
+
|
307
|
+
```
|
308
|
+
|
199
309
|
エラー文
|
200
310
|
|
201
311
|
```
|
202
312
|
|
203
313
|
---------------------------------------------------------------------------
|
204
314
|
|
205
|
-
|
315
|
+
WordNetError Traceback (most recent call last)
|
206
|
-
|
316
|
+
|
207
|
-
<ipython-input-
|
317
|
+
<ipython-input-29-f740c715a42a> in <module>()
|
208
318
|
|
209
319
|
1 for datum in data:
|
210
320
|
|
211
|
-
----> 2 ret = wn.synsets(datum, lang='
|
321
|
+
----> 2 ret = wn.synsets(datum, lang='en')
|
212
322
|
|
213
323
|
3 if ret:
|
214
324
|
|
@@ -220,96 +330,6 @@
|
|
220
330
|
|
221
331
|
~/.pyenv/versions/anaconda3-5.0.1/lib/python3.6/site-packages/nltk/corpus/reader/wordnet.py in synsets(self, lemma, pos, lang, check_exceptions)
|
222
332
|
|
223
|
-
1481 of that language will be returned.
|
224
|
-
|
225
|
-
1482 """
|
226
|
-
|
227
|
-
-> 1483 lemma = lemma.lower()
|
228
|
-
|
229
|
-
1484
|
230
|
-
|
231
|
-
1485 if lang == 'eng':
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
AttributeError: 'numpy.int64' object has no attribute 'lower'
|
236
|
-
|
237
|
-
```
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
②単語を英語にした場合、jaconvは使えないため、エラー処理に困る
|
242
|
-
|
243
|
-
```python
|
244
|
-
|
245
|
-
from nltk.corpus import wordnet as wn
|
246
|
-
|
247
|
-
#from jaconv import hira2kata
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
data = ['apple', 'dog', "strawberry", 'bear', 'orange', 'cats']
|
252
|
-
|
253
|
-
data_with_syn = []
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
for datum in data:
|
258
|
-
|
259
|
-
ret = wn.synsets(datum, lang='en')
|
260
|
-
|
261
|
-
if ret:
|
262
|
-
|
263
|
-
data_with_syn.append((datum, ret[0]))
|
264
|
-
|
265
|
-
continue
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
#datum = hira2kata(datum)
|
270
|
-
|
271
|
-
#ret = wn.synsets(datum, lang='en')
|
272
|
-
|
273
|
-
#if ret:
|
274
|
-
|
275
|
-
#data_with_syn.append((datum, ret[0]))
|
276
|
-
|
277
|
-
# continue
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
print(f'Unknown word: {datum}.')
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
print(*data_with_syn, sep='\n')
|
286
|
-
|
287
|
-
```
|
288
|
-
|
289
|
-
エラー文
|
290
|
-
|
291
|
-
```
|
292
|
-
|
293
|
-
---------------------------------------------------------------------------
|
294
|
-
|
295
|
-
WordNetError Traceback (most recent call last)
|
296
|
-
|
297
|
-
<ipython-input-29-f740c715a42a> in <module>()
|
298
|
-
|
299
|
-
1 for datum in data:
|
300
|
-
|
301
|
-
----> 2 ret = wn.synsets(datum, lang='en')
|
302
|
-
|
303
|
-
3 if ret:
|
304
|
-
|
305
|
-
4 data_with_syn.append((datum, ret[0]))
|
306
|
-
|
307
|
-
5 continue
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
~/.pyenv/versions/anaconda3-5.0.1/lib/python3.6/site-packages/nltk/corpus/reader/wordnet.py in synsets(self, lemma, pos, lang, check_exceptions)
|
312
|
-
|
313
333
|
1494
|
314
334
|
|
315
335
|
1495 else:
|
2
試したことの追記
test
CHANGED
File without changes
|
test
CHANGED
@@ -142,7 +142,7 @@
|
|
142
142
|
|
143
143
|
###ご回答を受けて試したこと
|
144
144
|
|
145
|
-
csvから読み込んだデータでもできるかどうか試しましたが、以下のようにエラーが出てしまいました。
|
145
|
+
①csvから読み込んだデータでもできるかどうか試しましたが、以下のようにエラーが出てしまいました。
|
146
146
|
|
147
147
|
```Python
|
148
148
|
|
@@ -235,3 +235,107 @@
|
|
235
235
|
AttributeError: 'numpy.int64' object has no attribute 'lower'
|
236
236
|
|
237
237
|
```
|
238
|
+
|
239
|
+
|
240
|
+
|
241
|
+
②単語を英語にした場合、jaconvは使えないため、エラー処理に困る
|
242
|
+
|
243
|
+
```python
|
244
|
+
|
245
|
+
from nltk.corpus import wordnet as wn
|
246
|
+
|
247
|
+
#from jaconv import hira2kata
|
248
|
+
|
249
|
+
|
250
|
+
|
251
|
+
data = ['apple', 'dog', "strawberry", 'bear', 'orange', 'cats']
|
252
|
+
|
253
|
+
data_with_syn = []
|
254
|
+
|
255
|
+
|
256
|
+
|
257
|
+
for datum in data:
|
258
|
+
|
259
|
+
ret = wn.synsets(datum, lang='en')
|
260
|
+
|
261
|
+
if ret:
|
262
|
+
|
263
|
+
data_with_syn.append((datum, ret[0]))
|
264
|
+
|
265
|
+
continue
|
266
|
+
|
267
|
+
|
268
|
+
|
269
|
+
#datum = hira2kata(datum)
|
270
|
+
|
271
|
+
#ret = wn.synsets(datum, lang='en')
|
272
|
+
|
273
|
+
#if ret:
|
274
|
+
|
275
|
+
#data_with_syn.append((datum, ret[0]))
|
276
|
+
|
277
|
+
# continue
|
278
|
+
|
279
|
+
|
280
|
+
|
281
|
+
print(f'Unknown word: {datum}.')
|
282
|
+
|
283
|
+
|
284
|
+
|
285
|
+
print(*data_with_syn, sep='\n')
|
286
|
+
|
287
|
+
```
|
288
|
+
|
289
|
+
エラー文
|
290
|
+
|
291
|
+
```
|
292
|
+
|
293
|
+
---------------------------------------------------------------------------
|
294
|
+
|
295
|
+
WordNetError Traceback (most recent call last)
|
296
|
+
|
297
|
+
<ipython-input-29-f740c715a42a> in <module>()
|
298
|
+
|
299
|
+
1 for datum in data:
|
300
|
+
|
301
|
+
----> 2 ret = wn.synsets(datum, lang='en')
|
302
|
+
|
303
|
+
3 if ret:
|
304
|
+
|
305
|
+
4 data_with_syn.append((datum, ret[0]))
|
306
|
+
|
307
|
+
5 continue
|
308
|
+
|
309
|
+
|
310
|
+
|
311
|
+
~/.pyenv/versions/anaconda3-5.0.1/lib/python3.6/site-packages/nltk/corpus/reader/wordnet.py in synsets(self, lemma, pos, lang, check_exceptions)
|
312
|
+
|
313
|
+
1494
|
314
|
+
|
315
|
+
1495 else:
|
316
|
+
|
317
|
+
-> 1496 self._load_lang_data(lang)
|
318
|
+
|
319
|
+
1497 synset_list = []
|
320
|
+
|
321
|
+
1498 for l in self._lang_data[lang][1][lemma]:
|
322
|
+
|
323
|
+
|
324
|
+
|
325
|
+
~/.pyenv/versions/anaconda3-5.0.1/lib/python3.6/site-packages/nltk/corpus/reader/wordnet.py in _load_lang_data(self, lang)
|
326
|
+
|
327
|
+
1135
|
328
|
+
|
329
|
+
1136 if lang not in self.langs():
|
330
|
+
|
331
|
+
-> 1137 raise WordNetError("Language is not supported.")
|
332
|
+
|
333
|
+
1138
|
334
|
+
|
335
|
+
1139 f = self._omw_reader.open('{0:}/wn-data-{0:}.tab'.format(lang))
|
336
|
+
|
337
|
+
|
338
|
+
|
339
|
+
WordNetError: Language is not supported.
|
340
|
+
|
341
|
+
```
|
1
試したことの追記
test
CHANGED
File without changes
|
test
CHANGED
@@ -137,3 +137,101 @@
|
|
137
137
|
Mac OS High Sierra
|
138
138
|
|
139
139
|
Jupyter notebook 5.0.
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
###ご回答を受けて試したこと
|
144
|
+
|
145
|
+
csvから読み込んだデータでもできるかどうか試しましたが、以下のようにエラーが出てしまいました。
|
146
|
+
|
147
|
+
```Python
|
148
|
+
|
149
|
+
import pandas as pd
|
150
|
+
|
151
|
+
import numpy as np
|
152
|
+
|
153
|
+
from nltk.corpus import wordnet as wn
|
154
|
+
|
155
|
+
from jaconv import hira2kata
|
156
|
+
|
157
|
+
|
158
|
+
|
159
|
+
data = pd.read_csv('sample.csv', header=None)
|
160
|
+
|
161
|
+
data_with_syn = []
|
162
|
+
|
163
|
+
|
164
|
+
|
165
|
+
for datum in data:
|
166
|
+
|
167
|
+
ret = wn.synsets(datum, lang='jpn')
|
168
|
+
|
169
|
+
if ret:
|
170
|
+
|
171
|
+
data_with_syn.append((datum, ret[0]))
|
172
|
+
|
173
|
+
continue
|
174
|
+
|
175
|
+
|
176
|
+
|
177
|
+
datum = hira2kata(datum)
|
178
|
+
|
179
|
+
ret = wn.synsets(datum, lang='jpn')
|
180
|
+
|
181
|
+
if ret:
|
182
|
+
|
183
|
+
data_with_syn.append((datum, ret[0]))
|
184
|
+
|
185
|
+
continue
|
186
|
+
|
187
|
+
|
188
|
+
|
189
|
+
print(f'Unknown word: {datum}.')
|
190
|
+
|
191
|
+
|
192
|
+
|
193
|
+
print(*data_with_syn, sep='\n')
|
194
|
+
|
195
|
+
```
|
196
|
+
|
197
|
+
|
198
|
+
|
199
|
+
エラー文
|
200
|
+
|
201
|
+
```
|
202
|
+
|
203
|
+
---------------------------------------------------------------------------
|
204
|
+
|
205
|
+
AttributeError Traceback (most recent call last)
|
206
|
+
|
207
|
+
<ipython-input-16-9da0225fd0d7> in <module>()
|
208
|
+
|
209
|
+
1 for datum in data:
|
210
|
+
|
211
|
+
----> 2 ret = wn.synsets(datum, lang='jpn')
|
212
|
+
|
213
|
+
3 if ret:
|
214
|
+
|
215
|
+
4 data_with_syn.append((datum, ret[0]))
|
216
|
+
|
217
|
+
5 continue
|
218
|
+
|
219
|
+
|
220
|
+
|
221
|
+
~/.pyenv/versions/anaconda3-5.0.1/lib/python3.6/site-packages/nltk/corpus/reader/wordnet.py in synsets(self, lemma, pos, lang, check_exceptions)
|
222
|
+
|
223
|
+
1481 of that language will be returned.
|
224
|
+
|
225
|
+
1482 """
|
226
|
+
|
227
|
+
-> 1483 lemma = lemma.lower()
|
228
|
+
|
229
|
+
1484
|
230
|
+
|
231
|
+
1485 if lang == 'eng':
|
232
|
+
|
233
|
+
|
234
|
+
|
235
|
+
AttributeError: 'numpy.int64' object has no attribute 'lower'
|
236
|
+
|
237
|
+
```
|