質問編集履歴

4

試したこと①の修正

2018/06/18 09:39

投稿

退会済みユーザー
test CHANGED
File without changes
test CHANGED
@@ -150,11 +150,7 @@
150
150
 
151
151
  from jaconv import hira2kata
152
152
 
153
-
154
-
155
- file = open('sample.csv', 'r') #読み込みモードでオープン
153
+ data = open('sample.csv', 'r').readlines()
156
-
157
- data = file.read() #readですべて読み込む
158
154
 
159
155
  data_with_syn = []
160
156
 
@@ -164,6 +160,10 @@
164
160
 
165
161
 
166
162
 
163
+ #結果 ['\ufeffりんご\n', 'いぬ\n', 'いちご\n', 'くま\n', 'みかん\n', 'ねこ']
164
+
165
+
166
+
167
167
  for datum in data:
168
168
 
169
169
  ret = wn.synsets(datum, lang='jpn')
@@ -202,57 +202,27 @@
202
202
 
203
203
  ```
204
204
 
205
- Unknown word: .
205
+ Unknown word: リンゴ
206
-
207
- Unknown word: リ.
206
+
208
-
209
- Unknown word: ン.
210
-
211
- Unknown word: ゴ.
212
-
213
- Unknown word:
214
-
215
- .
207
+ .
216
-
208
+
217
- Unknown word: イ.
209
+ Unknown word: イ
218
-
219
- Unknown word: ヌ.
210
+
220
-
221
- Unknown word:
222
-
223
- .
211
+ .
224
-
212
+
225
- Unknown word: イ.
213
+ Unknown word: イチゴ
226
-
227
- Unknown word: チ.
214
+
228
-
229
- Unknown word: ゴ.
230
-
231
- Unknown word:
232
-
233
- .
215
+ .
234
-
216
+
235
- Unknown word: ク.
217
+ Unknown word: ク
236
-
237
- Unknown word: マ.
218
+
238
-
239
- Unknown word:
240
-
241
- .
219
+ .
242
-
220
+
243
- Unknown word: カ.
221
+ Unknown word:
244
-
245
- Unknown word: ン.
222
+
246
-
247
- Unknown word:
248
-
249
- .
223
+ .
250
-
251
- Unknown word: ネ.
224
+
252
-
253
- Unknown word: コ.
254
-
255
- ('', Synset('mi.n.08'))
225
+ ('ネコ', Synset('cat.n.01'))
256
226
 
257
227
  ```
258
228
 

3

ご回答を受けて試したこと①の修正

2018/06/18 09:39

投稿

退会済みユーザー
test CHANGED
File without changes
test CHANGED
@@ -142,24 +142,26 @@
142
142
 
143
143
  ###ご回答を受けて試したこと
144
144
 
145
- ①csvから読み込んだデータでもできるかどうか試しましたが、以下のようにエラーが出てしまいました
145
+ ①csvから読み込んだデータでもできるかどうか試しましたが、以下の結果が出てしまいました
146
146
 
147
147
  ```Python
148
148
 
149
- import pandas as pd
150
-
151
- import numpy as np
152
-
153
149
  from nltk.corpus import wordnet as wn
154
150
 
155
151
  from jaconv import hira2kata
156
152
 
157
153
 
158
154
 
159
- data = pd.read_csv('sample.csv', header=None)
155
+ file = open('sample.csv', 'r') #読み込みモードでオープン
156
+
157
+ data = file.read() #readですべて読み込む
160
158
 
161
159
  data_with_syn = []
162
160
 
161
+
162
+
163
+ print(data)
164
+
163
165
 
164
166
 
165
167
  for datum in data:
@@ -196,19 +198,127 @@
196
198
 
197
199
 
198
200
 
201
+ 結果
202
+
203
+ ```
204
+
205
+ Unknown word: .
206
+
207
+ Unknown word: リ.
208
+
209
+ Unknown word: ン.
210
+
211
+ Unknown word: ゴ.
212
+
213
+ Unknown word:
214
+
215
+ .
216
+
217
+ Unknown word: イ.
218
+
219
+ Unknown word: ヌ.
220
+
221
+ Unknown word:
222
+
223
+ .
224
+
225
+ Unknown word: イ.
226
+
227
+ Unknown word: チ.
228
+
229
+ Unknown word: ゴ.
230
+
231
+ Unknown word:
232
+
233
+ .
234
+
235
+ Unknown word: ク.
236
+
237
+ Unknown word: マ.
238
+
239
+ Unknown word:
240
+
241
+ .
242
+
243
+ Unknown word: カ.
244
+
245
+ Unknown word: ン.
246
+
247
+ Unknown word:
248
+
249
+ .
250
+
251
+ Unknown word: ネ.
252
+
253
+ Unknown word: コ.
254
+
255
+ ('ミ', Synset('mi.n.08'))
256
+
257
+ ```
258
+
259
+
260
+
261
+ ②単語を英語にした場合、jaconvは使えないため、エラー処理に困る
262
+
263
+ ```python
264
+
265
+ from nltk.corpus import wordnet as wn
266
+
267
+ #from jaconv import hira2kata
268
+
269
+
270
+
271
+ data = ['apple', 'dog', "strawberry", 'bear', 'orange', 'cats']
272
+
273
+ data_with_syn = []
274
+
275
+
276
+
277
+ for datum in data:
278
+
279
+ ret = wn.synsets(datum, lang='en')
280
+
281
+ if ret:
282
+
283
+ data_with_syn.append((datum, ret[0]))
284
+
285
+ continue
286
+
287
+
288
+
289
+ #datum = hira2kata(datum)
290
+
291
+ #ret = wn.synsets(datum, lang='en')
292
+
293
+ #if ret:
294
+
295
+ #data_with_syn.append((datum, ret[0]))
296
+
297
+ # continue
298
+
299
+
300
+
301
+ print(f'Unknown word: {datum}.')
302
+
303
+
304
+
305
+ print(*data_with_syn, sep='\n')
306
+
307
+ ```
308
+
199
309
  エラー文
200
310
 
201
311
  ```
202
312
 
203
313
  ---------------------------------------------------------------------------
204
314
 
205
- AttributeError Traceback (most recent call last)
315
+ WordNetError Traceback (most recent call last)
206
-
316
+
207
- <ipython-input-16-9da0225fd0d7> in <module>()
317
+ <ipython-input-29-f740c715a42a> in <module>()
208
318
 
209
319
  1 for datum in data:
210
320
 
211
- ----> 2 ret = wn.synsets(datum, lang='jpn')
321
+ ----> 2 ret = wn.synsets(datum, lang='en')
212
322
 
213
323
  3 if ret:
214
324
 
@@ -220,96 +330,6 @@
220
330
 
221
331
  ~/.pyenv/versions/anaconda3-5.0.1/lib/python3.6/site-packages/nltk/corpus/reader/wordnet.py in synsets(self, lemma, pos, lang, check_exceptions)
222
332
 
223
- 1481 of that language will be returned.
224
-
225
- 1482 """
226
-
227
- -> 1483 lemma = lemma.lower()
228
-
229
- 1484
230
-
231
- 1485 if lang == 'eng':
232
-
233
-
234
-
235
- AttributeError: 'numpy.int64' object has no attribute 'lower'
236
-
237
- ```
238
-
239
-
240
-
241
- ②単語を英語にした場合、jaconvは使えないため、エラー処理に困る
242
-
243
- ```python
244
-
245
- from nltk.corpus import wordnet as wn
246
-
247
- #from jaconv import hira2kata
248
-
249
-
250
-
251
- data = ['apple', 'dog', "strawberry", 'bear', 'orange', 'cats']
252
-
253
- data_with_syn = []
254
-
255
-
256
-
257
- for datum in data:
258
-
259
- ret = wn.synsets(datum, lang='en')
260
-
261
- if ret:
262
-
263
- data_with_syn.append((datum, ret[0]))
264
-
265
- continue
266
-
267
-
268
-
269
- #datum = hira2kata(datum)
270
-
271
- #ret = wn.synsets(datum, lang='en')
272
-
273
- #if ret:
274
-
275
- #data_with_syn.append((datum, ret[0]))
276
-
277
- # continue
278
-
279
-
280
-
281
- print(f'Unknown word: {datum}.')
282
-
283
-
284
-
285
- print(*data_with_syn, sep='\n')
286
-
287
- ```
288
-
289
- エラー文
290
-
291
- ```
292
-
293
- ---------------------------------------------------------------------------
294
-
295
- WordNetError Traceback (most recent call last)
296
-
297
- <ipython-input-29-f740c715a42a> in <module>()
298
-
299
- 1 for datum in data:
300
-
301
- ----> 2 ret = wn.synsets(datum, lang='en')
302
-
303
- 3 if ret:
304
-
305
- 4 data_with_syn.append((datum, ret[0]))
306
-
307
- 5 continue
308
-
309
-
310
-
311
- ~/.pyenv/versions/anaconda3-5.0.1/lib/python3.6/site-packages/nltk/corpus/reader/wordnet.py in synsets(self, lemma, pos, lang, check_exceptions)
312
-
313
333
  1494
314
334
 
315
335
  1495 else:

2

試したことの追記

2018/06/18 09:29

投稿

退会済みユーザー
test CHANGED
File without changes
test CHANGED
@@ -142,7 +142,7 @@
142
142
 
143
143
  ###ご回答を受けて試したこと
144
144
 
145
- csvから読み込んだデータでもできるかどうか試しましたが、以下のようにエラーが出てしまいました。
145
+ csvから読み込んだデータでもできるかどうか試しましたが、以下のようにエラーが出てしまいました。
146
146
 
147
147
  ```Python
148
148
 
@@ -235,3 +235,107 @@
235
235
  AttributeError: 'numpy.int64' object has no attribute 'lower'
236
236
 
237
237
  ```
238
+
239
+
240
+
241
+ ②単語を英語にした場合、jaconvは使えないため、エラー処理に困る
242
+
243
+ ```python
244
+
245
+ from nltk.corpus import wordnet as wn
246
+
247
+ #from jaconv import hira2kata
248
+
249
+
250
+
251
+ data = ['apple', 'dog', "strawberry", 'bear', 'orange', 'cats']
252
+
253
+ data_with_syn = []
254
+
255
+
256
+
257
+ for datum in data:
258
+
259
+ ret = wn.synsets(datum, lang='en')
260
+
261
+ if ret:
262
+
263
+ data_with_syn.append((datum, ret[0]))
264
+
265
+ continue
266
+
267
+
268
+
269
+ #datum = hira2kata(datum)
270
+
271
+ #ret = wn.synsets(datum, lang='en')
272
+
273
+ #if ret:
274
+
275
+ #data_with_syn.append((datum, ret[0]))
276
+
277
+ # continue
278
+
279
+
280
+
281
+ print(f'Unknown word: {datum}.')
282
+
283
+
284
+
285
+ print(*data_with_syn, sep='\n')
286
+
287
+ ```
288
+
289
+ エラー文
290
+
291
+ ```
292
+
293
+ ---------------------------------------------------------------------------
294
+
295
+ WordNetError Traceback (most recent call last)
296
+
297
+ <ipython-input-29-f740c715a42a> in <module>()
298
+
299
+ 1 for datum in data:
300
+
301
+ ----> 2 ret = wn.synsets(datum, lang='en')
302
+
303
+ 3 if ret:
304
+
305
+ 4 data_with_syn.append((datum, ret[0]))
306
+
307
+ 5 continue
308
+
309
+
310
+
311
+ ~/.pyenv/versions/anaconda3-5.0.1/lib/python3.6/site-packages/nltk/corpus/reader/wordnet.py in synsets(self, lemma, pos, lang, check_exceptions)
312
+
313
+ 1494
314
+
315
+ 1495 else:
316
+
317
+ -> 1496 self._load_lang_data(lang)
318
+
319
+ 1497 synset_list = []
320
+
321
+ 1498 for l in self._lang_data[lang][1][lemma]:
322
+
323
+
324
+
325
+ ~/.pyenv/versions/anaconda3-5.0.1/lib/python3.6/site-packages/nltk/corpus/reader/wordnet.py in _load_lang_data(self, lang)
326
+
327
+ 1135
328
+
329
+ 1136 if lang not in self.langs():
330
+
331
+ -> 1137 raise WordNetError("Language is not supported.")
332
+
333
+ 1138
334
+
335
+ 1139 f = self._omw_reader.open('{0:}/wn-data-{0:}.tab'.format(lang))
336
+
337
+
338
+
339
+ WordNetError: Language is not supported.
340
+
341
+ ```

1

試したことの追記

2018/06/18 06:34

投稿

退会済みユーザー
test CHANGED
File without changes
test CHANGED
@@ -137,3 +137,101 @@
137
137
  Mac OS High Sierra
138
138
 
139
139
  Jupyter notebook 5.0.
140
+
141
+
142
+
143
+ ###ご回答を受けて試したこと
144
+
145
+ csvから読み込んだデータでもできるかどうか試しましたが、以下のようにエラーが出てしまいました。
146
+
147
+ ```Python
148
+
149
+ import pandas as pd
150
+
151
+ import numpy as np
152
+
153
+ from nltk.corpus import wordnet as wn
154
+
155
+ from jaconv import hira2kata
156
+
157
+
158
+
159
+ data = pd.read_csv('sample.csv', header=None)
160
+
161
+ data_with_syn = []
162
+
163
+
164
+
165
+ for datum in data:
166
+
167
+ ret = wn.synsets(datum, lang='jpn')
168
+
169
+ if ret:
170
+
171
+ data_with_syn.append((datum, ret[0]))
172
+
173
+ continue
174
+
175
+
176
+
177
+ datum = hira2kata(datum)
178
+
179
+ ret = wn.synsets(datum, lang='jpn')
180
+
181
+ if ret:
182
+
183
+ data_with_syn.append((datum, ret[0]))
184
+
185
+ continue
186
+
187
+
188
+
189
+ print(f'Unknown word: {datum}.')
190
+
191
+
192
+
193
+ print(*data_with_syn, sep='\n')
194
+
195
+ ```
196
+
197
+
198
+
199
+ エラー文
200
+
201
+ ```
202
+
203
+ ---------------------------------------------------------------------------
204
+
205
+ AttributeError Traceback (most recent call last)
206
+
207
+ <ipython-input-16-9da0225fd0d7> in <module>()
208
+
209
+ 1 for datum in data:
210
+
211
+ ----> 2 ret = wn.synsets(datum, lang='jpn')
212
+
213
+ 3 if ret:
214
+
215
+ 4 data_with_syn.append((datum, ret[0]))
216
+
217
+ 5 continue
218
+
219
+
220
+
221
+ ~/.pyenv/versions/anaconda3-5.0.1/lib/python3.6/site-packages/nltk/corpus/reader/wordnet.py in synsets(self, lemma, pos, lang, check_exceptions)
222
+
223
+ 1481 of that language will be returned.
224
+
225
+ 1482 """
226
+
227
+ -> 1483 lemma = lemma.lower()
228
+
229
+ 1484
230
+
231
+ 1485 if lang == 'eng':
232
+
233
+
234
+
235
+ AttributeError: 'numpy.int64' object has no attribute 'lower'
236
+
237
+ ```