質問編集履歴
4
追記の修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -192,9 +192,9 @@
|
|
192
192
|
|
193
193
|
###追記
|
194
194
|
|
195
|
-
以下のコードを実行すると
|
195
|
+
以下のコードを実行すると求めたい出力を得ることはできましたが、
|
196
|
-
|
196
|
+
|
197
|
-
|
197
|
+
少々冗長なので、どうすればより効率の良いコードになるか教えていただきたいです。
|
198
198
|
|
199
199
|
```python
|
200
200
|
|
@@ -214,7 +214,11 @@
|
|
214
214
|
|
215
215
|
result.extend(nltk.pos_tag(nltk.word_tokenize(word)))
|
216
216
|
|
217
|
+
print(word)
|
218
|
+
|
219
|
+
|
220
|
+
|
217
|
-
|
221
|
+
for word in result:
|
218
222
|
|
219
223
|
s = {'NN', 'NNP', 'NNPS', 'NNS'}
|
220
224
|
|
@@ -222,62 +226,24 @@
|
|
222
226
|
|
223
227
|
selected_wordsets.extend(selected_word)
|
224
228
|
|
229
|
+
break;
|
230
|
+
|
231
|
+
selected_wordsets
|
232
|
+
|
233
|
+
|
234
|
+
|
225
|
-
|
235
|
+
for word in selected_wordsets:
|
226
236
|
|
227
237
|
ans_word =[word for word, tag in selected_wordsets]
|
228
238
|
|
229
239
|
ans_wordsets.extend(ans_word)
|
230
240
|
|
241
|
+
break;
|
242
|
+
|
231
243
|
ans_wordsets
|
232
244
|
|
233
245
|
```
|
234
246
|
|
235
|
-
出力
|
236
|
-
|
237
|
-
```
|
238
|
-
|
239
|
-
['beef',
|
240
|
-
|
241
|
-
'beef',
|
242
|
-
|
243
|
-
'beef',
|
244
|
-
|
245
|
-
'chicken',
|
246
|
-
|
247
|
-
'beef',
|
248
|
-
|
249
|
-
'beef',
|
250
|
-
|
251
|
-
'chicken',
|
252
|
-
|
253
|
-
'beef',
|
254
|
-
|
255
|
-
'chicken',
|
256
|
-
|
257
|
-
'meat',
|
258
|
-
|
259
|
-
'beef',
|
260
|
-
|
261
|
-
'beef',
|
262
|
-
|
263
|
-
'chicken',
|
264
|
-
|
265
|
-
'beef',
|
266
|
-
|
267
|
-
'chicken',
|
268
|
-
|
269
|
-
'meat',
|
270
|
-
|
271
|
-
'beef',
|
272
|
-
|
273
|
-
'chicken',
|
274
|
-
|
275
|
-
'meat',
|
276
|
-
|
277
|
-
'pork']
|
278
|
-
|
279
|
-
```
|
280
|
-
|
281
247
|
|
282
248
|
|
283
249
|
|
3
追記のコードの編集
test
CHANGED
File without changes
|
test
CHANGED
@@ -198,6 +198,10 @@
|
|
198
198
|
|
199
199
|
```python
|
200
200
|
|
201
|
+
words = ['beef', 'boiled chicken', 'processed meat', 'pork']
|
202
|
+
|
203
|
+
|
204
|
+
|
201
205
|
result = []
|
202
206
|
|
203
207
|
selected_wordsets = []
|
2
追記を追加
test
CHANGED
File without changes
|
test
CHANGED
@@ -190,6 +190,90 @@
|
|
190
190
|
|
191
191
|
```
|
192
192
|
|
193
|
+
###追記
|
194
|
+
|
195
|
+
以下のコードを実行すると、名詞のみの単語リストは作れるのですが、
|
196
|
+
|
197
|
+
出力が繰り繰り返されるため、後どの点を修正すればいいか教えていただきたいです。
|
198
|
+
|
199
|
+
```python
|
200
|
+
|
201
|
+
result = []
|
202
|
+
|
203
|
+
selected_wordsets = []
|
204
|
+
|
205
|
+
ans_wordsets = []
|
206
|
+
|
207
|
+
import nltk
|
208
|
+
|
209
|
+
for word in words:
|
210
|
+
|
211
|
+
result.extend(nltk.pos_tag(nltk.word_tokenize(word)))
|
212
|
+
|
213
|
+
#for word in result:
|
214
|
+
|
215
|
+
s = {'NN', 'NNP', 'NNPS', 'NNS'}
|
216
|
+
|
217
|
+
selected_word = [(word, tag) for word, tag in result if tag in s]
|
218
|
+
|
219
|
+
selected_wordsets.extend(selected_word)
|
220
|
+
|
221
|
+
#for word in selected_wordsets:
|
222
|
+
|
223
|
+
ans_word =[word for word, tag in selected_wordsets]
|
224
|
+
|
225
|
+
ans_wordsets.extend(ans_word)
|
226
|
+
|
227
|
+
ans_wordsets
|
228
|
+
|
229
|
+
```
|
230
|
+
|
231
|
+
出力
|
232
|
+
|
233
|
+
```
|
234
|
+
|
235
|
+
['beef',
|
236
|
+
|
237
|
+
'beef',
|
238
|
+
|
239
|
+
'beef',
|
240
|
+
|
241
|
+
'chicken',
|
242
|
+
|
243
|
+
'beef',
|
244
|
+
|
245
|
+
'beef',
|
246
|
+
|
247
|
+
'chicken',
|
248
|
+
|
249
|
+
'beef',
|
250
|
+
|
251
|
+
'chicken',
|
252
|
+
|
253
|
+
'meat',
|
254
|
+
|
255
|
+
'beef',
|
256
|
+
|
257
|
+
'beef',
|
258
|
+
|
259
|
+
'chicken',
|
260
|
+
|
261
|
+
'beef',
|
262
|
+
|
263
|
+
'chicken',
|
264
|
+
|
265
|
+
'meat',
|
266
|
+
|
267
|
+
'beef',
|
268
|
+
|
269
|
+
'chicken',
|
270
|
+
|
271
|
+
'meat',
|
272
|
+
|
273
|
+
'pork']
|
274
|
+
|
275
|
+
```
|
276
|
+
|
193
277
|
|
194
278
|
|
195
279
|
|
1
エラー文の追記と現状のコードの修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -20,15 +20,61 @@
|
|
20
20
|
|
21
21
|
|
22
22
|
|
23
|
-
現状のコードだと
|
23
|
+
現状のコードだと以下のエラーが出てしまうため、
|
24
|
-
|
24
|
+
|
25
|
-
```
|
25
|
+
```
|
26
|
+
|
26
|
-
|
27
|
+
---------------------------------------------------------------------------
|
28
|
+
|
29
|
+
ValueError Traceback (most recent call last)
|
30
|
+
|
31
|
+
<ipython-input-38-45defa4ddf68> in <module>()
|
32
|
+
|
27
|
-
|
33
|
+
1 for word in result:
|
34
|
+
|
28
|
-
|
35
|
+
2 s = {'NN', 'NNP', 'NNPS', 'NNS'}
|
36
|
+
|
37
|
+
----> 3 selected_wordsets.append( [(word, tag) for word, tag in result if tag in s])
|
38
|
+
|
39
|
+
4 selected_wordsets
|
40
|
+
|
41
|
+
5
|
42
|
+
|
43
|
+
|
44
|
+
|
45
|
+
<ipython-input-38-45defa4ddf68> in <listcomp>(.0)
|
46
|
+
|
47
|
+
1 for word in result:
|
48
|
+
|
49
|
+
2 s = {'NN', 'NNP', 'NNPS', 'NNS'}
|
50
|
+
|
51
|
+
----> 3 selected_wordsets.append( [(word, tag) for word, tag in result if tag in s])
|
52
|
+
|
53
|
+
4 selected_wordsets
|
54
|
+
|
55
|
+
5
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
ValueError: not enough values to unpack (expected 2, got 1)
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
File "<ipython-input-39-74a608359c14>", line 2
|
66
|
+
|
67
|
+
ans.append(word for word, tag in selected_wordsets])
|
68
|
+
|
69
|
+
^
|
70
|
+
|
71
|
+
SyntaxError: invalid syntax
|
72
|
+
|
73
|
+
|
74
|
+
|
29
|
-
```
|
75
|
+
```
|
30
|
-
|
76
|
+
|
31
|
-
|
77
|
+
どのように修正すれば良いかアドバイスをいただきたいです。
|
32
78
|
|
33
79
|
|
34
80
|
|
@@ -46,19 +92,33 @@
|
|
46
92
|
|
47
93
|
result = []
|
48
94
|
|
95
|
+
selected_wordsets = []
|
96
|
+
|
97
|
+
ans = []
|
98
|
+
|
49
99
|
import nltk
|
50
100
|
|
51
101
|
for word in words:
|
52
102
|
|
53
|
-
result
|
103
|
+
result.append(nltk.pos_tag(nltk.word_tokenize(word)))
|
104
|
+
|
105
|
+
result
|
106
|
+
|
107
|
+
|
108
|
+
|
109
|
+
for word in result:
|
54
110
|
|
55
111
|
s = {'NN', 'NNP', 'NNPS', 'NNS'}
|
56
112
|
|
57
|
-
selected_wordsets
|
113
|
+
selected_wordsets.append( [(word, tag) for word, tag in result if tag in s])
|
114
|
+
|
58
|
-
|
115
|
+
selected_wordsets
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
for word in selected_wordsets:
|
120
|
+
|
59
|
-
ans
|
121
|
+
ans.append(word for word, tag in selected_wordsets])
|
60
|
-
|
61
|
-
result
|
62
122
|
|
63
123
|
ans
|
64
124
|
|
@@ -66,6 +126,24 @@
|
|
66
126
|
|
67
127
|
|
68
128
|
|
129
|
+
出力
|
130
|
+
|
131
|
+
```
|
132
|
+
|
133
|
+
#result
|
134
|
+
|
135
|
+
[[('beef', 'NN')],
|
136
|
+
|
137
|
+
[('boiled', 'VBN'), ('chicken', 'NN')],
|
138
|
+
|
139
|
+
[('processed', 'VBN'), ('meat', 'NN')],
|
140
|
+
|
141
|
+
[('pork', 'NN')]]
|
142
|
+
|
143
|
+
```
|
144
|
+
|
145
|
+
|
146
|
+
|
69
147
|
### 試したこと
|
70
148
|
|
71
149
|
形態素解析を行った後、名詞のみを取り出してリストに加えることはできています。
|
@@ -112,6 +190,16 @@
|
|
112
190
|
|
113
191
|
```
|
114
192
|
|
193
|
+
|
194
|
+
|
195
|
+
|
196
|
+
|
197
|
+
|
198
|
+
|
199
|
+
|
200
|
+
|
201
|
+
|
202
|
+
|
115
203
|
### 補足情報(FW/ツールのバージョンなど)
|
116
204
|
|
117
205
|
Python 3.6
|