質問編集履歴

2

もう一度考えます。

2019/08/15 06:23

投稿

jyon
jyon

スコア13

test CHANGED
File without changes
test CHANGED
@@ -1,36 +1,8 @@
1
- ```python
1
+ #!/usr/bin/env python3
2
2
 
3
- import text
3
+ # -*- coding: utf-8 -*-
4
-
5
- stopwords =["そう",","/"]
6
4
 
7
5
 
8
-
9
- kw_volume=[]
10
-
11
- mid=[]
12
-
13
- for i in range(len(test1)):
14
-
15
- mid=text.count_csv_noun(sentences[i])
16
-
17
- result=[]
18
-
19
- for x in mid:
20
-
21
- if not x[0] in stopwords:
22
-
23
- x[1]=int(x[1])
24
-
25
- x = tuple(x)
26
-
27
- result.append(x)
28
-
29
- kw_volume.append(result)
30
-
31
- ```
32
-
33
- ```python(test)
34
6
 
35
7
  import MeCab as mc
36
8
 
@@ -65,271 +37,3 @@
65
37
  if args.word_count:
66
38
 
67
39
  return '{}'.format(word_line(args.word_count))
68
-
69
- #文字数カウントの関数の追加
70
-
71
- def word_line(input_text):
72
-
73
- with open(input_text) as f:
74
-
75
- lines = f.readlines()
76
-
77
- return len(lines)
78
-
79
-
80
-
81
- #allバージョン
82
-
83
- def mecab_analysis_all(text):
84
-
85
- t = mc.Tagger("-Ochasen")
86
-
87
- t.parse('')
88
-
89
- node = t.parseToNode(text)
90
-
91
- output = []
92
-
93
- while node:
94
-
95
- if node.surface != "": # ヘッダとフッタを除外
96
-
97
- word_type = node.feature.split(",")[0]
98
-
99
- if word_type in [ "動詞","名詞","形容詞"]:
100
-
101
- output.append(node.surface)
102
-
103
- node = node.next
104
-
105
- if node is None:
106
-
107
- break
108
-
109
- return output
110
-
111
-
112
-
113
-
114
-
115
- #名詞バージョン
116
-
117
- def mecab_analysis_noun(text):
118
-
119
- t = mc.Tagger("-Ochasen")
120
-
121
- t.parse('')
122
-
123
- node = t.parseToNode(text)
124
-
125
- output = []
126
-
127
- while node:
128
-
129
- if node.surface != "": # ヘッダとフッタを除外
130
-
131
- word_type = node.feature.split(",")[0]
132
-
133
- if word_type in [ "名詞"]:
134
-
135
- output.append(node.surface)
136
-
137
- node = node.next
138
-
139
- if node is None:
140
-
141
- break
142
-
143
- return output
144
-
145
-
146
-
147
-
148
-
149
- #動詞バージョン
150
-
151
- def mecab_analysis_verb(text):
152
-
153
- t = mc.Tagger("-Ochasen")
154
-
155
- t.parse('')
156
-
157
- node = t.parseToNode(text)
158
-
159
- output = []
160
-
161
- while node:
162
-
163
- if node.surface != "": # ヘッダとフッタを除外
164
-
165
- word_type = node.feature.split(",")[0]
166
-
167
- if word_type in [ "動詞"]:
168
-
169
- output.append(node.surface)
170
-
171
- node = node.next
172
-
173
- if node is None:
174
-
175
- break
176
-
177
- return output
178
-
179
-
180
-
181
- #形容詞バージョン
182
-
183
- def mecab_analysis_adjective(text):
184
-
185
- t = mc.Tagger("-Ochasen")
186
-
187
- t.parse('')
188
-
189
- node = t.parseToNode(text)
190
-
191
- output = []
192
-
193
- while node:
194
-
195
- if node.surface != "": # ヘッダとフッタを除外
196
-
197
- word_type = node.feature.split(",")[0]
198
-
199
- if word_type in [ "形容詞"]:
200
-
201
- output.append(node.surface)
202
-
203
- node = node.next
204
-
205
- if node is None:
206
-
207
- break
208
-
209
- return output
210
-
211
-
212
-
213
-
214
-
215
- #inputはテキスト
216
-
217
-
218
-
219
- #all
220
-
221
- def count_csv_all(text_input):
222
-
223
- text= str(text_input)
224
-
225
- words = mecab_analysis_all(text)
226
-
227
- counter = Counter(words)
228
-
229
- output = []
230
-
231
- for word, count in counter.most_common():
232
-
233
- if len(word) > 0:
234
-
235
- middle = [word,count]
236
-
237
- output.append(middle)
238
-
239
- return output
240
-
241
-
242
-
243
- #noun
244
-
245
- def count_csv_noun(text_input):
246
-
247
- text= str(text_input)
248
-
249
- words = mecab_analysis_noun(text)
250
-
251
- counter = Counter(words)
252
-
253
- output = []
254
-
255
- for word, count in counter.most_common():
256
-
257
- if len(word) > 0:
258
-
259
- middle = [word,count]
260
-
261
- output.append(middle)
262
-
263
- return output
264
-
265
-
266
-
267
- #verb
268
-
269
- def count_csv_verb(text_input):
270
-
271
- text= str(text_input)
272
-
273
- words = mecab_analysis_verb(text)
274
-
275
- counter = Counter(words)
276
-
277
- output = []
278
-
279
- for word, count in counter.most_common():
280
-
281
- if len(word) > 0:
282
-
283
- middle = [word,count]
284
-
285
- output.append(middle)
286
-
287
- return output
288
-
289
-
290
-
291
- #adjective
292
-
293
- def count_csv_adjective(text_input):
294
-
295
- text= str(text_input)
296
-
297
- words = mecab_analysis_verb(text)
298
-
299
- counter = Counter(words)
300
-
301
- output = []
302
-
303
- for word, count in counter.most_common():
304
-
305
- if len(word) > 0:
306
-
307
- middle = [word,count]
308
-
309
- output.append(middle)
310
-
311
- return output
312
-
313
-
314
-
315
- def banner():
316
-
317
- print("単語,出現回数")
318
-
319
-
320
-
321
-
322
-
323
- if __name__ == '__main__':
324
-
325
- #banner()
326
-
327
- result = parser()
328
-
329
- print(result)
330
-
331
-
332
-
333
- ```
334
-
335
- kw_volumeとmidをprintで出力すると、何もデータが入ってないんですが、それはなぜでしょうか。

1

書式の変更

2019/08/15 06:23

投稿

jyon
jyon

スコア13

test CHANGED
File without changes
test CHANGED
@@ -1,3 +1,5 @@
1
+ ```python
2
+
1
3
  import text
2
4
 
3
5
  stopwords =["そう",","/"]
@@ -26,6 +28,308 @@
26
28
 
27
29
  kw_volume.append(result)
28
30
 
29
-
31
+ ```
32
+
33
+ ```python(test)
34
+
35
+ import MeCab as mc
36
+
37
+ from collections import Counter
38
+
39
+ from argparse import ArgumentParser
40
+
41
+
42
+
43
+
44
+
45
+ def parser():
46
+
47
+ usage = 'Usage:python3 count_word.py [-t <FILE.txt>] [--help]'\
48
+
49
+ .format(__file__)
50
+
51
+ parser = ArgumentParser(usage=usage)
52
+
53
+ parser.add_argument('-l','--word_count_line',dest='word_count',help='行数' )
54
+
55
+ parser.add_argument('-w','--number_of_words',dest='number_of_words',help='単語数' )
56
+
57
+ args = parser.parse_args()
58
+
59
+
60
+
61
+ if args.number_of_words:
62
+
63
+ return '{}'.format(count_csv(args.number_of_words))
64
+
65
+ if args.word_count:
66
+
67
+ return '{}'.format(word_line(args.word_count))
68
+
69
+ #文字数カウントの関数の追加
70
+
71
+ def word_line(input_text):
72
+
73
+ with open(input_text) as f:
74
+
75
+ lines = f.readlines()
76
+
77
+ return len(lines)
78
+
79
+
80
+
81
+ #allバージョン
82
+
83
+ def mecab_analysis_all(text):
84
+
85
+ t = mc.Tagger("-Ochasen")
86
+
87
+ t.parse('')
88
+
89
+ node = t.parseToNode(text)
90
+
91
+ output = []
92
+
93
+ while node:
94
+
95
+ if node.surface != "": # ヘッダとフッタを除外
96
+
97
+ word_type = node.feature.split(",")[0]
98
+
99
+ if word_type in [ "動詞","名詞","形容詞"]:
100
+
101
+ output.append(node.surface)
102
+
103
+ node = node.next
104
+
105
+ if node is None:
106
+
107
+ break
108
+
109
+ return output
110
+
111
+
112
+
113
+
114
+
115
+ #名詞バージョン
116
+
117
+ def mecab_analysis_noun(text):
118
+
119
+ t = mc.Tagger("-Ochasen")
120
+
121
+ t.parse('')
122
+
123
+ node = t.parseToNode(text)
124
+
125
+ output = []
126
+
127
+ while node:
128
+
129
+ if node.surface != "": # ヘッダとフッタを除外
130
+
131
+ word_type = node.feature.split(",")[0]
132
+
133
+ if word_type in [ "名詞"]:
134
+
135
+ output.append(node.surface)
136
+
137
+ node = node.next
138
+
139
+ if node is None:
140
+
141
+ break
142
+
143
+ return output
144
+
145
+
146
+
147
+
148
+
149
+ #動詞バージョン
150
+
151
+ def mecab_analysis_verb(text):
152
+
153
+ t = mc.Tagger("-Ochasen")
154
+
155
+ t.parse('')
156
+
157
+ node = t.parseToNode(text)
158
+
159
+ output = []
160
+
161
+ while node:
162
+
163
+ if node.surface != "": # ヘッダとフッタを除外
164
+
165
+ word_type = node.feature.split(",")[0]
166
+
167
+ if word_type in [ "動詞"]:
168
+
169
+ output.append(node.surface)
170
+
171
+ node = node.next
172
+
173
+ if node is None:
174
+
175
+ break
176
+
177
+ return output
178
+
179
+
180
+
181
+ #形容詞バージョン
182
+
183
+ def mecab_analysis_adjective(text):
184
+
185
+ t = mc.Tagger("-Ochasen")
186
+
187
+ t.parse('')
188
+
189
+ node = t.parseToNode(text)
190
+
191
+ output = []
192
+
193
+ while node:
194
+
195
+ if node.surface != "": # ヘッダとフッタを除外
196
+
197
+ word_type = node.feature.split(",")[0]
198
+
199
+ if word_type in [ "形容詞"]:
200
+
201
+ output.append(node.surface)
202
+
203
+ node = node.next
204
+
205
+ if node is None:
206
+
207
+ break
208
+
209
+ return output
210
+
211
+
212
+
213
+
214
+
215
+ #inputはテキスト
216
+
217
+
218
+
219
+ #all
220
+
221
+ def count_csv_all(text_input):
222
+
223
+ text= str(text_input)
224
+
225
+ words = mecab_analysis_all(text)
226
+
227
+ counter = Counter(words)
228
+
229
+ output = []
230
+
231
+ for word, count in counter.most_common():
232
+
233
+ if len(word) > 0:
234
+
235
+ middle = [word,count]
236
+
237
+ output.append(middle)
238
+
239
+ return output
240
+
241
+
242
+
243
+ #noun
244
+
245
+ def count_csv_noun(text_input):
246
+
247
+ text= str(text_input)
248
+
249
+ words = mecab_analysis_noun(text)
250
+
251
+ counter = Counter(words)
252
+
253
+ output = []
254
+
255
+ for word, count in counter.most_common():
256
+
257
+ if len(word) > 0:
258
+
259
+ middle = [word,count]
260
+
261
+ output.append(middle)
262
+
263
+ return output
264
+
265
+
266
+
267
+ #verb
268
+
269
+ def count_csv_verb(text_input):
270
+
271
+ text= str(text_input)
272
+
273
+ words = mecab_analysis_verb(text)
274
+
275
+ counter = Counter(words)
276
+
277
+ output = []
278
+
279
+ for word, count in counter.most_common():
280
+
281
+ if len(word) > 0:
282
+
283
+ middle = [word,count]
284
+
285
+ output.append(middle)
286
+
287
+ return output
288
+
289
+
290
+
291
+ #adjective
292
+
293
+ def count_csv_adjective(text_input):
294
+
295
+ text= str(text_input)
296
+
297
+ words = mecab_analysis_verb(text)
298
+
299
+ counter = Counter(words)
300
+
301
+ output = []
302
+
303
+ for word, count in counter.most_common():
304
+
305
+ if len(word) > 0:
306
+
307
+ middle = [word,count]
308
+
309
+ output.append(middle)
310
+
311
+ return output
312
+
313
+
314
+
315
+ def banner():
316
+
317
+ print("単語,出現回数")
318
+
319
+
320
+
321
+
322
+
323
+ if __name__ == '__main__':
324
+
325
+ #banner()
326
+
327
+ result = parser()
328
+
329
+ print(result)
330
+
331
+
332
+
333
+ ```
30
334
 
31
335
  kw_volumeとmidをprintで出力すると、何もデータが入ってないんですが、それはなぜでしょうか。