teratail header banner
teratail header banner
質問するログイン新規登録

質問編集履歴

2

もう一度考えます。

2019/08/15 06:23

投稿

jyon
jyon

スコア13

title CHANGED
File without changes
body CHANGED
@@ -1,20 +1,6 @@
1
- ```python
2
- import text
3
- stopwords =["そう",","/"]
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
4
3
 
5
- kw_volume=[]
6
- mid=[]
7
- for i in range(len(test1)):
8
- mid=text.count_csv_noun(sentences[i])
9
- result=[]
10
- for x in mid:
11
- if not x[0] in stopwords:
12
- x[1]=int(x[1])
13
- x = tuple(x)
14
- result.append(x)
15
- kw_volume.append(result)
16
- ```
17
- ```python(test)
18
4
  import MeCab as mc
19
5
  from collections import Counter
20
6
  from argparse import ArgumentParser
@@ -31,138 +17,4 @@
31
17
  if args.number_of_words:
32
18
  return '{}'.format(count_csv(args.number_of_words))
33
19
  if args.word_count:
34
- return '{}'.format(word_line(args.word_count))
20
+ return '{}'.format(word_line(args.word_count))
35
- #文字数カウントの関数の追加
36
- def word_line(input_text):
37
- with open(input_text) as f:
38
- lines = f.readlines()
39
- return len(lines)
40
-
41
- #allバージョン
42
- def mecab_analysis_all(text):
43
- t = mc.Tagger("-Ochasen")
44
- t.parse('')
45
- node = t.parseToNode(text)
46
- output = []
47
- while node:
48
- if node.surface != "": # ヘッダとフッタを除外
49
- word_type = node.feature.split(",")[0]
50
- if word_type in [ "動詞","名詞","形容詞"]:
51
- output.append(node.surface)
52
- node = node.next
53
- if node is None:
54
- break
55
- return output
56
-
57
-
58
- #名詞バージョン
59
- def mecab_analysis_noun(text):
60
- t = mc.Tagger("-Ochasen")
61
- t.parse('')
62
- node = t.parseToNode(text)
63
- output = []
64
- while node:
65
- if node.surface != "": # ヘッダとフッタを除外
66
- word_type = node.feature.split(",")[0]
67
- if word_type in [ "名詞"]:
68
- output.append(node.surface)
69
- node = node.next
70
- if node is None:
71
- break
72
- return output
73
-
74
-
75
- #動詞バージョン
76
- def mecab_analysis_verb(text):
77
- t = mc.Tagger("-Ochasen")
78
- t.parse('')
79
- node = t.parseToNode(text)
80
- output = []
81
- while node:
82
- if node.surface != "": # ヘッダとフッタを除外
83
- word_type = node.feature.split(",")[0]
84
- if word_type in [ "動詞"]:
85
- output.append(node.surface)
86
- node = node.next
87
- if node is None:
88
- break
89
- return output
90
-
91
- #形容詞バージョン
92
- def mecab_analysis_adjective(text):
93
- t = mc.Tagger("-Ochasen")
94
- t.parse('')
95
- node = t.parseToNode(text)
96
- output = []
97
- while node:
98
- if node.surface != "": # ヘッダとフッタを除外
99
- word_type = node.feature.split(",")[0]
100
- if word_type in [ "形容詞"]:
101
- output.append(node.surface)
102
- node = node.next
103
- if node is None:
104
- break
105
- return output
106
-
107
-
108
- #inputはテキスト
109
-
110
- #all
111
- def count_csv_all(text_input):
112
- text= str(text_input)
113
- words = mecab_analysis_all(text)
114
- counter = Counter(words)
115
- output = []
116
- for word, count in counter.most_common():
117
- if len(word) > 0:
118
- middle = [word,count]
119
- output.append(middle)
120
- return output
121
-
122
- #noun
123
- def count_csv_noun(text_input):
124
- text= str(text_input)
125
- words = mecab_analysis_noun(text)
126
- counter = Counter(words)
127
- output = []
128
- for word, count in counter.most_common():
129
- if len(word) > 0:
130
- middle = [word,count]
131
- output.append(middle)
132
- return output
133
-
134
- #verb
135
- def count_csv_verb(text_input):
136
- text= str(text_input)
137
- words = mecab_analysis_verb(text)
138
- counter = Counter(words)
139
- output = []
140
- for word, count in counter.most_common():
141
- if len(word) > 0:
142
- middle = [word,count]
143
- output.append(middle)
144
- return output
145
-
146
- #adjective
147
- def count_csv_adjective(text_input):
148
- text= str(text_input)
149
- words = mecab_analysis_verb(text)
150
- counter = Counter(words)
151
- output = []
152
- for word, count in counter.most_common():
153
- if len(word) > 0:
154
- middle = [word,count]
155
- output.append(middle)
156
- return output
157
-
158
- def banner():
159
- print("単語,出現回数")
160
-
161
-
162
- if __name__ == '__main__':
163
- #banner()
164
- result = parser()
165
- print(result)
166
-
167
- ```
168
- kw_volumeとmidをprintで出力すると、何もデータが入ってないんですが、それはなぜでしょうか。

1

書式の変更

2019/08/15 06:23

投稿

jyon
jyon

スコア13

title CHANGED
File without changes
body CHANGED
@@ -1,3 +1,4 @@
1
+ ```python
1
2
  import text
2
3
  stopwords =["そう",","/"]
3
4
 
@@ -12,5 +13,156 @@
12
13
  x = tuple(x)
13
14
  result.append(x)
14
15
  kw_volume.append(result)
16
+ ```
17
+ ```python(test)
18
+ import MeCab as mc
19
+ from collections import Counter
20
+ from argparse import ArgumentParser
15
21
 
22
+
23
+ def parser():
24
+ usage = 'Usage:python3 count_word.py [-t <FILE.txt>] [--help]'\
25
+ .format(__file__)
26
+ parser = ArgumentParser(usage=usage)
27
+ parser.add_argument('-l','--word_count_line',dest='word_count',help='行数' )
28
+ parser.add_argument('-w','--number_of_words',dest='number_of_words',help='単語数' )
29
+ args = parser.parse_args()
30
+
31
+ if args.number_of_words:
32
+ return '{}'.format(count_csv(args.number_of_words))
33
+ if args.word_count:
34
+ return '{}'.format(word_line(args.word_count))
35
+ #文字数カウントの関数の追加
36
+ def word_line(input_text):
37
+ with open(input_text) as f:
38
+ lines = f.readlines()
39
+ return len(lines)
40
+
41
+ #allバージョン
42
+ def mecab_analysis_all(text):
43
+ t = mc.Tagger("-Ochasen")
44
+ t.parse('')
45
+ node = t.parseToNode(text)
46
+ output = []
47
+ while node:
48
+ if node.surface != "": # ヘッダとフッタを除外
49
+ word_type = node.feature.split(",")[0]
50
+ if word_type in [ "動詞","名詞","形容詞"]:
51
+ output.append(node.surface)
52
+ node = node.next
53
+ if node is None:
54
+ break
55
+ return output
56
+
57
+
58
+ #名詞バージョン
59
+ def mecab_analysis_noun(text):
60
+ t = mc.Tagger("-Ochasen")
61
+ t.parse('')
62
+ node = t.parseToNode(text)
63
+ output = []
64
+ while node:
65
+ if node.surface != "": # ヘッダとフッタを除外
66
+ word_type = node.feature.split(",")[0]
67
+ if word_type in [ "名詞"]:
68
+ output.append(node.surface)
69
+ node = node.next
70
+ if node is None:
71
+ break
72
+ return output
73
+
74
+
75
+ #動詞バージョン
76
+ def mecab_analysis_verb(text):
77
+ t = mc.Tagger("-Ochasen")
78
+ t.parse('')
79
+ node = t.parseToNode(text)
80
+ output = []
81
+ while node:
82
+ if node.surface != "": # ヘッダとフッタを除外
83
+ word_type = node.feature.split(",")[0]
84
+ if word_type in [ "動詞"]:
85
+ output.append(node.surface)
86
+ node = node.next
87
+ if node is None:
88
+ break
89
+ return output
90
+
91
+ #形容詞バージョン
92
+ def mecab_analysis_adjective(text):
93
+ t = mc.Tagger("-Ochasen")
94
+ t.parse('')
95
+ node = t.parseToNode(text)
96
+ output = []
97
+ while node:
98
+ if node.surface != "": # ヘッダとフッタを除外
99
+ word_type = node.feature.split(",")[0]
100
+ if word_type in [ "形容詞"]:
101
+ output.append(node.surface)
102
+ node = node.next
103
+ if node is None:
104
+ break
105
+ return output
106
+
107
+
108
+ #inputはテキスト
109
+
110
+ #all
111
+ def count_csv_all(text_input):
112
+ text= str(text_input)
113
+ words = mecab_analysis_all(text)
114
+ counter = Counter(words)
115
+ output = []
116
+ for word, count in counter.most_common():
117
+ if len(word) > 0:
118
+ middle = [word,count]
119
+ output.append(middle)
120
+ return output
121
+
122
+ #noun
123
+ def count_csv_noun(text_input):
124
+ text= str(text_input)
125
+ words = mecab_analysis_noun(text)
126
+ counter = Counter(words)
127
+ output = []
128
+ for word, count in counter.most_common():
129
+ if len(word) > 0:
130
+ middle = [word,count]
131
+ output.append(middle)
132
+ return output
133
+
134
+ #verb
135
+ def count_csv_verb(text_input):
136
+ text= str(text_input)
137
+ words = mecab_analysis_verb(text)
138
+ counter = Counter(words)
139
+ output = []
140
+ for word, count in counter.most_common():
141
+ if len(word) > 0:
142
+ middle = [word,count]
143
+ output.append(middle)
144
+ return output
145
+
146
+ #adjective
147
+ def count_csv_adjective(text_input):
148
+ text= str(text_input)
149
+ words = mecab_analysis_verb(text)
150
+ counter = Counter(words)
151
+ output = []
152
+ for word, count in counter.most_common():
153
+ if len(word) > 0:
154
+ middle = [word,count]
155
+ output.append(middle)
156
+ return output
157
+
158
+ def banner():
159
+ print("単語,出現回数")
160
+
161
+
162
+ if __name__ == '__main__':
163
+ #banner()
164
+ result = parser()
165
+ print(result)
166
+
167
+ ```
16
168
  kw_volumeとmidをprintで出力すると、何もデータが入ってないんですが、それはなぜでしょうか。