質問編集履歴
1
ソースコードの位置
test
CHANGED
File without changes
|
test
CHANGED
@@ -58,193 +58,193 @@
|
|
58
58
|
|
59
59
|
```python
|
60
60
|
|
61
|
+
import gensim
|
62
|
+
|
63
|
+
import time
|
64
|
+
|
65
|
+
import MeCab
|
66
|
+
|
61
|
-
|
67
|
+
# 引数取得
|
68
|
+
|
69
|
+
import sys
|
70
|
+
|
71
|
+
from sys import argv
|
72
|
+
|
73
|
+
import jaconv
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
#fasttextのモデルを読み込む
|
80
|
+
|
81
|
+
model =gensim.models.KeyedVectors.load_word2vec_format('model300.vec')
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
#「非常にポジティブな単語」と「非常にネガティブな単語」を任意で指定
|
88
|
+
|
89
|
+
posi_list = ['優れる', '良い','神','喜ぶ','褒める', 'めでたい','賢い','善い', '適す','天晴',
|
90
|
+
|
91
|
+
'祝う', '功績','賞','嬉しい','喜び','才知','徳', '才能','素晴らしい','芳しい','称える',
|
92
|
+
|
93
|
+
'適切','崇める','助ける','抜きんでる','清水','雄雄しい','仕合せ','幸い','吉兆','秀でる']
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
nega_list = ['悪い', 'クソ','死ぬ', '病気', '酷い', '罵る', '浸ける', '卑しい']
|
98
|
+
|
99
|
+
def posi_nega_score(x):
|
100
|
+
|
101
|
+
#ポジティブ度合いの判定
|
102
|
+
|
103
|
+
posi = []
|
104
|
+
|
105
|
+
for i in posi_list:
|
106
|
+
|
107
|
+
try:
|
108
|
+
|
109
|
+
n = model.similarity(i, x)
|
110
|
+
|
111
|
+
posi.append(n)
|
112
|
+
|
113
|
+
except:
|
114
|
+
|
115
|
+
continue
|
116
|
+
|
117
|
+
try:
|
118
|
+
|
119
|
+
#posi_mean = sum(posi)/len(posi)
|
120
|
+
|
121
|
+
posi_mean=max(posi)
|
122
|
+
|
123
|
+
except:
|
124
|
+
|
125
|
+
posi_mean = 0
|
126
|
+
|
127
|
+
|
128
|
+
|
129
|
+
#ネガティブ度合いの判定
|
130
|
+
|
131
|
+
nega = []
|
132
|
+
|
133
|
+
for i in nega_list:
|
134
|
+
|
135
|
+
try:
|
136
|
+
|
137
|
+
n = model.similarity(i, x)
|
138
|
+
|
139
|
+
nega.append(n)
|
140
|
+
|
141
|
+
except:
|
142
|
+
|
143
|
+
continue
|
144
|
+
|
145
|
+
try:
|
146
|
+
|
147
|
+
#nega_mean = sum(nega)/len(nega)
|
148
|
+
|
149
|
+
nega_mean=max(nega)
|
150
|
+
|
151
|
+
except:
|
152
|
+
|
153
|
+
nega_mean = 0
|
154
|
+
|
155
|
+
|
156
|
+
|
157
|
+
if abs(posi_mean-nega_mean)<0.05:
|
158
|
+
|
159
|
+
return [posi_mean,nega_mean]
|
160
|
+
|
161
|
+
if posi_mean > nega_mean:
|
162
|
+
|
163
|
+
return [posi_mean]
|
164
|
+
|
165
|
+
if nega_mean > posi_mean:
|
166
|
+
|
167
|
+
return [-nega_mean]
|
168
|
+
|
169
|
+
else:
|
170
|
+
|
171
|
+
return [0]
|
172
|
+
|
173
|
+
|
174
|
+
|
175
|
+
row_no = 0
|
176
|
+
|
177
|
+
#ファイル実行開始時刻を取得
|
178
|
+
|
179
|
+
timestr = time.strftime('%Y%m%d-%H%M%S')
|
180
|
+
|
181
|
+
|
182
|
+
|
183
|
+
#出力ファイル名
|
184
|
+
|
185
|
+
out_file_name = "zisyo_" + timestr + ".txt"
|
186
|
+
|
187
|
+
with open(out_file_name, 'w') as f:
|
188
|
+
|
189
|
+
fileobj = open("ochasen_20211109-171619.txt", "r", encoding="utf_8")
|
190
|
+
|
191
|
+
# line = fileobj.readlines()
|
192
|
+
|
193
|
+
for l in fileobj:
|
194
|
+
|
195
|
+
a = l.split(' ')[0]
|
196
|
+
|
197
|
+
tagger = MeCab.Tagger()
|
198
|
+
|
199
|
+
parse = tagger.parse(a)
|
200
|
+
|
201
|
+
b=parse.split('\t')[0]
|
202
|
+
|
203
|
+
c=parse.split('\t')[1]
|
204
|
+
|
205
|
+
if len(c.split(',')) < 8:
|
206
|
+
|
207
|
+
henkan = b
|
208
|
+
|
209
|
+
#print(b,henkan,c.split(',')[1],posi_nega_score(a))
|
210
|
+
|
211
|
+
if len(posi_nega_score(a)) ==2:
|
212
|
+
|
213
|
+
with open('absdef15.txt', 'a') as f2:
|
214
|
+
|
215
|
+
s=b+':'+b+':'+c.split(',')[1]+':'+ str(posi_nega_score(a)[0])+':'+str(posi_nega_score(a)[1])+'\n'
|
216
|
+
|
217
|
+
f2.write(s)
|
218
|
+
|
219
|
+
continue
|
220
|
+
|
221
|
+
s=b+':'+henkan+':'+c.split(',')[1]+':'+str(posi_nega_score(a)[0])+'\n'
|
222
|
+
|
223
|
+
else:
|
224
|
+
|
225
|
+
henkan = jaconv.kata2hira(c.split(',')[7])
|
226
|
+
|
227
|
+
if len(posi_nega_score(a)) ==2:
|
228
|
+
|
229
|
+
with open('absdef15.txt', 'a') as f2:
|
230
|
+
|
231
|
+
s=b+':'+henkan+':'+c.split(',')[0]+':'+ ' '+ str(posi_nega_score(a)[0])+','+str(posi_nega_score(a)[1])+'\n'
|
232
|
+
|
233
|
+
f2.write(s)
|
234
|
+
|
235
|
+
continue
|
236
|
+
|
237
|
+
#print(b,henkan,c.split(',')[0],posi_nega_score(a))
|
238
|
+
|
239
|
+
s=b+':'+henkan+':'+c.split(',')[0]+':'+str(posi_nega_score(a)[0])+'\n'
|
240
|
+
|
241
|
+
|
242
|
+
|
243
|
+
f.write(s)
|
62
244
|
|
63
245
|
```
|
64
246
|
|
65
|
-
|
247
|
+
|
66
|
-
|
67
|
-
import time
|
68
|
-
|
69
|
-
import MeCab
|
70
|
-
|
71
|
-
# 引数取得
|
72
|
-
|
73
|
-
import sys
|
74
|
-
|
75
|
-
from sys import argv
|
76
|
-
|
77
|
-
import jaconv
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
#fasttextのモデルを読み込む
|
84
|
-
|
85
|
-
model =gensim.models.KeyedVectors.load_word2vec_format('model300.vec')
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
#「非常にポジティブな単語」と「非常にネガティブな単語」を任意で指定
|
92
|
-
|
93
|
-
posi_list = ['優れる', '良い','神','喜ぶ','褒める', 'めでたい','賢い','善い', '適す','天晴',
|
94
|
-
|
95
|
-
'祝う', '功績','賞','嬉しい','喜び','才知','徳', '才能','素晴らしい','芳しい','称える',
|
96
|
-
|
97
|
-
'適切','崇める','助ける','抜きんでる','清水','雄雄しい','仕合せ','幸い','吉兆','秀でる']
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
nega_list = ['悪い', 'クソ','死ぬ', '病気', '酷い', '罵る', '浸ける', '卑しい']
|
102
|
-
|
103
|
-
def posi_nega_score(x):
|
104
|
-
|
105
|
-
#ポジティブ度合いの判定
|
106
|
-
|
107
|
-
posi = []
|
108
|
-
|
109
|
-
for i in posi_list:
|
110
|
-
|
111
|
-
try:
|
112
|
-
|
113
|
-
n = model.similarity(i, x)
|
114
|
-
|
115
|
-
posi.append(n)
|
116
|
-
|
117
|
-
except:
|
118
|
-
|
119
|
-
continue
|
120
|
-
|
121
|
-
try:
|
122
|
-
|
123
|
-
#posi_mean = sum(posi)/len(posi)
|
124
|
-
|
125
|
-
posi_mean=max(posi)
|
126
|
-
|
127
|
-
except:
|
128
|
-
|
129
|
-
posi_mean = 0
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
#ネガティブ度合いの判定
|
134
|
-
|
135
|
-
nega = []
|
136
|
-
|
137
|
-
for i in nega_list:
|
138
|
-
|
139
|
-
try:
|
140
|
-
|
141
|
-
n = model.similarity(i, x)
|
142
|
-
|
143
|
-
nega.append(n)
|
144
|
-
|
145
|
-
except:
|
146
|
-
|
147
|
-
continue
|
148
|
-
|
149
|
-
try:
|
150
|
-
|
151
|
-
#nega_mean = sum(nega)/len(nega)
|
152
|
-
|
153
|
-
nega_mean=max(nega)
|
154
|
-
|
155
|
-
except:
|
156
|
-
|
157
|
-
nega_mean = 0
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
if abs(posi_mean-nega_mean)<0.05:
|
162
|
-
|
163
|
-
return [posi_mean,nega_mean]
|
164
|
-
|
165
|
-
if posi_mean > nega_mean:
|
166
|
-
|
167
|
-
return [posi_mean]
|
168
|
-
|
169
|
-
if nega_mean > posi_mean:
|
170
|
-
|
171
|
-
return [-nega_mean]
|
172
|
-
|
173
|
-
else:
|
174
|
-
|
175
|
-
return [0]
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
row_no = 0
|
180
|
-
|
181
|
-
#ファイル実行開始時刻を取得
|
182
|
-
|
183
|
-
timestr = time.strftime('%Y%m%d-%H%M%S')
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
#出力ファイル名
|
188
|
-
|
189
|
-
out_file_name = "zisyo_" + timestr + ".txt"
|
190
|
-
|
191
|
-
with open(out_file_name, 'w') as f:
|
192
|
-
|
193
|
-
fileobj = open("ochasen_20211109-171619.txt", "r", encoding="utf_8")
|
194
|
-
|
195
|
-
# line = fileobj.readlines()
|
196
|
-
|
197
|
-
for l in fileobj:
|
198
|
-
|
199
|
-
a = l.split(' ')[0]
|
200
|
-
|
201
|
-
tagger = MeCab.Tagger()
|
202
|
-
|
203
|
-
parse = tagger.parse(a)
|
204
|
-
|
205
|
-
b=parse.split('\t')[0]
|
206
|
-
|
207
|
-
c=parse.split('\t')[1]
|
208
|
-
|
209
|
-
if len(c.split(',')) < 8:
|
210
|
-
|
211
|
-
henkan = b
|
212
|
-
|
213
|
-
#print(b,henkan,c.split(',')[1],posi_nega_score(a))
|
214
|
-
|
215
|
-
if len(posi_nega_score(a)) ==2:
|
216
|
-
|
217
|
-
with open('absdef15.txt', 'a') as f2:
|
218
|
-
|
219
|
-
s=b+':'+b+':'+c.split(',')[1]+':'+ str(posi_nega_score(a)[0])+':'+str(posi_nega_score(a)[1])+'\n'
|
220
|
-
|
221
|
-
f2.write(s)
|
222
|
-
|
223
|
-
continue
|
224
|
-
|
225
|
-
s=b+':'+henkan+':'+c.split(',')[1]+':'+str(posi_nega_score(a)[0])+'\n'
|
226
|
-
|
227
|
-
else:
|
228
|
-
|
229
|
-
henkan = jaconv.kata2hira(c.split(',')[7])
|
230
|
-
|
231
|
-
if len(posi_nega_score(a)) ==2:
|
232
|
-
|
233
|
-
with open('absdef15.txt', 'a') as f2:
|
234
|
-
|
235
|
-
s=b+':'+henkan+':'+c.split(',')[0]+':'+ ' '+ str(posi_nega_score(a)[0])+','+str(posi_nega_score(a)[1])+'\n'
|
236
|
-
|
237
|
-
f2.write(s)
|
238
|
-
|
239
|
-
continue
|
240
|
-
|
241
|
-
#print(b,henkan,c.split(',')[0],posi_nega_score(a))
|
242
|
-
|
243
|
-
s=b+':'+henkan+':'+c.split(',')[0]+':'+str(posi_nega_score(a)[0])+'\n'
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
f.write(s)
|
248
248
|
|
249
249
|
### 試したこと
|
250
250
|
|