質問編集履歴
1
質問への回答を追加しました。
test
CHANGED
File without changes
|
test
CHANGED
@@ -129,3 +129,39 @@
|
|
129
129
|
1 UnicodeDecodeErrorが起きてしまっている原因箇所
|
130
130
|
|
131
131
|
2 encode問題の対処の仕方をお答えしてほしいです。
|
132
|
+
|
133
|
+
|
134
|
+
|
135
|
+
補足質問への回答
|
136
|
+
|
137
|
+
1 エラー発生時のdata_train_sの中身について
|
138
|
+
|
139
|
+
["print(len([s for s in l if s.endswith('e')]))", 'onSuccess {Function}', "1' UNION ALL SELECT CONCAT(0x716b6b6a71,(CASE WHEN (EXISTS(SELECT creditcard_id FROM performance_schema.events_waits_summary_by_instance)) THEN 1 ELSE 0 END),0x716a717a71),NULL-- mwJp", 'onAfterRender {Function}', "1' UNION ALL SELECT CONCAT(0x716b6b6a71,(CASE WHEN (EXISTS(SELECT aTEC FROM zsTX)) THEN 1 ELSE 0 END),0x716a717a71),NULL-- utMa", 'select* from database where id = 1;']
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
2 Tracebackの中身
|
144
|
+
|
145
|
+
Traceback (most recent call last):
|
146
|
+
|
147
|
+
File "svm.py", line 59, in <module>
|
148
|
+
|
149
|
+
words = get_words(data_train_s)
|
150
|
+
|
151
|
+
File "svm.py", line 30, in get_words
|
152
|
+
|
153
|
+
ret.append(get_words_main(content))
|
154
|
+
|
155
|
+
File "svm.py", line 35, in get_words_main
|
156
|
+
|
157
|
+
return [token for token in tokenize(content)]
|
158
|
+
|
159
|
+
File "svm.py", line 35, in <listcomp>
|
160
|
+
|
161
|
+
return [token for token in tokenize(content)]
|
162
|
+
|
163
|
+
File "svm.py", line 22, in tokenize
|
164
|
+
|
165
|
+
yield node.surface.lower()
|
166
|
+
|
167
|
+
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb6 in position 0: invalid start byte
|