質問編集履歴
6
clf = svm.SVC() clf.fit(train_data, label_train) pre = clf.predict() を削除
test
CHANGED
File without changes
|
test
CHANGED
@@ -16,25 +16,27 @@
|
|
16
16
|
|
17
17
|
|
18
18
|
|
19
|
-
・
|
19
|
+
・NameError Traceback (most recent call last)
|
20
|
-
|
20
|
+
|
21
|
-
<ipython-input-2
|
21
|
+
<ipython-input-24-b95cedfc1745> in <module>()
|
22
|
-
|
23
|
-
|
22
|
+
|
24
|
-
|
25
|
-
|
23
|
+
28 print('k={0}: {1}'.format(k, ac_score))
|
24
|
+
|
26
|
-
|
25
|
+
29
|
26
|
+
|
27
|
-
---
|
27
|
+
---> 30 accuracy_scores.append(score)
|
28
|
-
|
28
|
+
|
29
|
-
|
29
|
+
31
|
30
|
-
|
30
|
+
|
31
|
-
|
31
|
+
32 # 各経過時間を表示
|
32
|
-
|
33
|
-
|
34
|
-
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
-
|
35
|
+
NameError: name 'score' is not defined
|
36
|
+
|
37
|
+
|
38
|
+
|
36
|
-
|
39
|
+
・Digitsデータではそのようなエラーはなかった
|
37
|
-
|
38
40
|
|
39
41
|
|
40
42
|
|
@@ -158,18 +160,14 @@
|
|
158
160
|
|
159
161
|
train_data, test_data, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
|
160
162
|
|
163
|
+
|
164
|
+
|
161
165
|
K = 10
|
162
166
|
|
163
167
|
ks = range(1, K + 1)
|
164
168
|
|
165
169
|
|
166
170
|
|
167
|
-
clf = svm.SVC()
|
168
|
-
|
169
|
-
clf.fit(train_data, label_train)
|
170
|
-
|
171
|
-
pre = clf.predict()
|
172
|
-
|
173
171
|
|
174
172
|
|
175
173
|
# 使う近傍数ごとに正解率&各経過時間を計算
|
@@ -208,7 +206,7 @@
|
|
208
206
|
|
209
207
|
# 一つだけ取り除いたテストデータを識別
|
210
208
|
|
211
|
-
predicted_label = model.predict(
|
209
|
+
predicted_label = model.predict(train_data[test])
|
212
210
|
|
213
211
|
predicted_labels.append(predicted_label)
|
214
212
|
|
5
修正しました。
test
CHANGED
File without changes
|
test
CHANGED
@@ -16,6 +16,28 @@
|
|
16
16
|
|
17
17
|
|
18
18
|
|
19
|
+
・TypeError Traceback (most recent call last)
|
20
|
+
|
21
|
+
<ipython-input-20-71497c22a695> in <module>()
|
22
|
+
|
23
|
+
4 clf = svm.SVC()
|
24
|
+
|
25
|
+
5 clf.fit(train_data, label_train)
|
26
|
+
|
27
|
+
----> 6 pre = clf.predict()
|
28
|
+
|
29
|
+
7
|
30
|
+
|
31
|
+
8 # 使う近傍数ごとに正解率&各経過時間を計算
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
TypeError: predict() missing 1 required positional argument: 'X'
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
|
19
41
|
|
20
42
|
|
21
43
|
###Digitsデータで最適なkを探すknnのプログラム
|
@@ -134,9 +156,7 @@
|
|
134
156
|
|
135
157
|
test_size = 100
|
136
158
|
|
137
|
-
train_data, test_data
|
159
|
+
train_data, test_data, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
|
138
|
-
|
139
|
-
|
140
160
|
|
141
161
|
K = 10
|
142
162
|
|
@@ -146,9 +166,9 @@
|
|
146
166
|
|
147
167
|
clf = svm.SVC()
|
148
168
|
|
149
|
-
clf.fit(train_data,
|
169
|
+
clf.fit(train_data, label_train)
|
150
|
-
|
170
|
+
|
151
|
-
pre = clf.predict(
|
171
|
+
pre = clf.predict()
|
152
172
|
|
153
173
|
|
154
174
|
|
@@ -164,11 +184,13 @@
|
|
164
184
|
|
165
185
|
loo = LeaveOneOut()
|
166
186
|
|
187
|
+
|
188
|
+
|
167
189
|
for train, test in loo.split(mnist.data):
|
168
190
|
|
169
191
|
train_data = mnist.data[train]
|
170
192
|
|
171
|
-
|
193
|
+
label_train = mnist.target[train]
|
172
194
|
|
173
195
|
|
174
196
|
|
@@ -180,9 +202,9 @@
|
|
180
202
|
|
181
203
|
model = KNeighborsClassifier(n_neighbors=k)
|
182
204
|
|
183
|
-
model.fit(train_data,
|
205
|
+
model.fit(train_data, label_train)
|
184
|
-
|
185
|
-
|
206
|
+
|
207
|
+
|
186
208
|
|
187
209
|
# 一つだけ取り除いたテストデータを識別
|
188
210
|
|
4
書式の改善
test
CHANGED
File without changes
|
test
CHANGED
@@ -124,7 +124,7 @@
|
|
124
124
|
|
125
125
|
```
|
126
126
|
|
127
|
-
###mnistデータで最適なkを探すknnのプログラム
|
127
|
+
###mnistデータで最適なkを探すknnのプログラム 改善版
|
128
128
|
|
129
129
|
```python
|
130
130
|
|
@@ -134,7 +134,7 @@
|
|
134
134
|
|
135
135
|
test_size = 100
|
136
136
|
|
137
|
-
|
137
|
+
train_data, test_data_, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
|
138
138
|
|
139
139
|
|
140
140
|
|
@@ -146,7 +146,7 @@
|
|
146
146
|
|
147
147
|
clf = svm.SVC()
|
148
148
|
|
149
|
-
clf.fit(
|
149
|
+
clf.fit(train_data, target_data)
|
150
150
|
|
151
151
|
pre = clf.predict(data_test)
|
152
152
|
|
@@ -166,9 +166,9 @@
|
|
166
166
|
|
167
167
|
for train, test in loo.split(mnist.data):
|
168
168
|
|
169
|
-
|
169
|
+
train_data = mnist.data[train]
|
170
|
-
|
170
|
+
|
171
|
-
data
|
171
|
+
target_data = mnist.target[train]
|
172
172
|
|
173
173
|
|
174
174
|
|
3
誤字
test
CHANGED
File without changes
|
test
CHANGED
@@ -122,12 +122,14 @@
|
|
122
122
|
|
123
123
|
main()
|
124
124
|
|
125
|
-
|
125
|
+
```
|
126
126
|
|
127
127
|
###mnistデータで最適なkを探すknnのプログラム現段階
|
128
128
|
|
129
129
|
```python
|
130
130
|
|
131
|
+
|
132
|
+
|
131
133
|
train_size = 500
|
132
134
|
|
133
135
|
test_size = 100
|
2
書式の改善
test
CHANGED
File without changes
|
test
CHANGED
@@ -14,71 +14,13 @@
|
|
14
14
|
|
15
15
|
・容量が大きくて実行できない。
|
16
16
|
|
17
|
-
・ValueError Traceback (most recent call last)
|
18
|
-
|
19
|
-
<ipython-input-10-c9ec06272b57> in <module>()
|
20
|
-
|
21
|
-
3
|
22
|
-
|
23
|
-
4 clf = svm.SVC()
|
24
|
-
|
25
|
-
----> 5 clf.fit(data_train, label_train)
|
26
|
-
|
27
|
-
6 pre = clf.predict(data_test)
|
28
|
-
|
29
|
-
7
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
~\Anaconda3\lib\site-packages\sklearn\svm\base.py in fit(self, X, y, sample_weight)
|
34
|
-
|
35
|
-
147 self._sparse = sparse and not callable(self.kernel)
|
36
|
-
|
37
|
-
148
|
38
|
-
|
39
|
-
--> 149 X, y = check_X_y(X, y, dtype=np.float64, order='C', accept_sparse='csr')
|
40
|
-
|
41
|
-
150 y = self._validate_targets(y)
|
42
|
-
|
43
|
-
151
|
44
|
-
|
45
|
-
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
|
46
|
-
|
47
|
-
581 y = y.astype(np.float64)
|
48
|
-
|
49
|
-
582
|
50
|
-
|
51
|
-
--> 583 check_consistent_length(X, y)
|
52
|
-
|
53
|
-
584
|
54
|
-
|
55
|
-
585 return X, y
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_consistent_length(*arrays)
|
60
|
-
|
61
|
-
202 if len(uniques) > 1:
|
62
|
-
|
63
|
-
203 raise ValueError("Found input variables with inconsistent numbers of"
|
64
|
-
|
65
|
-
--> 204 " samples: %r" % [int(l) for l in lengths])
|
66
|
-
|
67
|
-
205
|
68
|
-
|
69
|
-
206
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
ValueError: Found input variables with inconsistent numbers of samples: [69999, 500]
|
74
|
-
|
75
17
|
|
76
18
|
|
77
19
|
|
78
20
|
|
79
21
|
###Digitsデータで最適なkを探すknnのプログラム
|
80
22
|
|
81
|
-
|
23
|
+
```python
|
82
24
|
|
83
25
|
from matplotlib import pyplot as plt
|
84
26
|
|
1
現段階とエラー
test
CHANGED
File without changes
|
test
CHANGED
@@ -12,7 +12,65 @@
|
|
12
12
|
|
13
13
|
|
14
14
|
|
15
|
-
容量が大きくて実行できない。
|
15
|
+
・容量が大きくて実行できない。
|
16
|
+
|
17
|
+
・ValueError Traceback (most recent call last)
|
18
|
+
|
19
|
+
<ipython-input-10-c9ec06272b57> in <module>()
|
20
|
+
|
21
|
+
3
|
22
|
+
|
23
|
+
4 clf = svm.SVC()
|
24
|
+
|
25
|
+
----> 5 clf.fit(data_train, label_train)
|
26
|
+
|
27
|
+
6 pre = clf.predict(data_test)
|
28
|
+
|
29
|
+
7
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
~\Anaconda3\lib\site-packages\sklearn\svm\base.py in fit(self, X, y, sample_weight)
|
34
|
+
|
35
|
+
147 self._sparse = sparse and not callable(self.kernel)
|
36
|
+
|
37
|
+
148
|
38
|
+
|
39
|
+
--> 149 X, y = check_X_y(X, y, dtype=np.float64, order='C', accept_sparse='csr')
|
40
|
+
|
41
|
+
150 y = self._validate_targets(y)
|
42
|
+
|
43
|
+
151
|
44
|
+
|
45
|
+
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
|
46
|
+
|
47
|
+
581 y = y.astype(np.float64)
|
48
|
+
|
49
|
+
582
|
50
|
+
|
51
|
+
--> 583 check_consistent_length(X, y)
|
52
|
+
|
53
|
+
584
|
54
|
+
|
55
|
+
585 return X, y
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_consistent_length(*arrays)
|
60
|
+
|
61
|
+
202 if len(uniques) > 1:
|
62
|
+
|
63
|
+
203 raise ValueError("Found input variables with inconsistent numbers of"
|
64
|
+
|
65
|
+
--> 204 " samples: %r" % [int(l) for l in lengths])
|
66
|
+
|
67
|
+
205
|
68
|
+
|
69
|
+
206
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
ValueError: Found input variables with inconsistent numbers of samples: [69999, 500]
|
16
74
|
|
17
75
|
|
18
76
|
|
@@ -124,6 +182,108 @@
|
|
124
182
|
|
125
183
|
|
126
184
|
|
185
|
+
###mnistデータで最適なkを探すknnのプログラム現段階
|
186
|
+
|
187
|
+
```python
|
188
|
+
|
189
|
+
train_size = 500
|
190
|
+
|
191
|
+
test_size = 100
|
192
|
+
|
193
|
+
data_train, data_test, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
|
194
|
+
|
195
|
+
|
196
|
+
|
197
|
+
K = 10
|
198
|
+
|
199
|
+
ks = range(1, K + 1)
|
200
|
+
|
201
|
+
|
202
|
+
|
203
|
+
clf = svm.SVC()
|
204
|
+
|
205
|
+
clf.fit(data_train, label_train)
|
206
|
+
|
207
|
+
pre = clf.predict(data_test)
|
208
|
+
|
209
|
+
|
210
|
+
|
211
|
+
# 使う近傍数ごとに正解率&各経過時間を計算
|
212
|
+
|
213
|
+
accuracy_scores = []
|
214
|
+
|
215
|
+
start = time.time()
|
216
|
+
|
217
|
+
for k in ks:
|
218
|
+
|
219
|
+
predicted_labels = []
|
220
|
+
|
221
|
+
loo = LeaveOneOut()
|
222
|
+
|
223
|
+
for train, test in loo.split(mnist.data):
|
224
|
+
|
225
|
+
data_train = mnist.data[train]
|
226
|
+
|
227
|
+
data_test = mnist.target[train]
|
228
|
+
|
229
|
+
|
230
|
+
|
231
|
+
elapsed_time = time.time() - start
|
232
|
+
|
233
|
+
|
234
|
+
|
235
|
+
# モデルを学習させる
|
236
|
+
|
237
|
+
model = KNeighborsClassifier(n_neighbors=k)
|
238
|
+
|
239
|
+
model.fit(train_data, target_data)
|
240
|
+
|
241
|
+
|
242
|
+
|
243
|
+
# 一つだけ取り除いたテストデータを識別
|
244
|
+
|
245
|
+
predicted_label = model.predict(features[test])
|
246
|
+
|
247
|
+
predicted_labels.append(predicted_label)
|
248
|
+
|
249
|
+
|
250
|
+
|
251
|
+
# 正解率を計算
|
252
|
+
|
253
|
+
ac_score = metrics.accuracy_score(label_test, pre)
|
254
|
+
|
255
|
+
print('k={0}: {1}'.format(k, ac_score))
|
256
|
+
|
257
|
+
|
258
|
+
|
259
|
+
accuracy_scores.append(score)
|
260
|
+
|
261
|
+
|
262
|
+
|
263
|
+
# 各経過時間を表示
|
264
|
+
|
265
|
+
print("経過時間:{0}".format(elapsed_time))
|
266
|
+
|
267
|
+
|
268
|
+
|
269
|
+
# 使う近傍数ごとの正解率を折れ線グラフ
|
270
|
+
|
271
|
+
X = list(ks)
|
272
|
+
|
273
|
+
plt.plot(X, ac_score)
|
274
|
+
|
275
|
+
|
276
|
+
|
277
|
+
plt.xlabel('k')
|
278
|
+
|
279
|
+
plt.ylabel('accuracy rate')
|
280
|
+
|
281
|
+
plt.show()
|
282
|
+
|
283
|
+
```
|
284
|
+
|
285
|
+
|
286
|
+
|
127
287
|
|
128
288
|
|
129
289
|
###試したこと
|