質問編集履歴
6
clf = svm.SVC() clf.fit(train_data, label_train) pre = clf.predict() を削除
title
CHANGED
File without changes
|
body
CHANGED
@@ -7,16 +7,17 @@
|
|
7
7
|
|
8
8
|
・容量が大きくて実行できない。
|
9
9
|
|
10
|
-
・
|
10
|
+
・NameError Traceback (most recent call last)
|
11
|
-
<ipython-input-
|
11
|
+
<ipython-input-24-b95cedfc1745> in <module>()
|
12
|
-
4 clf = svm.SVC()
|
13
|
-
|
12
|
+
28 print('k={0}: {1}'.format(k, ac_score))
|
13
|
+
29
|
14
|
-
---
|
14
|
+
---> 30 accuracy_scores.append(score)
|
15
|
-
|
15
|
+
31
|
16
|
-
|
16
|
+
32 # 各経過時間を表示
|
17
17
|
|
18
|
-
|
18
|
+
NameError: name 'score' is not defined
|
19
19
|
|
20
|
+
・Digitsデータではそのようなエラーはなかった
|
20
21
|
|
21
22
|
|
22
23
|
###Digitsデータで最適なkを探すknnのプログラム
|
@@ -78,12 +79,10 @@
|
|
78
79
|
train_size = 500
|
79
80
|
test_size = 100
|
80
81
|
train_data, test_data, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
|
82
|
+
|
81
83
|
K = 10
|
82
84
|
ks = range(1, K + 1)
|
83
85
|
|
84
|
-
clf = svm.SVC()
|
85
|
-
clf.fit(train_data, label_train)
|
86
|
-
pre = clf.predict()
|
87
86
|
|
88
87
|
# 使う近傍数ごとに正解率&各経過時間を計算
|
89
88
|
accuracy_scores = []
|
@@ -103,7 +102,7 @@
|
|
103
102
|
model.fit(train_data, label_train)
|
104
103
|
|
105
104
|
# 一つだけ取り除いたテストデータを識別
|
106
|
-
predicted_label = model.predict(
|
105
|
+
predicted_label = model.predict(train_data[test])
|
107
106
|
predicted_labels.append(predicted_label)
|
108
107
|
|
109
108
|
# 正解率を計算
|
5
修正しました。
title
CHANGED
File without changes
|
body
CHANGED
@@ -7,7 +7,18 @@
|
|
7
7
|
|
8
8
|
・容量が大きくて実行できない。
|
9
9
|
|
10
|
+
・TypeError Traceback (most recent call last)
|
11
|
+
<ipython-input-20-71497c22a695> in <module>()
|
12
|
+
4 clf = svm.SVC()
|
13
|
+
5 clf.fit(train_data, label_train)
|
14
|
+
----> 6 pre = clf.predict()
|
15
|
+
7
|
16
|
+
8 # 使う近傍数ごとに正解率&各経過時間を計算
|
10
17
|
|
18
|
+
TypeError: predict() missing 1 required positional argument: 'X'
|
19
|
+
|
20
|
+
|
21
|
+
|
11
22
|
###Digitsデータで最適なkを探すknnのプログラム
|
12
23
|
```python
|
13
24
|
from matplotlib import pyplot as plt
|
@@ -66,14 +77,13 @@
|
|
66
77
|
|
67
78
|
train_size = 500
|
68
79
|
test_size = 100
|
69
|
-
train_data,
|
80
|
+
train_data, test_data, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
|
70
|
-
|
71
81
|
K = 10
|
72
82
|
ks = range(1, K + 1)
|
73
83
|
|
74
84
|
clf = svm.SVC()
|
75
|
-
clf.fit(train_data,
|
85
|
+
clf.fit(train_data, label_train)
|
76
|
-
pre = clf.predict(
|
86
|
+
pre = clf.predict()
|
77
87
|
|
78
88
|
# 使う近傍数ごとに正解率&各経過時間を計算
|
79
89
|
accuracy_scores = []
|
@@ -81,16 +91,17 @@
|
|
81
91
|
for k in ks:
|
82
92
|
predicted_labels = []
|
83
93
|
loo = LeaveOneOut()
|
94
|
+
|
84
95
|
for train, test in loo.split(mnist.data):
|
85
96
|
train_data = mnist.data[train]
|
86
|
-
|
97
|
+
label_train = mnist.target[train]
|
87
98
|
|
88
99
|
elapsed_time = time.time() - start
|
89
100
|
|
90
101
|
# モデルを学習させる
|
91
102
|
model = KNeighborsClassifier(n_neighbors=k)
|
92
|
-
model.fit(train_data,
|
103
|
+
model.fit(train_data, label_train)
|
93
|
-
|
104
|
+
|
94
105
|
# 一つだけ取り除いたテストデータを識別
|
95
106
|
predicted_label = model.predict(features[test])
|
96
107
|
predicted_labels.append(predicted_label)
|
4
書式の改善
title
CHANGED
File without changes
|
body
CHANGED
@@ -61,18 +61,18 @@
|
|
61
61
|
if __name__ == '__main__':
|
62
62
|
main()
|
63
63
|
```
|
64
|
-
###mnistデータで最適なkを探すknnのプログラム
|
64
|
+
###mnistデータで最適なkを探すknnのプログラム 改善版
|
65
65
|
```python
|
66
66
|
|
67
67
|
train_size = 500
|
68
68
|
test_size = 100
|
69
|
-
|
69
|
+
train_data, test_data_, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
|
70
70
|
|
71
71
|
K = 10
|
72
72
|
ks = range(1, K + 1)
|
73
73
|
|
74
74
|
clf = svm.SVC()
|
75
|
-
clf.fit(
|
75
|
+
clf.fit(train_data, target_data)
|
76
76
|
pre = clf.predict(data_test)
|
77
77
|
|
78
78
|
# 使う近傍数ごとに正解率&各経過時間を計算
|
@@ -82,8 +82,8 @@
|
|
82
82
|
predicted_labels = []
|
83
83
|
loo = LeaveOneOut()
|
84
84
|
for train, test in loo.split(mnist.data):
|
85
|
-
|
85
|
+
train_data = mnist.data[train]
|
86
|
-
|
86
|
+
target_data = mnist.target[train]
|
87
87
|
|
88
88
|
elapsed_time = time.time() - start
|
89
89
|
|
3
誤字
title
CHANGED
File without changes
|
body
CHANGED
@@ -60,9 +60,10 @@
|
|
60
60
|
|
61
61
|
if __name__ == '__main__':
|
62
62
|
main()
|
63
|
-
|
63
|
+
```
|
64
64
|
###mnistデータで最適なkを探すknnのプログラム現段階
|
65
65
|
```python
|
66
|
+
|
66
67
|
train_size = 500
|
67
68
|
test_size = 100
|
68
69
|
data_train, data_test, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
|
2
書式の改善
title
CHANGED
File without changes
|
body
CHANGED
@@ -6,39 +6,10 @@
|
|
6
6
|
###発生している問題・エラーメッセージ
|
7
7
|
|
8
8
|
・容量が大きくて実行できない。
|
9
|
-
・ValueError Traceback (most recent call last)
|
10
|
-
<ipython-input-10-c9ec06272b57> in <module>()
|
11
|
-
3
|
12
|
-
4 clf = svm.SVC()
|
13
|
-
----> 5 clf.fit(data_train, label_train)
|
14
|
-
6 pre = clf.predict(data_test)
|
15
|
-
7
|
16
9
|
|
17
|
-
~\Anaconda3\lib\site-packages\sklearn\svm\base.py in fit(self, X, y, sample_weight)
|
18
|
-
147 self._sparse = sparse and not callable(self.kernel)
|
19
|
-
148
|
20
|
-
--> 149 X, y = check_X_y(X, y, dtype=np.float64, order='C', accept_sparse='csr')
|
21
|
-
150 y = self._validate_targets(y)
|
22
|
-
151
|
23
|
-
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
|
24
|
-
581 y = y.astype(np.float64)
|
25
|
-
582
|
26
|
-
--> 583 check_consistent_length(X, y)
|
27
|
-
584
|
28
|
-
585 return X, y
|
29
10
|
|
30
|
-
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_consistent_length(*arrays)
|
31
|
-
202 if len(uniques) > 1:
|
32
|
-
203 raise ValueError("Found input variables with inconsistent numbers of"
|
33
|
-
--> 204 " samples: %r" % [int(l) for l in lengths])
|
34
|
-
205
|
35
|
-
206
|
36
|
-
|
37
|
-
ValueError: Found input variables with inconsistent numbers of samples: [69999, 500]
|
38
|
-
|
39
|
-
|
40
11
|
###Digitsデータで最適なkを探すknnのプログラム
|
41
|
-
|
12
|
+
```python
|
42
13
|
from matplotlib import pyplot as plt
|
43
14
|
from sklearn import datasets
|
44
15
|
from sklearn.model_selection import LeaveOneOut
|
1
現段階とエラー
title
CHANGED
File without changes
|
body
CHANGED
@@ -5,9 +5,38 @@
|
|
5
5
|
|
6
6
|
###発生している問題・エラーメッセージ
|
7
7
|
|
8
|
-
容量が大きくて実行できない。
|
8
|
+
・容量が大きくて実行できない。
|
9
|
+
・ValueError Traceback (most recent call last)
|
10
|
+
<ipython-input-10-c9ec06272b57> in <module>()
|
11
|
+
3
|
12
|
+
4 clf = svm.SVC()
|
13
|
+
----> 5 clf.fit(data_train, label_train)
|
14
|
+
6 pre = clf.predict(data_test)
|
15
|
+
7
|
9
16
|
|
17
|
+
~\Anaconda3\lib\site-packages\sklearn\svm\base.py in fit(self, X, y, sample_weight)
|
18
|
+
147 self._sparse = sparse and not callable(self.kernel)
|
19
|
+
148
|
20
|
+
--> 149 X, y = check_X_y(X, y, dtype=np.float64, order='C', accept_sparse='csr')
|
21
|
+
150 y = self._validate_targets(y)
|
22
|
+
151
|
23
|
+
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
|
24
|
+
581 y = y.astype(np.float64)
|
25
|
+
582
|
26
|
+
--> 583 check_consistent_length(X, y)
|
27
|
+
584
|
28
|
+
585 return X, y
|
10
29
|
|
30
|
+
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_consistent_length(*arrays)
|
31
|
+
202 if len(uniques) > 1:
|
32
|
+
203 raise ValueError("Found input variables with inconsistent numbers of"
|
33
|
+
--> 204 " samples: %r" % [int(l) for l in lengths])
|
34
|
+
205
|
35
|
+
206
|
36
|
+
|
37
|
+
ValueError: Found input variables with inconsistent numbers of samples: [69999, 500]
|
38
|
+
|
39
|
+
|
11
40
|
###Digitsデータで最適なkを探すknnのプログラム
|
12
41
|
|
13
42
|
from matplotlib import pyplot as plt
|
@@ -61,7 +90,58 @@
|
|
61
90
|
if __name__ == '__main__':
|
62
91
|
main()
|
63
92
|
|
93
|
+
###mnistデータで最適なkを探すknnのプログラム現段階
|
94
|
+
```python
|
95
|
+
train_size = 500
|
96
|
+
test_size = 100
|
97
|
+
data_train, data_test, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
|
64
98
|
|
99
|
+
K = 10
|
100
|
+
ks = range(1, K + 1)
|
101
|
+
|
102
|
+
clf = svm.SVC()
|
103
|
+
clf.fit(data_train, label_train)
|
104
|
+
pre = clf.predict(data_test)
|
105
|
+
|
106
|
+
# 使う近傍数ごとに正解率&各経過時間を計算
|
107
|
+
accuracy_scores = []
|
108
|
+
start = time.time()
|
109
|
+
for k in ks:
|
110
|
+
predicted_labels = []
|
111
|
+
loo = LeaveOneOut()
|
112
|
+
for train, test in loo.split(mnist.data):
|
113
|
+
data_train = mnist.data[train]
|
114
|
+
data_test = mnist.target[train]
|
115
|
+
|
116
|
+
elapsed_time = time.time() - start
|
117
|
+
|
118
|
+
# モデルを学習させる
|
119
|
+
model = KNeighborsClassifier(n_neighbors=k)
|
120
|
+
model.fit(train_data, target_data)
|
121
|
+
|
122
|
+
# 一つだけ取り除いたテストデータを識別
|
123
|
+
predicted_label = model.predict(features[test])
|
124
|
+
predicted_labels.append(predicted_label)
|
125
|
+
|
126
|
+
# 正解率を計算
|
127
|
+
ac_score = metrics.accuracy_score(label_test, pre)
|
128
|
+
print('k={0}: {1}'.format(k, ac_score))
|
129
|
+
|
130
|
+
accuracy_scores.append(score)
|
131
|
+
|
132
|
+
# 各経過時間を表示
|
133
|
+
print("経過時間:{0}".format(elapsed_time))
|
134
|
+
|
135
|
+
# 使う近傍数ごとの正解率を折れ線グラフ
|
136
|
+
X = list(ks)
|
137
|
+
plt.plot(X, ac_score)
|
138
|
+
|
139
|
+
plt.xlabel('k')
|
140
|
+
plt.ylabel('accuracy rate')
|
141
|
+
plt.show()
|
142
|
+
```
|
143
|
+
|
144
|
+
|
65
145
|
###試したこと
|
66
146
|
mnistのデータ
|
67
147
|
mnist = datasets.fetch_mldata('MNIST original', data_home='data/src/download/')
|