回答編集履歴

1

edit

2017/12/11 11:25

投稿

mkgrei
mkgrei

スコア8560

test CHANGED
@@ -47,3 +47,159 @@
47
47
 
48
48
 
49
49
  後は純粋にMNISTのほうが意地悪なサンプルが割合多く含まれている可能性もありますが、上記の可能性を排除できないにはこのような結論を下すのは時期尚早でしょうね。
50
+
51
+
52
+
53
+ ---
54
+
55
+
56
+
57
+ 追記:
58
+
59
+ 気になったので試してみました。
60
+
61
+ MNISTのほうがずっと難しいですね。
62
+
63
+ digitsは8x8に対して、MNISTは28x28ですので、自由度がずっと高いですね。
64
+
65
+ 例えば、MNISTから2000だけ取り出して8x8にリサイズしてやると、正答率は
66
+
67
+ digits:~98%、MNIST:~92%になります。
68
+
69
+
70
+
71
+ ```python
72
+
73
+ from sklearn.model_selection import StratifiedKFold
74
+
75
+ from sklearn.neighbors import KNeighborsClassifier
76
+
77
+ from sklearn.metrics import accuracy_score
78
+
79
+
80
+
81
+ from sklearn import datasets
82
+
83
+ from keras.datasets import mnist
84
+
85
+
86
+
87
+ from scipy.misc import imresize
88
+
89
+
90
+
91
+ import numpy as np
92
+
93
+
94
+
95
+ try:
96
+
97
+ from tqdm import tqdm
98
+
99
+ except (ImportError) as e:
100
+
101
+ tqdm = lambda x:x
102
+
103
+
104
+
105
+ def main(key='digits', random_state=2017):
106
+
107
+ if key == 'digits':
108
+
109
+ dataset = datasets.load_digits()
110
+
111
+ X = dataset.data
112
+
113
+ Y = dataset.target
114
+
115
+ elif key == 'mnist':
116
+
117
+ (X_train, y_train), (X_test, y_test) = mnist.load_data()
118
+
119
+ kfold = StratifiedKFold(5, shuffle=True, random_state=0)
120
+
121
+ tr, ts = next(kfold.split(X_test, y_test))
122
+
123
+ X = X_test[ts]
124
+
125
+ X = np.array([imresize(x, (8, 8)) for x in X])
126
+
127
+ X = X.reshape(-1, np.prod(X.shape[1:]))
128
+
129
+ Y = y_test[ts]
130
+
131
+ Y = Y.reshape(-1)
132
+
133
+ else:
134
+
135
+ return [], []
136
+
137
+
138
+
139
+ ks = np.linspace(1, 10, 5).astype('i')
140
+
141
+
142
+
143
+ accuracy_scores = []
144
+
145
+ for k in tqdm(ks):
146
+
147
+ pY = np.zeros(Y.shape)
148
+
149
+ kfold = StratifiedKFold(5, shuffle=True, random_state=random_state)
150
+
151
+ for tr, ts in kfold.split(X, Y):
152
+
153
+ x_tr = X[tr]
154
+
155
+ y_tr = Y[tr]
156
+
157
+
158
+
159
+ model = KNeighborsClassifier(n_neighbors=k, metric='manhattan')
160
+
161
+ model.fit(x_tr, y_tr)
162
+
163
+
164
+
165
+ py = model.predict(X[ts])
166
+
167
+ pY[ts] = py
168
+
169
+
170
+
171
+ score = accuracy_score(Y, pY)
172
+
173
+ accuracy_scores.append(score)
174
+
175
+ return ks, accuracy_scores
176
+
177
+
178
+
179
+ if __name__ == '__main__':
180
+
181
+ colors = ['red', 'blue']
182
+
183
+ for ic, key in enumerate(['digits', 'mnist']):
184
+
185
+ for i in np.linspace(1, 1000, 10).astype('i'):
186
+
187
+ ks, accuracy_scores = main(key=key, random_state=2017+i)
188
+
189
+ plt.plot(ks, accuracy_scores, marker='.', color=colors[ic])
190
+
191
+
192
+
193
+ plt.xlabel('k')
194
+
195
+ plt.ylabel('Accuracy')
196
+
197
+ plt.grid()
198
+
199
+ plt.xlim((0, np.max(ks)))
200
+
201
+ plt.ylim((0.8, 1.0))
202
+
203
+ plt.show()
204
+
205
+ ```