MNISTのデータを分割してドロップアウトを使い訓練データとテストデータを比較したのだが、分割せずに60000枚の時は妥当な結果が出た45,000枚に訓練データを減らした時訓練データの正解率よりテストデータの正解率のほうが高くなりました。これは一体どういう意味なのでしょうか?教えていただけると幸いです。
参考にコードを載せておきます。
import keras from keras.datasets import mnist import numpy as np from sklearn.model_selection import train_test_split import tensorflow as tf from tensorflow.keras import datasets, layers, models from sklearn.model_selection import StratifiedKFold from keras.layers.core import Dropout (x_train, y_train), (x_test, y_test) = mnist.load_data() skf = StratifiedKFold(n_splits=4)# 1/4に分割 for train_index, test_index in skf.split(x_train,y_train): x_train_X = x_train[train_index] y_train_Y = y_train[train_index] shape = (28, 28, 1) x_train_X = x_train_X.reshape(-1,shape[0],shape[1],shape[2]) x_test = x_test.reshape(-1,shape[0],shape[1],shape[2]) print(x_train_X.shape) print(y_train_Y.shape) print(x_test.shape) print(y_test.shape) x_train_X, x_test = x_train_X / 255.0, x_test / 255.0 model = models.Sequential() model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28,1))) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(64, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(64, (3, 3), activation='relu')) model.summary() model.add(layers.Flatten()) model.add(layers.Dense(64, activation='relu')) model.add(layers.Dense(10, activation='softmax')) keras.layers.Dropout(0.5) model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy']) epochs=10 batch_size = 128 history = model.fit(x_train_X,y_train_Y,batch_size=batch_size, epochs=epochs) test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
あなたの回答
tips
プレビュー