
実現したいこと
kerasを用いたCNNモデル(VGG16)での過学習を回避したい
実行環境
- model: VGG16(Batch Normalizationを各conv層後に挿入)
- data: cifar10(VGG16の入力層に合わせるためbilinarで3232->224224にリサイズしています)
- optimizer: SGD(momentum=0.9, learning rate=0.01)
発生している問題・エラーメッセージ
質問
過学習が起きている原因は何でしょうか。また対策法をkerasの記法を用いて教えてください。
該当のソースコード
python
1import keras 2from keras.layers import Conv2D, MaxPooling2D, Lambda, Input, Dense, Flatten, BatchNormalization 3from keras.models import Model 4from keras.layers.core import Dropout 5from keras import optimizers 6import tensorflow as tf 7from keras.callbacks import ReduceLROnPlateau,TensorBoard 8 9from sklearn.metrics import confusion_matrix 10from sklearn.metrics import classification_report 11 12from sklearn.preprocessing import OneHotEncoder 13from keras.datasets import cifar10 14# import gc 15import numpy as np 16 17import matplotlib.pyplot as plt 18 19nb_epoch = 30 20 21#グラフ描画のための関数 22def plot_graph(history): 23 plt.plot(range(1, nb_epoch+1), history.history['acc'], label="training") 24 plt.plot(range(1, nb_epoch+1), history.history['val_acc'], label="validation") 25 plt.xlabel('Epochs') 26 plt.ylabel('Accuracy') 27 plt.legend() 28 plt.show() 29 30 plt.plot(range(1, nb_epoch+1), history.history['loss'], label="training") 31 plt.plot(range(1, nb_epoch+1), history.history['val_loss'], label="validation") 32 plt.xlabel('Epochs') 33 plt.ylabel('loss') 34 plt.legend() 35 plt.show() 36 37#VGG16モデルをfunctional APIで構築。 38def set_vgg_model(): 39 # inputs = Input(shape=(224, 224, 3)) 40 inputs = Input(shape=(32, 32, 3)) 41 # Due to memory limitation, images will resized on-the-fly. 42 x = Lambda(lambda image: tf.image.resize_images(image, (224, 224)))(inputs) 43 x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(inputs) 44 x = BatchNormalization()(x) 45 x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x) 46 x = BatchNormalization()(x) 47 x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='block1_pool')(x) 48 x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x) 49 x = BatchNormalization()(x) 50 x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x) 51 x = BatchNormalization()(x) 52 x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='block2_pool')(x) 53 x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x) 54 x = BatchNormalization()(x) 55 x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x) 56 x = BatchNormalization()(x) 57 x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x) 58 x = BatchNormalization()(x) 59 x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='block3_pool')(x) 60 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x) 61 x = BatchNormalization()(x) 62 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x) 63 x = BatchNormalization()(x) 64 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x) 65 x = BatchNormalization()(x) 66 x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='block4_pool')(x) 67 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x) 68 x = BatchNormalization()(x) 69 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x) 70 x = BatchNormalization()(x) 71 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x) 72 x = BatchNormalization()(x) 73 x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='block5_pool')(x) 74 flattened = Flatten(name='flatten')(x) 75 x = Dense(4096, activation='relu', name='fc1')(flattened) 76 x = Dropout(0.5, name='dropout1')(x) 77 x = Dense(4096, activation='relu', name='fc2')(x) 78 x = Dropout(0.5, name='dropout2')(x) 79 predictions = Dense(10, activation='softmax', name='predictions')(x) 80 model = Model(inputs=inputs, outputs=predictions) 81 return model 82 83if __name__ == "__main__": 84 print(tf.__version__) 85 print(keras.__version__) 86 87 # Prepare data(cifar10) 88 (x_train, y_train), (x_test, y_test) = cifar10.load_data() 89 enc = OneHotEncoder() 90 y_train = enc.fit_transform(y_train).toarray() 91 y_test = enc.fit_transform(y_test).toarray() 92 93 model = set_vgg_model() 94 BATCH_SIZE = 500 95 sgd = optimizers.SGD(lr=0.01, 96 momentum=0.9, 97 decay=5e-4)#, nesterov=False) 98 #損失関数はクロスエントロピーです 99 model.compile(optimizer=sgd, 100 loss='categorical_crossentropy', 101 metrics=['accuracy']) 102 103 rlop = ReduceLROnPlateau(monitor='val_acc', 104 factor=0.1, 105 patience=5, 106 verbose=1, 107 mode='auto', 108 min_delta=0.0001, 109 cooldown=0, 110 min_lr=0.00001) 111 112 113 history = model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=nb_epoch, verbose=1, 114 callbacks=[rlop], validation_data=(x_test, y_test)) 115 plot_graph(history)
補足情報(FW/ツールのバージョンなど)
tensorflow 1.10.0
keras 2.2.2
python 3.6

回答2件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。
退会済みユーザー
2018/09/19 04:55
2018/09/19 06:23
退会済みユーザー
2018/09/19 06:55