経緯

訳あってTensorflow1.5でKerasを使わずにクラス分類のCNNを実装したのですが、このコードを実行しても５エポックあたりで過学習が始まってしまい、訓練データに対する精度が100%になる時の検証データに対する精度は35%程度です。

学習に用いている画像データは32x32のrgb３チャネルの画像です。クラスは10クラスあり、各クラス500枚ずつ画像が与えられています。

最初はシンプルでコンパクトなネットワークにドロップアウト層を加えて過学習の抑制をしようと試みるも汎化性能は上がらず。
次に、ResNetに実装されているshortcut connectionを実装してみるも精度は上がらずといったところです。

他にも、学習データを3倍に水増ししたり、最適化に用いる手法を変えたりしてはみましたが、一向に精度が上がりません。

精度向上のためのアイデアがございましたら、お教えいただけると幸いです。

以下がソースコードになります。拙いコードですがよろしくお願いします。

python
1import numpy as np
2import tensorflow as tf
3from PIL import Image
4import os
5
6img_list = []
7label = {"airplane":0, "cat":1, "frog":2, "automobile":3, "deer":4,
8         "horse":5, "ship":6 ,"bird":7, "dog":8, "truck":9}
9
10for key in label:
11    images = os.listdir("ディレクトリのパス" + key)
12    for i in images:
13        if i == ".DS_Store":
14            continue
15        
16        else:
17            image = np.array(Image.open("ディレクトリのパス" + key + "/"+i))/255
18            img_list.append(image)
19
20x_train_list = [];
21t_train_list = [];
22x_test_list = [];
23t_test_list = [];
24target = 0
25train_num = 400
26test_num = 100 #画像を4:1に分けて学習
27for i in range(10):
28        x_train_list += [img_list[500 * i + j] for j in range(train_num)]
29        t_train_list += [target] * train_num
30        x_test_list += [img_list[500 * i + 400 + j] for j in range(test_num)] #testと名前をつけていますが検証用データのことです
31        t_test_list  += [target] * test_num;
32        target += 1
33
34x_train = np.array(x_train_list)
35t_train = np.array(t_train_list)
36t_train = np.identity(10)[t_train]
37x_test = np.array(x_test_list)
38t_test = np.array(t_test_list)
39t_test = np.identity(10)[t_test]
40
41shuffle_train = np.random.permutation(4000)
42shuffle_test = np.random.permutation(1000)
43x_train = x_train[shuffle_train[:4000]]
44t_train = t_train[shuffle_train[:4000]]
45x_test = x_test[shuffle_test[:1000]]
46t_test = t_test[shuffle_test[:1000]]
47
48# Global contrast normalization
49mean = np.mean(x_train, axis=(1, 2, 3), keepdims=True)
50std = np.std(x_train, axis=(1, 2, 3), keepdims=True)
51x_train = (x_train - mean)/std
52
53# 画像を[0,1]にする
54x_min = np.min(x_train)
55x_max = np.max(x_train)
56x_train = (x_train - x_min)/(x_max - x_min)
57
58rng = np.random.RandomState(1234)
59#畳み込み層
60class Conv:
61    def __init__(self, filter_shape, strides, function=lambda x: x, padding="SAME"):
62        self.W = tf.Variable(tf.truncated_normal(filter_shape, stddev = np.sqrt(2/np.prod(filter_shape[:3]))), name = "W")
63        self.W = tf.Variable(rng.uniform(
64                            low=-np.sqrt(6/(np.prod(filter_shape[:3]) + np.prod(filter_shape[:2]) * filter_shape[3])),
65                            high=np.sqrt(6/(np.prod(filter_shape[:3]) + np.prod(filter_shape[:2]) * filter_shape[3])),
66                            size=filter_shape
67                            ).astype('float32'), name='W')
68        self.b = tf.Variable(np.zeros((filter_shape[3]), dtype = "float32"), name = "b")
69        self.function = function
70        self.strides = strides
71        self.padding = padding
72
73    def f_prop(self, x):
74        u = tf.nn.conv2d(x, self.W, strides=self.strides, padding=self.padding) + self.b
75        return self.function(u)
76    
77#プーリング層
78class Pooling:
79    def __init__(self, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME"):
80        self.ksize = ksize
81        self.strides = strides
82        self.padding = padding
83    
84    def f_prop(self, x):
85        return tf.nn.max_pool(x, ksize=self.ksize, strides=self.strides, padding=self.padding)
86    
87class Avg_Pooling:
88    def __init__(self, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME"):
89        self.ksize = ksize
90        self.strides = strides
91        self.padding = padding
92    
93    def f_prop(self, x):
94        return tf.nn.avg_pool(x, ksize=self.ksize, strides=self.strides, padding=self.padding)
95
96
97#平滑化層
98class Flatten:
99    def f_prop(self, x):
100        return tf.reshape(x, (-1, np.prod(x.get_shape().as_list()[1:])))
101
102#全結合層
103class Dense:
104    def __init__(self, in_dim, out_dim, function=lambda x: x):
105        self.W = tf.Variable(rng.uniform(
106                            low=-np.sqrt(6/(in_dim + out_dim)),
107                            high=np.sqrt(6/(in_dim + out_dim)),
108                            size=(in_dim, out_dim)
109                        ).astype('float32'), name='W')
110        self.b = tf.Variable(np.zeros([out_dim]).astype('float32'), name = "b")
111        self.function = function
112
113    def f_prop(self, x):
114        return self.function(tf.matmul(x, self.W) + self.b)
115
116#BatchNormalizationの層
117class BatchNorm:
118    def __init__(self, shape, epsilon=np.float32(1e-5)):
119        self.gamma = tf.Variable(np.ones(shape, dtype='float32'), name='gamma')
120        self.beta  = tf.Variable(np.zeros(shape, dtype='float32'), name='beta')
121        self.epsilon = epsilon
122
123    def f_prop(self, x):
124        if len(x.get_shape()) == 2:
125            mean, var = tf.nn.moments(x, axes=0, keepdims=True)
126            std = tf.sqrt(var + self.epsilon)
127        elif len(x.get_shape()) == 4:
128            mean, var = tf.nn.moments(x, axes=(0,1,2), keep_dims=True)
129            std = tf.sqrt(var + self.epsilon)
130        normalized_x = (x - mean) / std
131        return self.gamma * normalized_x + self.beta
132    
133#活性化層
134class Activation:
135    def __init__(self, function=lambda x: x):
136        self.function = function
137    
138    def f_prop(self, x):
139        return self.function(x)
140
141#shortcut connection
142class rescell:
143    def __init__(self, filter_shape):
144        self.filter_shape = filter_shape
145        
146    def f_prop(self, input_data):
147        strides=[1,1,1,1]
148            
149        x = Conv(self.filter_shape, strides).f_prop(input_data)
150        x = BatchNorm((int(x.shape[1]), int(x.shape[2]), self.filter_shape[3])).f_prop(x)
151        x = Activation(tf.nn.relu).f_prop(x)
152        
153        data = Conv((1,1, self.filter_shape[2],self.filter_shape[3]), strides).f_prop(input_data)
154        
155        x = Conv((self.filter_shape[0], self.filter_shape[1], self.filter_shape[3], self.filter_shape[3]), strides).f_prop(x)
156        x = BatchNorm((int(x.shape[1]), int(x.shape[2]), self.filter_shape[3])).f_prop(x)
157        x = tf.add(x, data)
158        x = Activation(tf.nn.relu).f_prop(x)
159        return x
160                  
161layers = [
162    
163    Conv((3, 3, 3, 32), [1,1,1,1]),
164    Activation(tf.nn.relu),
165    Pooling((1, 2, 2, 1)),
166    
167    rescell((3, 3, 32, 64)),
168    rescell((3, 3, 64, 64)),
169    rescell((3, 3, 64, 64)),
170    
171    Avg_Pooling((1, 2, 2, 1)),
172    
173    Flatten(),
174    Dense(8*8*64, 8*8*64, tf.nn.relu),
175    Dense(8*8*64, 10, tf.nn.softmax)
176]
177    
178x = tf.placeholder(tf.float32, [None, 32, 32, 3])
179t = tf.placeholder(tf.float32, [None, 10])
180
181
182def f_props(layers, x):
183    for layer in layers:
184        x = layer.f_prop(x)
185    return x
186
187out = f_props(layers, x)
188
189#損失関数
190loss = tf.reduce_mean(-tf.reduce_sum(t * tf.log(out + 1e-5), axis = [1])) 
191
192#訓練
193train_step = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)
194
195#評価
196correct = tf.equal(tf.argmax(out,1), tf.argmax(t,1))
197accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
198
199init = tf.global_variables_initializer()
200
201with tf.Session() as sess:
202    sess.run(init)
203    saver = tf.train.Saver()
204
205    epoch_num = 51
206    data_num = x_train.shape[0] #50000
207    batch_size = 40
208    batch_num = x_train.shape[0] // batch_size
209    best_test_acc = 0
210    for epoch in range(epoch_num):
211 
212        shuffle_idx = np.random.permutation(data_num)
213        x_train = x_train[shuffle_idx[:12000]]
214        t_train = t_train[shuffle_idx[:12000]]
215 
216        for i in range(batch_num):
217            start = i * batch_size
218            end = i * batch_size + batch_size
219            sess.run(train_step, feed_dict = {x: x_train[start:end], t: t_train[start:end]})
220            
221        
222        if epoch%5 == 0:    
223            train_loss = sess.run(loss, feed_dict = {x: x_train, t: t_train})
224            train_acc = sess.run(accuracy, feed_dict = {x: x_train, t: t_train})
225            test_loss = sess.run(loss, feed_dict = {x: x_test, t: t_test})
226            test_acc = sess.run(accuracy, feed_dict = {x: x_test, t: t_test})
227            if best_test_acc < test_acc:
228                best_test_acc = test_acc
229                saver.save(sess, "テェックポイント保存用のディレクトリパス")
230                print("best_test_acc: " + str(best_test_acc))
231                
232            print('epoch:{} \n \
233                   tr_loss:{}\n \
234                   tr_acc:{} \n \
235                   tes_loss:{} \n \
236                   tes_acc:{}'.format(epoch,
237                                      train_loss,
238                                      train_acc,
239                                      test_loss,
240                                      test_acc))
241

Q71

2020/03/19 23:06

畳み込み層は1層？

行動規範の内容に同意します

回答1件

精度向上のためのアイデアがございましたら、お教えいただけると幸いです。

ImageNet の学習済みモデルを使って、転移学習してはどうでしょうか。
モデルも自作するのではなく、VGG や ResNet など実績のあるものをパラメータ変更せずにそのまま流用するのがいいと思います。
TensorFlow に同梱されている Keras では、学習済みモデルが簡単に利用できるような API が提供されています。

Applications - Keras Documentation

投稿2020/03/18 06:09