cifar-10で90%到達する方法。

今TensorFlowを使ってCifar-10の識別率を90%に到達させようとしています。
今はResNetを使っているのですがどうもうまくいきません。
良くて86%といった感じでそれ以上は望めないという状況です。
どこに着目してモデルを改善していくべきなのかわかりません。
もしわかる方がいれば教えてください。
ResNetは２層ごとに２層前に出力を合計していくもので、
8層ごとにフィルターのサイズを２倍ずつにしました。

フィルターの数	フィルターのサイズ	層の数
16	3×3	1
16	3×3	12
32	3×3	12
64	3×3	12
128	3×3	12

画像の前処理は画像の左右を反転させるものと明るさとコントラストをランダムに変更してから標準化しています。
以下、コードになります。
文字数の関係でNumPyの配列に画像データを落とし込むところはカットしました。

python
1import tensorflow as tf
2import numpy as np
3import pickle
4import time
5
6dtype = 'float64'
7
8dtype2 = tf.float32
9x = tf.placeholder(dtype2, shape = [None, 32, 32, 3], name='x')
10with tf.name_scope('Const'):
11    keep_prob = tf.placeholder(dtype2, name='keep_prob') # CNN内でのドロップアウト率
12with tf.name_scope('Input'):
13    mm = tf.placeholder(shape = [32, 32, 3], dtype=dtype2, name = 'input')
14    distorted_image = tf.image.random_flip_left_right(mm)
15    distorted_image = tf.image.random_brightness(distorted_image,
16                                                 max_delta=63)
17    distorted_image = tf.image.random_contrast(distorted_image,lower=0.2, upper=1.8)
18    
19    float_image = tf.image.per_image_standardization(distorted_image)
20
21
22with tf.name_scope('Input_for_Test'):
23    test_image = tf.image.per_image_standardization(mm)
24
25def weight_variable(shape, name=None):
26    initial = tf.truncated_normal(shape,stddev=0.1, dtype=dtype2)
27    return tf.Variable(initial, name=name)
28
29def softmax_layer(inpt, shape):
30    fc_w = weight_variable(shape)
31    fc_b = tf.Variable(tf.constant(0.1, shape=[shape[1]]))
32    fc_h = tf.nn.softmax(tf.matmul(inpt, fc_w) + fc_b)
33
34    return fc_h
35def relu_layer(inpt, shape):
36    fc_w_1 = weight_variable(shape)
37    fc_b_1 = tf.Variable(tf.constant(0.1, shape=[shape[1]]))
38
39    fc_h_1 = tf.nn.relu(tf.matmul(inpt, fc_w_1) + fc_b_1)
40
41    return fc_h_1
42def conv_layer(inpt, filter_shape, stride):
43    out_channels = filter_shape[3]
44
45    filter_ = weight_variable(filter_shape)
46    conv = tf.nn.conv2d(inpt, filter=filter_, strides=[1, stride, stride, 1], padding='SAME')
47    mean, var = tf.nn.moments(conv, axes=[0,1,2])
48    beta = tf.Variable(tf.zeros([out_channels], name="beta"))
49    gamma = weight_variable([out_channels], name="gamma")
50
51    batch_norm = tf.nn.batch_norm_with_global_normalization(
52        conv, mean, var, beta, gamma, 0.001,
53        scale_after_normalization=True)
54    return batch_norm
55
56def residual_block(inpt, output_depth, down_sample, projection=False):
57    input_depth = inpt.get_shape().as_list()[3]
58    if down_sample:
59        filter_ = [1,2,2,1]
60        inpt = tf.nn.max_pool(inpt, ksize=filter_, strides=filter_, padding='SAME')
61
62    conv1 = conv_layer(inpt, [3,3, input_depth, output_depth], 1)
63    conv2 = conv_layer(conv1, [3,3, output_depth, output_depth],1)
64
65    if input_depth != output_depth:
66        if projection:
67            input_layer = conv_layer(inpt, [1,1, input_depth, output_depth], 2)
68        else:
69            input_layer = tf.pad(inpt, [[0,0], [0,0],[0,0],[0, output_depth-input_depth]])
70
71    else:
72        input_layer = inpt
73
74    res = conv2 + input_layer
75    out = tf.nn.relu(res)
76    return out
77
78
79def resnet(inpt, n):
80    num_conv = (n-1)//16 + 1
81    layers = []
82
83    with tf.variable_scope('conv1'):
84        conv1 = conv_layer(inpt, [3,3,3,8], 1)
85        layers.append(conv1)
86
87    for i in range(num_conv):
88        with tf.variable_scope('conv2_%d' % (i+1)):
89            conv2_x = residual_block(layers[-1], 8, False)
90            conv2 = residual_block(conv2_x, 8, False)
91            conv2_dropout = tf.nn.dropout(conv2, keep_prob=keep_prob)
92            layers.append(conv2_x)
93            layers.append(conv2)
94            layers.append(conv2_dropout)
95
96        assert conv2.get_shape().as_list()[1:] == [32, 32, 8]
97
98    for i in range (num_conv):
99        down_sample = True if i==0 else False
100        with tf.variable_scope('conv3_%d' % (i+1)):
101            conv3_x = residual_block(layers[-1], 16, down_sample)
102            conv3 = residual_block(conv3_x, 16, False)
103            conv3_dropout = tf.nn.dropout(conv3, keep_prob=keep_prob)
104            layers.append(conv3_x)
105            layers.append(conv3)
106            layers.append(conv3_dropout)
107
108        assert conv3.get_shape().as_list()[1:] == [16, 16, 16]
109
110    for i in range(num_conv):
111        down_sample = True if i==0 else False
112        with tf.variable_scope('conv4_%d' % (i+1)):
113            conv4_x = residual_block(layers[-1], 32, down_sample)
114            conv4 = residual_block(conv4_x, 32, False)
115            conv4_dropout = tf.nn.dropout(conv4, keep_prob = keep_prob)
116            layers.append(conv4_x)
117            layers.append(conv4)
118            layers.append(conv4_dropout)
119        print(conv4.get_shape())
120        assert conv4.get_shape().as_list()[1:] == [8, 8, 32]
121    for i in range(num_conv):
122        down_sample = True if i==0 else False
123        with tf.variable_scope('conv5_%d' % (i+1)):
124            conv5_x = residual_block(layers[-1], 64, down_sample)
125            conv5 = residual_block(conv5_x, 64, False)
126            conv5_dropout = tf.nn.dropout(conv5, keep_prob = keep_prob)
127            layers.append(conv5_x)
128            layers.append(conv5)
129            layers.append(conv5_dropout)
130        print(conv5.get_shape())
131        assert conv5.get_shape().as_list()[1:] == [4, 4, 64]
132
133    with tf.variable_scope('fc'):
134        global_pool = tf.reduce_mean(layers[-1], [1,2])
135        assert global_pool.get_shape().as_list()[1:] == [64]
136        conv6 = conv_layer(inpt, [1,1,3,64], 1)
137        relu_conv6 = tf.nn.relu(conv6)
138        global_pool2 = tf.reduce_mean(relu_conv6, [1, 2])
139        global_pool3 = tf.add(global_pool, global_pool2)
140        hidden = relu_layer(global_pool3, [64, 1000])
141        out = softmax_layer(hidden, [1000, 10])
142        layers.append(out)
143
144    return layers[-1]
145
146p = resnet(inpt=x, n=49)
147with tf.name_scope('Loss'):
148        t = tf.placeholder(dtype2, [None, 10], name='labels')
149        loss = -tf.reduce_sum(t * tf.log(tf.clip_by_value(p, 1e-10, 1.0)), name='loss')
150
151with tf.name_scope('Train'):
152        learning_rate = tf.placeholder(dtype2)
153        train_step = tf.train.MomentumOptimizer(learning_rate,momentum=0.85).minimize(loss)
154
155with tf.name_scope('Accuracy'):
156        correct_prediction = tf.equal(tf.argmax(p, 1), tf.argmax(t, 1))
157        accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype=dtype2), name='accuracy')
158
159sess = tf.Session()
160sess.run(tf.global_variables_initializer())
161loss_list  = []
162test_acc_list = []
163test_loss_list = []
164i = 0
165saver = tf.train.Saver()
166batch_size = 25
167height = width = 32
168rate = 0.00004
169decay = np.exp(1/80000)
170start_time = time.time()
171max_epoch = 150
172training_time = 50000//batch_size
173
174for epoch in range(max_epoch):
175    for i in range(training_time):
176        ra = np.random.randint(50000, size= batch_size)
177        batch_xs, batch_ts = X1[ra], Y_train[ra]
178        batch_xs2 = np.empty([batch_size, height, width, 3], dtype='float32')
179
180        for k in range(batch_size):
181            batch_xs2[k] = sess.run(float_image, feed_dict={mm: batch_xs[k]})
182
183        sess.run(train_step, feed_dict = {x: batch_xs2,t:batch_ts, learning_rate: rate,keep_prob:0.95})
184        if epoch==0 and i%50==0:
185            print("Training step is %d" % (i))
186    if epoch == 100 or epoch == 130:
187        rate /= 10
188        print("Learning rate turned to be %e" % (rate))
189    if epoch >= 0:
190        loss_val = np.empty(400)
191        acc_val = np.empty(400)
192        for k in range(400):
193            X_test2 = np.empty((25, 32, 32, 3))
194            for l in range(25):
195                X_test2[l] = sess.run(test_image, feed_dict={mm:X1_test[(25*k)+l]})
196            loss_val[k], acc_val[k] = sess.run([loss, accuracy],
197                            feed_dict={x: X_test2,
198                            t: Y_test2[25*k:25*(k+1)], keep_prob:1.0})
199        loss_val = np.sum(loss_val)
200        acc_val = np.mean(acc_val)
201        test_loss_list.append(loss_val)
202        test_acc_list.append(acc_val)
203        print('Epoch: %d, Loss: %f, Accuracy: %f'
204                        % (epoch, loss_val, acc_val))
205    if epoch % 5 == 0:
206        saver = tf.train.Saver()
207        saver.save(sess, "0119ver10_resnet49_normalize_set_0.0015_%d.ckpt" %
208                            (epoch))
209
210    if epoch%5==0:
211        loss_val = np.empty(2000)
212        acc_val = np.empty(2000)
213        for k in range(2000):
214            X_test2 = np.empty((25, 32, 32, 3))
215            for l in range(25):
216                X_test2[l] = sess.run(test_image, feed_dict={mm:X1[(25*k)+l]})
217            loss_val[k], acc_val[k] = sess.run([loss, accuracy],
218                                        feed_dict={x: X_test2,
219                                                t: Y_train[25*k:25*(k+1)], keep_prob:1.0})
220        loss_val = np.sum(loss_val)
221        acc_val = np.mean(acc_val)
222        print('Train data --','Epoch: %d, Loss: %f, Accuracy: %f'
223                            % (epoch, loss_val, acc_val))
224  
225