バッチの正規化を使用したモデルに1枚のデータで識別を行いたい

前提・実現したいこと

pythonとtensorflowによりResNetを用いて、男女の顔の識別を行う機械学習モデルを作っています。実際に学習済みのモデルに1枚の顔写真を入力すると、上手く判別することができませんでした。原因は活性化関数に入れる前に行われているバッチの正規化(Batch Normalization)であるのではないかと考えています。バッチの正規化を使用したモデルに1枚のデータで識別を行う事ができるようにしたいです。

発生している問題・エラーメッセージ

バッチの正規化を含んで学習を行ったモデルに対してバッチではなく、
1つのデータをインプットとしていれるにはどのような工夫を行えばいいのでしょうか。

該当のソースコード

python
1# ============================import============================
2import tensorflow as tf
3import os
4# GPUの無効化
5os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
6import sys
7os.chdir("path")
8sys.path.append('path')
9import data_set
10
11# ============================init============================
12IMAGE_SIZE = 56
13LAYER_NUM = 3
14LABEL_NUM = 6
15CHANNEL = 3
16IMAGE_PIXELS = IMAGE_SIZE*IMAGE_SIZE*CHANNEL
17LEARNING_RATE = 1e-3
18MOMENTUM_RATE =  0.9
19LOOP_NUM = 1000
20VALIDATION_SIZE = 300
21TEST_SIZE = 120
22alpha = 1e-3
23batch_size = 1
24CKPT_PATH = "path"
25
26x = tf.placeholder(tf.float32, [None, IMAGE_PIXELS])
27y_ = tf.placeholder(tf.float32, [None, LABEL_NUM])
28x_image = tf.reshape(x, [-1, IMAGE_SIZE, IMAGE_SIZE, CHANNEL])
29keep_prob = tf.placeholder("float")
30
31# leaky-Relu
32def leaky_relu(x, alpha):
33	plus = tf.nn.relu(x)
34	minus = tf.nn.relu(-x)
35	plus = tf.cast(plus, tf.float32)
36	minus = tf.cast(minus, tf.float32)
37	y = plus - minus*alpha
38	return y
39
40# w:init
41def weight_variable(shape, name=None):
42	initial = tf.truncated_normal(shape, stddev=0.1)
43	return tf.Variable(initial, name=name)
44
45def fc_layer(inpt, shape):
46	fc_w = weight_variable(shape)
47	fc_b = tf.Variable(tf.zeros([shape[1]]))
48	fc_h = tf.matmul(inpt, fc_w) + fc_b
49
50	return fc_h
51
52# conv(relu):init
53def conv_layer(inpt, filter_shape, stride):
54	out_channels = filter_shape[3]
55	filter_ = weight_variable(filter_shape)
56	conv = tf.nn.conv2d(inpt, filter=filter_, strides=[1, stride, stride, 1], padding="SAME")
57	beta = tf.Variable(tf.zeros([out_channels]), name="beta")
58	gamma = weight_variable([out_channels], name="gamma")
59	mean, var = tf.nn.moments(conv, axes=[0,1,2])
60		
61	batch_norm = tf.nn.batch_norm_with_global_normalization(conv, mean, var, beta, gamma, 0.001,
62															scale_after_normalization=True)
63	out = leaky_relu(batch_norm, 1e-3)
64	
65	return out
66
67def residual_block(inpt, output_depth, down_sample, projection=False):
68	input_depth = inpt.get_shape().as_list()[3]
69	if down_sample:
70		filter_ = [1,2,2,1]
71		inpt = tf.nn.max_pool(inpt, ksize=filter_, strides=filter_, padding='SAME')
72
73	conv1 = conv_layer(inpt, [3, 3, input_depth, output_depth], 1)
74	conv2 = conv_layer(conv1, [3, 3, output_depth, output_depth], 1)
75
76	if input_depth != output_depth:
77		if projection:
78			# Option B: Projection shortcut
79			input_layer = conv_layer(inpt, [1, 1, input_depth, output_depth], 2)
80		else:
81			# Option A: Zero-padding
82			input_layer = tf.pad(inpt, [[0,0], [0,0], [0,0], [0, output_depth - input_depth]])
83	else:
84		input_layer = inpt
85
86	res = conv2 + input_layer
87	return res
88
89n_dict = {20:1, 32:2, 44:3, 56:4}
90
91# ============resnet====================
92n = 32
93inpt = x_image
94
95if n < 20 or (n - 20) % 12 != 0:
96    print("ResNet depth invalid.")
97    sys.exit()
98
99num_conv = (n - 20) // 12 + 1
100layers = []
101
102with tf.variable_scope('conv1'):
103    conv1 = conv_layer(inpt, [3, 3, 3, 16], 1)
104    layers.append(conv1)
105
106for i in range (num_conv):
107    with tf.variable_scope('conv2_%d' % (i+1)):
108        conv2_x = residual_block(layers[-1], 16, False)
109        conv2 = residual_block(conv2_x, 16, False)
110        layers.append(conv2_x)
111        layers.append(conv2)
112
113
114for i in range (num_conv):
115    down_sample = True if i == 0 else False
116    with tf.variable_scope('conv3_%d' % (i+1)):
117        conv3_x = residual_block(layers[-1], 32, down_sample)
118        conv3 = residual_block(conv3_x, 32, False)
119        layers.append(conv3_x)
120        layers.append(conv3)
121
122
123for i in range (num_conv):
124    down_sample = True if i == 0 else False
125    with tf.variable_scope('conv4_%d' % (i+1)):
126        conv4_x = residual_block(layers[-1], 64, down_sample)
127        conv4 = residual_block(conv4_x, 64, False)
128        layers.append(conv4_x)
129        layers.append(conv4)
130
131with tf.variable_scope('fc'):
132    global_pool = tf.reduce_mean(layers[-1], [1, 2])
133    assert global_pool.get_shape().as_list()[1:] == [64]
134
135    out = fc_layer(global_pool, [64, LABEL_NUM])
136    layers.append(out)
137    
138    y_fc = layers[-1]
139    y_conv = tf.nn.softmax(y_fc)

試したこと

1枚のみの入力の際は全学習データの平均値と分散値をそれぞれmean,varとして使用しようとしたのですが、畳み込み層の１層目~４層目まであるため、それぞれに固定値を設定するのは非合理的だと考え、やめました。
現在は学習とテストを分けずに行っていますが、この課題の糸口がみえたら分けようと思っております。

補足情報（FW/ツールのバージョンなど）

このサイトのコードを参考に作成しました。
http://www.iandprogram.net/entry/2016/06/06/180806

行動規範の内容に同意します

回答1件

ベストアンサー

https://stackoverflow.com/questions/44807038/single-prediction-when-using-batch-normalization
からの
https://r2rt.com/implementing-batch-normalization-in-tensorflow.html

投稿2018/03/20 05:17

mkgrei

総合スコア8560

jo-jo-

2018/03/22 15:51

適切な回答ありがとうございます。以下のように改善したところ、is_training = Falseとするとうまくいかないのですが間違っているからでしょうか？ # conv(relu):init def conv_layer(inpt, filter_shape, stride): # out_channels = filter_shape[3] filter_ = weight_variable(filter_shape) conv = tf.nn.conv2d(inpt, filter=filter_, strides=[1, stride, stride, 1], padding="SAME") batch_norm = batch_norm_wrapper(conv, filter_shape, is_training=True) # beta = tf.Variable(tf.zeros([out_channels]), name="beta") # gamma = weight_variable([out_channels], name="gamma") # mean, var = tf.nn.moments(conv, axes=[0,1,2]) # batch_norm = tf.nn.batch_norm_with_global_normalization(conv, mean, var, beta, gamma, 0.001, # scale_after_normalization=True) out = leaky_relu(batch_norm, 1e-3) return out def residual_block(inpt, output_depth, down_sample, projection=False): input_depth = inpt.get_shape().as_list()[3] if down_sample: filter_ = [1,2,2,1] inpt = tf.nn.max_pool(inpt, ksize=filter_, strides=filter_, padding='SAME') conv1 = conv_layer(inpt, [3, 3, input_depth, output_depth], 1) conv2 = conv_layer(conv1, [3, 3, output_depth, output_depth], 1) if input_depth != output_depth: if projection: # Option B: Projection shortcut input_layer = conv_layer(inpt, [1, 1, input_depth, output_depth], 2) else: # Option A: Zero-padding input_layer = tf.pad(inpt, [[0,0], [0,0], [0,0], [0, output_depth - input_depth]]) else: input_layer = inpt res = conv2 + input_layer return res

行動規範の内容に同意します