前提・実現したいこと
tensorflowでmobileNet V1を実装してImageNet画像分類の学習を行ったのですが、
誤差がほとんど減らず、精度も一定の値のまま動きません(training, testともに動かずtrainingに至ってはtop1,top5共に0)。
この場合どのような原因が考えられるでしょうか?(正しく実装できていないか、モデルの構造に無理があるのか)
また論文中には詳しく記載されてないのですが、ハイパーパラメータや学習環境、入力データの前処理等の詳細も教えていただきたいです。(論文と同条件で実行してバグかどうかの判断を行いたいため)
発生している問題・エラーメッセージ
設計したモデル(MobileNet V1)の誤差が減らない問題(正確には6.8~6.9の間を行ったり来たりしている)
ソースコードを変更して色々と検証してみた結果、
depthwise separable convolutionか、モデルの実装に問題があるっぽい?
該当のソースコード
一般的なconvolutionの定義
def conv(batch_input, c_i, c_o, k_h, k_w, s_h, s_w, padding, weight_decay, name, variance=0.01, bias=0.0): with tf.variable_scope(name): weights = _variable_with_weight_decay('weight', [k_h, k_w, c_i, c_o], initializer=tf.random_normal_initializer(0.0, variance), weight_decay=weight_decay) biases = _variable_on_device('bias', [c_o], initializer=tf.constant_initializer(bias)) return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(batch_input, weights, [1, s_h, s_w, 1], padding=padding), biases))
depthwise convolutionの定義
def depthwise_conv(batch_input, c_i, k_h, k_w, s_h, s_w, padding, weight_decay, name, variance=0.01, bias=0.0): with tf.variable_scope(name): weights = _variable_with_weight_decay('weight', [k_h, k_w, c_i, 1], initializer=tf.random_normal_initializer(0.0, variance), weight_decay=weight_decay) biases = _variable_on_device('bias', [c_i], initializer=tf.constant_initializer(bias)) return tf.nn.relu(tf.nn.batch_normalization(tf.nn.bias_add(tf.nn.depthwise_conv2d(batch_input, weights, [1, s_h, s_w, 1], padding=padding), biases),0,1,0,1,1e-8))
pointwise convolutionの定義
def pointwise_conv(batch_input, c_i, c_o, s_h, s_w, padding, weight_decay, name, variance=0.01, bias=0.0): with tf.variable_scope(name): weights = _variable_with_weight_decay('weight', [1, 1, c_i, c_o], initializer=tf.random_normal_initializer(0.0, variance), weight_decay=weight_decay) biases = _variable_on_device('bias', [c_o], initializer=tf.constant_initializer(bias)) return tf.nn.relu(tf.nn.batch_normalization(tf.nn.bias_add(tf.nn.conv2d(batch_input, weights, [1, s_h, s_w, 1], padding=padding), biases),0,1,0,1,1e-8))
average poolingの定義
def avg_pool(batch_input, kernel_size, stride, name): with tf.variable_scope(name): return tf.nn.avg_pool(batch_input,ksize=[1,kernel_size,kernel_size,1],strides=[1,stride,stride,1],padding='VALID')
全結合層の定義
def output(batch_input, c_i, c_o, weight_decay, name, variance=0.01, bias=0.0): with tf.variable_scope(name): weights = _variable_with_weight_decay('weight', [c_i,c_o], initializer=tf.random_normal_initializer(0.0, variance), weight_decay=weight_decay) biases = _variable_on_device('bias', [c_o], initializer=tf.constant_initializer(bias)) return tf.nn.bias_add(tf.matmul(batch_input,weights),biases)
モデルの定義
def separable(inputs, weight_decay): conv1_1 = conv(inputs, c_i=3, c_o=32, k_h=3, k_w=3, s_h=2, s_w=2, padding='SAME', weight_decay=weight_decay, name='conv1_1') conv2_1_1 = depthwise_conv(conv1_1, c_i=32, k_h=3, k_w=3, s_h=1, s_w=1, padding='SAME', weight_decay=None, name='conv2_1_1') conv2_1_2 = pointwise_conv(conv2_1_1, c_i=32, c_o=64, s_h=1, s_w=1, padding='SAME', weight_decay=weight_decay, name='conv2_1_2') conv2_2_1 = depthwise_conv(conv2_1_2, c_i=64, k_h=3, k_w=3, s_h=2, s_w=2, padding='SAME', weight_decay=None, name='conv2_2_1') conv2_2_2 = pointwise_conv(conv2_2_1, c_i=64, c_o=128, s_h=1, s_w=1, padding='SAME', weight_decay=weight_decay, name='conv2_2_2') conv3_1_1 = depthwise_conv(conv2_2_2, c_i=128, k_h=3, k_w=3, s_h=1, s_w=1, padding='SAME', weight_decay=None, name='conv3_1_1') conv3_1_2 = pointwise_conv(conv3_1_1, c_i=128, c_o=128, s_h=1, s_w=1, padding='SAME', weight_decay=weight_decay, name='conv3_1_2') conv3_2_1 = depthwise_conv(conv3_1_2, c_i=128, k_h=3, k_w=3, s_h=2, s_w=2, padding='SAME', weight_decay=None, name='conv3_2_1') conv3_2_2 = pointwise_conv(conv3_2_1, c_i=128, c_o=256, s_h=1, s_w=1, padding='SAME', weight_decay=weight_decay, name='conv3_2_2') conv4_1_1 = depthwise_conv(conv3_2_2, c_i=256, k_h=3, k_w=3, s_h=1, s_w=1, padding='SAME', weight_decay=None, name='conv4_1_1') conv4_1_2 = pointwise_conv(conv4_1_1, c_i=256, c_o=256, s_h=1, s_w=1, padding='SAME', weight_decay=weight_decay, name='conv4_1_2') conv4_2_1 = depthwise_conv(conv4_1_2, c_i=256, k_h=3, k_w=3, s_h=2, s_w=2, padding='SAME', weight_decay=None, name='conv4_2_1') conv4_2_2 = pointwise_conv(conv4_2_1, c_i=256, c_o=512, s_h=1, s_w=1, padding='SAME', weight_decay=weight_decay, name='conv4_2_2') conv5_1_1 = depthwise_conv(conv4_2_2, c_i=512, k_h=3, k_w=3, s_h=1, s_w=1, padding='SAME', weight_decay=None, name='conv5_1_1') conv5_1_2 = pointwise_conv(conv5_1_1, c_i=512, c_o=512, s_h=1, s_w=1, padding='SAME', weight_decay=weight_decay, name='conv5_1_2') conv5_2_1 = depthwise_conv(conv5_1_2, c_i=512, k_h=3, k_w=3, s_h=1, s_w=1, padding='SAME', weight_decay=None, name='conv5_2_1') conv5_2_2 = pointwise_conv(conv5_2_1, c_i=512, c_o=512, s_h=1, s_w=1, padding='SAME', weight_decay=weight_decay, name='conv5_2_2') conv5_3_1 = depthwise_conv(conv5_2_2, c_i=512, k_h=3, k_w=3, s_h=1, s_w=1, padding='SAME', weight_decay=None, name='conv5_3_1') conv5_3_2 = pointwise_conv(conv5_3_1, c_i=512, c_o=512, s_h=1, s_w=1, padding='SAME', weight_decay=weight_decay, name='conv5_3_2') conv5_4_1 = depthwise_conv(conv5_3_2, c_i=512, k_h=3, k_w=3, s_h=1, s_w=1, padding='SAME', weight_decay=None, name='conv5_4_1') conv5_4_2 = pointwise_conv(conv5_4_1, c_i=512, c_o=512, s_h=1, s_w=1, padding='SAME', weight_decay=weight_decay, name='conv5_4_2') conv5_5_1 = depthwise_conv(conv5_4_2, c_i=512, k_h=3, k_w=3, s_h=1, s_w=1, padding='SAME', weight_decay=None, name='conv5_5_1') conv5_5_2 = pointwise_conv(conv5_5_1, c_i=512, c_o=512, s_h=1, s_w=1, padding='SAME', weight_decay=weight_decay, name='conv5_5_2') conv5_6_1 = depthwise_conv(conv5_5_2, c_i=512, k_h=3, k_w=3, s_h=2, s_w=2, padding='SAME', weight_decay=None, name='conv5_6_1') conv5_6_2 = pointwise_conv(conv5_6_1, c_i=512, c_o=1024, s_h=1, s_w=1, padding='SAME', weight_decay=weight_decay, name='conv5_6_2') conv6_1_1 = depthwise_conv(conv5_6_2, c_i=1024, k_h=3, k_w=3, s_h=1, s_w=1, padding='SAME', weight_decay=None, name='conv6_1_1') conv6_1_2 = pointwise_conv(conv6_1_1, c_i=1024, c_o=1024, s_h=1, s_w=1, padding='SAME', weight_decay=weight_decay, name='conv6_1_2') pool6 = avg_pool(conv6_1_2, kernel_size=7, stride=1, name='pool6') flat = tf.reshape(pool6,[-1,1*1*1024]) fc1 = output(flat, c_i=1*1*1024, c_o=1000, weight_decay=weight_decay, name='fc1', variance=0.01, bias=0.0) return fc1
試したこと
・VGG16を実装して同様の条件で動かした際にはうまく学習できたので誤差や勾配計算のバグではなさそう。
補足情報(FW/ツールのバージョンなど)
tensorflowのバージョンは1.14です。
ソースコードが断片的でわかりにくいと思いますので、返答に応じて適宜補足します。
あなたの回答
tips
プレビュー