chainerでのin_types[0].shape[1] == in_types[1].shape[1]エラーについて

###前提・実現したいこと
chainerで作成したモデルを読み込みたいのですが、
Expect: in_types[0].shape[1] == in_types[1].shape[1]
Actual: 1800 != 300
というエラーが出て読み込めませんでした。

###発生している問題・エラーメッセージ

Traceback (most recent call last):
  File "eval_sc-cnn.py", line 192, in <module>
    main()
  File "eval_sc-cnn.py", line 189, in main
    eval(model, args)
  File "eval_sc-cnn.py", line 142, in eval
    y = model(x, False)
  File "/home/chainer-cnn/CNNSC.py", line 56, in __call__
    h_l1 = F.dropout(F.tanh(self[self.cnv_num+0](concat)), ratio=0.5, train=train)
  File "/home/anaconda3/envs/py2.7/lib/python2.7/site-packages/chainer/links/connection/linear.py", line 129, in __call__
    return linear.linear(x, self.W, self.b)
  File "/home/anaconda3/envs/py2.7/lib/python2.7/site-packages/chainer/functions/connection/linear.py", line 118, in linear
    y, = LinearFunction().apply(args)
  File "/home/anaconda3/envs/py2.7/lib/python2.7/site-packages/chainer/function_node.py", line 230, in apply
    self._check_data_type_forward(in_data)
  File "/home/anaconda3/envs/py2.7/lib/python2.7/site-packages/chainer/function_node.py", line 298, in _check_data_type_forward
    self.check_type_forward(in_type)
  File "/home/anaconda3/envs/py2.7/lib/python2.7/site-packages/chainer/functions/connection/linear.py", line 20, in check_type_forward
    x_type.shape[1] == w_type.shape[1],
  File "/home/anaconda3/envs/py2.7/lib/python2.7/site-packages/chainer/utils/type_check.py", line 524, in expect
    expr.expect()
  File "/home/anaconda3/envs/py2.7/lib/python2.7/site-packages/chainer/utils/type_check.py", line 482, in expect
    '{0} {1} {2}'.format(left, self.inv, right))
chainer.utils.type_check.InvalidType:
Invalid operation is performed in: LinearFunction (Forward)

Expect: in_types[0].shape[1] == in_types[1].shape[1]
Actual: 1800 != 300

###該当のソースコード

eval_sccnn.py

def eval(model, args):
    # Prepare dataset
    dataset, height, width = util.load_data(args.data)
    #dataset, height, width = util.load_data_with_rand_vec(args.data)

    print 'height (max length of sentences):', height
    print 'width (size of wordembedding vecteor ):', width
    dataset['source'] = dataset['source'].astype(np.float32) #特徴量
    dataset['target'] = dataset['target'].astype(np.int32) #ラベル

#    x_train, x_test, y_train, y_test = train_test_split(dataset['source'], dataset['target'], test_size=1.0)
    x_test=dataset['source']
    y_test =dataset['target']
    N_test = y_test.size         # test data size
#    N = len(x_train)
#    print N             # train data size
#    in_units = y_test.shape[1]  # 入力層のユニット数 (語彙数)

    # (nsample, channel, height, width) の4次元テンソルに変換
    input_channel = 1
#    x_train = x_train.reshape(len(x_train), input_channel, height, width)
    x_test  = x_test.reshape(len(x_test), input_channel, height, width)


    n_label = 2 # ラベル数
    filter_height = [3,4,5] # フィルタの高さ
    baseline_filter_height = [3]
    filter_width  = width # フィルタの幅 (embeddingの次元数)
    output_channel = 100
    decay = 0.0001 # 重み減衰
    grad_clip = 3  # gradient norm threshold to clip
    max_sentence_len = height # max length of sentences    
    filter_width  = width # フィルタの幅 (embeddingの次元数)
    max_sentence_len = height # max length of sentences

    # モデルの定義

    # Setup optimizer
    optimizer = optimizers.AdaDelta()
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(grad_clip))
    optimizer.add_hook(chainer.optimizer.WeightDecay(decay))

    #GPUを使うかどうか
    if args.gpu >= 0:
        cuda.check_cuda_available()
        cuda.get_device(args.gpu).use()
        model.to_gpu()
    xp = np if args.gpu < 0 else cuda.cupy #args.gpu <= 0: use cpu, otherwise: use gpu

    N = len(x_test)
    # Learning loop
    for epoch in range(1):
        # evaluation
        sum_test_loss     = 0.0
        sum_test_accuracy = 0.0
        for i in six.moves.range(0, N_test, N_test):
            # all test data
            x = chainer.Variable(xp.asarray(x_test[i:i + N_test]))
            t = chainer.Variable(xp.asarray(y_test[i:i + N_test]))
#            print x
            y = model(x, False)
            print y
            print t
            print len(t)
            loss = F.softmax_cross_entropy(y, t) # 損失の計算
            accuracy = F.accuracy(y, t) # 正解率の計算

            sum_test_loss += loss.data * len(t)
            sum_test_accuracy += accuracy.data * len(t)
#        print(accuracy)
        print(' test mean loss={}, accuracy={}'.format(sum_test_loss / N, sum_test_accuracy / N)) #平均誤差

        sys.stdout.flush()

    return model

def main():
    parser = get_parser()
    args = parser.parse_args()
#    model, optimizer = train(args)
    
 #   file_path='sc_cnn.model'
    input_channel = 1
    n_label = 2 # ラベル数
    filter_height = [3,4,5] # フィルタの高さ
    baseline_filter_height = [3]
    filter_width  = 200 # フィルタの幅 (embeddingの次元数)
    output_channel = 100
    decay = 0.0001 # 重み減衰
    grad_clip = 3  # gradient norm threshold to clip
    max_sentence_len = 227 # max length of sentences
    #model = CNNSC(input_channel,
    #                  output_channel,
    #                  filter_height,
    #                 filter_width,
    #                 n_label,
    #                 max_sentence_len)
    model = CNNSC(input_channel,
                  output_channel,
                  filter_height,
                  filter_width,
                  n_label,
                  max_sentence_len)
    filename=args.model_name
#    serializers.load_npz(args.model_name, model)
    print filename
    serializers.load_npz(filename, model)
    eval(model, args)

if __name__ == "__main__":
    main()

CNNSC.py

#encoding: utf8


from chainer import ChainList
import chainer.functions as F
import chainer.links as L


# リンク数を可変にしたいのでChainListを使用する
class CNNSC(ChainList):
    def __init__(self,
                 input_channel,
                 output_channel,
                 filter_height,
                 filter_width,
                 n_label,
                 max_sentence_len):
        # フィルター数、使用されたフィルター高さ、最大文長は後から使う
        self.cnv_num = len(filter_height)
        self.filter_height = filter_height
        self.max_sentence_len = max_sentence_len
        
        # Convolution層用のLinkをフィルター毎に追加
        # Convolution2D(　入力チャンネル数, 出力チャンネル数（形毎のフィルターの数）, フィルターの形（タプル形式で）, パディングサイズ )
        link_list = [L.Convolution2D(input_channel, output_channel, (i, filter_width), pad=0) for i in filter_height]
        # Dropoff用のLinkを追加
        link_list += [L.Linear(output_channel * self.cnv_num, output_channel * self.cnv_num)]
        # 出力層へのLinkを追加
        link_list += [L.Linear(output_channel * self.cnv_num, n_label)]

        # ここまで定義したLinkのリストを用いてクラスを初期化する
        super(CNNSC, self).__init__(*link_list)
        
        # ちなみに
        # self.add_link(link)
        # みたいにリンクを列挙して順々に追加していってもOKです

    def __call__(self, x, train=True):
        # フィルタを通した中間層を準備
        h_conv = [None for _ in self.filter_height]
        h_pool = [None for _ in self.filter_height]
        
        # フィルタ形毎にループを回す
        for i, filter_size in enumerate(self.filter_height):
            # Convolition層を通す
            h_conv[i] = F.relu(self[i](x))
            # Pooling層を通す
            h_pool[i] = F.max_pooling_2d(h_conv[i], (self.max_sentence_len+1-filter_size))
        # Convolution+Poolingを行った結果を結合する
        concat = F.concat(h_pool, axis=2)
        # 結合した結果に対してDropoutをかける
        h_l1 = F.dropout(F.tanh(self[self.cnv_num+0](concat)), ratio=0.5, train=train)
        # Dropoutの結果を出力層まで圧縮する
        y = self[self.cnv_num+1](h_l1)

        return y

if __name__ == '__main__':
    model = L.Classifier(CNNSC(input_channel=1,
                           output_channel=100,
                           filter_height=[3,4,5],
                           filter_width=20,
                           n_label=2,
                           max_sentence_len=20))
    print('done process')

###試したこと
データの次元数の問題なのかなと考えましたが、イマイチよく分かりませんでした。

###補足情報(言語/FW/ツール等のバージョンなど)
より詳細な情報

行動規範の内容に同意します

回答1件

ベストアンサー

このエラーメッセージの示すとおりです。
Convolution2D → Linear の部分の入力データのサイズが1800と300で食い違っていることが原因になります。
畳み込み層のパラメータを決定しているfilter_width か output_channel のあたりの設定を見直して、適切なデータ数になるように調整する必要があります。

python
1Expect: in_types[0].shape[1] == in_types[1].shape[1]
2Actual: 1800 != 300

投稿2018/01/21 09:53

diningyo

総合スコア379

hanpanai-tumi

2018/01/21 13:14

ご回答ありがとうございます。 concat = F.concat(h_pool, axis=2) h_l1 = F.dropout(F.tanh(self[self.cnv_num+0](concat)), ratio=0.5, train=train) の部分を concat = F.concat(h_pool, axis=2) concat = chainer.functions.reshape(concat, (concat.data.shape[0], -1, 1)) h_l1 = F.dropout(F.tanh(self[self.cnv_num+0](concat)), ratio=0.5, train=train) のように単にreshapeを挟むだけではだめなのでしょうか。