ディープラーニングで音声分離をしたいが配列の次元が違うというエラーが出てしまいます

前提・実現したいこと

音声分離をやりたくて、まず下記サイトの再現をしようとしています。

https://qiita.com/cvusk/items/61cdbce80785eaf28349

発生している問題・エラーメッセージ

ファイルのパス(と現在行っている処理を示す用のprint)以外はコピペしているにも関わらず

Error when checking target: expected activation_19 to have 2 dimensions, but got array with shape (500, 50, 50)

というエラーが出てしまいます。

該当のソースコード

python
1
2import keras
3from keras.models import Model
4from keras.layers import Input, Dense, Dropout, Activation
5from keras.layers import Conv2D, GlobalAveragePooling2D
6from keras.layers import BatchNormalization, Add
7from keras.callbacks import EarlyStopping, ModelCheckpoint
8from keras.models import load_model
9
10# dataset files
11print("\n***********************************************")
12print("dataset files")
13train_files = ["esc_melsp_train_raw.npz",
14               "esc_melsp_train_ss.npz",
15               "esc_melsp_train_st.npz",
16               "esc_melsp_train_wn.npz",
17               "esc_melsp_train_com.npz"]
18test_file = "esc_melsp_test.npz"
19
20train_num = 1500
21test_num = 500
22
23# define dataset placeholders
24print("\n***********************************************")
25print("define dataset placeholders")
26x_train = np.zeros(freq * time * train_num * len(train_files)).reshape(train_num * len(train_files), freq, time)
27y_train = np.zeros(train_num * len(train_files))
28
29# load dataset
30print("\n***********************************************")
31print("load dataset")
32for i in range(len(train_files)):
33    data = np.load(train_files[i])
34    x_train[i * train_num:(i + 1) * train_num] = data["x"]
35    y_train[i * train_num:(i + 1) * train_num] = data["y"]
36
37# load test dataset
38print("\n***********************************************")
39print("load test dataset")
40test_data = np.load(test_file)
41x_test = test_data["x"]
42y_test = test_data["y"]
43
44# redefine target data into one hot vector
45print("\n***********************************************")
46print("redefine target data into one hot vector")
47classes = 50
48y_train = keras.utils.to_categorical(y_train, classes)
49y_test = keras.utils.to_categorical(y_test, classes)
50
51# reshape training dataset
52print("\n***********************************************")
53print("reshape training dataset")
54x_train = x_train.reshape(train_num * 5, freq, time, 1)
55x_test = x_test.reshape(test_num, freq, time, 1)
56
57classes = 50
58y_test = keras.utils.to_categorical(y_test, classes)
59x_test = x_test.reshape(test_num, freq, time, 1)
60
61print("x train:{0}\ny train:{1}\nx test:{2}\ny test:{3}".format(x_train.shape,
62                                                                y_train.shape,
63                                                                x_test.shape,
64                                                                y_test.shape))
65
66
67def cba(inputs, filters, kernel_size, strides):
68    x = Conv2D(filters, kernel_size=kernel_size, strides=strides, padding='same')(inputs)
69    x = BatchNormalization()(x)
70    x = Activation("relu")(x)
71    return x
72
73
74# define CNN
75print("\n***********************************************")
76print("define CNN")
77inputs = Input(shape=(x_train.shape[1:]))
78
79x_1 = cba(inputs, filters=32, kernel_size=(1, 8), strides=(1, 2))
80x_1 = cba(x_1, filters=32, kernel_size=(8, 1), strides=(2, 1))
81x_1 = cba(x_1, filters=64, kernel_size=(1, 8), strides=(1, 2))
82x_1 = cba(x_1, filters=64, kernel_size=(8, 1), strides=(2, 1))
83
84x_2 = cba(inputs, filters=32, kernel_size=(1, 16), strides=(1, 2))
85x_2 = cba(x_2, filters=32, kernel_size=(16, 1), strides=(2, 1))
86x_2 = cba(x_2, filters=64, kernel_size=(1, 16), strides=(1, 2))
87x_2 = cba(x_2, filters=64, kernel_size=(16, 1), strides=(2, 1))
88
89x_3 = cba(inputs, filters=32, kernel_size=(1, 32), strides=(1, 2))
90x_3 = cba(x_3, filters=32, kernel_size=(32, 1), strides=(2, 1))
91x_3 = cba(x_3, filters=64, kernel_size=(1, 32), strides=(1, 2))
92x_3 = cba(x_3, filters=64, kernel_size=(32, 1), strides=(2, 1))
93
94x_4 = cba(inputs, filters=32, kernel_size=(1, 64), strides=(1, 2))
95x_4 = cba(x_4, filters=32, kernel_size=(64, 1), strides=(2, 1))
96x_4 = cba(x_4, filters=64, kernel_size=(1, 64), strides=(1, 2))
97x_4 = cba(x_4, filters=64, kernel_size=(64, 1), strides=(2, 1))
98
99x = Add()([x_1, x_2, x_3, x_4])
100
101x = cba(x, filters=128, kernel_size=(1, 16), strides=(1, 2))
102x = cba(x, filters=128, kernel_size=(16, 1), strides=(2, 1))
103
104x = GlobalAveragePooling2D()(x)
105x = Dense(classes)(x)
106x = Activation("softmax")(x)
107
108model = Model(inputs, x)
109
110model.summary()
111
112# initiate Adam optimizer
113print("\n***********************************************")
114print("initiate Adam optimizer")
115opt = keras.optimizers.adam(lr=0.00001, decay=1e-6, amsgrad=True)
116
117# Let's train the model using Adam with amsgrad
118print("\n***********************************************")
119print("Let's train the model using Adam with amsgrad")
120model.compile(loss='categorical_crossentropy',
121              optimizer=opt,
122              metrics=['accuracy'])
123
124# directory for model checkpoints
125print("\n***********************************************")
126print("directory for model checkpoints")
127model_dir = "./models"
128if not os.path.exists(model_dir):
129    os.mkdir(model_dir)
130
131# early stopping and model checkpoint# early
132print("\n***********************************************")
133print("early stopping and model checkpoint# early")
134es_cb = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto')
135chkpt = os.path.join(model_dir, 'esc50_.{epoch:02d}_{val_loss:.4f}_{val_acc:.4f}.hdf5')
136cp_cb = ModelCheckpoint(filepath=chkpt, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
137
138# between class data generator
139print("\n***********************************************")
140print("between class data generator")
141
142
143class MixupGenerator():
144    def __init__(self, x_train, y_train, batch_size=16, alpha=0.2, shuffle=True):
145        self.x_train = x_train
146        self.y_train = y_train
147        self.batch_size = batch_size
148        self.alpha = alpha
149        self.shuffle = shuffle
150        self.sample_num = len(x_train)
151
152    def __call__(self):
153        while True:
154            indexes = self.__get_exploration_order()
155            itr_num = int(len(indexes) // (self.batch_size * 2))
156
157            for i in range(itr_num):
158                batch_ids = indexes[i * self.batch_size * 2:(i + 1) * self.batch_size * 2]
159                x, y = self.__data_generation(batch_ids)
160
161                yield x, y
162
163    def __get_exploration_order(self):
164        indexes = np.arange(self.sample_num)
165
166        if self.shuffle:
167            np.random.shuffle(indexes)
168
169        return indexes
170
171    def __data_generation(self, batch_ids):
172        _, h, w, c = self.x_train.shape
173        _, class_num = self.y_train.shape
174        x1 = self.x_train[batch_ids[:self.batch_size]]
175        x2 = self.x_train[batch_ids[self.batch_size:]]
176        y1 = self.y_train[batch_ids[:self.batch_size]]
177        y2 = self.y_train[batch_ids[self.batch_size:]]
178        l = np.random.beta(self.alpha, self.alpha, self.batch_size)
179        x_l = l.reshape(self.batch_size, 1, 1, 1)
180        y_l = l.reshape(self.batch_size, 1)
181
182        x = x1 * x_l + x2 * (1 - x_l)
183        y = y1 * y_l + y2 * (1 - y_l)
184
185        return x, y
186
187
188# train model
189print("\n***********************************************")
190print("train model")
191batch_size = 16
192epochs = 1000
193print("training_generator")
194training_generator = MixupGenerator(x_train, y_train)()
195print("model.fit_generator")
196
197
198
199y_train =
200
201model.fit_generator(generator=training_generator,
202                    steps_per_epoch=x_train.shape[0] // batch_size,
203                    validation_data=(x_test, y_test),
204                    epochs=epochs,
205                    verbose=1,
206                    shuffle=True,
207                    callbacks=[es_cb, cp_cb])
208
209
210
211print("model = load_model")
212model = load_model("./models/esc50_.105_0.8096_0.8200.hdf5")
213
214# evaluation
215print("\n***********************************************")
216print("evaluation")
217evaluation = model.evaluate(x_test, y_test)
218print(evaluation)
219
220print("\n***********************************************")
221print("CNN program finish")
222