前提・実現したいこと
2層のGRU-VAEをkerasで実装しています.入力データが(データ数,タイムステップ,特徴量)=(11414,615,13)なのですが,データサイズが大きすぎてGPUのメモリに収まりきっていない状態です.そこでこの方(https://qiita.com/simonritchie/items/d7168d1d9cea9ceb6af7)のサイトを参考に一旦訓練データをmemmapに保存し,保存したファイルX_train_data.npyからデータを読み込んでkerasのfit_generatorで解決できるとのことなので,実装してみたのですが,エラーが出てしまいます.何が問題なのでしょうか.またこの方法以外で大量のデータを扱う方法はありますか?どなたか回答よろしくお願いいたします.
GRUVAE
1import math 2import os 3import subprocess 4import keras 5import numpy as np 6from keras import backend as K 7from keras.models import Sequential, Model 8from keras.layers import Input,InputLayer, CuDNNGRU, RepeatVector,TimeDistributed 9from keras.layers.core import Flatten, Dense, Dropout, Lambda 10from keras.optimizers import SGD, RMSprop, Adam 11from keras import objectives 12from keras.utils import Sequence 13 14def gru_vae(): 15 16 LATENT_DIM1=200 #潜在次元1 17 LATENT_DIM2=50 #潜在次元2 18 CODING_DIM=50 #コーディング層 19 NUM_TIMESTEPS=615 #MFCCの最大タイムステップ 20 NUM_INPUT_DIM=13 #MFCCの次元 21 22 #コーディング層からのサンプリング,reparameterization trick 23 def sampling(args): 24 z_mean, z_log_var = args 25 batch = K.shape(z_mean)[0] 26 dim = K.int_shape(z_mean)[1] 27 # 平均0 ,標準偏差1のランダム誤差 28 epsilon = K.random_normal(shape=(batch, dim),mean=0., stddev=1.) 29 return z_mean + K.exp(0.5 * z_log_var) * epsilon 30 31 # エンコーダの定義 32 #エンコーダの入力層の定義 33 inputs = Input(shape=(NUM_TIMESTEPS, NUM_INPUT_DIM)) 34 #入力から潜在次元数に次元圧縮する. 35 #2層のRNN層 36 x = CuDNNGRU(LATENT_DIM1,return_sequences=True)(inputs) 37 x = CuDNNGRU(LATENT_DIM2)(x) 38 #潜在次元からコーディング層の確率変数次元へ 39 z_mean = Dense(CODING_DIM, name='z_mean')(x) # コーディング層の平均を出力 40 z_log_var = Dense(CODING_DIM, name='z_log_var')(x) # コーディング層の標準偏差を出力 41 42 # reparameterization trick 43 z = Lambda(sampling, output_shape=(CODING_DIM,), name='z')([z_mean, z_log_var]) # コーディングの分布からサンプリング 44 #エンコーダ内の層をインスタンス化する. 45 encoder = Model(inputs, [z_mean, z_log_var, z], name="encoder") 46 # encoder部分は入力を受けて平均、分散、そこからランダムサンプリングしたものの3つを返す 47 48 # デコーダの定義 49 #デコーダの入力層はコーディング層からの出力zをタイムステップ分生成する. 50 latent_inputs = RepeatVector(NUM_TIMESTEPS)(z) 51 #コーディング次元から潜在次元へデコーディング 52 x = CuDNNGRU(LATENT_DIM2,return_sequences=True)(latent_inputs) 53 x = CuDNNGRU(LATENT_DIM1,return_sequences=True)(x) 54 outputs = CuDNNGRU(NUM_INPUT_DIM,return_sequences=True)(x) 55 56 57 # デコーダーとエンコーダーの結合 58 vae = Model(inputs, outputs, name='gru_vae') 59 60 # 損失関数をこのモデルに加える 61 def loss(inputs, outputs): 62 z_mean, z_log_var, _ = encoder(inputs) 63 rec_loss = objectives.mse(K.flatten(inputs), K.flatten(outputs)) 64 #rec_loss *= NUM_INPUT_DIM*NUM_TIMESTEPS 65 kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)) 66 BETA=1 67 total_loss = rec_loss + BETA*kl_loss 68 return total_loss 69 70 vae.add_loss(loss(inputs, outputs)) 71 print("gru_vaeの構成") 72 vae.summary() 73 return vae
class LearningSequence(Sequence): def __init__(self, batch_size): DATA_ROW_NUM = 11414 self.batch_size = batch_size self.X_train = np.memmap('X_train_data.npy', dtype='float32', mode='r', shape=(DATA_ROW_NUM, 615, 13)) self.y_train = np.memmap('y_train_data.npy', dtype='float32', mode='r', shape=(DATA_ROW_NUM)) self.length = math.ceil(DATA_ROW_NUM / batch_size) def __getitem__(self, idx): start_idx = idx * self.batch_size last_idx = start_idx + self.batch_size X = self.X_train[start_idx:last_idx] y = self.y_train[start_idx:last_idx] return X,y def __len__(self): return self.length def on_epoch_end(self): pass
#GRU-VAEの学習 BATCH_SIZE=32 EPOCH_NUM=10 gru_vae = gru_vae() optimizer=Adam(lr=0.001,beta_1=0.99,beta_2=0.99) gru_vae.compile(optimizer=optimizer) learning_sequence=LearningSequence(batch_size=BATCH_SIZE) gru_vae.fit_generator(generator=learning_sequence, epochs=EPOCH_NUM)
発生している問題・エラーメッセージ
gru_vaeの構成 __________________________________________________________________________________________________ Layer (type) Output Shape Param # Connected to ================================================================================================== input_1 (InputLayer) (None, 615, 13) 0 __________________________________________________________________________________________________ cu_dnngru_1 (CuDNNGRU) (None, 615, 200) 129000 input_1[0][0] __________________________________________________________________________________________________ cu_dnngru_2 (CuDNNGRU) (None, 50) 37800 cu_dnngru_1[0][0] __________________________________________________________________________________________________ z_mean (Dense) (None, 50) 2550 cu_dnngru_2[0][0] __________________________________________________________________________________________________ z_log_var (Dense) (None, 50) 2550 cu_dnngru_2[0][0] __________________________________________________________________________________________________ z (Lambda) (None, 50) 0 z_mean[0][0] z_log_var[0][0] __________________________________________________________________________________________________ repeat_vector_1 (RepeatVector) (None, 615, 50) 0 z[0][0] __________________________________________________________________________________________________ cu_dnngru_3 (CuDNNGRU) (None, 615, 50) 15300 repeat_vector_1[0][0] __________________________________________________________________________________________________ cu_dnngru_4 (CuDNNGRU) (None, 615, 200) 151200 cu_dnngru_3[0][0] __________________________________________________________________________________________________ cu_dnngru_5 (CuDNNGRU) (None, 615, 13) 8385 cu_dnngru_4[0][0] ================================================================================================== Total params: 346,785 Trainable params: 346,785 Non-trainable params: 0 __________________________________________________________________________________________________ Epoch 1/10 --------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-3-4f57106607b2> in <module> 15 #validation_split=0.15) 16 ---> 17 gru_vae.fit_generator(generator=learning_sequence, epochs=EPOCH_NUM) ~\Anaconda3\envs\master2\lib\site-packages\keras\legacy\interfaces.py in wrapper(*args, **kwargs) 89 warnings.warn('Update your `' + object_name + '` call to the ' + 90 'Keras 2 API: ' + signature, stacklevel=2) ---> 91 return func(*args, **kwargs) 92 wrapper._original_function = func 93 return wrapper ~\Anaconda3\envs\master2\lib\site-packages\keras\engine\training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch) 1416 use_multiprocessing=use_multiprocessing, 1417 shuffle=shuffle, -> 1418 initial_epoch=initial_epoch) 1419 1420 @interfaces.legacy_generator_methods_support ~\Anaconda3\envs\master2\lib\site-packages\keras\engine\training_generator.py in fit_generator(model, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch) 215 outs = model.train_on_batch(x, y, 216 sample_weight=sample_weight, --> 217 class_weight=class_weight) 218 219 outs = to_list(outs) ~\Anaconda3\envs\master2\lib\site-packages\keras\engine\training.py in train_on_batch(self, x, y, sample_weight, class_weight) 1209 x, y, 1210 sample_weight=sample_weight, -> 1211 class_weight=class_weight) 1212 if self._uses_dynamic_learning_phase(): 1213 ins = x + y + sample_weights + [1.] ~\Anaconda3\envs\master2\lib\site-packages\keras\engine\training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size) 787 feed_output_shapes, 788 check_batch_axis=False, # Don't enforce the batch size. --> 789 exception_prefix='target') 790 791 # Generate sample-wise weight values given the `sample_weight` and ~\Anaconda3\envs\master2\lib\site-packages\keras\engine\training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix) 61 raise ValueError('Error when checking model ' + 62 exception_prefix + ': ' ---> 63 'expected no data, but got:', data) 64 return [] 65 if data is None: ValueError: ('Error when checking model target: expected no data, but got:', memmap([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32))
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。