karasでVAEを実装する際に出るAttributeError: 'tuple' object has no attribute 'ndim'について

現状

kerasを用いてオリジナルのデータセットでVAEを実装しようとしています。
kerasのサンプル内のVAEを改造し、MNISTデータではなく、128*128、グレースケールのラベルなしデータセットを用いて試してみたところ以下のようなエラーが出ました。

発生している問題・エラーメッセージ

Traceback (most recent call last):
  File "MyVAE2.py", line 239, in <module>
    plot_results(models, data, batch_size=batch_size, model_name="vae_cnn")
  File "MyVAE2.py", line 68, in plot_results
    batch_size=batch_size)
  File "/home/tetsuro/.virtualenvs/Test/lib/python3.5/site-packages/keras/engine/training.py", line 1149, in predict
    x, _, _ = self._standardize_user_data(x)
  File "/home/tetsuro/.virtualenvs/Test/lib/python3.5/site-packages/keras/engine/training.py", line 751, in _standardize_user_data
    exception_prefix='input')
  File "/home/tetsuro/.virtualenvs/Test/lib/python3.5/site-packages/keras/engine/training_utils.py", line 92, in standardize_input_data
    data = [standardize_single_array(x) for x in data]
  File "/home/tetsuro/.virtualenvs/Test/lib/python3.5/site-packages/keras/engine/training_utils.py", line 92, in <listcomp>
    data = [standardize_single_array(x) for x in data]
  File "/home/tetsuro/.virtualenvs/Test/lib/python3.5/site-packages/keras/engine/training_utils.py", line 27, in standardize_single_array
    elif x.ndim == 1:
AttributeError: 'tuple' object has no attribute 'ndim'

該当のソースコード

python3
1from __future__ import absolute_import
2from __future__ import division
3from __future__ import print_function
4
5from keras.layers import Dense, Input
6from keras.layers import Conv2D, Flatten, Lambda
7from keras.layers import Reshape, Conv2DTranspose
8from keras.models import Model
9from keras.losses import mse, binary_crossentropy
10
11from keras.utils import plot_model
12from keras import backend as K
13from keras.utils import plot_model, np_utils
14import numpy as np
15import matplotlib.pyplot as plt
16import argparse
17import os
18import re
19
20from keras.preprocessing.image import array_to_img, img_to_array, load_img  
21from sklearn.model_selection import train_test_split
22
23
24#画像取ってくる
25def list_pictures(directory, ext='jpg|jpeg|bmp|png|ppm'):
26    return [os.path.join(root, f)
27            for root, _, files in os.walk(directory) for f in files
28            if re.match(r'([\w]+.(?:' + ext + '))', f.lower())]
29
30
31#潜在変数の定義　潜在空間の正規分布から点 z をランダムに１点サンプリングする
32def sampling(args):
33    """Reparameterization trick by sampling fr an isotropic unit Gaussian.
34    # Arguments:
35        args (tensor): mean and log of variance of Q(z|X)
36    # Returns:
37        z (tensor): sampled latent vector
38    """
39
40    z_mean, z_log_var = args
41    batch = K.shape(z_mean)[0]
42    dim = K.int_shape(z_mean)[1]
43    # by default, random_normal has mean=0 and std=1.0
44    epsilon = K.random_normal(shape=(batch, dim))
45    return z_mean + K.exp(0.5 * z_log_var) * epsilon
46
47
48def plot_results(models,
49                 data,
50                 batch_size=100,
51                 model_name="vae_mnist"):
52    """Plots labels and MNIST digits as function of 2-dim latent vector
53    # Arguments:
54        models (tuple): encoder and decoder models
55        data (tuple): test data and label
56        batch_size (int): prediction batch size
57        model_name (string): which model is using this function
58    """
59
60    encoder, decoder = models
61    x_test = data
62    os.makedirs(model_name, exist_ok=True)
63
64    filename = os.path.join(model_name, "vae_mean.png")
65    # ２次元マップにドットで分布を表示
66    # display a 2D plot of the digit classes in the latent space
67    z_mean, _, _ = encoder.predict(x_test,
68                                   batch_size=batch_size)
69    plt.figure(figsize=(12, 10))
70    plt.scatter(z_mean[:, 0], z_mean[:, 1])
71    #plt.colorbar()
72    plt.xlabel("z[0]")
73    plt.ylabel("z[1]")
74    plt.savefig(filename)
75    #plt.show()
76
77    filename = os.path.join(model_name, "digits_over_latent.png")
78    # display a 30x30 2D manifold of digits
79    n = 15
80    digit_size = 128
81    figure = np.zeros((digit_size * n, digit_size * n))
82    # linearly spaced coordinates corresponding to the 2D plot
83    # of digit classes in the latent space
84    grid_x = np.linspace(-4, 4, n)
85    grid_y = np.linspace(-4, 4, n)[::-1]
86
87    for i, yi in enumerate(grid_y):
88        for j, xi in enumerate(grid_x):
89            z_sample = np.array([[xi, yi]])
90            x_decoded = decoder.predict(z_sample)
91            digit = x_decoded[0].reshape(digit_size, digit_size)
92            figure[i * digit_size: (i + 1) * digit_size,
93                   j * digit_size: (j + 1) * digit_size] = digit
94
95    plt.figure(figsize=(10, 10))
96    start_range = digit_size // 2
97    end_range = n * digit_size + start_range + 1
98    pixel_range = np.arange(start_range, end_range, digit_size)
99    sample_range_x = np.round(grid_x, 1)
100    sample_range_y = np.round(grid_y, 1)
101    plt.xticks(pixel_range, sample_range_x)
102    plt.yticks(pixel_range, sample_range_y)
103    plt.xlabel("z[0]")
104    plt.ylabel("z[1]")
105    plt.imshow(figure, cmap='Greys_r')
106    plt.savefig(filename)
107    plt.show()
108
109
110x = []
111y = []
112
113for picture in list_pictures(r'valdata2/'):
114    img = img_to_array(load_img(picture, color_mode="grayscale",target_size=(128, 128)))
115
116    x.append(img)
117    y.append(0)
118
119x = np.asarray(x)
120y = np.asarray(y)
121print(y)
122x = x.astype('float32')
123x = x / 255.0
124print(x[0])
125y = np_utils.to_categorical(y, 1)
126
127#sklearnのtrain/test split
128x_train, x_test, y_train, y_test = train_test_split(
129    x, y, test_size=0.2, random_state=50)
130original_dim = 12288
131image_size = 128
132
133
134# network parameters
135input_shape = (image_size, image_size, 1)
136batch_size = 100
137kernel_size = 3
138filters = 64
139latent_dim = 2
140epochs = 50
141
142# VAE model = encoder + decoder
143# build encoder model
144#画像サイズとチャネル数のインプット
145inputs = Input(shape=input_shape, name='encoder_input')
146print(inputs.shape)
147x = inputs
148for i in range(3):
149    filters *= 2
150    x = Conv2D(filters=filters,
151               kernel_size=kernel_size,
152               activation='relu',
153               strides=2,
154               padding ='same')(x)
155
156# shape info needed to build decoder model
157shape = K.int_shape(x)
158
159# generate latent vector Q(z|X)
160x = Flatten()(x)
161x = Dense(32, activation='relu')(x)
162z_mean = Dense(latent_dim, name='z_mean')(x)
163z_log_var = Dense(latent_dim, name='z_log_var')(x)
164
165# use reparameterization trick to push the sampling out as input
166# note that "output_shape" isn't necessary with the TensorFlow backend
167z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
168
169# instantiate encoder model
170encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
171encoder.summary()
172plot_model(encoder, to_file='vae_cnn_encoder.png', show_shapes=True)
173
174# build decoder model
175latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
176x = Dense(shape[1] * shape[2] * shape[3], activation='relu')(latent_inputs)
177x = Reshape((shape[1], shape[2], shape[3]))(x)
178
179for i in range(3):
180    x = Conv2DTranspose(filters=filters,
181                        kernel_size=kernel_size,
182                        activation='relu',
183                        strides=2,
184                        padding='same')(x)
185    filters //= 2
186
187outputs = Conv2DTranspose(filters=1,
188                          kernel_size=kernel_size,
189                          activation='sigmoid',
190                          padding='same',
191                          name='decoder_output')(x)
192
193# instantiate decoder model
194decoder = Model(latent_inputs, outputs, name='decoder')
195decoder.summary()
196plot_model(decoder, to_file='vae_cnn_decoder.png', show_shapes=True)
197
198# instantiate VAE model
199outputs = decoder(encoder(inputs)[2])
200vae = Model(inputs, outputs, name='vae')
201
202if __name__ == '__main__':
203    parser = argparse.ArgumentParser()
204    help_ = "Load h5 model trained weights"
205    parser.add_argument("-w", "--weights", help=help_)
206    help_ = "Use mse loss instead of binary cross entropy (default)"
207    parser.add_argument("-m", "--mse", help=help_, action='store_true')
208    args = parser.parse_args()
209    models = (encoder, decoder)
210    data = (x_test, y_test)
211
212    # VAE loss = mse_loss or xent_loss + kl_loss
213    if args.mse:
214        reconstruction_loss = mse(K.flatten(inputs), K.flatten(outputs))
215    else:
216        reconstruction_loss = binary_crossentropy(K.flatten(inputs),
217                                                  K.flatten(outputs))
218
219    reconstruction_loss *= image_size * image_size
220    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
221    kl_loss = K.sum(kl_loss, axis=-1)
222    kl_loss *= -0.5
223    vae_loss = K.mean(reconstruction_loss + kl_loss)
224    vae.add_loss(vae_loss)
225    vae.compile(optimizer='rmsprop')
226    vae.summary()
227    plot_model(vae, to_file='vae_cnn.png', show_shapes=True)
228
229    if args.weights:
230        vae.load_weights(args.weights)
231    else:
232        # train the autoencoder
233        vae.fit(x_train,
234                epochs=epochs,
235                batch_size=batch_size,
236                validation_data=(x_test, None))
237        vae.save_weights('vae_cnn_mnist.h5')
238
239    plot_results(models, data, batch_size=batch_size, model_name="vae_cnn")
240