機械学習（CycleGAN）のエラー

質問内容

機械学習（GAN）を勉強中の者です．自作データセットでCycleGANを動かそうとした際にエラーが出てしまいました．エラー内容自体は入力が4次元以上の物でないといけないというのはわかったのですがそこから何を直せばいいのかがわかりません．書けばよい内容がわからないのでとりあえずわかっていることを書きました．よろしければご教授ください．

環境

GoogleColaboratory
Python3.6
tensorflow =2.4.0
テストを動かそうとしたとき時動かなくてkeras-contribを追加

発生しているエラーメッセージ

ValueError: Input 0 of layer conv2d_10 is incompatible with the layer: : expected min_ndim=4,
found ndim=3. Full shape received: (None, 512, 512)

試したこと

入力画像がRGBではないためチャネル数を１にしてみたが変化がなかった
載せたコードはいじる前のサンプルが動く状態のものです．

ソースコード

main
1from __future__ import print_function, division
2import scipy
3
4from keras.datasets import mnist
5from keras_contrib.layers.normalization.instancenormalization import InstanceNormalization
6from keras.layers import Input, Dense, Reshape, Flatten, Dropout, Concatenate
7from keras.layers import BatchNormalization, Activation, ZeroPadding2D
8from keras.layers.advanced_activations import LeakyReLU
9from keras.layers.convolutional import UpSampling2D, Conv2D
10from keras.models import Sequential, Model
11from tensorflow.keras.optimizers import Adam
12import datetime
13import matplotlib.pyplot as plt
14import sys
15from data_loader import DataLoader
16import numpy as np
17import os
18
19class CycleGAN():
20    def __init__(self):
21        # Input shape
22        self.img_rows = 512
23        self.img_cols = 512
24        self.channels = 3
25        self.img_shape =(self.img_rows, self.img_cols, self.channels)
26       
27        # Configure data loader
28        self.dataset_name = 'eye'
29        self.data_loader = DataLoader(dataset_name=self.dataset_name,
30                                      img_res=(self.img_rows, self.img_cols))
31
32
33        # Calculate output shape of D (PatchGAN)
34        patch = int(self.img_rows / 2**4)
35        self.disc_patch = (patch, patch, 1)
36
37        # Number of filters in the first layer of G and D
38        self.gf = 32
39        self.df = 64
40
41        # Loss weights
42        self.lambda_cycle = 10.0                    # Cycle-consistency loss
43        self.lambda_id = 0.1 * self.lambda_cycle    # Identity loss
44
45        optimizer = Adam(0.0002, 0.5)
46
47        # Build and compile the discriminators
48        self.d_A = self.build_discriminator()
49        self.d_B = self.build_discriminator()
50        self.d_A.compile(loss='mse',
51            optimizer=optimizer,
52            metrics=['accuracy'])
53        self.d_B.compile(loss='mse',
54            optimizer=optimizer,
55            metrics=['accuracy'])
56
57        #-------------------------
58        # Construct Computational
59        #   Graph of Generators
60        #-------------------------
61
62        # Build the generators
63        self.g_AB = self.build_generator()
64        self.g_BA = self.build_generator()
65
66        # Input images from both domains
67        img_A = Input(shape=self.img_shape)
68        img_B = Input(shape=self.img_shape)
69
70        # Translate images to the other domain
71        fake_B = self.g_AB(img_A)
72        fake_A = self.g_BA(img_B)
73        # Translate images back to original domain
74        reconstr_A = self.g_BA(fake_B)
75        reconstr_B = self.g_AB(fake_A)
76        # Identity mapping of images
77        img_A_id = self.g_BA(img_A)
78        img_B_id = self.g_AB(img_B)
79
80        # For the combined model we will only train the generators
81        self.d_A.trainable = False
82        self.d_B.trainable = False
83
84        # Discriminators determines validity of translated images
85        valid_A = self.d_A(fake_A)
86        valid_B = self.d_B(fake_B)
87
88        # Combined model trains generators to fool discriminators
89        self.combined = Model(inputs=[img_A, img_B],
90                              outputs=[ valid_A, valid_B,
91                                        reconstr_A, reconstr_B,
92                                        img_A_id, img_B_id ])
93        self.combined.compile(loss=['mse', 'mse',
94                                    'mae', 'mae',
95                                    'mae', 'mae'],
96                            loss_weights=[  1, 1,
97                                            self.lambda_cycle, self.lambda_cycle,
98                                            self.lambda_id, self.lambda_id ],
99                            optimizer=optimizer)
100
101    def build_generator(self):
102        """U-Net Generator"""
103
104        def conv2d(layer_input, filters, f_size=4):
105            """Layers used during downsampling"""
106            d = Conv2D(filters, kernel_size=f_size, strides=2, padding='same')(layer_input)
107            d = LeakyReLU(alpha=0.2)(d)
108            d = InstanceNormalization()(d)
109            return d
110
111        def deconv2d(layer_input, skip_input, filters, f_size=4, dropout_rate=0):
112            """Layers used during upsampling"""
113            u = UpSampling2D(size=2)(layer_input)
114            u = Conv2D(filters, kernel_size=f_size, strides=1, padding='same', activation='relu')(u)
115            if dropout_rate:
116                u = Dropout(dropout_rate)(u)
117            u = InstanceNormalization()(u)
118            u = Concatenate()([u, skip_input])
119            return u
120
121        # Image input
122        d0 = Input(shape=self.img_shape)
123
124        # Downsampling
125        d1 = conv2d(d0, self.gf)
126        d2 = conv2d(d1, self.gf*2)
127        d3 = conv2d(d2, self.gf*4)
128        d4 = conv2d(d3, self.gf*8)
129
130        # Upsampling
131        u1 = deconv2d(d4, d3, self.gf*4)
132        u2 = deconv2d(u1, d2, self.gf*2)
133        u3 = deconv2d(u2, d1, self.gf)
134
135        u4 = UpSampling2D(size=2)(u3)
136        output_img = Conv2D(self.channels, kernel_size=4, strides=1, padding='same', activation='tanh')(u4)
137
138        return Model(d0, output_img)
139
140

dataloader
1import scipy
2from glob import glob
3import numpy as np
4import imageio
5from PIL import Image
6
7class DataLoader():
8    def __init__(self, dataset_name, img_res=(512, 512)):
9        self.dataset_name = dataset_name
10        self.img_res = img_res
11
12    def load_data(self, domain, batch_size=1, is_testing=False):
13        data_type = "train%s" % domain if not is_testing else "test%s" % domain
14        path = glob('./datasets/%s/%s/*' % (self.dataset_name, data_type))
15
16        batch_images = np.random.choice(path, size=batch_size)
17
18        imgs = []
19        for img_path in batch_images:
20            img = self.imread(img_path)
21            if not is_testing:
22                img = np.array(Image.fromarray(img.astype(np.uint8)).resize(self.img_res,resample=2))
23
24                if np.random.random() > 0.5:
25                    img = np.fliplr(img)
26            else:
27                img = np.array(Image.fromarray(img.astype(np.uint8)).resize(self.img_res,resample=2))
28            imgs.append(img)
29
30        imgs = np.array(imgs)/127.5 - 1.
31
32        return imgs
33
34    def load_batch(self, batch_size=1, is_testing=False):
35        data_type = "train" if not is_testing else "val"
36        path_A = glob('./datasets/%s/%sA/*' % (self.dataset_name, data_type))
37        path_B = glob('./datasets/%s/%sB/*' % (self.dataset_name, data_type))
38
39        self.n_batches = int(min(len(path_A), len(path_B)) / batch_size)
40        total_samples = self.n_batches * batch_size
41
42        # Sample n_batches * batch_size from each path list so that model sees all
43        # samples from both domains
44        path_A = np.random.choice(path_A, total_samples, replace=False)
45        path_B = np.random.choice(path_B, total_samples, replace=False)
46
47        for i in range(self.n_batches-1):
48            batch_A = path_A[i*batch_size:(i+1)*batch_size]
49            batch_B = path_B[i*batch_size:(i+1)*batch_size]
50            imgs_A, imgs_B = [], []
51            for img_A, img_B in zip(batch_A, batch_B):
52                img_A = self.imread(img_A)
53                img_B = self.imread(img_B)
54
55                img_A = np.array(Image.fromarray(img_A.astype(np.uint8)).resize(self.img_res,resample=2))
56                img_B = np.array(Image.fromarray(img_B.astype(np.uint8)).resize(self.img_res,resample=2))
57
58                if not is_testing and np.random.random() > 0.5:
59                        img_A = np.fliplr(img_A)
60                        img_B = np.fliplr(img_B)
61
62                imgs_A.append(img_A)
63                imgs_B.append(img_B)
64
65            imgs_A = np.array(imgs_A)/127.5 - 1.
66            imgs_B = np.array(imgs_B)/127.5 - 1.
67
68            yield imgs_A, imgs_B
69
70    def load_img(self, path):
71        img = self.imread(path)
72        img = scipy.misc.imresize(img, self.img_res)
73        img = img/127.5 - 1.
74        return img[np.newaxis, :, :, :]
75
76    def imread(self, path):
77        return imageio.imread(path).astype(np.float)
78

jbpb0

2021/02/01 11:29

https://github.com/simontomaskarlsson/CycleGAN-Keras/blob/master/load_data.py の def create_image_array(... の中の image = image[:, :, np.newaxis] みたいなことをやって、次元を一つ水増しすればいけると思います追加する場所は、 def load_data(... の imgs.append(img) のすぐ上あたりかなただし、単純にそれを追加するだけだと、入力がカラー画像の場合にうまくいかないかもしれないので、上記Webサイトの def create_image_array(... のように、 def load_data(... の引数に入力のチャネル数を追加して、それが1の場合だけ実行されるようにした方がいいかも