Python3 Dimensions must be equalのエラー解消法について

前提・実現したいこと

kerasでCNNを使った多クラス分類モデルを作っています。
その際、以下のようなエラーが発生してしまいました。
おそらくArcFace層の次元の数が統一されていないのかと思うのですが、どう修正したら良いかわからなかったため、質問させていただきました。
こういった質問はこれまでにもたくさんあったかと思いますが、今回の自分のケースに適用して考えることができなかったため、お手数をおかけしますが修正方法についてどなたか教えて頂けますと大変助かります。

ValueError: Dimensions must be equal, but are 3 and 4 for 'arc_face_1/mul_1' (op: 'Mul') with input shapes: [3,3], [?,4].

該当のソースコード

Python
1from __future__ import absolute_import
2from __future__ import division
3from __future__ import print_function
4
5from keras.layers import Lambda, Input, Dense
6from keras.models import Model
7from keras.models import Sequential, model_from_json
8from keras.losses import mse, binary_crossentropy
9from keras.layers import Conv2D, Flatten
10from keras.layers import Reshape, Conv2DTranspose
11from keras.layers.pooling import MaxPool2D
12from keras.utils import plot_model, np_utils 
13from keras.callbacks import Callback, EarlyStopping, TensorBoard, ModelCheckpoint, LearningRateScheduler, CSVLogger
14from keras import optimizers
15from keras import backend as K
16from keras.layers import Layer
17from keras import regularizers
18from keras.preprocessing.image import array_to_img, img_to_array,load_img
19from keras.preprocessing.image import ImageDataGenerator
20from keras.layers import Activation, BatchNormalization, Dropout
21
22from sklearn.model_selection import train_test_split
23
24import numpy as np
25import matplotlib.pyplot as plt
26import argparse
27import os
28import re
29import glob
30import random as rn
31import tensorflow as tf
32import cv2
33from PIL import Image
34
35import warnings
36warnings.filterwarnings('ignore')
37
38%matplotlib inline
39
40
41
42# network parameters
43image_size_width, image_size_height = (512, 496)
44input_shape = (image_size_width,image_size_height,1)
45batch_size = 16#25#50
46kernel_size = 3
47filters = 16
48latent_dim = 2
49epochs = 100
50num_classes = 4
51weight_decay = 1e-4
52
53class ArcFace(Layer):
54    def __init__(self, n_classes=4, s=30.0, m=0.50, regularizer=None, **kwargs):
55        super(ArcFace, self).__init__(**kwargs)
56        self.n_classes = n_classes
57        self.s = s
58        self.m = m
59        self.regularizer = regularizers.get(regularizer)
60
61    def build(self, input_shape):
62        super(ArcFace, self).build(input_shape[0])
63        self.W = self.add_weight(name='W',
64                                shape=(input_shape[0][-1], self.n_classes),
65                                initializer='glorot_uniform',
66                                trainable=True,
67                                regularizer=self.regularizer)
68
69    def call(self, inputs):
70        x, y = inputs
71        c = K.shape(x)[-1]
72        # normalize feature
73        x = tf.nn.l2_normalize(x, axis=1)
74        # normalize weights
75        W = tf.nn.l2_normalize(self.W, axis=0)
76        # dot product
77        logits = x * W
78        # add margin
79        # clip logits to prevent zero division when backward
80        theta = tf.acos(K.clip(logits, -1.0 + K.epsilon(), 1.0 - K.epsilon()))
81        target_logits = tf.cos(theta + self.m)
82        # sin = tf.sqrt(1 - logits**2)
83        # cos_m = tf.cos(logits)
84        # sin_m = tf.sin(logits)
85        # target_logits = logits * cos_m - sin * sin_m
86        #
87        logits = logits * (1 - y) + target_logits * y
88        # feature re-scale
89        logits *= self.s
90        out = tf.nn.softmax(logits)
91
92        return out
93
94    def compute_output_shape(self, input_shape):
95        return (None, self.n_classes)
96
97def vgg_block(x, filters, layers):
98    for _ in range(layers):
99        x = Conv2D(filters, (3, 3), padding='same', kernel_initializer='he_normal',
100                    kernel_regularizer=regularizers.l2(weight_decay))(x)
101        x = BatchNormalization()(x)
102        x = Activation('relu')(x)
103
104    return x
105
106def vgg8_arcface(args):
107    input = Input(shape=(512, 496, 1))
108    y = Input(shape=(4,))
109
110    x = vgg_block(input, 16, 2)
111    x = MaxPool2D(pool_size=(2, 2))(x)
112    x = vgg_block(x, 32, 2)
113    x = MaxPool2D(pool_size=(2, 2))(x)
114    x = vgg_block(x, 64, 2)
115    x = MaxPool2D(pool_size=(2, 2))(x)
116
117    x = BatchNormalization()(x)
118    x = Dropout(0.5)(x)
119    x = Flatten()(x)
120    x = Dense(3, kernel_initializer='he_normal',
121                kernel_regularizer=regularizers.l2(weight_decay))(x)
122    x = BatchNormalization()(x)
123    output = ArcFace(4, regularizer=regularizers.l2(weight_decay))([x, y])
124
125    return Model([input, y], output)
126
127#データセットの準備
128folder = ["A","B","C","D"]
129 
130X = []
131Y = []
132for index, name in enumerate(folder):
133    dir = "./" + name
134    files = glob.glob(dir + "/*.jpeg")
135    for i, file in enumerate(files):
136        image = Image.open(file)
137        image = image.convert("L")#("RGB")
138        image = image.resize((image_size_width, image_size_height))
139        data = np.asarray(image)
140        X.append(data)
141        Y.append(index)
142 
143X = np.array(X)
144Y = np.array(Y)
145
146X = np.reshape(X, [-1, image_size_width,image_size_height,1])
147X = X.astype('float32') / 255
148
149print(X.shape)
150
151# 正解ラベルの形式を変換
152Y = np_utils.to_categorical(Y, num_classes)
153
154# 学習用データとテストデータ
155X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20)
156print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
157
158# learn model
159model = vgg8_arcface(4)# 重みloadしない
160model.summary()
161
162Adam = optimizers.Adam(lr=1e-4, decay=1e-4,beta_1=0.9, beta_2=0.999, epsilon=1e-8)
163model.compile(optimizer=Adam, loss='categorical_crossentropy', metrics=["accuracy"])
164
165callbacks = []
166callbacks.append(ModelCheckpoint(filepath="model.ep{epoch:02d}.h5", save_best_only=True))
167callbacks.append(EarlyStopping(monitor='val_loss', patience=5, verbose=1))
168callbacks.append(CSVLogger("history.csv"))
169
170os.environ['PYTHONHASHSEED'] = '0'
171np.random.seed(5)
172rn.seed(5)
173
174config = tf.ConfigProto()
175config.gpu_options.per_process_gpu_memory_fraction = 0.9
176
177tf.set_random_seed(5)
178sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
179K.set_session(sess)
180
181history = model.fit(X_train, y_train,
182                    batch_size=batch_size,
183                    epochs=epochs,
184                    verbose=1,
185                    validation_data=(X_test, y_test),
186                   callbacks=callbacks)
187
188score = model.evaluate(X_test, y_test, verbose=0)
189print('Test loss:', score[0])
190print('Test accuracy:', score[1])