前提・実現したいこと
こちらのブログを参考にし、VGGFaceというものに画像のどの部分を見ているかをヒートマップで表すGradCAMを実装しました。
これにMTCNNという画像の顔部分を抽出するものを実装し、適当な画像を入れた時に顔部分を抽出した画像を入力としてとしてヒートマップを出力しようとしましたがうまくいきません。
また、上記とは関係ないのですがコマンドライン引数を用いて入力した画像のファイル名を元に顔を抽出した画像を保存したいのですがどのように改善すればよいでしょうか。
発生している問題・エラーメッセージ
File "vggface-gradcam+mtcnn.py", line 200, in <module> predictions = model.predict(preprocessed_input) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/keras/engine/training.py", line 1149, in predict x, _, _ = self._standardize_user_data(x) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/keras/engine/training.py", line 751, in _standardize_user_data exception_prefix='input') File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/keras/engine/training_utils.py", line 128, in standardize_input_data 'with shape ' + str(data_shape)) ValueError: Error when checking input: expected input_1 to have 4 dimensions, but got array with shape (224, 224, 3)
該当のソースコード
どの部分を修正するべきかわからないので長いのですが以下がコードになります。
Python
1def extract_face(filename, required_size=(224, 224)): 2 # load image from file 3 pixels = pyplot.imread(filename) 4 # create the detector, using default weights 5 detector = MTCNN() 6 # detect faces in the image 7 results = detector.detect_faces(pixels) 8 # extract the bounding box from the first face 9 x1, y1, width, height = results[0]['box'] 10 x2, y2 = x1 + width, y1 + height 11 # extract the face 12 face = pixels[y1:y2, x1:x2] 13 # resize pixels to the model size 14 image = Image.fromarray(face) 15 image = image.resize(required_size) 16 face_array = asarray(image) 17 return face_array 18 19# load the photo and extract the face 20pixels = extract_face(sys.argv[1]) 21img = Image.fromarray(pixels, 'RGB') 22img.save('1.jpg') 23#img.show() 24 25def target_category_loss(x, category_index, nb_classes): 26 return tf.multiply(x, K.one_hot([category_index], nb_classes)) 27 28def target_category_loss_output_shape(input_shape): 29 return input_shape 30 31def normalize(x): 32 # utility function to normalize a tensor by its L2 norm 33 return x / (K.sqrt(K.mean(K.square(x))) + 1e-5) 34''' 35def load_image(path): 36 img_path = sys.argv[1] 37 img = image.load_img(img_path, target_size=(224,224)) #299,299)) #224, 224) 38 x = image.img_to_array(img) 39 x = np.expand_dims(x, axis=0) 40 x = preprocess_input(x) 41 return x 42''' 43def register_gradient(): 44 if "GuidedBackProp" not in ops._gradient_registry._registry: 45 @ops.RegisterGradient("GuidedBackProp") 46 def _GuidedBackProp(op, grad): 47 dtype = op.inputs[0].dtype 48 return grad * tf.cast(grad > 0., dtype) * \ 49 tf.cast(op.inputs[0] > 0., dtype) 50 51def compile_saliency_function(model, activation_layer='conv5_3'): #mixed10 'activation_49' add_16 add_32 activation_98 52 input_img = model.input 53 layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) 54 #print(layer_dict) 55 layer_output = layer_dict[activation_layer].output 56 max_output = K.max(layer_output, axis=3) 57 saliency = K.gradients(K.sum(max_output), input_img)[0] 58 return K.function([input_img, K.learning_phase()], [saliency]) 59 60def modify_backprop(model, name): 61 g = tf.get_default_graph() 62 with g.gradient_override_map({'Relu': name}): 63 64 # get layers that have an activation 65 layer_dict = [layer for layer in model.layers[1:] 66 if hasattr(layer, 'activation')] 67 68 # replace relu activation 69 for layer in layer_dict: 70 if layer.activation == keras.activations.relu: 71 layer.activation = tf.nn.relu 72 73 # re-instanciate a new model 74 new_model = VGGFace(weights='vggface') 75 #new_model = ResNet50(weights='imagenet') 76 new_model.summary() 77 return new_model 78 79def deprocess_image(x): 80 ''' 81 Same normalization as in: 82 https://github.com/fchollet/keras/blob/master/examples/conv_filter_visualization.py 83 ''' 84 if np.ndim(x) > 3: 85 x = np.squeeze(x) 86 # normalize tensor: center on 0., ensure std is 0.1 87 x -= x.mean() 88 x /= (x.std() + 1e-5) 89 x *= 0.1 90 91 # clip to [0, 1] 92 x += 0.5 93 x = np.clip(x, 0, 1) 94 95 # convert to RGB array 96 x *= 255 97 if K.image_dim_ordering() == 'th': 98 x = x.transpose((1, 2, 0)) 99 x = np.clip(x, 0, 255).astype('uint8') 100 return x 101 102def _compute_gradients(tensor, var_list): 103 grads = tf.gradients(tensor, var_list) 104 return [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(var_list, grads)] 105 106def grad_cam(input_model, image, category_index, layer_name): 107 nb_classes = 2622 108 target_layer = lambda x: target_category_loss(x, category_index, nb_classes) 109 x = Lambda(target_layer, output_shape = target_category_loss_output_shape)(input_model.output) 110 model = Model(inputs=input_model.input, outputs=x) 111 #model.summary() 112 loss = K.sum(model.output) 113 conv_output = [l for l in model.layers if l.name == layer_name][0].output #is 114 grads = normalize(_compute_gradients(loss, [conv_output])[0]) 115 gradient_function = K.function([model.input], [conv_output, grads]) 116 117 output, grads_val = gradient_function([image]) 118 output, grads_val = output[0, :], grads_val[0, :, :, :] 119 120 weights = np.mean(grads_val, axis = (0, 1)) 121 cam = np.zeros(output.shape[0 : 2], dtype = np.float32) 122 123 for i, w in enumerate(weights): 124 cam += w * output[:, :, i] 125 126 cam = cv2.resize(cam, (224,224)) 127 cam = np.maximum(cam, 0) 128 #heatmap = cam / np.max(cam) 129 heatmap = (cam - np.min(cam))/(np.max(cam) - np.min(cam)) 130 131 #Return to BGR [0..255] from the preprocessed image 132 image = image[0, :] 133 image -= np.min(image) 134 image = np.minimum(image, 255) 135 136 cam = cv2.applyColorMap(np.uint8(255*heatmap), cv2.COLORMAP_JET) 137 cam = np.float32(cam) + np.float32(image) 138 cam = 255 * cam / np.max(cam) 139 return np.uint8(cam), heatmap 140 141preprocessed_input = extract_face(sys.argv[1]) #if tmp[1:] else '', 142model = VGGFace() 143target_layer = 'conv5_3' #'activation_49' add_16 "block5_conv3" 144predictions = model.predict(preprocessed_input) 145register_gradient() 146guided_model = modify_backprop(model, 'GuidedBackProp') 147guided_model.summary() 148for i in range(5): 149 top_1 = decode_predictions(predictions)[0][i] 150 print('label番号',predictions.argsort()[0][::-1][i]) 151 print('%s (%s) with probability %.2f',(top_1)) 152 predicted_class = predictions.argsort()[0][::-1][i] 153 cam, heatmap = grad_cam(model, preprocessed_input, predicted_class, target_layer) 154 cv2.imwrite(str(i)+"gradcam"+str(predictions.argsort()[0][::-1][i])+".jpg", cam) 155 saliency_fn = compile_saliency_function(guided_model) 156 saliency = saliency_fn([preprocessed_input, 0]) 157 gradcam = saliency[0] * heatmap[..., np.newaxis] 158 cv2.imwrite(str(i)+"guided"+str(predictions.argsort()[0][::-1][i])+".jpg", deprocess_image(gradcam))
顔を抽出した画像を保存する
Python
1pixels = extract_face(sys.argv[1]) 2img = Image.fromarray(pixels, 'RGB') 3img.save('1.jpg')
文が変かもしれませんがよろしくお願いします。