###前提・実現したいこと
Python（Keras）でCNNを用い2種類の画像を判別するプログラムを作っております。
その時に学習したモデルを活用して判定時にどこに注目したのかを
grad-camにより、表現したいと考えております。
grad-camについては
https://github.com/jacobgil/keras-grad-cam/blob/master/grad-cam.py
こちらのコードを参考にしており自分のモデルに合わせて対象箇所を変えたつもりです。

モデルはこちらになります。

Layer (type) Output Shape Param #

conv2d_1 (Conv2D) (None, 62, 62, 32) 896

max_pooling2d_1 (MaxPooling2 (None, 31, 31, 32) 0

dropout_1 (Dropout) (None, 31, 31, 32) 0

zero_padding2d_1 (ZeroPaddin (None, 33, 33, 32) 0

conv2d_2 (Conv2D) (None, 31, 31, 96) 27744

max_pooling2d_2 (MaxPooling2 (None, 15, 15, 96) 0

dropout_2 (Dropout) (None, 15, 15, 96) 0

zero_padding2d_2 (ZeroPaddin (None, 17, 17, 96) 0

conv2d_3 (Conv2D) (None, 15, 15, 96) 83040

max_pooling2d_3 (MaxPooling2 (None, 7, 7, 96) 0

flatten_1 (Flatten) (None, 4704) 0

dense_1 (Dense) (None, 1024) 4817920

dropout_3 (Dropout) (None, 1024) 0

dense_2 (Dense) (None, 2) 2050

Total params: 4,931,650
Trainable params: 4,931,650
Non-trainable params: 0

###発生している問題・エラーメッセージ

IndexError                                Traceback (most recent call last)
<ipython-input-3-2fe13b62daac> in <module>()
    128 
    129 predicted_class = np.argmax(predictions)
--> 130 cam, heatmap = grad_cam(model, preprocessed_input, predicted_class, "conv2d_3")
    131 cv2.imwrite("gradcam.jpg", cam)
    132 

<ipython-input-3-2fe13b62daac> in grad_cam(input_model, image, category_index, layer_name)
     92 
     93     loss = K.sum(model.layers[-1].output)
---> 94     conv_output =  [l for l in model.layers[0].layers if l.name is layer_name][0].output
     95     grads = normalize(K.gradients(loss, conv_output)[0])
     96     gradient_function = K.function([model.layers[0].input], [conv_output, grads])

IndexError: list index out of range

###該当のソースコード

python
1from keras.preprocessing import image
2from keras.layers.core import Lambda
3from keras.models import Sequential ,load_model
4from tensorflow.python.framework import ops
5import keras.backend as K
6import tensorflow as tf
7import numpy as np
8import keras
9import sys
10import cv2
11
12def target_category_loss(x, category_index, nb_classes):
13    return tf.multiply(x, K.one_hot([category_index], nb_classes))
14
15def target_category_loss_output_shape(input_shape):
16    return input_shape
17
18def normalize(x):
19    # utility function to normalize a tensor by its L2 norm
20    return x / (K.sqrt(K.mean(K.square(x))) + 1e-5)
21
22def load_image(path):
23    img_path = path
24    img = image.load_img(img_path, target_size=(64, 64))
25    x = image.img_to_array(img)
26    x = np.expand_dims(x, axis=0)
27    return x
28
29def register_gradient():
30    if "GuidedBackProp" not in ops._gradient_registry._registry:
31        @ops.RegisterGradient("GuidedBackProp")
32        def _GuidedBackProp(op, grad):
33            dtype = op.inputs[0].dtype
34            return grad * tf.cast(grad > 0., dtype) * \
35                tf.cast(op.inputs[0] > 0., dtype)
36
37def compile_saliency_function(model, activation_layer='conv2d_3'):
38    input_img = model.input
39    layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])
40    layer_output = layer_dict[activation_layer].output
41    max_output = K.max(layer_output, axis=3)
42    saliency = K.gradients(K.sum(max_output), input_img)[0]
43    return K.function([input_img, K.learning_phase()], [saliency])
44
45def modify_backprop(model, name):
46    g = tf.get_default_graph()
47    with g.gradient_override_map({'Relu': name}):
48
49        # get layers that have an activation
50        layer_dict = [layer for layer in model.layers[1:]
51                      if hasattr(layer, 'activation')]
52
53        # replace relu activation
54        for layer in layer_dict:
55            if layer.activation == keras.activations.relu:
56                layer.activation = tf.nn.relu
57
58        # re-instanciate a new model
59        new_model = model
60    return new_model
61
62def deprocess_image(x):
63    '''
64    Same normalization as in:
65    https://github.com/fchollet/keras/blob/master/examples/conv_filter_visualization.py
66    '''
67    if np.ndim(x) > 3:
68        x = np.squeeze(x)
69    # normalize tensor: center on 0., ensure std is 0.1
70    x -= x.mean()
71    x /= (x.std() + 1e-5)
72    x *= 0.1
73
74    # clip to [0, 1]
75    x += 0.5
76    x = np.clip(x, 0, 1)
77
78    # convert to RGB array
79    x *= 255
80    if K.image_dim_ordering() == 'th':
81        x = x.transpose((1, 2, 0))
82    x = np.clip(x, 0, 255).astype('uint8')
83    return x
84
85def grad_cam(input_model, image, category_index, layer_name):
86    model = Sequential()
87    model.add(input_model)
88
89    nb_classes = 2
90    target_layer = lambda x: target_category_loss(x, category_index, nb_classes)
91    model.add(Lambda(target_layer,
92                     output_shape = target_category_loss_output_shape))
93
94    loss = K.sum(model.layers[-1].output)
95    conv_output =  [l for l in model.layers[0].layers if l.name is layer_name][0].output
96    grads = normalize(K.gradients(loss, conv_output)[0])
97    gradient_function = K.function([model.layers[0].input], [conv_output, grads])
98
99    output, grads_val = gradient_function([image])
100    output, grads_val = output[0, :], grads_val[0, :, :, :]
101
102    weights = np.mean(grads_val, axis = (0, 1))
103    cam = np.ones(output.shape[0 : 2], dtype = np.float32)
104
105    for i, w in enumerate(weights):
106        cam += w * output[:, :, i]
107
108    cam = cv2.resize(cam, (64, 64))
109    cam = np.maximum(cam, 0)
110    heatmap = cam / np.max(cam)
111
112    #Return to BGR [0..255] from the preprocessed image
113    image = image[0, :]
114    image -= np.min(image)
115    image = np.minimum(image, 255)
116
117    cam = cv2.applyColorMap(np.uint8(255*heatmap), cv2.COLORMAP_JET)
118    cam = np.float32(cam) + np.float32(image)
119    cam = 255 * cam / np.max(cam)
120    return np.uint8(cam), heatmap
121
122# 判定画像の読み込み
123preprocessed_input = load_image('image.jpeg')
124
125model = load_model('mymodel.h5')
126model.summary()
127
128predictions = model.predict(preprocessed_input)
129
130predicted_class = np.argmax(predictions)
131cam, heatmap = grad_cam(model, preprocessed_input, predicted_class, "conv2d_3")
132cv2.imwrite("gradcam.jpg", cam)
133
134register_gradient()
135guided_model = modify_backprop(model, 'GuidedBackProp')
136saliency_fn = compile_saliency_function(guided_model)
137saliency = saliency_fn([preprocessed_input, 0])
138gradcam = saliency[0] * heatmap[..., np.newaxis]
139cv2.imwrite("guided_gradcam.jpg", deprocess_image(gradcam))

###試したこと
1行1行入力してみたもののどこが問題かわからず、
もともと専門外で初心者なこともあり、お手上げ状態です…

###補足情報(言語/FW/ツール等のバージョンなど)
tensorflow (1.1.0)
Keras (2.0.4)

行動規範の内容に同意します

回答3件

私も同じコードを参考にして同じ悩みにぶつかっていた、ズブの素人なのですが、
Githubのissuesで同じことが議論されていました。
https://github.com/jacobgil/keras-grad-cam/issues/12

修正のCODEがリンク先の以下アドレスにあり、93行目から95行目が修正箇所のようです。
この修正で画像が出力されるようになりました。
https://github.com/vense/keras-grad-cam/blob/master/grad-cam.py

投稿2018/03/24 12:07

AOSZZ

総合スコア14

ベストアンサー

以下の行でインデックス外アクセスエラーが出ています。

python
1conv_output =  [l for l in model.layers[0].layers if l.name is layer_name][0].output

のレイヤーアクセスが問題のようです。

Kerasのバージョンのせいの可能性もあるので、地道に調べるのが一番ラクかと。

python
1print(model.layers)
2print(model.layers[0].layers)
3print([l for l in model.layers[0].layers])
4print(layer_name)
5print([l.name for l in model.layers[0].layers])

投稿2017/12/02 06:44

mkgrei

総合スコア8562

morishi3

2017/12/05 05:16

回答ありがとうございました！頑張ります！

mkgrei

2017/12/05 05:48

内包表記内のエラーなので、インデックスアクセスが複数あって、特定できずに申し訳ないです。書き出しデバッグは愚直ですが、なにも考えなくてもできるので、思考停止した時に重宝します。

hiro_dr_p

2018/01/13 03:50

横から失礼します。同様のerrorにはまっているのですが、もし解決なさっていたら解決法をご教示いただけませんでしょうか。

gorogoroyasu

2018/01/16 01:35

``` [l for l in model.layers[0].layers if l.name is layer_name] ``` 内、 `if l.name is layer_name` を `if l.name == layer_name` に修正すればいけそうな気がします。

mkgrei

2018/01/16 02:51

確かに。見逃していました。

hiro_dr_p

2018/01/23 21:17

ありがとうございます。確かに 'is' -> '==' で該当エラーは解除されました。次行 grads = normalize(K.gradients(loss, conv_output)[0]) にて " None values not supported. " と出てしまいます。 loss とconv_output を見てみると loss Out[93]: <tf.Tensor 'Sum_14:0' shape=() dtype=float32> conv_output Out[94]: <tf.Tensor 'block5_conv3_5/Relu:0' shape=(?, 14, 14, 512) dtype=float32> となり何か取得する過程でmissがあるように思うのですが解決法がわからずにいます。全く初心者に近いもので重ね重ねの質問ですがご教示いただけると助かります。

gorogoroyasu

2018/02/20 06:21

自分も試してみましたが、 loss と conv_output の間の gradients が計算できていないことが問題だと思います。そのままではうまくいかないみたいですねー。。こちら、mnist の grad cam をやってみたものです。 https://github.com/gorogoroyasu/mnist-Grad-CAM 少しは参考になるかも知れません。

行動規範の内容に同意します

ありがとうございます！
できました！

投稿2018/04/16 14:20

hiro_dr_p

総合スコア6

yoppy6yoppy

2018/12/20 08:45

すみません。こちらの解決方法について教えていただけないでしょうか。

hiro_dr_p

2019/01/09 01:08 編集

回答遅れてすみません。とりあえず私の環境ではこれで出力されているようです。コード載せておきます。出力画像がなぜか回転してしまうため保存の前で回転させてるのですが、途中のcamの計算でtransposeをいれないとcodeが走らなかったので原因はよくわかっていないです。 #!/usr/bin/env python3 # -*- coding: utf-8 -*- from keras.applications.vgg16 import ( VGG16, preprocess_input, decode_predictions) from keras.preprocessing import image from keras.layers.core import Lambda from keras.models import Sequential from tensorflow.python.framework import ops import keras.backend as K import tensorflow as tf import numpy as np import keras import sys import cv2 def target_category_loss(x, category_index, nb_classes): return tf.multiply(x, K.one_hot([category_index], nb_classes)) def target_category_loss_output_shape(input_shape): return input_shape def normalize(x): # utility function to normalize a tensor by its L2 norm return x / (K.sqrt(K.mean(K.square(x))) + 1e-5) def load_image(path): #img_path = sys.argv[1] img_path = path img = image.load_img(img_path, target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) return x def register_gradient(): if "GuidedBackProp" not in ops._gradient_registry._registry: @ops.RegisterGradient("GuidedBackProp") def _GuidedBackProp(op, grad): dtype = op.inputs[0].dtype return grad * tf.cast(grad > 0., dtype) * \ tf.cast(op.inputs[0] > 0., dtype) def compile_saliency_function(model, activation_layer='block5_conv3'): input_img = model.input layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) layer_output = layer_dict[activation_layer].output max_output = K.max(layer_output, axis=3) saliency = K.gradients(K.sum(max_output), input_img)[0] return K.function([input_img, K.learning_phase()], [saliency]) def modify_backprop(model, name): g = tf.get_default_graph() with g.gradient_override_map({'Relu': name}): # get layers that have an activation layer_dict = [layer for layer in model.layers[1:] if hasattr(layer, 'activation')] # replace relu activation for layer in layer_dict: if layer.activation == keras.activations.relu: layer.activation = tf.nn.relu # re-instanciate a new model new_model = VGG16(weights='imagenet') return new_model def deprocess_image(x): ''' Same normalization as in: https://github.com/fchollet/keras/blob/master/examples/conv_filter_visualization.py ''' if np.ndim(x) > 3: x = np.squeeze(x) # normalize tensor: center on 0., ensure std is 0.1 x -= x.mean() x /= (x.std() + 1e-5) x *= 0.1 # clip to [0, 1] x += 0.5 x = np.clip(x, 0, 1) # convert to RGB array x *= 255 if K.image_dim_ordering() == 'th': x = x.transpose((1, 2, 0)) x = np.clip(x, 0, 255).astype('uint8') return x def grad_cam(input_model, image, category_index, layer_name): nb_classes = 1000 target_layer = lambda x: target_category_loss(x, category_index, nb_classes) x = input_model.layers[-1].output x = Lambda(target_layer, output_shape=target_category_loss_output_shape)(x) model = keras.models.Model(input_model.layers[0].input, x) loss = K.sum(model.layers[-1].output) #conv_output = [l for l in model.layers[0].layers if l.name is layer_name][0].output conv_output = [l for l in model.layers if l.name is layer_name][0].output grads = normalize(K.gradients(loss, conv_output)[0]) gradient_function = K.function([model.layers[0].input], [conv_output, grads]) output, grads_val = gradient_function([image]) output, grads_val = output[0, :], grads_val[0, :, :, :] weights = np.mean(grads_val, axis = (0, 1)) cam = np.ones(output.shape[0 : 2], dtype = np.float32) for i, w in enumerate(weights): cam += w * output[:, :, i] cam = cv2.resize(cam, (224, 224)) cam = np.maximum(cam, 0) heatmap = cam / np.max(cam) #Return to BGR [0..255] from the preprocessed image image = image[0, :] image -= np.min(image) image = np.minimum(image, 255) cam = cv2.applyColorMap(np.uint8(255*heatmap), cv2.COLORMAP_JET) cam = np.float32(cam) + np.float32(image) cam = 255 * cam / np.max(cam) cam = cam.transpose(1,0, 2) return np.uint8(cam), heatmap img_path = 'ここに画像path' img = image.load_img(img_path, target_size=(224, 224)) x = image.img_to_array(img) preprocessed_input = np.expand_dims(x, axis=0) preprocessed_input.shape model = VGG16(weights='imagenet') predictions = model.predict(preprocessed_input) top_1 = decode_predictions(predictions)[0][0] print('Predicted class:') print('%s (%s) with probability %.2f' % (top_1[1], top_1[0], top_1[2])) predicted_class = np.argmax(predictions) cam, heatmap = grad_cam(model, preprocessed_input, predicted_class, "block5_conv3") cam = cam.transpose((1, 2, 0)) cv2.imwrite("gradcam01.jpg", cam) register_gradient() guided_model = modify_backprop(model, 'GuidedBackProp') saliency_fn = compile_saliency_function(guided_model) saliency = saliency_fn([preprocessed_input, 0]) gradcam = saliency[0] * heatmap[..., np.newaxis] cv2.imwrite("guided_gradcam01.jpg", deprocess_image(gradcam))

行動規範の内容に同意します

あなたの回答