cv2.rectangleで画像に矩形が表示されない

前提・実現したいこと

pascalVOCデータセットを読み込んで、確認のために画像にバウンディングボックスを描画してみたいです。

エラー

表示結果です。

矩形の描画がなされません、、、

該当のソースコード

python
1image, target, image_id = train_dataset[1]
2boxes = target['boxes'].cpu().numpy().astype(np.int32)
3
4nim = image.permute(1,2,0).cpu().numpy().astype(float)
5
6fig, ax = plt.subplots(1, 1, figsize=(16, 8))
7
8for box in boxes:
9    cv2.rectangle(nim, (box[0], box[1]), (box[2], box[3]), (0, 1, 0), 2)  
10ax.set_axis_off()
11ax.imshow(nim)

参考
https://www.kaggle.com/shonenkov/training-efficientdet
データセット作成は自分で行い、描画のところだけ参照しています。
同じようにデータセットからはimage, target, image_idが返されるようにはしているはずです。

試したこと

numpyで適当に真っ黒の画像を作成してやってみるとそれに関してはうまくいっています。
なぜか描画したい画像ではcv2.rectangleが機能していないように思います。

補足情報（FW/ツールのバージョンなど）

補足の情報を詳細に記載します。
nimは描画したい画像
boxesは矩形の座標(xmin, ymin, xmax, ymax)

nim
1array([[[0.51372552, 0.57254905, 0.59215689],
2        [0.50588238, 0.56470591, 0.58431375],
3        [0.50588238, 0.56470591, 0.58431375],
4        ...,
5        [0.48627451, 0.54509807, 0.53333336],
6        [0.45490196, 0.51372552, 0.50196081],
7        [0.43137255, 0.49019608, 0.47843137]],
8
9       [[0.51372552, 0.56470591, 0.58823532],
10        [0.50588238, 0.55686277, 0.58039218],
11        [0.50588238, 0.55686277, 0.58039218],
12        ...,
13        [0.42352942, 0.48235294, 0.4627451 ],
14        [0.41568628, 0.47450981, 0.45490196],
15        [0.41176471, 0.47058824, 0.4509804 ]],
16
17       [[0.50588238, 0.5411765 , 0.56862748],
18        [0.50588238, 0.5411765 , 0.56862748],
19        [0.50980395, 0.54509807, 0.57254905],
20        ...,
21        [0.41960785, 0.47450981, 0.47450981],
22        [0.42745098, 0.48235294, 0.48235294],
23        [0.43921569, 0.49411765, 0.49411765]],
24
25       ...,
26
27       [[0.43529412, 0.48627451, 0.50980395],
28        [0.43529412, 0.48627451, 0.50980395],
29        [0.43529412, 0.48627451, 0.50980395],
30        ...,
31        [0.41960785, 0.47843137, 0.50588238],
32        [0.42352942, 0.48235294, 0.50980395],
33        [0.42745098, 0.48627451, 0.51372552]],
34
35       [[0.43921569, 0.49019608, 0.51372552],
36        [0.43529412, 0.48627451, 0.50980395],
37        [0.43137255, 0.48235294, 0.50588238],
38        ...,
39        [0.41960785, 0.47843137, 0.50588238],
40        [0.41960785, 0.47843137, 0.50588238],
41        [0.41960785, 0.47843137, 0.50588238]],
42
43       [[0.43137255, 0.48235294, 0.50588238],
44        [0.42745098, 0.47843137, 0.50196081],
45        [0.43137255, 0.48235294, 0.50588238],
46        ...,
47        [0.42352942, 0.48235294, 0.50980395],
48        [0.41960785, 0.47843137, 0.50588238],
49        [0.42352942, 0.48235294, 0.50980395]]])

nimdtype
1dtype('float64')

nimshape
1(334, 500, 3)

boxes
1array([[ 31, 140, 116, 173],
2       [193, 183, 260, 201]], dtype=int32)

データセット

class MyDataset(torch.utils.data.Dataset):

    def __init__(self, df, image_dir):

        super().__init__()

        self.image_ids = df["image_id"].unique()
        self.df = df
        self.image_dir = image_dir

    def __getitem__(self, index):

        transform = transforms.Compose([
                                        transforms.ToTensor()
        ])

        # 入力画像の読み込み
        image_id = self.image_ids[index]
        #image = Image.open(f"{self.image_dir}{image_id}.jpg")
        image = cv2.imread(self.image_dir + image_id + '.jpg')
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float64)
        image /= 255.0
        image = transform(image)

        # アノテーションデータの読み込み
        records = self.df[self.df["image_id"] == image_id]
        boxes = torch.tensor(records[["x", "y", "w", "h"]].values, dtype=torch.float32)
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]

        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        #area = boxes[:, 2] *  boxes[:, 3]
        area = torch.as_tensor(area, dtype=torch.float32)

        labels = torch.tensor(records["class"].values, dtype=torch.int64)

        iscrowd = torch.zeros((records.shape[0], ), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"]= labels
        target["image_id"] = torch.tensor([index])
        target["area"] = area
        target["iscrowd"] = iscrowd

        return image, target, image_id

    def __len__(self):
        return self.image_ids.shape[0]

いろいろいじっている中で、Umat形式とやらが関係しているような気もするのですが、確かなことは私の方ではわかっておりません。

大変長くなり恐縮ですが、どなたか分かる方がいらっしゃればご教授のほうよろしくお願いいたします。

jbpb0

2021/08/03 11:05

> cv2.rectangle(nim, (box[0], box[1]), (box[2], box[3]), (0, 1, 0), 2) の「(0, 1, 0)」は「(0, 255, 0)」では？

shu214

2021/08/03 11:08

ご意見ありがとうございます！早くて助かります！！実はそれも試しまして、、できませんでした。。参考元も(0, 1, 0)です。

jbpb0

2021/08/03 11:43

下記のようにして画像を読み込んでやってみましたけど、二つの長方形は、ちゃんと描画されましたこの場合と何が違うのでしょうね？ import numpy as np import cv2 nim = cv2.imread(r"画像ファイルのパス").astype(float) / 255.0 boxes = np.array([[ 31, 140, 116, 173], [193, 183, 260, 201]]) for box in boxes: cv2.rectangle(nim, (box[0], box[1]), (box[2], box[3]), (0, 1, 0), 2) cv2.imshow("test", nim) cv2.waitKey(0) cv2.destroyAllWindows() 上記はforループのインデントが消えて表示されてしまいますが、実行する際はインデントが付いてます

shu214

2021/08/04 02:29

そうなんですよね、、そうやってやるとできるんですよね。。何が違うのかはいまのところ分かってません、、

jbpb0

2021/08/04 03:57 編集

質問のコードと実質同じ処理になるように書きました「nim」が質問のコードの処理と同じで、「nimo」が昨日のコードの処理と同じです import numpy as np import matplotlib.pyplot as plt import cv2 from torchvision import transforms transform = transforms.Compose([transforms.ToTensor()]) image = cv2.imread(r"画像ファイルのパス") image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float64) image /= 255.0 nimo = image image = transform(image) nim = image.permute(1,2,0).cpu().numpy().astype(float) boxes = np.array([[ 31, 140, 116, 173], [193, 183, 260, 201]]) for box in boxes: cv2.rectangle(nimo, (box[0], box[1]), (box[2], box[3]), (0, 1, 0), 2) plt.imshow(nimo) plt.show() for box in boxes: cv2.rectangle(nim, (box[0], box[1]), (box[2], box[3]), (0, 1, 0), 2) plt.imshow(nim) plt.show() 上記を実行すると、「nimo」は正常に処理できて二つの長方形が描画されます一方、「nim」は「cv2.rectangle()」がエラーになり実行されませんので、長方形は描画されません Jupyterで実行すると、エラーが隠蔽されて表示されないかもしれないので、Pythonの対話モードで実行してみてください

shu214

2021/08/04 04:02

ありがとうございます！！これはnimが描画されないということで合ってますか？（文章ではどちらもnimoですので、、）だとすると、transformかpermuteあたりが怪しいのでしょうか、、

jbpb0

2021/08/04 04:22

> nimが描画されないということで合ってますか？そうです > 文章ではどちらもnimoですので文章はすぐに直したのですが、間に合いませんでしたか

shu214

2021/08/04 04:26

いまはなおっています！ありがとうございます！いちど別の方法(があれば)でテンソルをnumpyに変換してrectangleしてみようと思います。

jbpb0

2021/08/04 04:52

nim = image.permute(1,2,0).cpu().numpy().astype(float) ↓ 変更 nim = image.permute(1,2,0).cpu().numpy().copy().astype(float) としたら、エラーにならず長方形が描画されました

shu214

2021/08/04 05:36

ありがとうございます！！確かにそうすればできました！ただ、直接的な解決策を探っていたところさきほど申し上げていた通り、transformsの部分でなにやらおかしくなっていることが判明しました。 pytorchのtransformsではなく、albumentationsライブラリを用いることによってまさかの解決ができました！ jbpb0様、ご検討ほんとうにありがとうございました！あなたのおかげで解決までたどりつけました！

行動規範の内容に同意します

回答1件

自己解決

画像を変換する際に、torchvisionのtransformsではなく、albumentationsライブラリを使用することによって解決しました。
参考は、質問にも記載させていただいたkaggleのノートブックです
https://www.kaggle.com/shonenkov/training-efficientdet

![
（3匹目の鳥に矩形がないのはおそらくプログラムのミスではなくアノテーションのミスかと思われます。）

以下、長くなりますが全体のコードを載せておきます。

python
1def get_transforms():
2    return A.Compose([A.Resize(height=512, width=512, p=1),
3                      ToTensorV2(p=1.0)], 
4                     p=1.0, 
5                     bbox_params=A.BboxParams(format='pascal_voc', 
6                                              min_area=0, 
7                                              min_visibility=0,
8                                              label_fields=['labels']))
9
10class MyDataset(torch.utils.data.Dataset):
11
12    def __init__(self, df, image_dir, transforms=None):
13
14        super().__init__()
15
16        self.image_ids = df["image_id"].unique()
17        self.df = df
18        self.image_dir = image_dir
19        self.transforms = transforms
20
21    def __getitem__(self, index):
22
23        # 入力画像の読み込み
24        image_id = self.image_ids[index]
25        #image = Image.open(f"{self.image_dir}{image_id}.jpg")
26        image = cv2.imread(self.image_dir + image_id + '.jpg', cv2.IMREAD_COLOR)
27        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float64)
28        image /= 255.0
29        #image = transform(image)
30
31        # アノテーションデータの読み込み
32        records = self.df[self.df["image_id"] == image_id]
33        boxes = records[["x", "y", "w", "h"]].values
34        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
35        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
36
37        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
38        #area = boxes[:, 2] *  boxes[:, 3]
39        area = torch.as_tensor(area, dtype=torch.float32)
40
41        labels = torch.tensor(records["class"].values, dtype=torch.int64)
42
43        iscrowd = torch.zeros((records.shape[0], ), dtype=torch.int64)
44
45        target = {}
46        target["boxes"] = boxes
47        target["labels"]= labels
48        target["image_id"] = torch.tensor([index])
49        #target["area"] = area
50        #target["iscrowd"] = iscrowd
51
52        if self.transforms:
53            for i in range(10):
54                sample = self.transforms(**{
55                    'image': image,
56                    'bboxes': target['boxes'],
57                    'labels': labels
58                })
59                if len(sample['bboxes']) > 0:
60                    image = sample['image']
61                    target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
62                    target['boxes'][:,[0,1,2,3]] = target['boxes'][:,[1,0,3,2]]  #yxyx: be warning
63                    break
64
65        return image, target, image_id
66
67    def __len__(self):
68        return self.image_ids.shape[0]
69
70image_dir = "VOCtrainval_11-May-2012/VOCdevkit/VOC2012/JPEGImages/"
71
72train_dataset = MyDataset(train, image_dir, transforms=get_transforms())
73val_dataset = MyDataset(val, image_dir, transforms=get_transforms())
74test_dataset = MyDataset(test, image_dir, transforms=get_transforms())
75
76image, target, image_id = train_dataset[1]
77boxes = target['boxes'].cpu().numpy().astype(np.int32)
78
79nim = image.permute(1,2,0).cpu().numpy()
80
81fig, ax = plt.subplots(1, 1, figsize=(16, 8))
82
83for box in boxes:
84    cv2.rectangle(nim, (box[1], box[0]), (box[3], box[2]), (0, 255, 0), 2)  
85ax.set_axis_off()
86ax.imshow(nim);