画像認識です。データの定義やdataset[1]などのエラーを解消したいです

前提

ここに質問の内容を詳しく書いてください。
自分のデータをインスタンス化できず、またdataset[7]などでデータを試したが、入っていない模様。
class MyDataset　を定義するところで問題があるが、どこが間違っているのか分からない。

機械学習で画像認識するものを作っています。
データセットを定義するところ、インスタンス化するところ、データセットの中身を確かめるところを実装中に以下のエラーメッセージが発生しました。

実現したいこと

ここに実現したいことを箇条書きで書いてください。

動作するようにする

　　dataset[7]などと実行してきちんと表示させる

発生している問題・エラーメッセージ

エラーメッセージ
 in __init__(self, csv_path, class_names, img_size, augmentation, min_size)
     43         self.random_croppig_base = False
     44 
---> 45         _f = open(csv_path, "/content/valid/_annotations.coco.json")
     46         _raw = _f.read()
     47         _raw = _raw.strip()

ValueError: invalid mode: '/content/valid/_annotations.coco.json'

＃エラーメッセージ２個目
__getitem__
image:  5 {'id': 5, 'license': 1, 'file_name': '67_jpg.rf.6603179f8119953b3da405866ed57d2d.jpg', 'height': 1080, 'width': 1920, 'date_captured': '2022-08-14T02:55:42+00:00'}
67_jpg.rf.6603179f8119953b3da405866ed57d2d.jpg
/content/valid/67_jpg.rf.6603179f8119953b3da405866ed57d2d.jpg
/content/valid/
        "contributor": "",
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-60-6d95f5d99674> in <module>
----> 1 dataset[5]

<ipython-input-57-c85c3c190af9> in __getitem__(self, index)
     82 
     83         print(_row)
---> 84         img_path = _row.split("/content/valid/_annotations.coco.json")[419]
     85         anno_path = _row.split("/content/valid/_annotations.coco.json")[694]
     86 

IndexError: list index out of range

#### 該当のソースコード

csv_path = "/content/valid/_annotations.coco.json"


class_names = [
    "Road-Marking",
]

cfg_augmentation = {
    "RANDRESIZE": False,
    "JITTER": False,
    "RANDOM_PLACING": False,
    "HUE": False,
    "SATURATION": False,
    "EXPOSURE": False,
    "LRFLIP": False,
    "RANDOM_DISTORT": False,
}

dataset = MyDataset(csv_path=csv_path, class_names=class_names, augmentation=cfg_augmentation)

#２個目
dataset[7]

# dataset

class MyDataset(Dataset):
    """
    dataset class.
    img_size=416
    """
    def __init__(self,
                 csv_path="/content/valid/_annotations.coco.json",
                 class_names="Road-Marking",
                 img_size=608,
                 augmentation=None, 
                 min_size=1):
        """
        Args:
        """

        if csv_path is "/content/valid/_annotations.coco.json":
            raise Exception("csv path is not specified.")

        if class_names is "Road-Marking":
            raise Exception("please specify class names.")

        self.max_labels = 30

        self.img_size = img_size
        self.min_size = min_size

        self.lrflip = augmentation['LRFLIP']
        self.jitter = augmentation['JITTER']
        self.random_placing = augmentation['RANDOM_PLACING']
        self.hue = augmentation['HUE']
        self.saturation = augmentation['SATURATION']
        self.exposure = augmentation['EXPOSURE']
        self.random_distort = augmentation['RANDOM_DISTORT']
        self.random_croppig_target = False
        self.random_croppig_base = False

        _f = open(csv_path, "/content/valid/_annotations.coco.json")
        _raw = _f.read()
        _raw = _raw.strip()
        _csv_list = _raw.split("\n")

        self.csv_list = _csv_list
        
        self.class_names = class_names
        self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)}

    def __len__(self):
        print("__getaaa__")
        #print(self.csv_list)

        #return len(self.csv_list)
        return len(image_list)
        

    def __getitem__(self, index):
        print("__getitem__")
        #print("image: ", image_list[])
        print("image: ", index, image_list[index])
        
        image_name = image_list[index]["file_name"]
        print(image_name)


        img_path = "/content/valid/" + image_name
        print(img_path)

        anno_path = "/content/valid/"
        print(anno_path)



        _row = self.csv_list[index]


        print(_row)
        img_path = _row.split("/content/valid/_annotations.coco.json")[4]
        anno_path = _row.split("/content/valid/_annotations.coco.json")[5]



        lrflip = False

        # load image and preprocess
        img = self._read_image(img_path)

        # load labels
        boxes, labels = self._get_annotation(anno_path)

        img, info_img = preprocess(img, 
                                   self.img_size, 
                                   jitter=self.jitter,
                                   random_placing=self.random_placing)

        img = np.transpose(img / 255., (2, 0, 1))

        # concat class + box
        label_list = []

        for l, b in zip(labels, boxes):

            # refine box
            x1 = float(b[0])
            y1 = float(b[1])
            x2 = float(b[2])
            y2 = float(b[3])

            # seems _get_annotation convert (x1, y1, w, h) > (x1, y1, x2, y2)
            b = [x1, y1, x2-x1, y2-y1]

            if b[2] > self.min_size and b[3] > self.min_size:
                label_list.append([])
                label_list[-1].append(l)
                label_list[-1].extend(b)

        labels = label_list

        padded_labels = np.zeros((self.max_labels, 5))
        if len(labels) > 0:
            labels = np.stack(labels)
            labels = label2yolobox(labels, info_img, self.img_size, lrflip)
            padded_labels[range(len(labels))[:self.max_labels]] = labels[:self.max_labels]

        padded_labels = torch.from_numpy(padded_labels)

        return img, padded_labels, info_img

    def _get_annotation(self, annotation_file_path):

        objects = json.load(codecs.open(annotation_file_path, '/content/valid/_annotations.coco.json', 'utf-8-sig'))
        objects = objects["annotations"]

        boxes = []
        labels = []

        for item in objects:

            class_name = "Road-Marking"

            # we're only concerned with clases in our list
            if class_name in self.class_dict:

                bbox = item["bbox"]
                if bbox is not None:

                    x1 = bbox.split("id")[0]
                    y1 = bbox.split("iamge_id")[1]
                    w = bbox.split("category_id")[2]
                    h = bbox.split("area")[3]

                    # VOC dataset format follows Matlab, in which indexes start from 0
                    # We do not need this here...
                    x1 = float(x1) - 1
                    y1 = float(y1) - 1
                    x2 = x1 + float(w)
                    y2 = y1 + float(h)
                    boxes.append([x1, y1, x2, y2])

                    # you shold append index of category here
                    labels.append(self.class_dict[class_name])

        return (np.array(boxes, dtype=np.float32),
                np.array(labels, dtype=np.int64))


    def _read_image(self, image_path):
        print("image_path: ", image_path)
        image = cv2.imread(str(image_path))
        print("image: ", image.shape)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        return image


```ここに言語名を入力
ソースコード

試したこと

CSV_pathをいじったり、

補足情報（FW/ツールのバージョンなど）

プログラミング初心者ですよろしくおねがいします。
ここにより詳細な情報を記載してください。

行動規範の内容に同意します

回答1件

Python
1エラーメッセージ
2 in __init__(self, csv_path, class_names, img_size, augmentation, min_size)
3     43         self.random_croppig_base = False
4     44 
5---> 45         _f = open(csv_path, "/content/valid/_annotations.coco.json")
6     46         _raw = _f.read()
7     47         _raw = _raw.strip()
8
9ValueError: invalid mode: '/content/valid/_annotations.coco.json'

open関数の使い方を理解されていないようです。
二つ目の引数に"/content/valid/_annotations.coco.json"を渡す意図はなんでしょうか。

リファレンスによれば、二つ目の引数はファイルのオープンモードを指定します。

投稿2022/08/29 07:21

kazto

総合スコア7196

tanaka_tarou_00

2022/08/29 08:17 編集

回答ありがとうございます。ご指摘の所を"r"に変更いたしました。その上でdataset[7]の箇所でエラーがまだ出ているのでもう一つお伺いしたいのですが、list index out of rangeと出てしまいます。下記に添付したコード部分に問題があるのですが、どうゆう風に変更すべきか教えていただきたいです。 ####エラー表示 __getitem__ image: 2 {'id': 2, 'license': 1, 'file_name': '628a_jpg.rf.69b9ee8ff91bdaa7b650f78c31e61f84.jpg', 'height': 1080, 'width': 1920, 'date_captured': '2022-08-14T02:55:42+00:00'} 628a_jpg.rf.69b9ee8ff91bdaa7b650f78c31e61f84.jpg /content/valid/628a_jpg.rf.69b9ee8ff91bdaa7b650f78c31e61f84.jpg /content/valid/ "year": "2022", --------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-61-dfc7bb704f63> in <module> ----> 1 dataset[2] <ipython-input-58-0a5d87346061> in __getitem__(self, index) 75 76 print(_row) ---> 77 img_path = _row.split("/content/valid")[4] 78 anno_path = _row.split("/content/valid")[5] 79 IndexError: list index out of range ####コード def __getitem__(self, index): print("__getitem__") print("image:", index, image_list[index]) image_name = image_list[index]["file_name"] print(image_name) img_path = "/content/valid/" + image_name print(img_path) anno_path = "/content/valid/" print(anno_path) _row = self.csv_list[index] print(_row) img_path = _row.split("/content/valid")[4] anno_path = _row.split("/content/valid")[5] lrflip = False

kazto

2022/08/29 09:09

``` ---> 77 img_path = _row.split("/content/valid")[4] 78 anno_path = _row.split("/content/valid")[5] 79 IndexError: list index out of range ``` _row.split で分割した結果の、[4] が out of range ということでしょう。 splitがなにをやっているか、理解していますか？

tanaka_tarou_00

2022/08/30 11:02

spilitの認識が誤っておりました。_row.split("r")[0]と[1]に変更いたしました。ありがとうございます。アノテーションからbbboxを引っ張ってくる箇所を変更する必要があるのですが、 x1 = bbox.split("")[0] y2 = bbox.split("")[1] w = bbox.split("")[2] h = bbox.split("")[3] と,もうひとつの # you shold append index of category here labels.append(self.class_dict["class_name"]) をどう変更していけば良いか分からないです。一つ目のx1,y１などにはアノテーションにある情報をセットすること、二つ目では、アノテーションにある「category_id」をセットするという目標は把握しているのですが、どこに何を挿入していけばよいか分からない状況です。エラーコード表示はdataset[]を確認する際に in _read_image(self, image_path) 166 print("image_path: ", image_path) 167 image = cv2.imread(str(image_path)) --> 168 print("image: ", image.shape) 169 image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 170 return image AttributeError: 'NoneType' object has no attribute 'shape' と表示されております。立て続けですがよろしくお願いいたします。下記に該当コードを記述しております。 def _get_annotation(self, annotation_file_path): objects = json.load(codecs.open(annotation_file_path, 'r', 'utf-8-sig')) objects = objects["annotations"] boxes = [] labels = [] for item in objects: class_name = "Road-Marking" # we're only concerned with clases in our list if class_name in self.class_dict: bbox = item["bbox"] if bbox is not None: x1 = bbox.split("")[0] y2 = bbox.split("")[1] w = bbox.split("")[2] h = bbox.split("")[3] # VOC dataset format follows Matlab, in which indexes start from 0 # We do not need this here... x1 = float(x1) - 1 y1 = float(y1) - 1 x2 = x1 + float(w) y2 = y1 + float(h) boxes.append([x1, y1, x2, y2]) # you shold append index of category here labels.append(self.class_dict["class_name"]) return (np.array(boxes, dtype=np.float32), np.array(labels, dtype=np.int64)) def _read_image(self, image_path): print("image_path: ", image_path) image = cv2.imread(str(image_path)) print("image: ", image.shape) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) return image

kazto

2022/08/30 12:24

splitの引数に"r"を渡した理由はなんでしょうか？まだsplitについて誤解していると見受けられます。 tanaka_tarou_00さんは、これをどういうことをするメソッドとお考えですか？

tanaka_tarou_00

2022/08/30 12:40

すみません。入力ミスでした。_row.split(",")にいたしました。 splitは文字列を分割する関数だと考えています。

kazto

2022/09/01 01:04

現時点の問題は以下であっていますか？ ``` in _read_image(self, image_path) 166 print("image_path: ", image_path) 167 image = cv2.imread(str(image_path)) --> 168 print("image: ", image.shape) 169 image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 170 return image AttributeError: 'NoneType' object has no attribute 'shape' ``` 上記でAttributeErrorが発生する。この場合、imreadで画像を開くことに失敗していると考えられます。直前でimage_pathをprintしていますが、これと実際の画像ファイルへのパスが一致していますか？

tanaka_tarou_00

2022/09/01 07:28

imageのパスは一致していると思います。imageに.shapeというプロパティ？が無いかもしれないです。 .shapeはtensor型、あるいはnumpy.array型でないと使えないと勉強したのですが、入れ方が分からないです。これより前にtorch.tensorに変換する必要があると思うのですが、どのように入れたらよろしいでしょうか？

tanaka_tarou_00

2022/09/01 07:29

現時点での問題はご提示頂いた箇所であっております。

kazto

2022/09/01 07:51 編集

正しく画像が読み込めていれば、imageには画像のインスタンスが入っており、shapeも取れるはずです。しかしながら、imageにはNoneTypeが入り、imreadがエラー終了していることがわかります。imreadが失敗する原因は、入力したパスがおかしいことが大半です（画像ファイルが壊れているなどなければの話）。 > imageのパスは一致していると思います。その確証はありますか？ご自身のコードが間違っている前提で、疑ってかかりましょう。 image_pathは相対パス、絶対パスどちらでしょうか？今いるディレクトリと画像ファイルの位置関係は？

tanaka_tarou_00

2022/09/02 06:53

csv_path は"/content/valid/_annotations.coco.json"で設定していますがこちらのことでしょうか？画像は/content/valid/に入っています。また、print("image:", image)とやるとname 'image' is not definedと返ってきます。画像をtensor化したりノーマライズする部分は_read_imageが動いていれば、後はgetitem内のpreprocessという関数の中で処理されるの把握できたのですが、class内のどこを変更したらよいか分からないです。ご教授頂いたパスを変更する場合も、どこのことを指しているかまだ理解できていないです。下記にコードを添付いたします。 from torch.utils.data import Dataset import json import cv2 import codecs import numpy as np # dataset class MyDataset(Dataset): """ dataset class. img_size=416 """ def __init__(self, csv_path="/content/valid/_annotations.coco.json", class_names="Road-Marking", img_size=608, augmentation=None, min_size=1): """ Args: """ if csv_path is"/content/valid/_annotations.coco.json": raise Exception("csv path is not specified.") if class_names is "Road-Marking": raise Exception("please specify class names.") self.max_labels = 30 self.img_size = img_size self.min_size = min_size self.lrflip = augmentation['LRFLIP'] self.jitter = augmentation['JITTER'] self.random_placing = augmentation['RANDOM_PLACING'] self.hue = augmentation['HUE'] self.saturation = augmentation['SATURATION'] self.exposure = augmentation['EXPOSURE'] self.random_distort = augmentation['RANDOM_DISTORT'] self.random_croppig_target = False self.random_croppig_base = False _f = open(csv_path, "r") _raw = _f.read() _raw = _raw.strip() _csv_list = _raw.split("\n") self.csv_list = _csv_list self.class_names = class_names self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)} def __len__(self): print("__getaaa_") return len(image_list) def __getitem__(self, index): print("__getitem__") print("image:", index, image_list[index]) image_name = image_list[index]["file_name"] print(image_name) img_path = "/content/valid/" + image_name print(img_path) anno_path = "/content/valid/" print(anno_path) _row = self.csv_list[index] print(_row) img_path = _row.split(",")[0] anno_path = _row.split(",")[1] lrflip = False # load image and preprocess img = self._read_image(img_path) # load labels boxes, labels = self._get_annotation(anno_path) img, info_img = preprocess(img, self.img_size, jitter=self.jitter, random_placing=self.random_placing) img = np.transpose(img / 255., (2, 0, 1)) # concat class + box label_list = [] for l, b in zip(labels, boxes): # refine box x1 = float(b[0]) y1 = float(b[1]) x2 = float(b[2]) y2 = float(b[3]) # seems _get_annotation convert (x1, y1, w, h) > (x1, y1, x2, y2) b = [x1, y1, x2-x1, y2-y1] if b[2] > self.min_size and b[3] > self.min_size: label_list.append([]) label_list[-1].append(l) label_list[-1].extend(b) labels = label_list padded_labels = np.zeros((self.max_labels, 5)) if len(labels) > 0: labels = np.stack(labels) labels = label2yolobox(labels, info_img, self.img_size, lrflip) padded_labels[range(len(labels))[:self.max_labels]] = labels[:self.max_labels] padded_labels = torch.from_numpy(padded_labels) return img, padded_labels, info_img def _get_annotation(self, annotation_file_path): objects = json.load(codecs.open(annotation_file_path, 'r', 'utf-8-sig')) objects = objects["annotations"] boxes = [] labels = [] for item in objects: class_names = "Road-Marking" # we're only concerned with clases in our list if class_names in self.class_dict: bbox = item["bbox"] if bbox is not None: x1 = bbox.split(",")[0] y2 = bbox.split(",")[1] w = bbox.split(",")[2] h = bbox.split(",")[3] # VOC dataset format follows Matlab, in which indexes start from 0 # We do not need this here... x1 = float(x1) - 1 y1 = float(y1) - 1 x2 = x1 + float(w) y2 = y1 + float(h) boxes.append([x1, y1, x2, y2]) # you shold append index of category here labels.append(self.class_dict["category_id"]) return (np.array(boxes, dtype=np.float32), np.array(labels, dtype=np.int64)) def _read_image(self, image_path): print("image_path: ", image_path) image = cv2.imread(str(image_path)) print("image: ",image.shape) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) return image

kazto

2022/09/03 01:41

いいえ、cvs_pathではなく、image_pathです。 def _read_image(self, image_path): print("image_path: ", image_path) image = cv2.imread(str(image_path)) print("image: ",image.shape) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) return image ここでエラーがでているのですよね？

tanaka_tarou_00

2022/09/04 12:02

ご教授頂いたパスを変更する場合も、どこのことを指しているかまだ理解できていないです。 image_pathはどのようにセットしていけばよいのでしょうか？

行動規範の内容に同意します