編集履歴

質問編集履歴

コンフィデンスが格納されている部分からの抜き出し方に教えていただきたく、predictionの出力を追記しました。

2019/10/23 12:22

投稿

Pinkun

スコア13

test CHANGED Viewed

File without changes

test CHANGED Viewed

@@ -1,273 +1,169 @@
-YOLOv3におけるdetect.pyの出力として実際に検出した物体のコンフィデンスレベルの取得方法がわかりません.
+YOLOv3ホームページに公開されているdetect.pyというファイルにおいて検出された物体のコンフィデンスの値を取得したいと考えています。
+ソースコード中のpredictionにそれらの情報が格納されていると考え抽出しようと考えているのですが、中身の値の認識に困っています。検出された物体のコンフィデンスを正確に出力するためにはどのようにしたら良いか教えていただけると幸いです。
-これらのコンフィデンスレベルがどこから引用可能か教えていただきたいです.
+ソースコードはdetect.pyの一部抜粋となります。
 ```python
-from __future__ import division
-import time
-import torch
-import torch.nn as nn
-from torch.autograd import Variable
-import numpy as np
-import cv2
-from util import *
-import argparse
-import os
-import os.path as osp
-from darknet import Darknet
-from preprocess import prep_image, inp_to_image
-import pandas as pd
-import random
-import pickle as pkl
-import itertools
-class test_net(nn.Module):
-    def __init__(self, num_layers, input_size):
-        super(test_net, self).__init__()
-        self.num_layers= num_layers
-        self.linear_1 = nn.Linear(input_size, 5)
-        self.middle = nn.ModuleList([nn.Linear(5,5) for x in range(num_layers)])
-        self.output = nn.Linear(5,2)
-    def forward(self, x):
-        x = x.view(-1)
-        fwd = nn.Sequential(self.linear_1, *self.middle, self.output)
-        return fwd(x)
-def get_test_input(input_dim, CUDA):
-    img = cv2.imread("dog-cycle-car.png")
-    img = cv2.resize(img, (input_dim, input_dim))
-    img_ =  img[:,:,::-1].transpose((2,0,1))
-    img_ = img_[np.newaxis,:,:,:]/255.0
-    img_ = torch.from_numpy(img_).float()
-    img_ = Variable(img_)
+def arg_parse():
+    parser = argparse.ArgumentParser(description='YOLO v3 Detection Module')
+    parser.add_argument("--images", dest = 'images', help =
+                        "Image / Directory containing images to perform detection upon",
+                        default = "imgs", type = str)
+    parser.add_argument("--det", dest = 'det', help =
+                        "Image / Directory to store detections to",
+                        default = "det", type = str)
+    parser.add_argument("--bs", dest = "bs", help = "Batch size", default = 1)
+    parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
+    parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
+    parser.add_argument("--cfg", dest = 'cfgfile', help =
+                        "Config file",
+                        default = "cfg/yolov3.cfg", type = str)
+    parser.add_argument("--weights", dest = 'weightsfile', help =
+                        "weightsfile",
+                        default = "yolov3.weights", type = str)
+    parser.add_argument("--reso", dest = 'reso', help =
+                        "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
+                        default = "416", type = str)
+    parser.add_argument("--scales", dest = "scales", help = "Scales to use for detection",
+                        default = "1,2,3", type = str)
+    return parser.parse_args()
+if __name__ ==  '__main__':
+    args = arg_parse()
+    scales = args.scales
+    images = args.images
+    batch_size = int(args.bs)
+    confidence = float(args.confidence)
+    nms_thesh = float(args.nms_thresh)
+    start = 0
+    CUDA = torch.cuda.is_available()
+    num_classes = 80
+    classes = load_classes('data/coco.names')
+    model = Darknet(args.cfgfile)
+    model.load_weights(args.weightsfile)
+    model.net_info["height"] = args.reso
+    inp_dim = int(model.net_info["height"])
+    assert inp_dim % 32 == 0
+    assert inp_dim > 32
+    #If there's a GPU availible, put the model on GPU
     if CUDA:
-        img_ = img_.cuda()
-    num_classes
-    return img_
-def arg_parse():
-    """
-    Parse arguements to the detect module
-    """
-    parser = argparse.ArgumentParser(description='YOLO v3 Detection Module')
-    parser.add_argument("--images", dest = 'images', help =
-                        "Image / Directory containing images to perform detection upon",
-                        default = "imgs", type = str)
-    parser.add_argument("--det", dest = 'det', help =
-                        "Image / Directory to store detections to",
-                        default = "det", type = str)
-    parser.add_argument("--bs", dest = "bs", help = "Batch size", default = 1)
-    parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
-    parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
-    parser.add_argument("--cfg", dest = 'cfgfile', help =
-                        "Config file",
-                        default = "cfg/yolov3.cfg", type = str)
-    parser.add_argument("--weights", dest = 'weightsfile', help =
-                        "weightsfile",
-                        default = "yolov3.weights", type = str)
-    parser.add_argument("--reso", dest = 'reso', help =
-                        "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
-                        default = "416", type = str)
-    parser.add_argument("--scales", dest = "scales", help = "Scales to use for detection",
-                        default = "1,2,3", type = str)
-    return parser.parse_args()
-if __name__ ==  '__main__':
-    args = arg_parse()
-    scales = args.scales
-    images = args.images
-    batch_size = int(args.bs)
-    confidence = float(args.confidence)
-    nms_thesh = float(args.nms_thresh)
-    start = 0
-    CUDA = torch.cuda.is_available()
-    num_classes = 80
-    classes = load_classes('data/coco.names')
-    print("Loading network.....")
-    model = Darknet(args.cfgfile)
-    model.load_weights(args.weightsfile)
-    print("Network successfully loaded")
-    model.net_info["height"] = args.reso
-    inp_dim = int(model.net_info["height"])
-    assert inp_dim % 32 == 0
-    assert inp_dim > 32
-    #If there's a GPU availible, put the model on GPU
+        model.cuda()
+    model.eval()
+    read_dir = time.time()
+    #Detection phase
+    try:
+        imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images) if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1] =='.jpeg' or os.path.splitext(img)[1] =='.jpg']
+    except NotADirectoryError:
+        imlist = []
+        imlist.append(osp.join(osp.realpath('.'), images))
+    except FileNotFoundError:
+        print ("No file or directory with the name {}".format(images))
+        exit()
+    if not os.path.exists(args.det):
+        os.makedirs(args.det)
+    load_batch = time.time()
+    batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))]))
+    im_batches = [x[0] for x in batches]
+    orig_ims = [x[1] for x in batches]
+    im_dim_list = [x[2] for x in batches]
+    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
     if CUDA:
-        model.cuda()
-    #Set the model in evaluation mode
-    model.eval()
-    read_dir = time.time()
-    #Detection phase
-    try:
-        imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images) if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1] =='.jpeg' or os.path.splitext(img)[1] =='.jpg']
-    except NotADirectoryError:
-        imlist = []
-        imlist.append(osp.join(osp.realpath('.'), images))
-    except FileNotFoundError:
-        print ("No file or directory with the name {}".format(images))
-        exit()
-    if not os.path.exists(args.det):
-        os.makedirs(args.det)
-    load_batch = time.time()
-    batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))]))
-    im_batches = [x[0] for x in batches]
-    orig_ims = [x[1] for x in batches]
-    im_dim_list = [x[2] for x in batches]
-    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
-    if CUDA:
         im_dim_list = im_dim_list.cuda()
@@ -312,6 +208,8 @@
     for batch in im_batches:
+        #load the image
         start = time.time()
         if CUDA:
@@ -324,10 +222,18 @@
             prediction = model(Variable(batch), CUDA)
+# prediction here
+            print ("prediction", prediction)
         prediction = write_results(prediction, confidence, num_classes, nms = True, nms_conf = nms_thesh)
+# prediction here
+        print ("prediction2", prediction)
         if type(prediction) == int:
@@ -340,7 +246,7 @@
         end = time.time()
-#        print(end - start)
         prediction[:,0] += i*batch_size
@@ -356,136 +262,50 @@
             output = torch.cat((output,prediction))
-        for im_num, image in enumerate(imlist[i*batch_size: min((i +  1)*batch_size, len(imlist))]):
-            im_id = i*batch_size + im_num
-            objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
-            print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
-            print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
-            print("----------------------------------------------------------")
-        i += 1
-        if CUDA:
-            torch.cuda.synchronize()
-    try:
-        output
-    except NameError:
-        print("No detections were made")
-        exit()
-    im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
-    scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1)
-    output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
-    output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
-    output[:,1:5] /= scaling_factor
-    for i in range(output.shape[0]):
-        output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
-        output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
-    output_recast = time.time()
-    class_load = time.time()
-    colors = pkl.load(open("pallete", "rb"))
-    draw = time.time()
-    def write(x, batches, results):
-        c1 = tuple(x[1:3].int())
-        c2 = tuple(x[3:5].int())
-        img = results[int(x[0])]
-        cls = int(x[-1])
-        label = "{0}".format(classes[cls])
-        color = random.choice(colors)
-        cv2.rectangle(img, c1, c2,color, 1)
-        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
-        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
-        cv2.rectangle(img, c1, c2,color, -1)
-        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1)
-        return img
-    list(map(lambda x: write(x, im_batches, orig_ims), output))
-    det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format(args.det,x.split("/")[-1]))
-    list(map(cv2.imwrite, det_names, orig_ims))
-    end = time.time()
-    print()
-    print("SUMMARY")
-    print("----------------------------------------------------------")
-    print("{:25s}: {}".format("Task", "Time Taken (in seconds)"))
-    print()
-    print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir))
-    print("{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch))
-    print("{:25s}: {:2.3f}".format("Detection (" + str(len(imlist)) +  " images)", output_recast - start_det_loop))
-    print("{:25s}: {:2.3f}".format("Output Processing", class_load - output_recast))
-    print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw))
-    print("{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch)/len(imlist)))
-    print("----------------------------------------------------------")
-    torch.cuda.empty_cache()
 ```
+これらのpredictionの出力は以下のようになっています。
+prediction tensor([[[1.5383e+01, 1.2399e+01, 9.3864e+01,  ..., 7.5703e-04,
+          9.0208e-04, 5.9246e-04],
+         [1.8194e+01, 1.4778e+01, 1.0411e+02,  ..., 2.1265e-04,
+          1.1475e-03, 1.6560e-03],
+         [2.1265e+01, 1.2748e+01, 3.8478e+02,  ..., 3.6203e-03,
+          7.6282e-03, 6.8394e-03],
+         ...,
+         [4.1259e+02, 4.1129e+02, 3.3664e+00,  ..., 2.8758e-05,
+          3.9763e-05, 2.3203e-05],
+         [4.1155e+02, 4.0989e+02, 7.5316e+00,  ..., 1.7735e-04,
+          2.2018e-04, 2.0052e-04],
+         [4.1110e+02, 4.1259e+02, 5.2966e+01,  ..., 9.5141e-05,
+          1.5668e-04, 2.1929e-04]]])
+prediction2 tensor([[  0.0000,  89.3013, 110.7477, 303.7198, 294.3178,   0.9951,   0.9997,
+           1.0000],
+        [  0.0000, 256.5005,  98.3645, 373.2559, 144.1284,   0.9953,   0.9431,
+           7.0000],
+        [  0.0000,  69.5096, 173.2218, 170.4211, 343.0221,   0.9997,   0.9882,
+          16.0000]])