質問編集履歴

1

コンフィデンスが格納されている部分からの抜き出し方に教えていただきたく、predictionの出力を追記しました。

2019/10/23 12:22

投稿

Pinkun
Pinkun

スコア13

test CHANGED
File without changes
test CHANGED
@@ -1,273 +1,169 @@
1
- YOLOv3におけるdetect.pyの出力実際に検出た物体のコンフィデンスレベルの取得方法がわかりせん.
1
+ YOLOv3ホームページ公開されているdetect.pyというファイルにおいて検出された物体のコンフィデンスの値を取得したいと考えていす。
2
+
2
-
3
+ ソースコード中のpredictionにそれらの情報が格納されていると考え抽出しようと考えているのですが、中身の値の認識に困っています。検出された物体のコンフィデンスを正確に出力するためにはどのようにしたら良いか教えていただけると幸いです。
4
+
3
- これらコンフィデンスレベルがどこから引用可能か教えていただきたいで.
5
+ ソースコードはdetect.py一部抜粋となりま
4
6
 
5
7
 
6
8
 
7
9
  ```python
8
10
 
9
- from __future__ import division
10
-
11
- import time
12
-
13
- import torch
14
-
15
- import torch.nn as nn
16
-
17
- from torch.autograd import Variable
18
-
19
- import numpy as np
20
-
21
- import cv2
22
-
23
- from util import *
24
-
25
- import argparse
26
-
27
- import os
28
-
29
- import os.path as osp
30
-
31
- from darknet import Darknet
32
-
33
- from preprocess import prep_image, inp_to_image
34
-
35
- import pandas as pd
36
-
37
- import random
38
-
39
- import pickle as pkl
40
-
41
- import itertools
42
-
43
-
44
-
45
- class test_net(nn.Module):
46
-
47
- def __init__(self, num_layers, input_size):
48
-
49
- super(test_net, self).__init__()
50
-
51
- self.num_layers= num_layers
52
-
53
- self.linear_1 = nn.Linear(input_size, 5)
54
-
55
- self.middle = nn.ModuleList([nn.Linear(5,5) for x in range(num_layers)])
56
-
57
- self.output = nn.Linear(5,2)
58
-
59
-
60
-
61
- def forward(self, x):
62
-
63
- x = x.view(-1)
64
-
65
- fwd = nn.Sequential(self.linear_1, *self.middle, self.output)
66
-
67
- return fwd(x)
68
-
69
-
70
-
71
- def get_test_input(input_dim, CUDA):
72
-
73
- img = cv2.imread("dog-cycle-car.png")
74
-
75
- img = cv2.resize(img, (input_dim, input_dim))
76
-
77
- img_ = img[:,:,::-1].transpose((2,0,1))
78
-
79
- img_ = img_[np.newaxis,:,:,:]/255.0
80
-
81
- img_ = torch.from_numpy(img_).float()
82
-
83
- img_ = Variable(img_)
84
-
85
-
11
+ def arg_parse():
12
+
13
+
14
+
15
+ parser = argparse.ArgumentParser(description='YOLO v3 Detection Module')
16
+
17
+
18
+
19
+ parser.add_argument("--images", dest = 'images', help =
20
+
21
+ "Image / Directory containing images to perform detection upon",
22
+
23
+ default = "imgs", type = str)
24
+
25
+ parser.add_argument("--det", dest = 'det', help =
26
+
27
+ "Image / Directory to store detections to",
28
+
29
+ default = "det", type = str)
30
+
31
+ parser.add_argument("--bs", dest = "bs", help = "Batch size", default = 1)
32
+
33
+ parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
34
+
35
+ parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
36
+
37
+ parser.add_argument("--cfg", dest = 'cfgfile', help =
38
+
39
+ "Config file",
40
+
41
+ default = "cfg/yolov3.cfg", type = str)
42
+
43
+ parser.add_argument("--weights", dest = 'weightsfile', help =
44
+
45
+ "weightsfile",
46
+
47
+ default = "yolov3.weights", type = str)
48
+
49
+ parser.add_argument("--reso", dest = 'reso', help =
50
+
51
+ "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
52
+
53
+ default = "416", type = str)
54
+
55
+ parser.add_argument("--scales", dest = "scales", help = "Scales to use for detection",
56
+
57
+ default = "1,2,3", type = str)
58
+
59
+
60
+
61
+ return parser.parse_args()
62
+
63
+
64
+
65
+ if __name__ == '__main__':
66
+
67
+ args = arg_parse()
68
+
69
+ scales = args.scales
70
+
71
+ images = args.images
72
+
73
+ batch_size = int(args.bs)
74
+
75
+ confidence = float(args.confidence)
76
+
77
+ nms_thesh = float(args.nms_thresh)
78
+
79
+ start = 0
80
+
81
+
82
+
83
+ CUDA = torch.cuda.is_available()
84
+
85
+
86
+
87
+ num_classes = 80
88
+
89
+ classes = load_classes('data/coco.names')
90
+
91
+
92
+
93
+ model = Darknet(args.cfgfile)
94
+
95
+ model.load_weights(args.weightsfile)
96
+
97
+
98
+
99
+ model.net_info["height"] = args.reso
100
+
101
+ inp_dim = int(model.net_info["height"])
102
+
103
+ assert inp_dim % 32 == 0
104
+
105
+ assert inp_dim > 32
106
+
107
+
108
+
109
+ #If there's a GPU availible, put the model on GPU
86
110
 
87
111
  if CUDA:
88
112
 
89
- img_ = img_.cuda()
90
-
91
- num_classes
92
-
93
- return img_
94
-
95
-
96
-
97
- def arg_parse():
98
-
99
- """
100
-
101
- Parse arguements to the detect module
102
-
103
-
104
-
105
- """
106
-
107
-
108
-
109
- parser = argparse.ArgumentParser(description='YOLO v3 Detection Module')
110
-
111
-
112
-
113
- parser.add_argument("--images", dest = 'images', help =
114
-
115
- "Image / Directory containing images to perform detection upon",
116
-
117
- default = "imgs", type = str)
118
-
119
- parser.add_argument("--det", dest = 'det', help =
120
-
121
- "Image / Directory to store detections to",
122
-
123
- default = "det", type = str)
124
-
125
- parser.add_argument("--bs", dest = "bs", help = "Batch size", default = 1)
126
-
127
- parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
128
-
129
- parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
130
-
131
- parser.add_argument("--cfg", dest = 'cfgfile', help =
132
-
133
- "Config file",
134
-
135
- default = "cfg/yolov3.cfg", type = str)
136
-
137
- parser.add_argument("--weights", dest = 'weightsfile', help =
138
-
139
- "weightsfile",
140
-
141
- default = "yolov3.weights", type = str)
142
-
143
- parser.add_argument("--reso", dest = 'reso', help =
144
-
145
- "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
146
-
147
- default = "416", type = str)
148
-
149
- parser.add_argument("--scales", dest = "scales", help = "Scales to use for detection",
150
-
151
- default = "1,2,3", type = str)
152
-
153
-
154
-
155
- return parser.parse_args()
156
-
157
-
158
-
159
- if __name__ == '__main__':
160
-
161
- args = arg_parse()
162
-
163
-
164
-
165
- scales = args.scales
166
-
167
-
168
-
169
- images = args.images
170
-
171
- batch_size = int(args.bs)
172
-
173
- confidence = float(args.confidence)
174
-
175
- nms_thesh = float(args.nms_thresh)
176
-
177
- start = 0
178
-
179
-
180
-
181
- CUDA = torch.cuda.is_available()
182
-
183
-
184
-
185
- num_classes = 80
186
-
187
- classes = load_classes('data/coco.names')
188
-
189
-
190
-
191
- print("Loading network.....")
192
-
193
- model = Darknet(args.cfgfile)
194
-
195
- model.load_weights(args.weightsfile)
196
-
197
- print("Network successfully loaded")
198
-
199
-
200
-
201
- model.net_info["height"] = args.reso
202
-
203
- inp_dim = int(model.net_info["height"])
204
-
205
- assert inp_dim % 32 == 0
206
-
207
- assert inp_dim > 32
208
-
209
-
210
-
211
- #If there's a GPU availible, put the model on GPU
113
+ model.cuda()
114
+
115
+
116
+
117
+ model.eval()
118
+
119
+
120
+
121
+ read_dir = time.time()
122
+
123
+ #Detection phase
124
+
125
+ try:
126
+
127
+ imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images) if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1] =='.jpeg' or os.path.splitext(img)[1] =='.jpg']
128
+
129
+ except NotADirectoryError:
130
+
131
+ imlist = []
132
+
133
+ imlist.append(osp.join(osp.realpath('.'), images))
134
+
135
+ except FileNotFoundError:
136
+
137
+ print ("No file or directory with the name {}".format(images))
138
+
139
+ exit()
140
+
141
+
142
+
143
+ if not os.path.exists(args.det):
144
+
145
+ os.makedirs(args.det)
146
+
147
+
148
+
149
+ load_batch = time.time()
150
+
151
+
152
+
153
+ batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))]))
154
+
155
+ im_batches = [x[0] for x in batches]
156
+
157
+ orig_ims = [x[1] for x in batches]
158
+
159
+ im_dim_list = [x[2] for x in batches]
160
+
161
+ im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
162
+
163
+
212
164
 
213
165
  if CUDA:
214
166
 
215
- model.cuda()
216
-
217
-
218
-
219
- #Set the model in evaluation mode
220
-
221
- model.eval()
222
-
223
-
224
-
225
- read_dir = time.time()
226
-
227
- #Detection phase
228
-
229
- try:
230
-
231
- imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images) if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1] =='.jpeg' or os.path.splitext(img)[1] =='.jpg']
232
-
233
- except NotADirectoryError:
234
-
235
- imlist = []
236
-
237
- imlist.append(osp.join(osp.realpath('.'), images))
238
-
239
- except FileNotFoundError:
240
-
241
- print ("No file or directory with the name {}".format(images))
242
-
243
- exit()
244
-
245
-
246
-
247
- if not os.path.exists(args.det):
248
-
249
- os.makedirs(args.det)
250
-
251
-
252
-
253
- load_batch = time.time()
254
-
255
-
256
-
257
- batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))]))
258
-
259
- im_batches = [x[0] for x in batches]
260
-
261
- orig_ims = [x[1] for x in batches]
262
-
263
- im_dim_list = [x[2] for x in batches]
264
-
265
- im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
266
-
267
-
268
-
269
- if CUDA:
270
-
271
167
  im_dim_list = im_dim_list.cuda()
272
168
 
273
169
 
@@ -312,6 +208,8 @@
312
208
 
313
209
  for batch in im_batches:
314
210
 
211
+ #load the image
212
+
315
213
  start = time.time()
316
214
 
317
215
  if CUDA:
@@ -324,10 +222,18 @@
324
222
 
325
223
  prediction = model(Variable(batch), CUDA)
326
224
 
225
+ # prediction here
226
+
227
+ print ("prediction", prediction)
228
+
327
229
 
328
230
 
329
231
  prediction = write_results(prediction, confidence, num_classes, nms = True, nms_conf = nms_thesh)
330
232
 
233
+ # prediction here
234
+
235
+ print ("prediction2", prediction)
236
+
331
237
 
332
238
 
333
239
  if type(prediction) == int:
@@ -340,7 +246,7 @@
340
246
 
341
247
  end = time.time()
342
248
 
343
- # print(end - start)
249
+
344
250
 
345
251
  prediction[:,0] += i*batch_size
346
252
 
@@ -356,136 +262,50 @@
356
262
 
357
263
  output = torch.cat((output,prediction))
358
264
 
359
-
360
-
361
- for im_num, image in enumerate(imlist[i*batch_size: min((i + 1)*batch_size, len(imlist))]):
362
-
363
- im_id = i*batch_size + im_num
364
-
365
- objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
366
-
367
- print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
368
-
369
- print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
370
-
371
- print("----------------------------------------------------------")
372
-
373
- i += 1
374
-
375
-
376
-
377
- if CUDA:
378
-
379
- torch.cuda.synchronize()
380
-
381
-
382
-
383
- try:
384
-
385
- output
386
-
387
- except NameError:
388
-
389
- print("No detections were made")
390
-
391
- exit()
392
-
393
-
394
-
395
- im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
396
-
397
- scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1)
398
-
399
- output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
400
-
401
- output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
402
-
403
- output[:,1:5] /= scaling_factor
404
-
405
-
406
-
407
- for i in range(output.shape[0]):
408
-
409
- output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
410
-
411
- output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
412
-
413
- output_recast = time.time()
414
-
415
- class_load = time.time()
416
-
417
- colors = pkl.load(open("pallete", "rb"))
418
-
419
- draw = time.time()
420
-
421
- def write(x, batches, results):
422
-
423
- c1 = tuple(x[1:3].int())
424
-
425
- c2 = tuple(x[3:5].int())
426
-
427
- img = results[int(x[0])]
428
-
429
- cls = int(x[-1])
430
-
431
- label = "{0}".format(classes[cls])
432
-
433
- color = random.choice(colors)
434
-
435
- cv2.rectangle(img, c1, c2,color, 1)
436
-
437
- t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
438
-
439
- c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
440
-
441
- cv2.rectangle(img, c1, c2,color, -1)
442
-
443
- cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1)
444
-
445
- return img
446
-
447
-
448
-
449
- list(map(lambda x: write(x, im_batches, orig_ims), output))
450
-
451
-
452
-
453
- det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format(args.det,x.split("/")[-1]))
454
-
455
- list(map(cv2.imwrite, det_names, orig_ims))
456
-
457
- end = time.time()
458
-
459
-
460
-
461
- print()
462
-
463
- print("SUMMARY")
464
-
465
- print("----------------------------------------------------------")
466
-
467
- print("{:25s}: {}".format("Task", "Time Taken (in seconds)"))
468
-
469
- print()
470
-
471
- print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir))
472
-
473
- print("{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch))
474
-
475
- print("{:25s}: {:2.3f}".format("Detection (" + str(len(imlist)) + " images)", output_recast - start_det_loop))
476
-
477
- print("{:25s}: {:2.3f}".format("Output Processing", class_load - output_recast))
478
-
479
- print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw))
480
-
481
- print("{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch)/len(imlist)))
482
-
483
- print("----------------------------------------------------------")
484
-
485
-
486
-
487
- torch.cuda.empty_cache()
488
-
489
-
490
-
491
265
  ```
266
+
267
+
268
+
269
+ これらのpredictionの出力は以下のようになっています。
270
+
271
+
272
+
273
+ prediction tensor([[[1.5383e+01, 1.2399e+01, 9.3864e+01, ..., 7.5703e-04,
274
+
275
+ 9.0208e-04, 5.9246e-04],
276
+
277
+ [1.8194e+01, 1.4778e+01, 1.0411e+02, ..., 2.1265e-04,
278
+
279
+ 1.1475e-03, 1.6560e-03],
280
+
281
+ [2.1265e+01, 1.2748e+01, 3.8478e+02, ..., 3.6203e-03,
282
+
283
+ 7.6282e-03, 6.8394e-03],
284
+
285
+ ...,
286
+
287
+ [4.1259e+02, 4.1129e+02, 3.3664e+00, ..., 2.8758e-05,
288
+
289
+ 3.9763e-05, 2.3203e-05],
290
+
291
+ [4.1155e+02, 4.0989e+02, 7.5316e+00, ..., 1.7735e-04,
292
+
293
+ 2.2018e-04, 2.0052e-04],
294
+
295
+ [4.1110e+02, 4.1259e+02, 5.2966e+01, ..., 9.5141e-05,
296
+
297
+ 1.5668e-04, 2.1929e-04]]])
298
+
299
+
300
+
301
+ prediction2 tensor([[ 0.0000, 89.3013, 110.7477, 303.7198, 294.3178, 0.9951, 0.9997,
302
+
303
+ 1.0000],
304
+
305
+ [ 0.0000, 256.5005, 98.3645, 373.2559, 144.1284, 0.9953, 0.9431,
306
+
307
+ 7.0000],
308
+
309
+ [ 0.0000, 69.5096, 173.2218, 170.4211, 343.0221, 0.9997, 0.9882,
310
+
311
+ 16.0000]])