yolov3のcam_demo.pyのIndexError: list index out of rangeとエラーになる原因がわかりません．

こちらの記事を参考にリアルタイムカメラで画像認識を行うcam_demo.pyのカメラ指定をmjpg-streamerのソフトを用いたネットワークカメラのIPアドレスに指定し

python
1cap = cv2.VideoCapture("http://192.168.xxx.xxx:8080/?action=stream")

実行すると以下の２パターンのオーバーフローのようなエラー

FPS of the video is  1.00
Traceback (most recent call last):
  File "cam_demo.py", line 151, in <module>
    list(map(lambda x: write(x, orig_im), output))
  File "cam_demo.py", line 151, in <lambda>
    list(map(lambda x: write(x, orig_im), output))
  File "cam_demo.py", line 47, in write
    label = "{0}".format(classes[cls])
IndexError: list index out of range

FPS of the video is  3.71
Traceback (most recent call last):
  File "cam_demo.py", line 151, in <module>
    list(map(lambda x: write(x, orig_im), output))
  File "cam_demo.py", line 151, in <lambda>
    list(map(lambda x: write(x, orig_im), output))
  File "cam_demo.py", line 47, in write
    label = "{0}".format(classes[cls])
IndexError: cannot fit 'int' into an index-sized integer

が起こります．
そこで label = "{0}".format(classes[cls])のCLSを出力してみると
値が-36902482152534310912となった後にエラーが出ていました．
YOLOのcam_demo.pyの内容を理解できていないため，
なぜ，cv2.VideoCaptureの指定を内蔵カメラからIPアドレスしてするとエラーになってしまうのかわかりません．
なお，デフォルト指定（cv2.VideoCapture(0)）の内蔵カメラの場合だと上記のようなエラーは起きませんでした．
原因を教えていただければ幸いです．

---cam_demo.py---
エラー47行付近のコード

def write(x, img):
    c1 = tuple(x[1:3].int())
    c2 = tuple(x[3:5].int())
    cls = int(x[-1])
    print(cls)
    label = "{0}".format(classes[cls])

ソースコード

python
1from __future__ import division
2import time
3import torch 
4import torch.nn as nn
5from torch.autograd import Variable
6import numpy as np
7import cv2 
8from util import *
9from darknet import Darknet
10from preprocess import prep_image, inp_to_image
11import pandas as pd
12import random 
13import argparse
14import pickle as pkl
15
16def get_test_input(input_dim, CUDA):
17    img = cv2.imread("imgs/messi.jpg")
18    img = cv2.resize(img, (input_dim, input_dim)) 
19    img_ =  img[:,:,::-1].transpose((2,0,1))
20    img_ = img_[np.newaxis,:,:,:]/255.0
21    img_ = torch.from_numpy(img_).float()
22    img_ = Variable(img_)
23    
24    if CUDA:
25        img_ = img_.cuda()
26    
27    return img_
28
29def prep_image(img, inp_dim):
30    """
31    Prepare image for inputting to the neural network. 
32    
33    Returns a Variable 
34    """
35
36    orig_im = img
37    dim = orig_im.shape[1], orig_im.shape[0]
38    img = cv2.resize(orig_im, (inp_dim, inp_dim))
39    img_ = img[:,:,::-1].transpose((2,0,1)).copy()
40    img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
41    return img_, orig_im, dim
42
43def write(x, img):
44    c1 = tuple(x[1:3].int())
45    c2 = tuple(x[3:5].int())
46    cls = int(x[-1])
47    print(cls)
48    label = "{0}".format(classes[cls])
49    color = random.choice(colors)
50    cv2.rectangle(img, c1, c2,color, 1)
51    t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
52    c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
53    cv2.rectangle(img, c1, c2,color, -1)
54    cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
55    return img
56
57def arg_parse():
58    """
59    Parse arguements to the detect module
60    
61    """
62    
63    
64    parser = argparse.ArgumentParser(description='YOLO v3 Cam Demo')
65    parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.25)
66    parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
67    parser.add_argument("--reso", dest = 'reso', help = 
68                        "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
69                        default = "160", type = str)
70    return parser.parse_args()
71
72
73
74if __name__ == '__main__':
75    cfgfile = "cfg/yolov3.cfg"
76    weightsfile = "yolov3.weights"
77    num_classes = 80
78
79    args = arg_parse()
80    confidence = float(args.confidence)
81    nms_thesh = float(args.nms_thresh)
82    start = 0
83    CUDA = torch.cuda.is_available()
84    
85
86    
87    
88    num_classes = 80
89    bbox_attrs = 5 + num_classes
90    
91    model = Darknet(cfgfile)
92    model.load_weights(weightsfile)
93    
94    model.net_info["height"] = args.reso
95    inp_dim = int(model.net_info["height"])
96    
97    assert inp_dim % 32 == 0 
98    assert inp_dim > 32
99
100    if CUDA:
101        model.cuda()
102            
103    model.eval()
104    
105    videofile = 'video.avi'
106    
107    cap = cv2.VideoCapture("http://192.168.179.5:8080/?action=stream")
108    
109    assert cap.isOpened(), 'Cannot capture source'
110    
111    frames = 0
112    start = time.time()    
113    while cap.isOpened():
114        
115        ret, frame = cap.read()
116        if ret:
117            
118            img, orig_im, dim = prep_image(frame, inp_dim)
119            
120#            im_dim = torch.FloatTensor(dim).repeat(1,2)                        
121            
122            
123            if CUDA:
124                im_dim = im_dim.cuda()
125                img = img.cuda()
126            
127            
128            output = model(Variable(img), CUDA)
129            output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)
130
131            if type(output) == int:
132                frames += 1
133                print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
134                cv2.imshow("frame", orig_im)
135                key = cv2.waitKey(1)
136                if key & 0xFF == ord('q'):
137                    break
138                continue
139            
140
141        
142            output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))/inp_dim
143            
144#            im_dim = im_dim.repeat(output.size(0), 1)
145            output[:,[1,3]] *= frame.shape[1]
146            output[:,[2,4]] *= frame.shape[0]
147
148            
149            classes = load_classes('data/coco.names')
150            colors = pkl.load(open("pallete", "rb"))
151            
152            list(map(lambda x: write(x, orig_im), output))
153            
154            
155            cv2.imshow("frame", orig_im)
156            key = cv2.waitKey(1)
157            if key & 0xFF == ord('q'):
158                break
159            frames += 1
160            print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
161
162            
163        else:
164            break
165    
166
167    
168    
169
170

退会済みユーザー

2020/01/12 01:13

OpenCVでネットワークカメラにURLで直接アクセスできることを初めて知りました。大変勉強になります。 Q.1 cv2.imshow("frame", orig_im)　で画像は表示されていますか？ Q.2 list(map(lambda x: write(x, orig_im), output))のx,orig_im, outputのshape、typeは何が表示されますか？