
こちらの記事を参考にリアルタイムカメラで画像認識を行うcam_demo.pyのカメラ指定をmjpg-streamerのソフトを用いたネットワークカメラのIPアドレスに指定し
python
1cap = cv2.VideoCapture("http://192.168.xxx.xxx:8080/?action=stream")
実行すると以下の2パターンのオーバーフローのようなエラー
FPS of the video is 1.00 Traceback (most recent call last): File "cam_demo.py", line 151, in <module> list(map(lambda x: write(x, orig_im), output)) File "cam_demo.py", line 151, in <lambda> list(map(lambda x: write(x, orig_im), output)) File "cam_demo.py", line 47, in write label = "{0}".format(classes[cls]) IndexError: list index out of range
FPS of the video is 3.71 Traceback (most recent call last): File "cam_demo.py", line 151, in <module> list(map(lambda x: write(x, orig_im), output)) File "cam_demo.py", line 151, in <lambda> list(map(lambda x: write(x, orig_im), output)) File "cam_demo.py", line 47, in write label = "{0}".format(classes[cls]) IndexError: cannot fit 'int' into an index-sized integer
が起こります.
そこで label = "{0}".format(classes[cls])のCLSを出力してみると
値が-36902482152534310912となった後にエラーが出ていました.
YOLOのcam_demo.pyの内容を理解できていないため,
なぜ,cv2.VideoCaptureの指定を内蔵カメラからIPアドレスしてするとエラーになってしまうのかわかりません.
なお,デフォルト指定(cv2.VideoCapture(0))の内蔵カメラの場合だと上記のようなエラーは起きませんでした.
原因を教えていただければ幸いです.
---cam_demo.py---
エラー47行付近のコード
def write(x, img): c1 = tuple(x[1:3].int()) c2 = tuple(x[3:5].int()) cls = int(x[-1]) print(cls) label = "{0}".format(classes[cls])
ソースコード
python
1from __future__ import division 2import time 3import torch 4import torch.nn as nn 5from torch.autograd import Variable 6import numpy as np 7import cv2 8from util import * 9from darknet import Darknet 10from preprocess import prep_image, inp_to_image 11import pandas as pd 12import random 13import argparse 14import pickle as pkl 15 16def get_test_input(input_dim, CUDA): 17 img = cv2.imread("imgs/messi.jpg") 18 img = cv2.resize(img, (input_dim, input_dim)) 19 img_ = img[:,:,::-1].transpose((2,0,1)) 20 img_ = img_[np.newaxis,:,:,:]/255.0 21 img_ = torch.from_numpy(img_).float() 22 img_ = Variable(img_) 23 24 if CUDA: 25 img_ = img_.cuda() 26 27 return img_ 28 29def prep_image(img, inp_dim): 30 """ 31 Prepare image for inputting to the neural network. 32 33 Returns a Variable 34 """ 35 36 orig_im = img 37 dim = orig_im.shape[1], orig_im.shape[0] 38 img = cv2.resize(orig_im, (inp_dim, inp_dim)) 39 img_ = img[:,:,::-1].transpose((2,0,1)).copy() 40 img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) 41 return img_, orig_im, dim 42 43def write(x, img): 44 c1 = tuple(x[1:3].int()) 45 c2 = tuple(x[3:5].int()) 46 cls = int(x[-1]) 47 print(cls) 48 label = "{0}".format(classes[cls]) 49 color = random.choice(colors) 50 cv2.rectangle(img, c1, c2,color, 1) 51 t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0] 52 c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4 53 cv2.rectangle(img, c1, c2,color, -1) 54 cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1); 55 return img 56 57def arg_parse(): 58 """ 59 Parse arguements to the detect module 60 61 """ 62 63 64 parser = argparse.ArgumentParser(description='YOLO v3 Cam Demo') 65 parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.25) 66 parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4) 67 parser.add_argument("--reso", dest = 'reso', help = 68 "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed", 69 default = "160", type = str) 70 return parser.parse_args() 71 72 73 74if __name__ == '__main__': 75 cfgfile = "cfg/yolov3.cfg" 76 weightsfile = "yolov3.weights" 77 num_classes = 80 78 79 args = arg_parse() 80 confidence = float(args.confidence) 81 nms_thesh = float(args.nms_thresh) 82 start = 0 83 CUDA = torch.cuda.is_available() 84 85 86 87 88 num_classes = 80 89 bbox_attrs = 5 + num_classes 90 91 model = Darknet(cfgfile) 92 model.load_weights(weightsfile) 93 94 model.net_info["height"] = args.reso 95 inp_dim = int(model.net_info["height"]) 96 97 assert inp_dim % 32 == 0 98 assert inp_dim > 32 99 100 if CUDA: 101 model.cuda() 102 103 model.eval() 104 105 videofile = 'video.avi' 106 107 cap = cv2.VideoCapture("http://192.168.179.5:8080/?action=stream") 108 109 assert cap.isOpened(), 'Cannot capture source' 110 111 frames = 0 112 start = time.time() 113 while cap.isOpened(): 114 115 ret, frame = cap.read() 116 if ret: 117 118 img, orig_im, dim = prep_image(frame, inp_dim) 119 120# im_dim = torch.FloatTensor(dim).repeat(1,2) 121 122 123 if CUDA: 124 im_dim = im_dim.cuda() 125 img = img.cuda() 126 127 128 output = model(Variable(img), CUDA) 129 output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh) 130 131 if type(output) == int: 132 frames += 1 133 print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) 134 cv2.imshow("frame", orig_im) 135 key = cv2.waitKey(1) 136 if key & 0xFF == ord('q'): 137 break 138 continue 139 140 141 142 output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))/inp_dim 143 144# im_dim = im_dim.repeat(output.size(0), 1) 145 output[:,[1,3]] *= frame.shape[1] 146 output[:,[2,4]] *= frame.shape[0] 147 148 149 classes = load_classes('data/coco.names') 150 colors = pkl.load(open("pallete", "rb")) 151 152 list(map(lambda x: write(x, orig_im), output)) 153 154 155 cv2.imshow("frame", orig_im) 156 key = cv2.waitKey(1) 157 if key & 0xFF == ord('q'): 158 break 159 frames += 1 160 print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) 161 162 163 else: 164 break 165 166 167 168 169 170


あなたの回答
tips
プレビュー
