どの部分でWEBカメラの情報を取り入れているか知りたい

前提・実現したいこと

こちらのソースコードを使用してマスクの有無を検知させました。
WEBカメラを２台使用して２か所で検知を行いたいのですが、そもそもどの部分でWEBカメラの情報を受け取っているかわかりません。

↓こちらからダウンロードしました。
リンク内容

該当のソースコード

Python
1# USAGE
2# python detect_mask_video.py
3
4# import the necessary packages
5from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
6from tensorflow.keras.preprocessing.image import img_to_array
7from tensorflow.keras.models import load_model
8from imutils.video import VideoStream
9import numpy as np
10import argparse
11import imutils
12import time
13import cv2
14import os
15
16def detect_and_predict_mask(frame, faceNet, maskNet):
17	# grab the dimensions of the frame and then construct a blob
18	# from it
19	(h, w) = frame.shape[:2]
20	blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300),
21		(104.0, 177.0, 123.0))
22
23	# pass the blob through the network and obtain the face detections
24	faceNet.setInput(blob)
25	detections = faceNet.forward()
26
27	# initialize our list of faces, their corresponding locations,
28	# and the list of predictions from our face mask network
29	faces = []
30	locs = []
31	preds = []
32
33	# loop over the detections
34	for i in range(0, detections.shape[2]):
35		# extract the confidence (i.e., probability) associated with
36		# the detection
37		confidence = detections[0, 0, i, 2]
38
39		# filter out weak detections by ensuring the confidence is
40		# greater than the minimum confidence
41		if confidence > args["confidence"]:
42			# compute the (x, y)-coordinates of the bounding box for
43			# the object
44			box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
45			(startX, startY, endX, endY) = box.astype("int")
46
47			# ensure the bounding boxes fall within the dimensions of
48			# the frame
49			(startX, startY) = (max(0, startX), max(0, startY))
50			(endX, endY) = (min(w - 1, endX), min(h - 1, endY))
51
52			# extract the face ROI, convert it from BGR to RGB channel
53			# ordering, resize it to 224x224, and preprocess it
54			face = frame[startY:endY, startX:endX]
55			if face.any():
56				face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
57				face = cv2.resize(face, (224, 224))
58				face = img_to_array(face)
59				face = preprocess_input(face)
60
61	# only make a predictions if at least one face was detected
62	if len(faces) > 0:
63		# for faster inference we'll make batch predictions on *all*
64		# faces at the same time rather than one-by-one predictions
65		# in the above `for` loop
66		faces = np.array(faces, dtype="float32")
67		preds = maskNet.predict(faces, batch_size=32)
68
69	# return a 2-tuple of the face locations and their corresponding
70	# locations
71	return (locs, preds)
72
73# construct the argument parser and parse the arguments
74ap = argparse.ArgumentParser()
75ap.add_argument("-f", "--face", type=str,
76	default="face_detector",
77	help="path to face detector model directory")
78ap.add_argument("-m", "--model", type=str,
79	default="mask_detector.model",
80	help="path to trained face mask detector model")
81ap.add_argument("-c", "--confidence", type=float, default=0.5,
82	help="minimum probability to filter weak detections")
83args = vars(ap.parse_args())
84
85# load our serialized face detector model from disk
86print("[INFO] loading face detector model...")
87prototxtPath = os.path.sep.join([args["face"], "deploy.prototxt"])
88weightsPath = os.path.sep.join([args["face"],
89	"res10_300x300_ssd_iter_140000.caffemodel"])
90faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)
91
92# load the face mask detector model from disk
93print("[INFO] loading face mask detector model...")
94maskNet = load_model(args["model"])
95
96# initialize the video stream and allow the camera sensor to warm up
97print("[INFO] starting video stream...")
98vs = VideoStream(src=0).start()
99time.sleep(2.0)
100
101# loop over the frames from the video stream
102while True:
103	# grab the frame from the threaded video stream and resize it
104	# to have a maximum width of 400 pixels
105	frame = vs.read()
106	frame = imutils.resize(frame, width=400)
107
108	# detect faces in the frame and determine if they are wearing a
109	# face mask or not
110	(locs, preds) = detect_and_predict_mask(frame, faceNet, maskNet)
111
112	# loop over the detected face locations and their corresponding
113	# locations
114	for (box, pred) in zip(locs, preds):
115		# unpack the bounding box and predictions
116		(startX, startY, endX, endY) = box
117		(mask, withoutMask) = pred
118
119		# determine the class label and color we'll use to draw
120		# the bounding box and text
121		label = "Mask" if mask > withoutMask else "No Mask"
122		color = (0, 255, 0) if label == "Mask" else (0, 0, 255)
123			
124		# include the probability in the label
125		label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)
126
127		# display the label and bounding box rectangle on the output
128		# frame
129		cv2.putText(frame, label, (startX, startY - 10),
130			cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
131		cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
132
133	# show the output frame
134	cv2.imshow("Frame", frame)
135	key = cv2.waitKey(1) & 0xFF
136
137	# if the `q` key was pressed, break from the loop
138	if key == ord("q"):
139		break
140
141# do a bit of cleanup
142cv2.destroyAllWindows()
143vs.stop()
144

jbpb0

2021/12/21 23:22

pythonのコードの一番最初の行のすぐ上に ```python だけの行を追加してくださいまた、pythonのコードの一番最後の行のすぐ下に ``` だけの行を追加してくださいまたは、 https://teratail.storage.googleapis.com/uploads/contributed_images/56957fe805d9d7befa7dba6a98676d2b.gif を見て、そのようにしてみてください現状、コードがとても読み辛いです質問にコードを載せる際に上記をやってくれたら、他人がコードを読みやすくなり、コードの実行による現象確認もやりやすくなるので、回答されやすくなります

1T2R3M4

2021/12/21 23:23

nampleさんが書いたコードではないのでしょうか。

nample

2021/12/21 23:30

GitHubからダウンロードしました

jbpb0

2021/12/22 00:14

そういう場合は、コードを引用したURLも質問に書くといいですよ

行動規範の内容に同意します

回答1件

ベストアンサー

調べてないのであてずっぽうで投げやりな回答です。

調べた結果、動かしてませんが読みは正しいと確信しました。
理由： VideoStream(src=0)のsrcの引数が、最終的にcv2.VideoCaptuer(src)という形式で使われており、事実上VideoStream(src=0)はcv2.VideoCaptuer(src)のラッパーに過ぎないため。

（分かる人がいて、この回答が違っていればきっとマイナス評価をした上でちゃんと回答がもらえるかもしれません）

■カメラ自体を認識させる方法について

VideoStreamは使ったことがないですが、cv2.VideoCaptuer()と互換の機能と思います。
で、これ系の関数で大事なのは「何番目のカメラを」という情報になります。

■OpenCVでカメラ2台を認識させる方法について
cv2.VideoCaptuer()であれば、
cv2.VideoCaptuer(0)で0番目にOSが認識したカメラ
cv2.VideoCaptuer(1)で1番目にOSが認識したカメラ
です。

■VideoStreamでカメラ2台を認識させる方法について

以下に掲載のコードの上の方の部分を抜粋します。

Python3
1
2# initialize the video stream and allow the camera sensor to warm up
3print("[INFO] starting video stream...")
4vs = VideoStream(src=0).start()
5time.sleep(2.0)
6
7# loop over the frames from the video stream
8while True:
9    # grab the frame from the threaded video stream and resize it
10    # to have a maximum width of 400 pixels
11    frame = vs.read()

コードのコメントにもある通り、
vs = VideoStream(src=0).start()
でカメラを認識してカメラを使える状態にしているようです。

であれば、
vs0 = VideoStream(src=0).start()
vs1 = VideoStream(src=1).start()
のようにしてやれば、カメラ2台を認識できるようになるはずです。

■実際どうよ？
ですので、

Python3
1
2# initialize the video stream and allow the camera sensor to warm up
3print("[INFO] starting video stream...")
4vs0 = VideoStream(src=0).start() # <--- ここを修正
5vs0 = VideoStream(src=1).start() # <--- ここを修正
6time.sleep(2.0)
7
8# loop over the frames from the video stream
9while True:
10    # grab the frame from the threaded video stream and resize it
11    # to have a maximum width of 400 pixels
12    frame0 = vs0.read() # <--- ここを修正
13    frame1 = vs1.read() # <--- ここを修正
14
15    frame0 = imutils.resize(frame0, width=400) # <--- ここを修正
16    frame1 = imutils.resize(frame1, width=400) # <--- ここを修正
17
18    frames = [frame0 ,frame1] # <--- ここを修正　それぞれのフレームをリストに格納
19
20    for frame in frames:# <--- ここを修正 フレーム1つずつをくりぬいて今までの処理をそのままぶっこむ
21        # detect faces in the frame and determine if they are wearing a
22        # face mask or not
23        # 以下略
24

でいけるはずです。
コケたら連絡ください。なるべく対応します。

投稿2021/12/21 23:54

編集2021/12/22 00:00

退会済みユーザー

総合スコア0

nample

2021/12/22 00:40

回答ありがとうございます。環境を言うの忘れていました。ラズパイ４で行っています早速試してみたのですが、 VIDEOIO ERROR: V4L2: Could not obtain specifics of capture window. VIDEOIO ERROR: V4L: can't open camera by index 1 /dev/video1 does not support memory mapping Traceback (most recent call last): File "detect_mask_video.py", line 124, in <module> frame1 = imutils.resize(frame1, width=400) # <--- ここを修正 File "/home/pi/.virtualenvs/cv/lib/python3.7/site-packages/imutils/convenience.py", line 69, in resize (h, w) = image.shape[:2] AttributeError: 'NoneType' object has no attribute 'shape' このようなエラー文？がかえってきました。

nample

2021/12/22 01:12 編集

カメラのIDを変えてみました vs0 = VideoStream(src=1).start() # <--- ここを修正この文を vs0 = VideoStream(src=2).start() # <--- ここを修正にしました VIDIOC_QBUF: Invalid argument Traceback (most recent call last): File "detect_mask_video.py", line 118, in <module> frame1 = imutils.resize(frame1, width=400) # <--- ここを修正 File "/home/pi/.virtualenvs/cv/lib/python3.7/site-packages/imutils/convenience.py", line 69, in resize (h, w) = image.shape[:2] AttributeError: 'NoneType' object has no attribute 'shape' 少し違うエラー文になりました

jbpb0

2021/12/22 02:01 編集

vs0 = VideoStream(src=1).start() # <--- ここを修正 ↓ vs0→vs1 vs1 = VideoStream(src=1).start() # <--- ここを修正

退会済みユーザー

2021/12/22 10:43

jbpb0さんありがとうございます、完全にポカでした。

nample

2021/12/22 13:22

返信遅れました私もそこには気が付き修正しましたが、上記のエラー文が出ました… 元のWEBカメラ1台の場合、IDを変えればカメラが切り替わっている事は確認出来ています。エラーの原因は、別のファイルで定義されているものと今回追加したコードが干渉？しているということが原因とか考えられますか？

jbpb0

2021/12/22 14:01

> 私もそこには気が付き修正しましたがとありますが、 > vs0 = VideoStream(src=2).start() # <--- ここを修正にしましたは違いますけど、そこは大丈夫でしょうか？

nample

2021/12/22 15:33

すみません、コメントからコピペしてコメントしたので訂正しきれず間違いのままでした実行はvs1、src=2と正して行った結果になります今確認のために再び実行してみた結果、１つのwindow(frame)にWEBカメラの映像が交互に表示されましたが数秒して select timeout VIDEOC_DQBUF: リソースが一時的に利用できませんという文と前コメント同様のエラー文が表示されました

jbpb0

2021/12/22 15:44 編集

> 実行はvs1、src=2と正して行った src=2 ではありません src=1 → vs1 → frame1 1繋がりです

nample

2021/12/22 15:43

cv2.imshow("Frame", frame1) を追加して2つのwindowで表示することができました！ラズパイ4のスペックが原因かと思うのでラズパイの方は映像のフレーム数を落として試してみるのとPCの方でも同じ環境を用意して試してみたいと思います考えられる原因に心当たりなどあればご教授願いたいです

nample

2021/12/22 15:46

src=1ではcan't open camera by index 1とエラーが出るのでカメラのID？が違うのかなと思いsrc=2にしました

nample

2021/12/22 15:48

度々すみません vs0の方はsrc=0 vs1の方はsrc=2にしていますこの理由は前コメントの通りです

jbpb0

2021/12/22 16:22 編集

> can't open camera by index 1 /dev/video1 does not support memory mapping 「1」は使えないって言われてましたね失礼しました v4l2-ctl --list-devices を実行したら、つながってるカメラの番号が分かるようですおそらく、一つ目のカメラが「0」と「1」、二つ目のカメラが「2」と「3」になっていて、どちらも小さい方の数字を使わないとダメなのでしょう参考 https://leico.github.io/TechnicalNote/Linux/webcam-usage https://qiita.com/naoppy/items/74bdfa8216c7223f584b

行動規範の内容に同意します