darkflow(yolo)の物体検出にrealsenseから得た深度をラベル横に出力したい。（YOLO(RGB,3ch)に1ch(Depth)を加えたい）

https://github.com/thtrieu/darkflow.git　のdarkflow(yolo)の物体検出にrealsense(pyrealsens2)から得た深度をラベル横(confの部分)に出力したい。（YOLO(RGB,3ch)に1ch(Depth)を加えたい）```python
コード

import
1from darkflow.net.build import TFNet
2import cv2
3import numpy as np
4
5options = {"model": "cfg/yolo.cfg", "load": "bin/yolo.weights", "threshold": 0.1}
6tfnet = TFNet(options)
7
8class_names = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
9              'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
10              'dog', 'horse', 'motorbike', 'person', 'pottedplant',
11              'sheep', 'sofa', 'train', 'tvmonitor']
12
13num_classes = len(class_names)
14class_colors = []
15for i in range(0, num_classes):
16    hue = 255*i/num_classes
17    col = np.zeros((1,1,3)).astype("uint8")
18    col[0][0][0] = hue
19    col[0][0][1] = 128
20    col[0][0][2] = 255
21    cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
22    col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
23    class_colors.append(col)
24
25config = rs.config()
26config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
27config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
28
29pipeline = rs.pipeline()
30profile = pipeline.start(config)
31
32while True:
33       frames = pipeline.wait_for_frames()
34       result = tfnet.return_predict(frames)
35       depth_frame = frames.get_depth_frame()
36       if not depth_frame: continue
37           
38       for item in result:
39            tlx = item['topleft']['x']
40            tly = item['topleft']['y']
41            brx = item['bottomright']['x']
42            bry = item['bottomright']['y']
43            label = item['label']
44            dist_to_center = depth_frame.get_distance(int(tlx+ brx/2), int(tly + bry/2))
45            dep = item['dist_to_center']
46
47            if dep > 0.4:
48
49                for i in class_names:
50                    if label == i:
51                        class_num = class_names.index(i)
52                        break       
53
54                
55                cv2.rectangle(frame, (tlx, tly), (brx, bry), class_colors[class_num], 2)
56
57                
58                text = label + " " + ('%.2f' % dep)
59                cv2.rectangle(frame, (tlx, tly - 15), (tlx + 100, tly + 5), class_colors[class_num], -1)
60                cv2.putText(frame, text, (tlx, tly), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1)
61        
62       
63                cv2.imshow("Show FLAME Image", frame)
64
65        
66                k = cv2.waitKey(10);
67                if k == ord('q'):  break;
68
69       pipeline.stop()
70       cv2.destroyAllWindows()
71
72if __name__ == '__main__':
73    main()
74
75```python
76エラー
77```  File "yolodistancetry2.py", line 35, in <module>
78    result = tfnet.return_predict(frames)
79  File "C:\Users\Public\darkflow\darkflow\net\flow.py", line 78, in return_predict
80    'Image is not a np.ndarray'
81AssertionError: Image is not a np.ndarray
82
83```python
84コード
85```darkflow/darkflow/net
86flow.py内76～102行目
87
88def return_predict(self, im):
89    assert isinstance(im, np.ndarray), \
90				'Image is not a np.ndarray'
91    h, w, _ = im.shape
92    im = self.framework.resize_input(im)
93    this_inp = np.expand_dims(im, 0)
94    feed_dict = {self.inp : this_inp}
95
96    out = self.sess.run(self.out, feed_dict)[0]
97    boxes = self.framework.findboxes(out)
98    threshold = self.FLAGS.threshold
99    boxesInfo = list()
100    for box in boxes:
101        tmpBox = self.framework.process_box(box, h, w, threshold)
102        if tmpBox is None:
103            continue
104        boxesInfo.append({
105            "label": tmpBox[4],
106            "confidence": tmpBox[6],
107            "topleft": {
108                "x": tmpBox[0],
109                "y": tmpBox[2]},
110            "bottomright": {
111                "x": tmpBox[1],
112                "y": tmpBox[3]}
113        })
114    return boxesInfo
115	
116のconfidenceの部分を書き換えれば、深度を出力することは可能でしょうか？
117プログラミング初心者のため投げやりになってしまい、申し訳ありません。
118ご教授頂けると幸いです。

行動規範の内容に同意します

回答3件

ベストアンサー

python
1import pyrealsense2 as rs
2from darkflow.net.build import TFNet
3import cv2
4import numpy as np
5import sys
6
7options = {"model": "cfg/yolo.cfg", "load": "bin/yolo.weights", "threshold": 0.1}
8tfnet = TFNet(options)
9
10class_names = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
11'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
12'dog', 'horse', 'motorbike', 'person', 'pottedplant',
13'sheep', 'sofa', 'train', 'tvmonitor']
14
15num_classes = len(class_names)
16class_colors = []
17for i in range(0, num_classes):
18    hue = 255*i/num_classes
19    col = np.zeros((1,1,3)).astype("uint8")
20    col[0][0][0] = hue
21    col[0][0][1] = 128
22    col[0][0][2] = 255
23    cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
24    col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
25    class_colors.append(col)
26
27config = rs.config()
28config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
29config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
30
31pipeline = rs.pipeline()
32profile = pipeline.start(config)
33
34while True:
35    # Wait for a coherent pair of frames: depth and color
36    frames = pipeline.wait_for_frames()
37
38    depth_frame = frames.get_depth_frame()
39    color_frame = frames.get_color_frame()
40    if not depth_frame or not color_frame:
41        continue
42
43    # Convert images to numpy arrays
44    depth_image = np.asanyarray(depth_frame.get_data())
45    color_image = np.asanyarray(color_frame.get_data())
46
47    result = tfnet.return_predict(color_image)
48    print(result)
49
50    sys.exit(0)

実行結果は下記です。特にエラー無く動作しました。

console
1$ python3 test.py 
2WARNING:tensorflow:
3The TensorFlow contrib module will not be included in TensorFlow 2.0.
4For more information, please see:
5  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
6  * https://github.com/tensorflow/addons
7  * https://github.com/tensorflow/io (for I/O related ops)
8If you depend on functionality not listed there, please file an issue.
9
10=============== 中略 ===============
11
12Finished in 2.767444133758545s
13
14[{'label': 'clock', 'confidence': 0.2793427, 'topleft': {'x': 0, 'y': 16}, 'bottomright': {'x': 122, 'y': 148}}]

Firmware と SDK のバージョンを一度確認してみてください。現時点の製品版のものに統一しておくことをおすすめします。Intel製品の最新版は不安定であることが多いです。

Firmware 5.11.6.250
SDK 2.30.1

投稿2019/11/12 06:46

編集2019/11/12 07:05

PINTO

総合スコア351

退会済みユーザー

2019/11/13 02:15

ご丁寧にありがとうございます。 CPUonlyで動かしているのでめちゃめちゃ重いですが、無事距離表示できるようになりました。

PINTO

2019/11/13 02:58

なによりです。当初の問題が解消したようでしたら、解決済みとしてマークしてください。

行動規範の内容に同意します

depth_image の中身、サイズがどのようになっているか確認してみてください。深度の数値のみが縦横の２次元で格納されていたかもしれません。失礼しました。color_image を指定してみてください。なお、エラーになっている下記の箇所は、３つの値が取得できることを想定しているようです。 flow.py の return_predict

python
1h, w, _ = im.shape

おそらく、高さ、幅、チャンネル(RGB) だと思いますが、color_image を渡していればそのままでも正常に動作する可能性があります。

ご参考までに、私が過去に作成したロジックをご紹介します。darkflow(Yolo)ではありませんが、t-1996-kさんが実現したいことがSSDでほぼそのまま再現されているロジックです。深度情報の取得の仕方の部分はご参考になるかと思います。
MobileNetV2-SSD + RealSense D435

投稿2019/11/10 15:26

PINTO

総合スコア351

退会済みユーザー

2019/11/11 15:36

ご丁寧にありがとうございます。参考にさせていただきます。

退会済みユーザー

2019/11/11 15:49

＜コード＞ import pyrealsense2 as rs from darkflow.net.build import TFNet import cv2 import numpy as np options = {"model": "cfg/yolo.cfg", "load": "bin/yolo.weights", "threshold": 0.1} tfnet = TFNet(options) class_names = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] num_classes = len(class_names) class_colors = [] for i in range(0, num_classes): hue = 255*i/num_classes col = np.zeros((1,1,3)).astype("uint8") col[0][0][0] = hue col[0][0][1] = 128 col[0][0][2] = 255 cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR) col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2])) class_colors.append(col) config = rs.config() config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30) pipeline = rs.pipeline() profile = pipeline.start(config) while True: # Wait for a coherent pair of frames: depth and color frames = pipeline.wait_for_frames() depth_frame = frames.get_depth_frame() color_frame = frames.get_color_frame() if not depth_frame or not color_frame: continue # Convert images to numpy arrays depth_image = np.asanyarray(depth_frame.get_data()) color_image = np.asanyarray(color_frame.get_data()) result = tfnet.return_predict(color_image) for item in result: tlx = item['topleft']['x'] tly = item['topleft']['y'] brx = item['bottomright']['x'] bry = item['bottomright']['y'] label = item['label'] #dist_to_center = depth_frame.get_distance(int(tlx+ brx/2), int(tly + bry/2)) dep = item['confidence'] if dep > 0.4: for i in class_names: if label == i: class_num = class_names.index(i) break #枠の作成 cv2.rectangle(frame, (tlx, tly), (brx, bry), class_colors[class_num], 2) #ラベルの作成 text = label + " " + ('%.2f' % dep) cv2.rectangle(frame, (tlx, tly - 15), (tlx + 100, tly + 5), class_colors[class_num], -1) cv2.putText(frame, text, (tlx, tly), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1) # 表示 cv2.imshow("Show FLAME Image", frame) # escを押したら終了。 k = cv2.waitKey(10); if k == ord('q'): break; pipeline.stop() cv2.destroyAllWindows() if __name__ == '__main__': main() ＜エラー＞ File "yolodistancetry2.py", line 35, in <module> frames = pipeline.wait_for_frames() RuntimeError: wait_for_frames cannot be called before start() このようなエラーが出てしまいました。 start前にframeが呼び出されていないとのことですが、どのようにコードを変えればよいのでしょうか？度々の質問になってしまい、申し訳ありません。

PINTO

2019/11/12 06:38

コードを整形できないため、回答欄に記載します。

行動規範の内容に同意します

まずは下記でエラーが解消できるかどうかをお試しください。pipeline.wait_for_frames() は画像そのものを返しません。深度画像とRGB画像のワンセットになっています。また、 tfnet.return_predict に渡す前にnumpy配列への変換が必要だと思います。 depth_image を渡すのか color_image を渡すのかは要件次第ですので適宜入れ替えてみてください。

python
1        # Wait for a coherent pair of frames: depth and color
2        frames = pipeline.wait_for_frames()
3        depth_frame = frames.get_depth_frame()
4        color_frame = frames.get_color_frame()
5        if not depth_frame or not color_frame:
6            continue
7
8        # Convert images to numpy arrays
9        depth_image = np.asanyarray(depth_frame.get_data())
10        color_image = np.asanyarray(color_frame.get_data())
11
12        result = tfnet.return_predict(color_image)

投稿2019/11/10 06:11

PINTO

総合スコア351

退会済みユーザー

2019/11/10 14:49

貴重なご意見・ご指摘ありがとうございます。ご指摘の通り、コードを以下のように修正しました。＜コード＞ import pyrealsense2 as rs from darkflow.net.build import TFNet import cv2 import numpy as np options = {"model": "cfg/yolo.cfg", "load": "bin/yolo.weights", "threshold": 0.1} tfnet = TFNet(options) class_names = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] num_classes = len(class_names) class_colors = [] for i in range(0, num_classes): hue = 255*i/num_classes col = np.zeros((1,1,3)).astype("uint8") col[0][0][0] = hue col[0][0][1] = 128 col[0][0][2] = 255 cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR) col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2])) class_colors.append(col) config = rs.config() config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30) pipeline = rs.pipeline() profile = pipeline.start(config) while True: # Wait for a coherent pair of frames: depth and color frames = pipeline.wait_for_frames() depth_frame = frames.get_depth_frame() color_frame = frames.get_color_frame() if not depth_frame or not color_frame: continue # Convert images to numpy arrays depth_image = np.asanyarray(depth_frame.get_data()) color_image = np.asanyarray(color_frame.get_data()) result = tfnet.return_predict(depth_image) for item in result: tlx = item['topleft']['x'] tly = item['topleft']['y'] brx = item['bottomright']['x'] bry = item['bottomright']['y'] label = item['label'] dist_to_center = depth_frame.get_distance(int(tlx+ brx/2), int(tly + bry/2)) dep = item['dist_to_center'] if dep > 0.4: for i in class_names: if label == i: class_num = class_names.index(i) break #枠の作成 cv2.rectangle(frame, (tlx, tly), (brx, bry), class_colors[class_num], 2) #ラベルの作成 text = label + " " + ('%.2f' % dep) cv2.rectangle(frame, (tlx, tly - 15), (tlx + 100, tly + 5), class_colors[class_num], -1) cv2.putText(frame, text, (tlx, tly), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1) # 表示 cv2.imshow("Show FLAME Image", frame) # escを押したら終了。 k = cv2.waitKey(10); if k == ord('q'): break; pipeline.stop() cv2.destroyAllWindows() if __name__ == '__main__': main() 下記のエラーが出てしまいました。＜エラー＞ File "yolodistancetry2.py", line 45, in <module> result = tfnet.return_predict(depth_image) File "C:\Users\Public\darkflow\darkflow\net\flow.py", line 79, in return_predict h, w, _ = im.shape ValueError: not enough values to unpack (expected 3, got 2) こちらの対処方法を知っていたら、教えていただけないでしょうか？よろしくお願いします。

行動規範の内容に同意します

あなたの回答