Yolov7でOnnxRuntime推論

実現したいこと

Yolov7でトレーニングした.ptファイルをexport.pyでONNXRUNTIMEで使えるように変換して、推論したい

前提

Yolov7でトレーニングした.ptファイルを

terminal
1python export.py --weights runs/train/yolov7-tiny-fruits/weights/best.pt --end2end --topk-all 100 --iou-thres 0.65 --conf-thres 0.6 --img-size 480 640 --max-wh 480 --grid

でONNXファイルに変換して

python
1import cv2
2import numpy as np
3import onnxruntime
4
5def preprocess_image(img):
6    img = img.astype(np.float32) / 255
7    img = np.transpose(img, (2, 0, 1))
8    img = np.expand_dims(img, 0)
9    print(img)
10    return img
11
12
13
14# ONNXモデルをロード
15onnx_file = "C:/Users/user/Desktop/yolov7/yolov7/runs/train/yolov7-tiny-fruits/weights/best1_iou65_conf30.onnx"
16session = onnxruntime.InferenceSession(onnx_file)
17
18# Webカメラの初期化
19cap = cv2.VideoCapture(0)  # 0はデフォルトのカメラを示す。別のカメラを使用する場合は、この数字を変更。
20
21while True:
22    # カメラからフレームをキャプチャ
23    ret, frame = cap.read()
24    if not ret:
25        break
26
27    preprocessed_img = preprocess_image(frame)
28
29    # 推論
30    outputs = session.run(["output"], {"images": preprocessed_img})
31    print(outputs)
32    # 結果を描画
33    if range(len(outputs[0])) != 0:
34        for i in range(len(outputs[0])):
35            xmin,ymin,xmax,ymax = outputs[0][i][1],outputs[0][i][2],outputs[0][i][3], outputs[0][i][4]
36            xmin,ymin,xmax,ymax = xmin.astype(np.int8),ymin.astype(np.int8),xmax.astype(np.int8),ymax.astype(np.int8)
37            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
38            label = "aaaaaaaaa"
39            cv2.putText(frame, label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
40
41    cv2.imshow('Live Object Detection', frame)
42
43    # 'q' キーを押すとループを終了
44    if cv2.waitKey(1) & 0xFF == ord('q'):
45        break
46
47cap.release()
48cv2.destroyAllWindows()

このコードを実行したところ

9行目の結果　print(img)
[[[[0.69803923 0.7137255  0.7058824  ... 0.41568628 0.41568628
    0.41960785]
   [0.6862745  0.69803923 0.69803923 ... 0.41960785 0.41960785
    0.41960785]
   [0.6862745  0.7019608  0.7058824  ... 0.4117647  0.41960785
    0.41960785]
   ...
   [0.25882354 0.26666668 0.27058825 ... 0.36078432 0.3764706
    0.3882353 ]
   [0.25882354 0.27058825 0.27450982 ... 0.34901962 0.37254903
    0.3882353 ]
   [0.25882354 0.2627451  0.26666668 ... 0.32156864 0.34117648
    0.3764706 ]]

  [[0.6392157  0.64705884 0.6392157  ... 0.3882353  0.38431373
    0.39215687]
   [0.6509804  0.6509804  0.6509804  ... 0.39215687 0.39215687
    0.39215687]
   [0.6509804  0.654902   0.654902   ... 0.39215687 0.39215687
    0.39215687]
   ...
   [0.25882354 0.2627451  0.2627451  ... 0.41960785 0.43529412
    0.44705883]
   [0.25882354 0.25882354 0.25490198 ... 0.40784314 0.43137255
    0.44705883]
   [0.25882354 0.25882354 0.25882354 ... 0.3882353  0.40784314
    0.4392157 ]]

  [[0.69803923 0.7137255  0.7058824  ... 0.5058824  0.5137255
    0.52156866]
   [0.69803923 0.7058824  0.70980394 ... 0.5137255  0.50980395
    0.50980395]
   [0.69803923 0.70980394 0.7137255  ... 0.5019608  0.50980395
    0.5137255 ]
   ...
   [0.3764706  0.38039216 0.38039216 ... 0.5568628  0.57254905
    0.58431375]
   [0.3764706  0.3764706  0.3764706  ... 0.54901963 0.5686275
    0.58431375]
   [0.3764706  0.3764706  0.3764706  ... 0.53333336 0.5529412
    0.58431375]]]]

プラス
dtype('float32')
shape: (1, 3, 480, 640)


31行目の結果　print(outputs)
[array([[0.0000000e+00, 2.7053705e+02, 2.7021649e+02, 4.8089246e+02, 4.5904602e+02, 0.0000000e+00, 3.8479486e-01]], dtype=float32)]
.
.
.
.