表情認識AIで、動画内の表情を0.1秒刻みで解析したい

前提・実現したいこと

下記サイトのプログラムにおける、
real_time_video.pyにのみ修正を加え、
動画における表情推移を計測するコードを作成しました。
https://github.com/omar178/Emotion-recognition

再生されている動画に対し、
0.1秒刻みで解析処理を行う(print(preds, file=f)を出力する)には、
コードをどのように改変すれば良いでしょうか？

発生している問題・エラーメッセージ

time(0.1)と追記したところ、
動画の再生自体も0.1秒刻みになってしまい、
print(preds, file=f)での出力値は、
time(0.1)追記前と変わりませんでした。

どうしたら、
自然な速度で再生されている動画に対して、
0.1秒刻みでprint(preds, file=f)
を出力することができるようになるでしょうか？

下記見づらいコードですが、
何卒、よろしくお願いいたします。

該当のソースコード

python
1from keras.preprocessing.image import img_to_array
2import imutils
3import cv2
4from keras.models import load_model
5import numpy as np
6import time
7###
8import os
9
10filename = "MMM"
11filenameMP4 = filename + ".mp4"
12filenameTXT = filename + ".txt"
13print(filenameTXT)
14###
15
16# parameters for loading data and images
17detection_model_path = 'haarcascade_files/haarcascade_frontalface_default.xml'
18emotion_model_path = 'models/_mini_XCEPTION.102-0.66.hdf5'
19
20# hyper-parameters for bounding boxes shape
21# loading models
22face_detection = cv2.CascadeClassifier(detection_model_path)
23emotion_classifier = load_model(emotion_model_path, compile=False)
24EMOTIONS = ["angry" ,"disgust","scared", "happy", "sad", "surprised",
25 "neutral"]
26
27
28#feelings_faces = []
29#for index, emotion in enumerate(EMOTIONS):
30   # feelings_faces.append(cv2.imread('emojis/' + emotion + '.png', -1))
31
32# starting video streaming
33cv2.namedWindow('your_face')
34camera = cv2.VideoCapture(r'C:\Users\yukak\OneDrive\experiment_videos\Mari_Elka_Pangestu_M.mp4')
35
36while True:
37    frame = camera.read()[1]
38    #reading the frame
39    frame = imutils.resize(frame,width=300)
40    ####
41    prev_time = time.time()
42    while True:
43        curr_time = time.time()
44        if curr_time - prev_time >= 0.1:
45        
46            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
47            faces = face_detection.detectMultiScale(gray,scaleFactor=1.1,minNeighbors=5,minSize=(30,30),flags=cv2.CASCADE_SCALE_IMAGE)
48            
49            canvas = np.zeros((250, 300, 3), dtype="uint8")
50            frameClone = frame.copy()
51            if len(faces) > 0:
52                faces = sorted(faces, reverse=True,
53                key=lambda x: (x[2] - x[0]) * (x[3] - x[1]))[0]
54                (fX, fY, fW, fH) = faces
55                            # Extract the ROI of the face from the grayscale image, resize it to a fixed 28x28 pixels, and then prepare
56                    # the ROI for classification via the CNN
57                roi = gray[fY:fY + fH, fX:fX + fW]
58                roi = cv2.resize(roi, (64, 64))
59                roi = roi.astype("float") / 255.0
60                roi = img_to_array(roi)
61                roi = np.expand_dims(roi, axis=0)
62                
63                
64                preds = emotion_classifier.predict(roi)[0]
65                with open(filenameTXT, 'a') as f:
66                #print(preds)
67                    print(preds, file=f)
68                emotion_probability = np.max(preds)
69                label = EMOTIONS[preds.argmax()]
70                ###
71                time.sleep(1)
72                ###
73            else: continue
74
75         
76            for (i, (emotion, prob)) in enumerate(zip(EMOTIONS, preds)):
77                        # construct the label text
78                        text = "{}: {:.2f}%".format(emotion, prob * 100)
79
80                        # draw the label + probability bar on the canvas
81                       # emoji_face = feelings_faces[np.argmax(preds)]
82
83                        
84                        w = int(prob * 300)
85                        cv2.rectangle(canvas, (7, (i * 35) + 5),
86                        (w, (i * 35) + 35), (0, 0, 255), -1)
87                        cv2.putText(canvas, text, (10, (i * 35) + 23),
88                        cv2.FONT_HERSHEY_SIMPLEX, 0.45,
89                        (255, 255, 255), 2)
90                        cv2.putText(frameClone, label, (fX, fY - 10),
91                        cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)
92                        cv2.rectangle(frameClone, (fX, fY), (fX + fW, fY + fH),
93                                      (0, 0, 255), 2)
94        #    for c in range(0, 3):
95        #        frame[200:320, 10:130, c] = emoji_face[:, :, c] * \
96        #        (emoji_face[:, :, 3] / 255.0) + frame[200:320,
97        #        10:130, c] * (1.0 - emoji_face[:, :, 3] / 255.0)
98
99        prev_time = curr_time
100    ####
101    cv2.imshow('your_face', frameClone)
102    cv2.imshow("Probabilities", canvas)
103    if cv2.waitKey(1) & 0xFF == ord('q'):
104        break
105
106camera.release()
107cv2.destroyAllWindows()
108

行動規範の内容に同意します

回答1件

ベストアンサー

ループの中で、gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) の行から cv2.imshow('your_face', frameClone) より1つ前の行が検出処理に該当する部分ですので、この範囲の処理を「前回実行したときから 0.1 秒以上経過していたら実行する」という if 節の中に入れればいいと思います。

python
1import time
2
3prev_time = time.time()
4while True:
5    curr_time = time.time()
6    if curr_time - prev_time >= 0.1:
7        # 0.1 秒ごとに実行したい処理
8        print(curr_time - prev_time)
9
10        prev_time = curr_time

追記

「前回表情検出を行った時刻から 0.1 秒以上経過しているかどうか」という判定をする処理だったので、以下のように修正していただく意図でした。
動かしての確認はしていません。

from keras.preprocessing.image import img_to_array
import imutils
import cv2
from keras.models import load_model
import numpy as np
import time
###
import os

filename = "MMM"
filenameMP4 = filename + ".mp4"
filenameTXT = filename + ".txt"
print(filenameTXT)
###

# parameters for loading data and images
detection_model_path = 'haarcascade_files/haarcascade_frontalface_default.xml'
emotion_model_path = 'models/_mini_XCEPTION.102-0.66.hdf5'

# hyper-parameters for bounding boxes shape
# loading models
face_detection = cv2.CascadeClassifier(detection_model_path)
emotion_classifier = load_model(emotion_model_path, compile=False)
EMOTIONS = ["angry" ,"disgust","scared", "happy", "sad", "surprised",
 "neutral"]


#feelings_faces = []
#for index, emotion in enumerate(EMOTIONS):
   # feelings_faces.append(cv2.imread('emojis/' + emotion + '.png', -1))

# starting video streaming
cv2.namedWindow('your_face')
camera = cv2.VideoCapture(r'C:\Users\yukak\OneDrive\experiment_videos\Mari_Elka_Pangestu_M.mp4')

prev_time = time.time()
while True:
    frame = camera.read()[1]
    #reading the frame
    frame = imutils.resize(frame,width=300)
    ################################################################
    curr_time = time.time()
    if curr_time - prev_time >= 0.1:
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_detection.detectMultiScale(gray,scaleFactor=1.1,minNeighbors=5,minSize=(30,30),flags=cv2.CASCADE_SCALE_IMAGE)

        canvas = np.zeros((250, 300, 3), dtype="uint8")
        frameClone = frame.copy()
        if len(faces) > 0:
            faces = sorted(faces, reverse=True,
            key=lambda x: (x[2] - x[0]) * (x[3] - x[1]))[0]
            (fX, fY, fW, fH) = faces
                        # Extract the ROI of the face from the grayscale image, resize it to a fixed 28x28 pixels, and then prepare
                # the ROI for classification via the CNN
            roi = gray[fY:fY + fH, fX:fX + fW]
            roi = cv2.resize(roi, (64, 64))
            roi = roi.astype("float") / 255.0
            roi = img_to_array(roi)
            roi = np.expand_dims(roi, axis=0)


            preds = emotion_classifier.predict(roi)[0]
            with open(filenameTXT, 'a') as f:
            #print(preds)
                print(preds, file=f)
            emotion_probability = np.max(preds)
            label = EMOTIONS[preds.argmax()]
        else: continue


        for (i, (emotion, prob)) in enumerate(zip(EMOTIONS, preds)):
                    # construct the label text
                    text = "{}: {:.2f}%".format(emotion, prob * 100)

                    # draw the label + probability bar on the canvas
                    # emoji_face = feelings_faces[np.argmax(preds)]


                    w = int(prob * 300)
                    cv2.rectangle(canvas, (7, (i * 35) + 5),
                    (w, (i * 35) + 35), (0, 0, 255), -1)
                    cv2.putText(canvas, text, (10, (i * 35) + 23),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.45,
                    (255, 255, 255), 2)
                    cv2.putText(frameClone, label, (fX, fY - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)
                    cv2.rectangle(frameClone, (fX, fY), (fX + fW, fY + fH),
                                    (0, 0, 255), 2)
        prev_time = curr_time
    ##########################################
    cv2.imshow('your_face', frameClone)
    cv2.imshow("Probabilities", canvas)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

camera.release()
cv2.destroyAllWindows()

投稿2020/06/30 18:46

編集2020/07/01 01:49

tiitoi

総合スコア21960

YYJP

2020/07/01 01:38

ご回答頂きありがとうございます！書いて頂いたコードを追記したところ、プログラムは動くものの、動画の読み込みができなくなってしまいました。(動画表示画面は立ち上がるものの、動画は再生されず、ずっとグレー画面のままです) 質問欄のコードを、ご回答内容追記後のコードに変更しました。修正必要箇所は、どこになるのでしょうか？何卒、宜しくお願い致します。