Python VOICEVOX読み上げ中のGUIフリーズ

実現したいこと

文章中に含まれる感情に反応して、対応した表情画像を選ぶ

前提

PythonでChat GPTに疑似感情プロンプトを入力して
返答された感情を元に、対応した表情画像を切り替わりVOICEVOX
で発言するプログラムを作っています。

発生している問題・エラーメッセージ

実際に実行すると、VOICEVOXで読み上げが発生している途中
uxpythonのWindowがフリーズし、表情が変化しません。
VOICEVOXの読み上げが終わりきった後に表情が変化します。

想定では
表情変化 → 読み上げ
現在は
読み上げ終了 → 表情変化
となってしまっています。
どうすればよいでしょうか

該当のソースコード

Python
1import wx
2import io
3import re
4import json
5import openai
6import requests
7import speech_recognition as sr
8import time
9from PIL import Image, ImageTk
10from pydub import AudioSegment, playback
11from PIL import Image
12
13#APIKeyを設定
14openai.api_key = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
15
16class MyFrame(wx.Frame):
17    def __init__(self, parent, title):
18        super(MyFrame, self).__init__(parent, title=title, size=(600, 800))
19
20        self.panel = wx.Panel(self)
21
22        # 画像の読み込みとリサイズ
23        img = wx.Image('normal.png', wx.BITMAP_TYPE_ANY)
24        img = img.Scale(500, 650, wx.IMAGE_QUALITY_HIGH)
25        self.bmp = wx.Bitmap(img)
26
27        # 画像を表示するStaticBitmapの作成
28        self.imgCtrl = wx.StaticBitmap(self.panel, wx.ID_ANY, wx.Bitmap(img))
29
30        # ボタンの作成
31        self.btn = wx.Button(self.panel, label='REC', size=(150,100))
32        self.btn.Bind(wx.EVT_BUTTON, self.onChangeImage)
33        self.btn.SetForegroundColour('#FF0000')
34        self.btn.SetBackgroundColour('#ffffff')
35
36        # レイアウトの設定
37        vbox = wx.BoxSizer(wx.VERTICAL)
38        vbox.Add(self.imgCtrl, 0, wx.ALIGN_CENTER|wx.TOP|wx.BOTTOM, 10)
39        vbox.Add(self.btn, 0, wx.ALIGN_CENTER|wx.TOP|wx.BOTTOM, 10)
40        self.panel.SetSizer(vbox)
41
42    def onChangeImage(self, event):
43        sound_rec(self)
44
45
46def sound_rec(self):
47
48    r = sr.Recognizer()
49    r.pause_threshold = 0.5
50
51    #マイクから音声を受け取る
52    with sr.Microphone() as source:
53
54        self.btn.Disable()
55
56        print("話してください")
57        audio = r.listen(source)
58
59    try:
60        global UserInput
61        UserInput = r.recognize_google(audio, language="ja-JP")
62        print(UserInput)
63        GPTStart(self)
64
65    except sr.UnknownValueError:
66        print("音声を理解できませんでした")
67        self.btn.Enable()
68
69    except sr.RequestError as e:
70        print("音声認識に失敗しました; {0}".format(e))
71        self.btn.Enable()
72
73def GPTStart(self):
74
75    response = openai.ChatCompletion.create(
76        model="gpt-3.5-turbo",
77    messages=[
78         {"role": "system", "content": "前提プロンプトわ～"},
79         {"role": "user", "content": UserInput},
80     ]
81    )
82
83    global GPToutput
84    GPToutput = response["choices"][0]["message"]["content"]
85    print(GPToutput)
86    FEiS(self)
87
88def FEiS(self):
89
90    global TempOutput
91    global BEDCOutput
92
93    if "5CCC2C28C3" in GPToutput:
94        img = wx.Image('happy.png', wx.BITMAP_TYPE_ANY)
95        img = img.Scale(500, 650, wx.IMAGE_QUALITY_HIGH)
96        self.bmp = wx.Bitmap(img)
97        self.imgCtrl.SetBitmap(self.bmp)
98        TempOutput = re.sub(r"【.*?】", "", GPToutput)
99        BEDCOutput = re.sub(r"喜び", "", TempOutput)
100        TalkEngine(self)
101
102
103
104    if "2AAC7BF95B" in GPToutput:
105        img = wx.Image('angry.png', wx.BITMAP_TYPE_ANY)
106        img = img.Scale(500, 650, wx.IMAGE_QUALITY_HIGH)
107        self.bmp = wx.Bitmap(img)
108        self.imgCtrl.SetBitmap(self.bmp)
109        TempOutput = re.sub(r"【.*?】", "", GPToutput)
110        BEDCOutput = re.sub(r"怒り", "", TempOutput)
111        TalkEngine(self)
112
113
114    if "35E52EF5D5" in GPToutput:
115        img = wx.Image('cry.png', wx.BITMAP_TYPE_ANY)
116        img = img.Scale(500, 650, wx.IMAGE_QUALITY_HIGH)
117        self.bmp = wx.Bitmap(img)
118        self.imgCtrl.SetBitmap(self.bmp)
119        TempOutput = re.sub(r"【.*?】", "", GPToutput)
120        BEDCOutput = re.sub(r"悲しみ", "", TempOutput)
121        TalkEngine(self)
122
123
124    if "274A916769" in GPToutput:
125        img = wx.Image('confidence.png', wx.BITMAP_TYPE_ANY)
126        img = img.Scale(500, 650, wx.IMAGE_QUALITY_HIGH)
127        self.bmp = wx.Bitmap(img)
128        self.imgCtrl.SetBitmap(self.bmp)
129        TempOutput = re.sub(r"【.*?】", "", GPToutput)
130        BEDCOutput = re.sub(r"自信", "", TempOutput)
131        TalkEngine(self)
132
133    if "D3779F3734" in GPToutput:
134        img = wx.Image('interest.png', wx.BITMAP_TYPE_ANY)
135        img = img.Scale(500, 650, wx.IMAGE_QUALITY_HIGH)
136        self.bmp = wx.Bitmap(img)
137        self.imgCtrl.SetBitmap(self.bmp)
138        TempOutput = re.sub(r"【.*?】", "", GPToutput)
139        BEDCOutput = re.sub(r"好奇心", "", TempOutput)
140        TalkEngine(self)
141
142    if "603286C614" in GPToutput:
143        img = wx.Image('confusion.png', wx.BITMAP_TYPE_ANY)
144        img = img.Scale(500, 650, wx.IMAGE_QUALITY_HIGH)
145        self.bmp = wx.Bitmap(img)
146        self.imgCtrl.SetBitmap(self.bmp)
147        TempOutput = re.sub(r"【.*?】", "", GPToutput)
148        BEDCOutput = re.sub(r"困惑", "", TempOutput)
149        TalkEngine(self)
150
151    if "0545348B1F" in GPToutput:
152        img = wx.Image('normal.png', wx.BITMAP_TYPE_ANY)
153        img = img.Scale(500, 650, wx.IMAGE_QUALITY_HIGH)
154        self.bmp = wx.Bitmap(img)
155        self.imgCtrl.SetBitmap(self.bmp)
156        TempOutput = re.sub(r"【.*?】", "", GPToutput)
157        BEDCOutput = re.sub(r"中立", "", TempOutput)
158        TalkEngine(self)
159
160
161def TalkEngine(self):
162    text = BEDCOutput
163    speaker_id = 0  # スピーカーID (０：つくよみちゃん)
164
165    # 音声合成のクエリの作成
166    response = requests.post(
167        "http://localhost:50031/audio_query",
168        params={
169            'text': text,
170            'speaker': speaker_id,
171            'core_version': '0.0.0'
172        })
173    query = response.json()
174
175    # 音声合成のwavの生成
176    response = requests.post(
177        'http://localhost:50031/synthesis',
178        params={
179            'speaker': speaker_id,
180            'core_version': "0.0.0",
181            'enable_interrogative_upspeak': 'true'
182        },
183        data=json.dumps(query))
184
185    # フェードインして再生する
186    sound = AudioSegment.from_file(io.BytesIO(response.content), format="wav")
187    sound = sound.fade_in(100) # 1/10秒でフェードインする
188    playback.play(sound)
189    print(BEDCOutput)
190    print("END")
191
192    self.btn.Enable()
193
194
195
196app = wx.App()
197frame = MyFrame(None, title='Image Viewer')
198frame.Show()
199app.MainLoop()