PythonでVTubeStudioを制御したい

実現したいこと

PythonでVTubeStudioの3Dモデルを、wavファイルの音声にあわせて動かしたい。

発生している問題・分からないこと

3Dモデルの口の開閉が上手くいかない。

ソースコードの、以下の処理が上手くいってないように見える。
・async def send_mouth_open(websocket, name, value):
・async def Openmouth(audio_data, chunk_size, threshold, stream, uri):

該当のソースコード

python
1import asyncio  # 非同期処理をサポート
2import json  # JSON形式のデータ操作
3import os  # OSの機能、特にファイルパス操作
4import re  # 正規表現
5import random  # ランダムな選択
6import websockets  # WebSocket通信
7from pygame import mixer  # オーディオ再生
8import sys
9import pprint  # データの整形出力
10import time  # 時間操作
11import librosa
12import numpy as np
13import pyaudio
14import wave
15
16sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
17sys.path.append(os.path.join(os.path.dirname(__file__), '../api'))
18
19from api.authentication import request_token, authenticate
20
21async def get_hotkeys(websocket): #ホットキーを入手
22    request = {
23        "apiName": "VTubeStudioPublicAPI",
24        "apiVersion": "1.0",
25        "requestID": "UniqueRequestIDForHotkeys",
26        "messageType": "HotkeysInCurrentModelRequest",
27        "data": {}
28    }
29    await websocket.send(json.dumps(request))
30    response = await websocket.recv()
31    response_json = json.loads(response)
32    if "data" in response_json and "availableHotkeys" in response_json["data"]:
33        return response_json["data"]["availableHotkeys"]
34    return []
35
36async def trigger_random_hotkey(websocket, hotkeys): #ランダムホットキー作成
37    if hotkeys:
38        hotkey = random.choice(hotkeys)
39        hotkey_id = hotkey.get("hotkeyID")
40        if hotkey_id:
41            request = {
42                "apiName": "VTubeStudioPublicAPI",
43                "apiVersion": "1.0",
44                "requestID": "UniqueRequestIDForTriggering",
45                "messageType": "HotkeyTriggerRequest",
46                "data": {
47                    "hotkeyID": hotkey_id
48                }
49            }
50            await websocket.send(json.dumps(request))
51            response = await websocket.recv()
52            print(f"Triggered Hotkey Response: {response}")
53
54async def play_audio_and_trigger_hotkeys(websocket, folder_path='audio/Word2Motion'): 
55    await trigger_random_hotkey(websocket, await get_hotkeys(websocket))
56
57async def send_mouth_open(websocket, name, value):
58    message = {
59        "type": "SetParameterValue",
60        "data": {
61            "name": name,
62            "value": value
63        }
64    }
65    print(f"Sending message: {message}")  # メッセージの内容を出力
66    await websocket.send(json.dumps(message))
67
68async def Openmouth(audio_data, chunk_size, threshold, stream, uri):  #音声と口を連動させる処理
69    async with websockets.connect(uri) as websocket:
70        # 音声データをチャンクに分割して処理
71        for i in range(0, len(audio_data), chunk_size):
72            chunk = audio_data[i:i + chunk_size]
73
74            # 音量を計算
75            audio_array = np.frombuffer(chunk, dtype=np.int16)
76            rms = np.sqrt(np.mean(audio_array**2))
77
78            # 口パクの判定
79            if rms > threshold:
80                await send_mouth_open(websocket, "ParamMouthOpen", 1.0)
81            else:
82                await send_mouth_open(websocket, "ParamMouthOpen", 0.0)  # 口を閉じる
83            # 音声を再生
84            stream.write(chunk)
85
86async def main():
87    uri = "ws://localhost:8001"
88    async with websockets.connect(uri) as websocket:
89        plugin_name = "My Cool Plugin"
90        plugin_developer = "My Name"
91        authentication_token = await request_token(websocket, plugin_name, plugin_developer)
92        if authentication_token:
93            print(f"Token: {authentication_token}")
94            is_authenticated = await authenticate(websocket, plugin_name, plugin_developer, authentication_token)
95            if is_authenticated:
96                # 音声ファイルのパス
97
98                wav_file = r"aaa\bbb\〇〇〇〇.wav"  # raw文字列を使用
99
100                # パラメータ
101                chunk_size = 1024  # 音声データを読み込むチャンクサイズ
102                threshold = 0.01  # 口を開けるかどうかの閾値
103
104                # wavファイルを読み込む
105                wf = wave.open(wav_file, 'rb')
106                sample_rate = wf.getframerate()
107                audio_data = wf.readframes(-1)
108
109                # pyaudioを初期化
110                p = pyaudio.PyAudio()
111
112                # ストリームを開く
113                stream = p.open(format=pyaudio.paInt16,
114                                channels=1,
115                                rate=sample_rate,
116                                output=True)
117
118                await Openmouth(audio_data, chunk_size, threshold, stream, uri)
119
120                # ストリームを閉じる
121                stream.stop_stream()
122                stream.close()
123
124                # pyaudioを終了
125                p.terminate()
126                await play_audio_and_trigger_hotkeys(websocket)
127
128asyncio.run(main())

試したこと・調べたこと

teratailやGoogle等で検索した
ソースコードを自分なりに変更した
知人に聞いた
その他

上記の詳細・結果

恐らく、変更したいパラメータを上手く指定できていないと考えている

①処理結果
正常終了するが、3Dモデルの口は全く動かない。

②処理ログ
口の開閉に使用する'ParamMouthOpen'のステータスは変更できているように思う。
以下抜粋
＞Sending message: {'type': 'SetParameterValue', 'data': {'name': 'ParamMouthOpen', 'value': 1.0}}
＞Sending message: {'type': 'SetParameterValue', 'data': {'name': 'ParamMouthOpen', 'value': 0.0}}
＞Sending message: {'type': 'SetParameterValue', 'data': {'name': 'ParamMouthOpen', 'value': 0.0}}
＞Sending message: {'type': 'SetParameterValue', 'data': {'name': 'ParamMouthOpen', 'value': 1.0}}

③別処理の動作
以下の処理は正常で、ホットキーで3Dモデルの表情変化はできていた。
・await trigger_random_hotkey(websocket, await get_hotkeys(websocket))

④VTubeStudio上のパラメータ
VTubeStudio上でパラメータを確認※モデル設定タブ
口の開閉に関わりそうなパーツ名「Mouth Open」、IN「VoceVolumePlusMouthOpen」、OUT「ParanMouthOpen」
をそれぞれ"name"に指定・"value"の変更も試したが、口は動かなかった