pythonで音声認識とレベルバーの同時処理について

解決したいこと

音声認識のpyファイルと、音量レベルバーのpyファイルを一つにして、認識とレベルバーを同時に処理したい。
音声認識はできているのですが、レベルバーが動かずに困っています。
解決方法を教えて下さい。

完成したら、下記のようにしたい。
「認識中」の上がレベルバー、「認識中」のところには、認識した文字が表示されるようにしたい。

試したこと

下にある「音声認識用pyファイル　voice.py」と「音量レベルバー表示pyファイル　bar.py」があり、voice.pyのvoice_inをbar.pyにimportし,bar.pyのapp()に追記。
この時点で、音声認識はするものの、ボタンのクリック等出来なくなります。

その他に、音声認識したものを、#ラベルを変更用のキー「key='-TEXT-'」を使って表示したいと考えています。

bar2.py
1from VoicePaste import voice_in
2-------省略------
3def app():
4    while True:     #ボタン動作　　
5        event, values = _VARS['window'].read(timeout=100)
6        if event == sg.WIN_CLOSED or event == '終了':
7            pAud.terminate()
8            break
9        if event == '開始':
10            listen()
11        if event == '停止':
12            stop()
13        voice_in()    #追加
14        update_text = voice_text
15        _VARS['window']['-TEXT-'].update(update_text)

音声認識用pyファイル

voice.py
1import time
2import speech_recognition as sr
3import pyperclip
4import pyautogui
5
6###### 音声入力
7def voice_in():
8    while True:
9        r = sr.Recognizer()
10        with sr.Microphone() as source:
11            print("何かお話しして下さい。")
12            r.adjust_for_ambient_noise(source , duration = 1 ) #雑音対策
13            audio = r.listen(source)
14
15        try:
16            # Google Web Speech APIで音声認識
17            text =""
18            text = r.recognize_google(audio, language="ja-JP")
19
20        except sr.UnknownValueError:
21            print("音声認識できませんでした。")
22        except sr.RequestError as e:
23            print("音声認識を要求できませんでした;"
24                " {0}".format(e))
25        else:
26            if text == "停止" : #停止で終了
27                break
28            else:
29                print(text)
30                pyperclip.copy(text)
31                pyautogui.press('enter')     #メモ帳などを選択した状態で
32                time.sleep(0.2)              #話すと、貼り付けられる
33                pyautogui.hotkey('ctrl', 'v')
34                voice_text = text
35
36if __name__ == "__main__":
37    app = voice_in()
38
39print("停止を確認。終わります。")

###音量レベルバー表示pyファイル

bar.py
1import PySimpleGUI as sg
2import pyaudio
3import numpy as np
4
5_VARS = {'window': False,'stream': False}
6
7AppFont = 'Any 16'
8sg.theme('Black')
9layout = [[sg.ProgressBar(10000, orientation='h',
10                          size=(30, 10), key='-PROG-')],
11            [sg.Text('開始を押してね', size=(40, 2) ,font=(AppFont,11,'bold'),key='-TEXT-'))],     #ラベルを変更用のキー「key='-TEXT-'」
12            [sg.Button('開始', font=(AppFont,10)),
13            sg.Button('停止', font=(AppFont,10)),
14            sg.Button('終了', font=(AppFont,10))]]
15_VARS['window'] = sg.Window('音声入力レベルバー', layout,no_titlebar=False,finalize=True,transparent_color=True,keep_on_top=True)
16
17CHUNK = 1024
18RATE = 44100
19INTERVAL = 1
20pAud = pyaudio.PyAudio()
21
22def stop():
23    if _VARS['stream']:
24        _VARS['stream'].stop_stream()
25        _VARS['stream'].close()
26        _VARS['window']['-PROG-'].update(0)
27
28def callback(in_data, frame_count, time_info, status):
29    data = np.frombuffer(in_data, dtype=np.int16)
30    _VARS['window']['-PROG-'].update(np.amax(data))
31    return (in_data, pyaudio.paContinue)
32
33def listen():
34    _VARS['stream'] = pAud.open(format=pyaudio.paInt16, channels=1, rate=RATE,input=True, frames_per_buffer=CHUNK, stream_callback=callback)
35    _VARS['stream'].start_stream()
36
37def app():
38    while True:     #ボタン動作　　
39        event, values = _VARS['window'].read(timeout=100)
40        if event == sg.WIN_CLOSED or event == '終了':
41            pAud.terminate()
42            break
43        if event == '開始':
44            listen()
45        if event == '停止':
46            stop()
47if __name__ == "__main__":
48    application = app()
49    application.mainloop()
50_VARS['window'].close()

###解決した内容
長いので、こちらに記入します。これで、アクティブにしたメモ帳等に、音声が反応しているか等を確認しながら入力できるようになりました。

import PySimpleGUI as sg
import pyaudio
import numpy as np
import time
import speech_recognition as sr
import pyperclip
import pyautogui
import threading

_VARS = {'window': False,'stream': False}

CHUNK = 1024
RATE = 44100
INTERVAL = 1
pAud = pyaudio.PyAudio()

###### 音声入力部分 ######
def long_operation_thread(window):
    while True:
        r = sr.Recognizer()
        with sr.Microphone() as source:
            print("何かお話しして下さい。")
            r.adjust_for_ambient_noise(source , duration = 1 ) #雑音対策
            audio = r.listen(source)
            update_text = "認識中"
            _VARS['window']['-TEXT-'].update(update_text)
        try:
            # Google Web Speech APIで音声認識
            text =""
            text = r.recognize_google(audio, language="ja-JP")
        except sr.UnknownValueError:
            print("音声認識できませんでした。")
            update_text = "音声認識できませんでした。"
            _VARS['window']['-TEXT-'].update(update_text)
        except sr.RequestError as e:
            print("音声認識を要求できませんでした。"
                " {0}".format(e))
            update_text = "音声認識を要求できませんでした。"
            _VARS['window']['-TEXT-'].update(update_text)
        else:
            if text == "停止" : #停止で終了
                stop()
            else:
                print(text)
                pyperclip.copy(text)
                pyautogui.press('enter')     #メモ帳などを選択した状態で
                time.sleep(0.2)              #話すと、貼り付けられる
                pyautogui.hotkey('ctrl', 'v')
                voice_text = text
                update_text = voice_text
                _VARS['window']['-TEXT-'].update(update_text)

def stop():
    if _VARS['stream']:
        _VARS['stream'].stop_stream()
        _VARS['stream'].close()
        _VARS['window']['-PROG-'].update(0)
        update_text = "停止中"
        _VARS['window']['-TEXT-'].update(update_text)

def callback(in_data, frame_count, time_info, status):
    data = np.frombuffer(in_data, dtype=np.int16)
    _VARS['window']['-PROG-'].update(np.amax(data))
    return (in_data, pyaudio.paContinue)

def listen():
    _VARS['stream'] = pAud.open(format=pyaudio.paInt16, channels=1, rate=RATE,input=True, frames_per_buffer=CHUNK, stream_callback=callback)
    _VARS['stream'].start_stream()

def the_gui():
    AppFont = 'Any 16'
    sg.theme('Black')
    layout = [[sg.ProgressBar(10000, orientation='h',
                            size=(30, 10), key='-PROG-')],
                [sg.Text('開始を押してね', size=(40, 2) ,font=(AppFont,11,'bold'),key='-TEXT-')],     #ラベルを変更用のキー「key='-TEXT-'」 #size=(40, 2)で横40字、2行表示ができます。
                [sg.Button('開始', font=(AppFont,10)),
                sg.Button('停止', font=(AppFont,10)),
                sg.Button('終了', font=(AppFont,10))]]
    _VARS['window'] = sg.Window('音声入力レベルバー', layout,no_titlebar=False,finalize=True,transparent_color=True,keep_on_top=True)

    ###### 音声入力 ######
    while True:     #ボタン動作　　
        event, values = _VARS['window'].read(timeout=100)
        if event == sg.WIN_CLOSED or event == '終了':
            pAud.terminate()
            break
        if event == '開始':
            listen()
            print('音声認識中')
            update_text = '音声認識中'
            _VARS['window']['-TEXT-'].update(update_text)
            threading.Thread(target=long_operation_thread, args=(_VARS['window'],), daemon=True).start()
        if event == '停止':
            stop()
            long_operation_thread.alive = False

    _VARS['window'].close()

if __name__ == '__main__':
    the_gui()
    print("停止を確認。終わります。")

いろいろ改善できそうなコードですが、とりあえずできました。

行動規範の内容に同意します

回答1件

ベストアンサー

voice_in 関数は、時間の掛かるループ処理なので
GUI と同じスレッドで実行することはできません。

GUIライブラリは大抵イベント駆動形式で稼働していて
ウィンドウの描画やマウス・キーボード等の入力処理を受け持ちます。

python
1def mainloop():
2    while True:
3        描画更新
4        イベント処理
5            while True: # voice_in 関数のループ処理
6                ... 終わる迄、他のイベントや描画が処理されない

PySimpleGUI の場合は、 window.read がこれに相当して、
この部分が定期的に呼び出されないと、GUIは応答なしになります。

アニメーション等のGUI関連の定期処理 -> timeout イベントを使う
その他の時間の掛かる処理 -> Thread 等

参考:
https://github.com/PySimpleGUI/PySimpleGUI/blob/master/DemoPrograms/Demo_Multithreaded_Long_Tasks.py

python
1
2    application = app()
3    application.mainloop()

関数 app は戻り値を持ちません。
内部でループ処理してるので、戻るところまで実行されてないはずです。
.mainloop は恐らく tkinter の関数です。

投稿2021/11/21 22:45

teamikl

総合スコア8817

hiro04kon

2021/11/22 02:55

ありがとうございます。早速読んで試してみます！

hiro04kon

2021/11/23 02:38

参考のリンクを読んで、Threadを活用してできました！ありがとうございます！！

行動規範の内容に同意します

あなたの回答

tips

プレビュー

行動規範の内容に同意します

質問の解決につながる回答をしましょう。サンプルコードなど、より具体的な説明があると質問者の理解の助けになります。また、読む側のことを考えた、分かりやすい文章を心がけましょう。

15分調べてもわからないことは
teratailで質問しよう！

ただいまの回答率
85.31%

質問をまとめることで
思考を整理して素早く解決

テンプレート機能で
簡単に質問をまとめる

質問する

質問をすることでしか得られない、回答やアドバイスがある。

15分調べてもわからないことは、質問しよう！

pythonで音声認識とレベルバーの同時処理について

解決したいこと

試したこと

音声認識用pyファイル

関連した質問