###前提・実現したいこと
Google Speech API を使って、音声のテキスト出力を行っているのですが、
出力に、各単語が音声ファイル内のどの時間に発話されたものであるかを知りたいのですが、どのように行えばいいかわかりません、お詳しい方宜しくお願いします。
上記のコードにマイナーチェンジを行ったもので出力を行っています。ここにどのようにコードを足せば、時間つきのファイルになるのかを知りたいです
###該当のソースコード
#!/usr/bin/env python
- Copyright 2017 Google Inc. All Rights Reserved.-
-
- Licensed under the Apache License, Version 2.0 (the "License");-
- you may not use this file except in compliance with the License.-
- You may obtain a copy of the License at-
-
http://www.apache.org/licenses/LICENSE-2.0-
--
- Unless required by applicable law or agreed to in writing, software-
- distributed under the License is distributed on an "AS IS" BASIS,-
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.-
- See the License for the specific language governing permissions and-
- limitations under the License.-
---Google Cloud Speech API sample application using the REST API for async
batch processing.
Example usage:
python transcribe_async.py resources/audio.raw
python transcribe_async.py gs://cloud-samples-tests/speech/brooklyn.flac
import argparse
import io
import time
def transcribe_file(speech_file):
---Transcribe the given audio file asynchronously.---
from google.cloud import speech
speech_client = speech.Client()
with io.open(speech_file, 'rb') as audio_file: content = audio_file.read() audio_sample = speech_client.sample( content, source_uri=None, encoding='FLAC', sample_rate=16000) operation = speech_client.speech_api.async_recognize(audio_sample, language_code='en-GB', speech_context=['<登録した単語>'], retry_count = 100 while retry_count > 0 and not operation.complete: retry_count -= 1 time.sleep(2) operation.poll() if not operation.complete: print('Operation not complete and retry limit reached.') return alternatives = operation.results for alternative in alternatives: print('Transcript: {}'.format(alternative.transcript)) print('Confidence: {}'.format(alternative.confidence)) - [END send_request] -
def transcribe_gcs(gcs_uri):
---Asynchronously transcribes the audio file specified by the gcs_uri.---
from google.cloud import speech
speech_client = speech.Client()
audio_sample = speech_client.sample( content=None, source_uri=gcs_uri, encoding='FLAC', sample_rate=16000) operation = speech_client.speech_api.async_recognize(audio_sample) retry_count = 100 while retry_count > 0 and not operation.complete: retry_count -= 1 time.sleep(2) operation.poll() if not operation.complete: print('Operation not complete and retry limit reached.') return alternatives = operation.results for alternative in alternatives: print('Transcript: {}'.format(alternative.transcript)) print('Confidence: {}'.format(alternative.confidence)) -[END send_request_gcs]-
if name == 'main':
parser = argparse.ArgumentParser(
description=doc,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument(
'path', help='File or GCS path for audio file to be recognized')
args = parser.parse_args()
if args.path.startswith('gs://'):
transcribe_gcs(args.path)
else:
transcribe_file(args.path)
あなたの回答
tips
プレビュー