前提
話題になっているwhisperで音声認識を試そうとしたところ以下のようなエラーが出ました。
ある動画から2分間の音声ファイルを抽出し、out_exe.mp3という名前で出力しました。
ファイルパスはgooglecolab上で/content/out_exe.mp3になっています。
実現したいこと
音声認識をできるようにしたい。
発生している問題・エラーメッセージ
Traceback (most recent call last) <ipython-input-64-cad93362898e> in <module> ----> 1 model.transcribe("out_exe.mp3") 18 frames /usr/local/lib/python3.7/dist-packages/whisper/transcribe.py in transcribe(model, audio, verbose, temperature, compression_ratio_threshold, logprob_threshold, no_speech_threshold, condition_on_previous_text, **decode_options) 88 print("Detecting language using up to the first 30 seconds. Use `--language` to specify the language") 89 segment = pad_or_trim(mel, N_FRAMES).to(model.device).to(dtype) ---> 90 _, probs = model.detect_language(segment) 91 decode_options["language"] = max(probs, key=probs.get) 92 if verbose is not None: /usr/local/lib/python3.7/dist-packages/torch/autograd/grad_mode.py in decorate_context(*args, **kwargs) 25 def decorate_context(*args, **kwargs): 26 with self.clone(): ---> 27 return func(*args, **kwargs) 28 return cast(F, decorate_context) 29 /usr/local/lib/python3.7/dist-packages/whisper/decoding.py in detect_language(model, mel, tokenizer) 59 for j, c in zip(tokenizer.all_language_tokens, tokenizer.all_language_codes) 60 } ---> 61 for i in range(n_audio) 62 ] 63 /usr/local/lib/python3.7/dist-packages/whisper/decoding.py in <listcomp>(.0) 59 for j, c in zip(tokenizer.all_language_tokens, tokenizer.all_language_codes) 60 } ---> 61 for i in range(n_audio) 62 ] 63 /usr/local/lib/python3.7/dist-packages/whisper/tokenizer.py in all_language_codes(self) 226 @lru_cache() 227 def all_language_codes(self) -> Tuple[str]: --> 228 return tuple(self.decode([l]).strip("<|>") for l in self.all_language_tokens) 229 230 @property /usr/local/lib/python3.7/dist-packages/whisper/tokenizer.py in <genexpr>(.0) 226 @lru_cache() 227 def all_language_codes(self) -> Tuple[str]: --> 228 return tuple(self.decode([l]).strip("<|>") for l in self.all_language_tokens) 229 230 @property /usr/local/lib/python3.7/dist-packages/whisper/tokenizer.py in decode(self, token_ids, **kwargs) 139 140 def decode(self, token_ids: Union[int, List[int], np.ndarray, torch.Tensor], **kwargs): --> 141 return self.tokenizer.decode(token_ids, **kwargs) 142 143 def decode_with_timestamps(self, tokens) -> str: /usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_base.py in decode(self, token_ids, skip_special_tokens, clean_up_tokenization_spaces, **kwargs) 3428 """ 3429 # Convert inputs to python lists -> 3430 token_ids = to_py_obj(token_ids) 3431 3432 return self._decode( /usr/local/lib/python3.7/dist-packages/transformers/utils/generic.py in to_py_obj(obj) 115 return {k: to_py_obj(v) for k, v in obj.items()} 116 elif isinstance(obj, (list, tuple)): --> 117 return [to_py_obj(o) for o in obj] 118 elif is_tf_available() and _is_tensorflow(obj): 119 return obj.numpy().tolist() /usr/local/lib/python3.7/dist-packages/transformers/utils/generic.py in <listcomp>(.0) 115 return {k: to_py_obj(v) for k, v in obj.items()} 116 elif isinstance(obj, (list, tuple)): --> 117 return [to_py_obj(o) for o in obj] 118 elif is_tf_available() and _is_tensorflow(obj): 119 return obj.numpy().tolist() /usr/local/lib/python3.7/dist-packages/transformers/utils/generic.py in to_py_obj(obj) 116 elif isinstance(obj, (list, tuple)): 117 return [to_py_obj(o) for o in obj] --> 118 elif is_tf_available() and _is_tensorflow(obj): 119 return obj.numpy().tolist() 120 elif is_torch_available() and _is_torch(obj): /usr/local/lib/python3.7/dist-packages/transformers/utils/generic.py in _is_tensorflow(x) 97 98 def _is_tensorflow(x): ---> 99 import tensorflow as tf 100 101 return isinstance(x, tf.Tensor) /usr/local/lib/python3.7/dist-packages/tensorflow/__init__.py in <module> 35 import typing as _typing 36 ---> 37 from tensorflow.python.tools import module_util as _module_util 38 from tensorflow.python.util.lazy_loader import LazyLoader as _LazyLoader 39 /usr/local/lib/python3.7/dist-packages/tensorflow/python/__init__.py in <module> 35 36 from tensorflow.python import pywrap_tensorflow as _pywrap_tensorflow ---> 37 from tensorflow.python.eager import context 38 39 # pylint: enable=wildcard-import /usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/context.py in <module> 27 import six 28 ---> 29 from tensorflow.core.framework import function_pb2 30 from tensorflow.core.protobuf import config_pb2 31 from tensorflow.core.protobuf import coordination_config_pb2 /usr/local/lib/python3.7/dist-packages/tensorflow/core/framework/function_pb2.py in <module> 14 15 ---> 16 from tensorflow.core.framework import attr_value_pb2 as tensorflow_dot_core_dot_framework_dot_attr__value__pb2 17 from tensorflow.core.framework import node_def_pb2 as tensorflow_dot_core_dot_framework_dot_node__def__pb2 18 from tensorflow.core.framework import op_def_pb2 as tensorflow_dot_core_dot_framework_dot_op__def__pb2 /usr/local/lib/python3.7/dist-packages/tensorflow/core/framework/attr_value_pb2.py in <module> 14 15 ---> 16 from tensorflow.core.framework import tensor_pb2 as tensorflow_dot_core_dot_framework_dot_tensor__pb2 17 from tensorflow.core.framework import tensor_shape_pb2 as tensorflow_dot_core_dot_framework_dot_tensor__shape__pb2 18 from tensorflow.core.framework import types_pb2 as tensorflow_dot_core_dot_framework_dot_types__pb2 /usr/local/lib/python3.7/dist-packages/tensorflow/core/framework/tensor_pb2.py in <module> 14 15 ---> 16 from tensorflow.core.framework import resource_handle_pb2 as tensorflow_dot_core_dot_framework_dot_resource__handle__pb2 17 from tensorflow.core.framework import tensor_shape_pb2 as tensorflow_dot_core_dot_framework_dot_tensor__shape__pb2 18 from tensorflow.core.framework import types_pb2 as tensorflow_dot_core_dot_framework_dot_types__pb2 /usr/local/lib/python3.7/dist-packages/tensorflow/core/framework/resource_handle_pb2.py in <module> 148 , 149 'DESCRIPTOR' : _RESOURCEHANDLEPROTO, --> 150 '__module__' : 'tensorflow.core.framework.resource_handle_pb2' 151 # @@protoc_insertion_point(class_scope:tensorflow.ResourceHandleProto) 152 }) SystemError:google/protobuf/pyext/descriptor.cc:358: bad argument to internal function
該当のソースコード
python
1import whisper 2 3model = whisper.load_model("base") 4print(model.transcribe("out_exe.mp3"))
回答1件
あなたの回答
tips
プレビュー