PythonのKerasの音声認識でモデルサイズを合わせれなくて予測できない。
予め学習させたモデルを読み込んで、PCのマイクで5秒録音した後に、確率が0.8が次に遷移するプログラムなのですが、
学習済みモデルのサイズに入力音声を合わせれなくてつまずいています。
機械学習とプログラムも初心者なのでよろしくお願いします。
Python
1import numpy as np 2from tensorflow import keras #Colabで学習させたら 3import tensorflow as tf 4import librosa 5 6model = keras.models.load_model('esc50-sp-tpu_epoch10.h5', compile=False) 7print(model.summary()) 8 9import pyaudio 10import time 11import librosa.display 12import matplotlib.pyplot as plt 13import IPython.display as ipd 14 15#基本情報の設定 16SAMPLING_RATE = 44100 17CHUNK = 1024 18RECORD_SECONDS = 5 19FFT_SIZE = 256 20THRESHOLD = 0.8 21HOP_LENGTH=128 22 23STATES = ['airplane','breathing','brushing_teeth','can_opening','car_horn','cat','chainsaw','chirping_birds','church_bells','clapping','clock_alarm','clock_tick','coughing','cow','crackling_fire','crickets','crow','crying_baby','dog','door_wood_creaks','door_wood_knock','drinking_sipping','engine','fireworks','footsteps','frog','glass_breaking','hand_saw','helicopter','hen','insects','keyboard_typing','laughing','mouse_click','pig','pouring_water','rain','rooster','sea_waves','sheep','siren','sneezing','snoring','thunderstorm','toilet_flush','train','vacuum_cleaner','washing_machine','water_drops','wind','Unknown'] 24last_state = STATES.index('Unknown') 25 26# display wave in plots 27def show_wave(x): 28 plt.plot(x) 29 plt.show() 30 31# display wave in spectrogram 32def show_sp(sp, fs, HOP_LENGTH): 33 librosa.display.specshow(sp, sr=fs, x_axis="time", y_axis="log", hop_length=hop_length) 34 plt.colorbar(format='%+2.0f dB') 35 plt.title('Spectrogram') 36 plt.show() 37 38def calculate_sp(x, n_fft=FFT_SIZE, hop_length=HOP_LENGTH): 39 stft = librosa.stft(x, n_fft=n_fft, hop_length=hop_length,window='hamming') 40 sp = librosa.amplitude_to_db(np.abs(stft)) 41 return sp 42 43count = 0 44predictions_in_60_sec = np.empty((0, len(STATES) - 1)) 45 46audio_interface = pyaudio.PyAudio() 47audio_stream = audio_interface.open(format=pyaudio.paInt16, 48 channels=1, 49 rate=SAMPLING_RATE, 50 input=True, 51 frames_per_buffer=CHUNK) 52audio_stream.start_stream() 53 54try: 55 while True: 56 all = [] 57 for i in range(0, int(SAMPLING_RATE / CHUNK * RECORD_SECONDS)): 58 rec = audio_stream.read(CHUNK) #音声を読み取って、 59 all.append(rec) #データを追加 60 data = b"".join(all) 61 data = np.frombuffer(data,dtype="int16") / float(2**15) 62 63 print(data) 64 show_wave(data) 65 66 # Pause the audio stream 67 audio_stream.stop_stream() 68 69 start = time.time() 70 71 state = last_state 72 73 D = calculate_sp(data) 74 show_sp(D, SAMPLING_RATE, HOP_LENGTH) 75 print("wave size:{0}\nspectrogram size:{1}\nsamping rate:{2}".format(data.shape, D.shape, SAMPLING_RATE)) 76 print(D) 77 78 #配列をモデルに変換 79 D = D.reshape((1,) + D.shape + (1,)) 80 print("reshape:",D) 81 82 magnitude = D 83 predictions = model.predict(magnitude,verbose=False) 84 predictions_mean = predictions.mean(axis=0) 85 86 elapsed_time = time.time() - start 87 88 print('{0:s} ({1:.3f}, processed in {2:.3f} seconds)'.format( 89 STATES[predictions_mean.argmax()], 90 predictions_mean.max(), 91 elapsed_time)) 92 93 if predictions_mean.max() > THRESHOLD: 94 state = predictions_mean.argmax() 95 96 if last_state != state: 97 print('CHANGED: {0} > {1}'.format( 98 STATES[last_state], STATES[state])) 99 last_state = state 100 101 # Resume the audio stream 102 audio_stream.start_stream() 103 104except KeyboardInterrupt: 105 print('Requested to terminate') 106 107finally: 108 audio_stream.stop_stream() 109 audio_stream.close() 110 audio_interface.terminate() 111 print('Terminated')
エラーログ
ValueError Traceback (most recent call last)
<ipython-input-12-106cde585066> in <module>
73
74 magnitude = D
---> 75 predictions = model.predict(magnitude,verbose=False)
76 predictions_mean = predictions.mean(axis=0)
77
~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in predict(self, x, batch_size, verbose, steps, max_queue_size, workers, use_multiprocessing)
1094 # batch size.
1095 x, _, _ = self._standardize_user_data(
-> 1096 x, check_steps=True, steps_name='steps', steps=steps)
1097
1098 if (self.run_eagerly or (isinstance(x, iterator_ops.EagerIterator) and
~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, check_steps, steps_name, steps, validation_split, shuffle)
2380 feed_input_shapes,
2381 check_batch_axis=False, # Don't enforce the batch size.
-> 2382 exception_prefix='input')
2383
2384 if y is not None:
~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
360 'Error when checking ' + exception_prefix + ': expected ' +
361 names[i] + ' to have shape ' + str(shape) +
--> 362 ' but got array with shape ' + str(data_shape))
363 return data
364
ValueError: Error when checking input: expected input_1 to have shape (128, 1723, 1) but got array with shape (129, 1721, 1)