音声データを機械学習するために数値化を試みましたが、model.fitでエラーが起こってしまいます。正しい数値化の行い方を知っている方がいらっしゃいましたら、ご回答お願いします。
エラー文
ValueError: Input 0 of layer sequential_2 is incompatible with the layer: expected axis -1 of input shape to have value 20 but received input with shape [20, 400, 400, 1]
Python
1# ライブラリの呼び出し 2import os 3import numpy as np 4import glob 5import wave 6import struct 7import pandas as pd 8import matplotlib.pyplot as plt 9import tensorflow 10%matplotlib inline 11 12train_data = [] 13 14# ファイルを読み出し 15train_path = glob.glob('./train_normal/[0-9][0-9][0-9].wav') 16train_num = len([name for name in os.listdir('./train_normal/') if os.path.isfile(os.path.join('./train_normal/', name))]) 17file_name = [name for name in os.listdir('./train_normal/') if os.path.isfile(os.path.join('./train_normal/', name))] 18for i in range(0, train_num): 19 wavf = train_path[i] 20 wr = wave.open(wavf, 'r') 21 filename = file_name[i] 22 23 ch = wr.getnchannels() 24 width = wr.getsampwidth() 25 fr = wr.getframerate() 26 fn = wr.getnframes() 27 amp = (2**8) ** width / 2 28 st = 0 29 hammingWindow = np.hamming(train_num) 30 time = 10 #sec 31 chunk_size =16000*time #1sec = 16000 32 33 data = wr.readframes(chunk_size) # バイナリ読み込み 34 data = np.frombuffer(data,'int16') # intに変換 35 data = data / amp # 振幅正規化(-1~1) 36 data = data.reshape(400, 400, 1) 37 data = np.asarray(data).astype(np.float32) 38 train_data.append(data) 39 40# DataFrame化 41df_train = pd.DataFrame(np.arange(900).reshape(300, 3), index=None, columns=['filename', 'train_x', 'train_y']) 42df_train['filename'] = [name for name in os.listdir('./train_normal/') if os.path.isfile(os.path.join('./train_normal/', name))] 43df_train['train_x'] = train_data 44df_train['train_y'] = 0 45 46train_data = np.array(train_data) 47 48valn_data = [] 49 50# ファイルを読み出し 51valn_path = glob.glob('./valid_normal/[0-9][0-9][0-9].wav') 52valn_num = len([name for name in os.listdir('./valid_normal/') if os.path.isfile(os.path.join('./valid_normal/', name))]) 53file_name = [name for name in os.listdir('./valid_normal/') if os.path.isfile(os.path.join('./valid_normal/', name))] 54for i in range(0, valn_num): 55 wavf = valn_path[i] 56 wr = wave.open(wavf, 'r') 57 filename = file_name[i] 58 59 ch = wr.getnchannels() 60 width = wr.getsampwidth() 61 fr = wr.getframerate() 62 fn = wr.getnframes() 63 amp = (2**8) ** width / 2 64 st = 0 65 hammingWindow = np.hamming(valn_num) 66 time = 10 #sec 67 chunk_size =16000*time #1sec = 16000 68 69 data = wr.readframes(chunk_size) # バイナリ読み込み 70 data = np.frombuffer(data,'int16') # intに変換 71 data = data / amp # 振幅正規化(-1~1) 72 data = data.reshape(400, 400, 1) 73 data = np.asarray(data).astype(np.float32) 74 valn_data.append(data) 75 76# DataFrame化 77df_valn = pd.DataFrame(np.arange(450).reshape(150, 3), index=None, columns=['filename', 'val_x', 'val_y']) 78df_valn['filename'] = [name for name in os.listdir('./valid_normal/') if os.path.isfile(os.path.join('./valid_normal/', name))] 79df_valn['val_x'] = valn_data 80df_valn['val_y'] = 0 81 82valn_data = np.array(valn_data) 83 84vala_data = [] 85 86# ファイルを読み出し 87vala_path = glob.glob('./valid_anomaly/[0-9][0-9][0-9].wav') 88vala_num = len([name for name in os.listdir('./valid_anomaly/') if os.path.isfile(os.path.join('./valid_anomaly/', name))]) 89file_name = [name for name in os.listdir('./valid_anomaly/') if os.path.isfile(os.path.join('./valid_anomaly/', name))] 90for i in range(0, vala_num): 91 wavf = vala_path[i] 92 wr = wave.open(wavf, 'r') 93 filename = file_name[i] 94 95 ch = wr.getnchannels() 96 width = wr.getsampwidth() 97 fr = wr.getframerate() 98 fn = wr.getnframes() 99 amp = (2**8) ** width / 2 100 st = 0 101 hammingWindow = np.hamming(vala_num) 102 time = 10 #sec 103 chunk_size =16000*time #1sec = 16000 104 105 data = wr.readframes(chunk_size) # バイナリ読み込み 106 data = np.frombuffer(data,'int16') # intに変換 107 data = data / amp # 振幅正規化(-1~1) 108 data = data.reshape(400, 400, 1) 109 data = np.asarray(data).astype(np.float32) 110 vala_data.append(data) 111 112# DataFrame化 113df_vala = pd.DataFrame(np.arange(150).reshape(50, 3), index=None, columns=['filename', 'val_x', 'val_y']) 114df_vala['filename'] = [name for name in os.listdir('./valid_anomaly/') if os.path.isfile(os.path.join('./valid_anomaly/', name))] 115df_vala['val_x'] = vala_data 116df_vala['val_y'] = 1 117 118vala_data = np.array(vala_data) 119 120df_val = pd.concat([df_valn, df_vala]) 121val_data = np.append(valn_data, vala_data, axis=0) 122 123test_data = [] 124 125# ファイルを読み出し 126test_path = glob.glob('./test/[0-9][0-9][0-9].wav') 127test_num = len([name for name in os.listdir('./test/') if os.path.isfile(os.path.join('./test/', name))]) 128file_name = [name for name in os.listdir('./test/') if os.path.isfile(os.path.join('./test/', name))] 129for i in range(0, test_num): 130 wavf = test_path[i] 131 wr = wave.open(wavf, 'r') 132 filename = file_name[i] 133 134 ch = wr.getnchannels() 135 width = wr.getsampwidth() 136 fr = wr.getframerate() 137 fn = wr.getnframes() 138 amp = (2**8) ** width / 2 139 st = 0 140 hammingWindow = np.hamming(test_num) 141 time = 10 #sec 142 chunk_size =16000*time #1sec = 16000 143 144 data = wr.readframes(chunk_size) # バイナリ読み込み 145 data = np.frombuffer(data,'int16') # intに変換 146 data = data / amp # 振幅正規化(-1~1) 147 data = data.reshape(400, 400, 1) 148 data = np.asarray(data).astype(np.float32) 149 test_data.append(data) 150 151# DataFrame化 152df_test = pd.DataFrame(np.arange(600).reshape(200, 3), index=None, columns=['filename', 'test_x', 'test_y']) 153df_test['filename'] = [name for name in os.listdir('./test/') if os.path.isfile(os.path.join('./test/', name))] 154df_test['test_x'] = test_data 155 156test_data = np.array(test_data) 157 158batch_size = 20 159epochs = 120 160 161from tensorflow.keras.layers import Activation, Dense, Dropout, Conv2D, Flatten, MaxPooling2D 162from tensorflow.keras.models import Sequential, load_model 163from tensorflow.keras.optimizers import Adam 164 165model = Sequential() 166 167# 畳み込み処理1回目(Conv→Conv→Pool→Dropout) 168model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(400, 400, 500))) 169model.add(Conv2D(32, (3, 3), activation='relu', padding='same')) 170model.add(MaxPooling2D(pool_size=(2, 2))) 171model.add(Dropout(0.25)) 172 173# 畳み込み処理2回目(Conv→Conv→Pool→Dropout) 174model.add(Conv2D(64, (3, 3), activation='relu', padding='same')) 175model.add(Conv2D(64, (3, 3), activation='relu', padding='same')) 176model.add(MaxPooling2D(pool_size=(2, 2))) 177model.add(Dropout(0.25)) 178 179# ニューラルネットワークによる分類(Flatten→Dense→Dropout→Dense) 180model.add(Flatten()) 181model.add(Dense(512, activation='relu')) 182model.add(Dropout(0.5)) 183model.add(Dense(10, activation='softmax')) 184 185model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['acc']) 186 187history = model.fit(train_data, df_train['train_y'], batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(val_data, df_val['val_y']))
エラーの解決法の模索
エラー文を検索にかけて、近しいものがないか検索
音声データの数値化で検索し、様々な方法を模索
バージョン
windows 11
Python 3.8.8
tensorflow 2.8.0
あなたの回答
tips
プレビュー