前提
ps_aux_grep様にご教授いただいたモデルを用いて,別のデータの二値分類をしたところ,エラーメッセージが出て,解決に至らない状況です。
前回は行数の異なるデータを用いておりましたが,今回は30行で統一されたデータ(行名を含めると31行)を用いております。データ自体に欠損値があるなどの問題はないことを確認しております。その他の影響で考えられること,修正案をご教示いただけますと幸いです。
発生している問題・エラーメッセージ
--------------------------------------------------------------------------- AssertionError Traceback (most recent call last) <ipython-input-6-878f4bfc0c58> in <cell line: 89>() 87 batch_size = 8 88 time_stamp = 30 ---> 89 generator = DataLoader(split = 3, roll = time_stamp, batch_size = batch_size, epochs = epochs) 90 91 def reround(number, ndigits=0): # 0から1の閾値調整 <ipython-input-6-878f4bfc0c58> in __init__(self, split, batch_size, epochs, roll) 24 self.y.append(self.category[name]) 25 self.l.append(len(open(file).readlines()) - 1) # subtract header row ---> 26 assert self.l[-1] > roll, f"Missing roll size: (roll, file length): ({roll}, {self.l[-1]}) on {file}" 27 self.skf = StratifiedKFold(split, shuffle = True) 28 AssertionError: Missing roll size: (roll, file length): (30, 30) on /content/drive/MyDrive/data_FOG/noFOG/ID004_2.csv
該当のソースコード
python
1import csv 2import math 3import numpy as np 4from glob import glob 5import pandas as pd 6import seaborn as sns 7import matplotlib.pyplot as plt 8import os 9from sklearn.preprocessing import StandardScaler 10from sklearn.model_selection import StratifiedKFold 11from google.colab import drive 12drive.mount('/content/drive') 13 14class DataLoader: 15 def __init__(self, split: int, batch_size: int, epochs: int, roll: int): 16 self.batch_size = batch_size 17 self.epochs = epochs 18 self.roll = roll 19 self.file, self.y, self.l = list(), list(), list() 20 self.category = {"normal": 0, "abnormal": 1} 21 for name in self.category.keys(): 22 for i, file in enumerate(glob(f"/content/drive/MyDrive/data/{name}/*.csv")): 23 self.file.append(file) 24 self.y.append(self.category[name]) 25 self.l.append(len(open(file).readlines()) - 1) # subtract header row 26 assert self.l[-1] > roll, f"Missing roll size: (roll, file length): ({roll}, {self.l[-1]}) on {file}" 27 self.skf = StratifiedKFold(split, shuffle = True) 28 29 def generator(self, idx, epochs): 30 X1, X2, X3, y = list(), list(), list(), list() 31 for e in range(epochs): 32 np.random.shuffle(idx) 33 for i in idx: 34 start = np.random.randint(0, self.l[i] - self.roll - 1) 35 data = pd.read_csv(self.file[i]).values[start: start + self.roll] 36 data = StandardScaler().fit_transform(data.reshape(-1, 1)).reshape(data.shape) 37 X1.append(np.concatenate([data[:, 8:26], data[:, 71:83]], axis = -1)) 38 X2.append(data[:, 107:113]) 39 X3.append(data[:, 117:120]) 40 y.append(self.y[i]) 41 if len(X1) == self.batch_size: 42 yield list(map(np.array, [X1, X2, X3])), np.array(y) # Returning Just one batch 43 X1, X2, X3, y = list(), list(), list(), list() 44 if len(X1): 45 yield list(map(np.array, [X1, X2, X3])), np.array(y) # Rreturning remain batch 46 47 def split(self): 48 for train, test in self.skf.split(self.file, self.y): 49 self.test_idx = test 50 yield ( 51 self.generator(train, self.epochs), 52 self.generator(test, self.epochs), 53 math.ceil(len(train) / self.batch_size), 54 math.ceil(len(test) / self.batch_size) 55 ) 56 57import tensorflow as tf 58from tensorflow.python import keras 59from keras.models import Sequential, Model 60from keras.layers import Input, Dense, Concatenate, Flatten, Dropout 61from keras.layers import Conv1D, AveragePooling1D, GlobalAveragePooling1D 62from keras.layers import LSTM 63from keras.optimizers import Adam 64from keras.callbacks import EarlyStopping, ReduceLROnPlateau 65 66def build_model2(time_stamp): 67 inputs1 = Input(shape = (300, 30)) 68 inputs2 = Input(shape = (300, 6)) 69 inputs3 = Input(shape = (300, 3)) 70 71 x1 = Conv1D(32, 7, activation = "swish", kernel_initializer = "he_uniform")(inputs1) 72 x1 = AveragePooling1D()(x1) 73 x2 = Conv1D(32, 7, activation = "swish", kernel_initializer = "he_uniform")(inputs2) 74 x2 = AveragePooling1D()(x2) 75 x3 = Conv1D(32, 7, activation = "swish", kernel_initializer = "he_uniform")(inputs3) 76 x3 = AveragePooling1D()(x3) 77 78 combined = Concatenate(axis = -1)([x1, x2, x3]) 79 x = LSTM(32, dropout = 0.2)(combined) 80 x = Dense(1, activation = "sigmoid")(x) 81 return Model(inputs = [inputs1, inputs2, inputs3], outputs = x) 82 83from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay 84from sklearn.metrics import balanced_accuracy_score, accuracy_score, precision_score, recall_score, f1_score 85 86epochs = 128 87batch_size = 8 88time_stamp = 30 89generator = DataLoader(split = 3, roll = time_stamp, batch_size = batch_size, epochs = epochs) 90 91def reround(number, ndigits=0): # 0から1の閾値調整 92 shift_amount = 10 ** ndigits 93 shifted = number * shift_amount 94 return np.floor(shifted +0.6) / shift_amount 95 96for train_gen, valid_gen, steps_per_epoch, validation_steps in generator.split(): 97 model = build_model2(time_stamp) # Be sure to rebuild the model with each fold. 98 model.summary() 99 model.compile( 100 loss = "binary_crossentropy", 101 optimizer = Adam(), 102 metrics = ["acc"] 103 ) 104 es = EarlyStopping( 105 monitor = "val_loss", # val_lossが 106 patience = 10, # 10epoch間で 107 mode = "min", # 最小値を更新しなかったら 108 restore_best_weights = True, # ベストのweightsを採用して終了 109 verbose = 1, 110 ) 111 # 学習モデルにデータを与えて学習させる 112 model.fit( 113 train_gen, 114 epochs = epochs, 115 steps_per_epoch = steps_per_epoch, 116 class_weight = {0: 4, 1: 1}, 117 validation_data = valid_gen, 118 validation_steps = validation_steps, 119 callbacks = [es], 120 ) 121 122 y_valid, y_pred = list(), list() 123 test_generator = generator.generator(generator.test_idx, 1) 124 for (X1, X2, X3), y in test_generator: 125 y_pred.extend(reround(model.predict([X1, X2, X3], batch_size = batch_size))) 126 y_valid.extend(y) 127 128 #混同行列 129 tn, fp, fn, tp = confusion_matrix(y_valid, y_pred).ravel() #混同行列のそれぞれの結果を取得 130 print(f"TN {tn} / FP {fp} / FN {fn} / TP {tp}")
回答1件
あなたの回答
tips
プレビュー