Python ValueError: invalid literal for int() with base 10: '' のエラーの改善

前提・実現したいこと

PythonでCNNモデルを用い音声感情分類をするシステムを作っています。
ディレクトリ内のfeatureフォルダにそれぞれSession1~5までのフォルダがあり、
その中のtestフォルダ、trainフォルダにそれぞれtestデータとtrainデータの特徴量ファイルが入っています。

Google Colabratoryで実行しようとしたところ、以下のエラーメッセージが発生しました。
エラーの意味を調べてみたのですが原因と改善方法がどうしても分からなかったため、どなたかご教授いただきたいです。
よろしくお願いいたします。

発生している問題・エラーメッセージ

---> tf.random.set_seed(int(seed))
ValueError: invalid literal for int() with base 10: ''

該当のソースコード

Python
1# ライブラリ、パッケージのimport
2
3emo_classes = 4
4slen = 2913
5f_dim = 40
6
7args = sys.argv
8argc = len(args)
9
10if(argc!=3):
11    print('Error: python cnn_model.py test_fold initial_value\n')
12    quit()
13
14fold = sys.argv[1]
15seed = sys.argv[2]
16
17tf.random.set_seed(int(seed))
18np.random.seed(int(seed))
19random.seed(int(seed))
20os.environ['TF_DETERMINISTIC_OPS'] = '1'
21os.environ['PYTHONHASHSEED'] = str(seed)
22
23f_test_path = os.getcwd() + '/feature/Session' + fold + '/test/'
24f_train_path = os.getcwd() + '/feature/Session' + fold + '/train/'
25l_test_path = os.getcwd() + '/label/impro/Session' + fold + '.csv'
26
27if fold == '1':
28    l_train_path = os.getcwd() + '/label/impro/Session[2345].csv'
29
30if fold == '2':
31    l_train_path = os.getcwd() + '/label/impro/Session[1345].csv'
32
33if fold == '3':
34    l_train_path = os.getcwd() + '/label/impro/Session[1245].csv'
35
36if fold == '4':
37    l_train_path = os.getcwd() + '/label/impro/Session[1235].csv'
38
39if fold == '5':
40    l_train_path = os.getcwd() + '/label/impro/Session[1234].csv'
41
42def hard_label(path):
43    label=[]
44    f_list = list(ii for ii in sorted(glob.glob(path)))
45    for file_path in f_list:
46        with open(file_path, 'r') as file:
47            for line in file:
48                line_sp = line.replace('Happiness', '0')
49                line_sp = line_sp.replace('Anger', '1')
50                line_sp = line_sp.replace('Neutral', '2')
51                line_sp = line_sp.replace('Sadness', '3')
52                line_sp = line_sp.replace('\n', '')
53                line_sp = line_sp.split(',')
54                if (len(line_sp)==10):
55                    if (line_sp[9] == '0') or (line_sp[9] == '1') or (line_sp[9] == '2') or (line_sp[9] == '3'):
56                        label.append(line_sp[9])
57                        print(line_sp[0])
58        file.close()
59    label = np.array(label)
60    label = label.astype('int16')
61    #print(label)
62    return label
63
64def load_data(path):
65    f_list = list(sorted(glob.glob(path + '*.npy')))
66    X = np.zeros((len(f_list), slen, f_dim), dtype='float32')
67
68    for fname, ii in zip(f_list, range(len(f_list))):
69        tmp = np.load(fname)
70        padd = np.zeros((slen-len(tmp), f_dim))
71        X[ii] = np.vstack((tmp, padd))
72    return X.reshape(len(X), slen, f_dim, 1)
73
74def create_model(height, width, depth):
75　　　　　　　　# モデルの定義
76    return model
77
78def categorical_focal_loss(num_classes, alpha, gamma, smooth_alpha):
79    def categorical_focal_loss_fixed(y_true, y_pred):
80        # categorical_focal_lossの計算
81    return categorical_focal_loss_fixed
82
83def categorical_crossentropy():
84    def categorical_crossentropy_fixed(y_true, y_pred):
85        # scale predictions so that the class probas of each sample sum to 1
86        y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
87        # clip to prevent NaN's and Inf's
88        y_pred = K.clip(y_pred, K.epsilon(), 1.-K.epsilon())
89        loss = y_true * K.log(y_pred)
90        loss = -K.sum(loss, -1)
91        return loss
92    return categorical_crossentropy_fixed
93
94class Metrics(Callback):
95    def __init__(self, validation, file_path):
96        super(Metrics, self).__init__()
97        self.file_path = file_path
98        self.validation = validation
99
100    def on_train_begin(self, epoch, logs={}):
101        self.best_val_recall = 0
102        self.val_recalls = []
103
104    def on_epoch_end(self, epoch, logs={}):
105        val_predict = np.argmax(self.model.predict(self.validation[0]), -1)
106        val_targ = self.validation[1]
107
108        if len(val_targ.shape) == 2 and val_targ.shape[1] != 1:
109            val_targ = np.argmax(val_targ, -1)
110
111        val_recall = recall_score(val_targ, val_predict, average='macro')
112        _val_recall = recall_score(val_targ, val_predict, average=None)
113
114        self.val_recalls.append(val_recall)
115
116        if epoch == 0:
117            print('epoch \t loss \t\t WA \t\t val_loss \t val_WA \t val_UA')
118        epoch += 1
119        print(epoch, '\t',
120             '{:.5f}'.format(logs['loss']), '\t',
121             '{:.5f}'.format(logs['accuracy']), '\t',
122             '{:.5f}'.format(logs['val_loss']), '\t',
123             '{:.5f}'.format(logs['val_accuracy']), '\t',
124             f'{val_recall:.5f}', '\t',
125             _val_recall
126             )
127        return
128
129def main():
130
131    x_train = load_data(f_train_path)
132    y_train = hard_label(l_train_path)
133    Happiness=np.count_nonzero(y_train==0)
134    Anger=np.count_nonzero(y_train==1)
135    Neutral=np.count_nonzero(y_train==2)
136    Sadness=np.count_nonzero(y_train==3)
137    n_max=max(Happiness, Anger, Neutral, Sadness)
138    y_train = to_categorical(y_train, emo_classes)
139
140    x_test = load_data(f_test_path)
141    y_test = hard_label(l_test_path)
142    y_test = to_categorical(y_test, emo_classes)
143
144    print(x_train.shape, y_train.shape)
145    print(x_test.shape, y_test.shape)
146
147    batch_size = 16
148    epochs = 100
149
150    w_dir = os.getcwd()+'/Result/Session'+fold+'/'
151    os.makedirs(w_dir, exist_ok=True)
152    w_file = w_dir + f'seed{int(seed):03d}.hdf5'
153
154    model = create_model(slen, f_dim, 1)
155
156    # Stop training when a monitored metric has stopped improving
157    early_stopping = EarlyStopping(monitor='val_loss',
158                                   min_delta=0.001,
159                                   patience=1,
160                                   verbose=0,
161                                   mode='auto'
162                                   )
163
164    # Save model weights by the lowest validation loss
165    chkPoint = ModelCheckpoint(w_file,
166                               monitor='val_loss',
167                               mode='min',
168                               verbose=0,
169                               save_best_only=True,
170                               save_weights_only=True,
171                               )
172
173    valid_metrics = Metrics(validation=(x_test, y_test), file_path=w_file)
174    opt = Adam(lr=1e-4, amsgrad=True)
175
176    # Configures the model for training
177    model.compile(loss=categorical_crossentropy(), optimizer=opt, metrics=['accuracy'])
178
179    class_weight = {0: n_max/Happiness, 1: n_max/Anger, 2: n_max/Neutral, 3: n_max/Sadness}
180
181    # Trains the model for a fixed number of epochs (iterations on a dataset)
182    model_history = model.fit(x=x_train,
183                              y=y_train,
184                              batch_size=batch_size,
185                              epochs=epochs,
186                              #class_weight=class_weight,
187                              verbose=0,
188                              validation_data=(x_test, y_test),
189                              callbacks=[valid_metrics, chkPoint]
190                              )
191
192    # Display loss and accuracy of learning process
193    plot_history(model_history,
194                 save_graph_img_path=w_dir,
195                 fig_size_width=12,
196                 fig_size_height=10,
197                 lim_font_size=25
198                 )
199
200    # Evaluate performance by best model
201    eval_model = create_model(slen, f_dim, 1)
202    eval_model.load_weights(w_file)
203
204    predict_prob = eval_model.predict(x_test)
205    predict_classes = np.argmax(predict_prob, axis=1)
206    predict_classes = predict_classes.reshape(len(predict_classes), 1)
207
208    true_classes = np.argmax(y_test,axis=1)
209    true_classes = true_classes.reshape(len(true_classes), 1)
210    result = np.hstack((true_classes, predict_classes))
211    result = np.hstack((result, predict_prob))
212    C = confusion_matrix(true_classes, predict_classes)
213    line = np.sum(C, axis=1)
214
215    WA, UA = 0, 0
216    for ii in range(emo_classes):
217        WA += C[ii,ii]
218        UA += C[ii,ii]/line[ii]
219
220    WA = (WA / np.sum(C)) * 100.0
221    UA = UA*100/emo_classes
222    C = C / np.sum(C, axis=1).reshape(emo_classes, 1)
223    np.set_printoptions(precision=3)
224    print('Weigted Accuracy:', '{:.3f}'.format(WA))
225    print('Unweigted Accuracy:', '{:.3f}'.format(UA))
226    print('Confusion matrix \n', C*100)
227
228    # save true label, predicted label, and predicted probability
229    np.save(w_dir + f'result_{int(seed):03d}.npy', result)
230
231if __name__ == '__main__':
232    main()
233    print ('><')
234

補足情報

Session4フォルダのtestデータが元々存在しないことが原因なのかと考えましたが、どのように改善すれば良いか分かりませんでした…（他のSession1,2,3,5フォルダ内のデータは、testデータもtrainデータも複数ファイル存在します）
（分かりにくい説明で申し訳ございません。）

行動規範の内容に同意します

回答2件

ベストアンサー

ぶっちゃけ、10進数の数値の文字列じゃありません。というエラーですんで、
そこの文字列がどういうものなのかをprintしてみよう

投稿2021/12/15 11:04

編集2021/12/15 11:04

y_waiwai

総合スコア88040

odenhanpen

2021/12/16 03:28

ご回答ありがとうございます。上記のコードのtf.random.set_seed(int(seed))という部分でエラーが発生しているのですが、この部分をprintしてみても実行結果に反映されないです…

y_waiwai

2021/12/16 05:55

そのseedになにはいってるのかみてみよう

odenhanpen

2021/12/16 06:03

すみません、説明不足だったので補足させていただきます。 python cnn_model.py 1 0 と実行すると、No such file or directoryのエラーが発生しました。pathの表記は間違っていなかったため、原因が分からずGoogle Colabのコマンドに直接コードを記述すると、上記のエラーが発生しました。 seedの中身を確認するため、print(seed)と記述したのですが表示されませんでした…

odenhanpen

2021/12/16 06:39

大変失礼いたしました。現在のディレクトリを取得する文が抜けていました。そこを修正すると、上記のエラーは解消されました。

行動規範の内容に同意します

引数の二個目が入ってないんじゃないでしょうか。

投稿2021/12/15 09:56

irognodyci

総合スコア227

odenhanpen

2021/12/15 10:22

ご回答ありがとうございます。コマンドライン引数でしょうか？初めGoogle Colab上で2個コマンドライン引数を指定し実行すると、No such file or directoryエラーが出てしまい原因がわからなかったので、コマンドにコードを直書きし実行したところ、上記のエラーメッセージが発生してしまいました…

irognodyci

2021/12/15 10:42

そのエラーはどのタイミング(何行目)で出るのでしょうか？それと、コマンドライン引数を指定せずに実行しても動きますか？

odenhanpen

2021/12/15 10:52

上記記載のコードでは18行目、tf.random.set_seed(int(seed))の部分で出ています。コマンドライン引数を指定せず実行してもNo such file or directoryとエラーが出てしまいました。

行動規範の内容に同意します

あなたの回答