kerasを用いてCSVに各エポック毎に出力したい

kerasを用いて，画像を入力データとして，数値予測する回帰分析をしています．

CSVloggerを用いて各エポック毎にloss.val_lossを出力しているのですが，予測値の出力仕方が分かります．以下のコードに加えたいのですが，どのよううにコーディングすればよいでしょうか？

python
1#最大応力の値の予測
2from keras.models import Sequential
3from keras.layers import Activation, Dense, Dropout, LeakyReLU
4#from keras.layers.advanced_activations import LeakyReLU
5from keras.utils.np_utils import to_categorical
6from keras.optimizers import Adagrad
7from keras.optimizers import Adam
8from keras.models import load_model
9from keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger
10from sklearn.model_selection import train_test_split
11from sklearn import datasets
12import numpy as np
13from PIL import Image
14import os
15import time
16import csv
17import cv2
18import math
19
20
21start_time = time.time()
22print("開始時刻: " + str(start_time))
23#それぞれの画像の枚数を入力
24A = 50
25B = 50
26sum =A+B
27# 学習用のデータを作る.
28image_list = []
29location_list = []
30#ハイパーパラメータ
31#画像サイズ
32x = 150
33y = 75
34Z = x*y #入力層のノード数
35#エポック数
36E = 5
37#バッチサイズ
38BATCH_SIZE = 32
39#学習率
40LR = 0.00001
41#訓練データの数 train=sum
42train=sum
43
44#画像の読み込み：読み込み→リサイズ→１列に変換→正規化
45print("画像の読み込み　開始")
46count=1
47for i in range(0,A):
48    im = cv2.imread("data/image/a/"+str(i)+"a.png".format(i),1) #画像の読み込み
49    print(str(i)+"a.png")
50    image = np.array(Image.open("data/image/a/"+str(i)+"a.png").resize((x, y))) #画像をnum配列にしてリサイズ
51    print(image.shape)
52    image =np.reshape(image,Z) #2次元行列を1次元行列に変換
53    print(image.shape)
54    image_list.append(image / 255.) #appendは追加：1枚ずつ足しこんでいく
55    print(str(count)+"/"+str(train))
56    count +=1
57    print('\n')
58
59for i in range(0,B):
60    im = cv2.imread("data/image/b/"+str(i)+"b.png".format(i),1) #画像の読み込み
61    print(str(i)+"b.png")
62    image = np.array(Image.open("data/image/b/"+str(i)+"b.png").resize((x, y))) #画像をnum配列にしてリサイズ
63    print(image.shape)
64    image =np.reshape(image,Z) #2次元行列を1次元行列に変換
65    print(image.shape)
66    image_list.append(image / 255.) #appendは追加：1枚ずつ足しこんでいく
67    print(str(count)+"/"+str(train))
68    count +=1
69    print('\n')
70
71print("画像の読み込み　終了")
72
73# kerasに渡すためにnumpy配列に変換。
74image_list = np.array(image_list)
75
76#最大応力の位置＿読み込み＿表示
77location = np.loadtxt("data/value/max_stress_value_a.csv",delimiter=",",skiprows=0)
78location_list.extend(location)
79location = np.loadtxt("data/value/max_stress_value_b.csv",delimiter=",",skiprows=0)
80location_list.extend(location)
81location_list = np.array(location_list)
82print("\n最大応力の値の行列の形")
83print(location_list.shape)
84#print(location_list)
85np.savetxt("data/value/max_stress_value_true.csv",location_list,delimiter=",")
86#最大応力の位置＿読み込み＿終了
87
88# モデルを生成してニューラルネットを構築
89model = Sequential()
90
91model.add(Dense(5000, input_dim=Z,kernel_initializer='random_uniform',bias_initializer='zeros')) 
92#model.add(Activation("LeakyReLU"))
93model.add(LeakyReLU())
94model.add(Dropout(0.2))
95
96
97model.add(Dense(100,kernel_initializer='random_uniform',bias_initializer='zeros'))
98model.add(LeakyReLU())
99model.add(Dropout(0.075))
100
101
102model.add(Dense(10,kernel_initializer='random_uniform',bias_initializer='zeros'))
103model.add(LeakyReLU())
104model.add(Dropout(0.0))
105
106model.add(Dense(5,kernel_initializer='random_uniform',bias_initializer='zeros'))
107model.add(LeakyReLU())
108model.add(Dropout(0.0))
109
110model.add(Dense(1))
111model.add(Activation("linear"))
112
113# オプティマイザ（最適化）にAdamを使用
114opt = Adam(lr=LR)
115
116# モデルをコンパイル
117#最大応力位置の予測　誤差関数：二乗誤差
118model.compile(loss="mean_absolute_percentage_error", optimizer=opt)
119
120#CSVに各エポックの学習結果の保存
121csv_logger = CSVLogger('result/training_process.csv')
122
123# 学習を実行。20%はテストに使用
124#最大応力位置の予測　モデルフィット
125history = model.fit(image_list, location_list, nb_epoch=E,verbose=1,callbacks=[csv_logger], batch_size=BATCH_SIZE, validation_split=0.2) 
126
127#最大応力位置の予測　誤差の評価
128loss = model.evaluate(image_list, location_list)
129
130#最終の学習結果を書き込む
131fp = open("result/RESULT.txt","w")
132fp.write("\nloss:{}".format(loss))
133fp.close()
134
135#最終の誤差の表示
136print("\nloss:{}\n".format(loss))
137
138#予測値
139predicted= model.predict(image_list)
140#print("NNの最大応力の値　予測値")
141#print(predicted)
142np.savetxt("result/max_stress_value_predict_result.csv",predicted,delimiter=",")
143
144image_list = np.array(image_list)
145location_list = np.array(location_list) 
146print(image_list.shape, image_list.dtype)  # (300, 11250) float64
147print(location_list.shape, location_list.dtype)  # (300,) float64
148
149
150def get_batch(image_list, location_list, batch_size, shuffle=False):
151    '''ミニバッチを生成するジェネレーター関数
152    '''
153    num_samples = location_list # サンプル数
154    if shuffle:# シャッフルする場合
155        indices = np.random.permutation(num_samples)
156    else: # シャッフルしない場合
157        indices = np.random.arange(num_samples)
158    num_steps = np.ceil(num_samples / batch_size).astype(int)
159    print(num_steps)
160    print(type(num_steps))
161
162    for itr in range(num_steps):
163        start = batch_size * itr
164        excerpt = indices[start:start + batch_size]
165        yield x[excerpt], y[excerpt]
166
167# 保存用ディレクトリ
168out_dirpath = 'prediction'
169os.makedirs(out_dirpath, exist_ok=True)
170
171x_train, x_test, y_train, y_test = train_test_split(image_list, location_list, test_size=0.2)
172
173# 学習する。
174epochs = E
175for i in range(epochs):
176    for x_batch, y_batch in get_batch(x_train, y_train, batch_size=BATCH_SIZE, shuffle=True):
177        # x_batch, y_batch が生成されたミニバッチ
178
179        # 1バッチ分学習する
180        model.train_on_batch(x_batch, y_batch)
181
182    # エポックごとにテストデータで推論する。
183    y_pred = model.predict_classes(x_train)
184    result = np.c_[y_pred, y_train]
185
186    # 推論結果を保存する。
187    filepath = os.path.join(out_dirpath, 'prediction_{}.csv'.format(i))
188    np.savetxt(filepath, result, fmt='%.0f')
189
190
191
192end_time = time.time()
193print("\n終了時刻: ",end_time)
194print ("かかった時間: ", (end_time - start_time))
195ttime = end_time - start_time
196fa = open("result/TIME.txt","w")
197fa.write("\nかかった時間:{} ".format(ttime))
198fa.close()
199
200
201
202

行動規範の内容に同意します

回答1件

ベストアンサー

prediction の結果を各エポック毎に保存するコールバック関数はないので、以下のようにする必要があります。

手順

ミニバッチを作成するジェネレーターを作る。
1 epoch 学習する。
predict() を実行し、推論する。
numpy.savetxt() で結果をファイルに保存する。

サンプルコード

モデル作成

ここは本題とは関係ありません。

import os

import numpy as np
import seaborn as sn
from keras.datasets import mnist
from keras.layers import Activation, BatchNormalization, Dense, Dropout
from keras.models import Sequential
from keras.utils.np_utils import to_categorical
from sklearn import metrics

# MNIST データを取得する。
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print('x_train.shape', x_train.shape)  # x_train.shape (60000, 28, 28)
print('y_train.shape', y_train.shape)  # y_train.shape (60000,)
print('x_test.shape', x_test.shape)  # x_test.shape (10000, 28, 28)
print('y_test.shape', y_test.shape)  # y_test.shape (10000,)

# モデルを作成する。
model = Sequential()
model.add(Dense(10, input_dim=784))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(10))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(10))
model.add(BatchNormalization())
model.add(Activation('softmax'))
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# モデルの入力に合わせて1次元配列にする。 (28, 28) -> (784,) にする
x_train = x_train.reshape(len(x_train), -1)
x_test = x_test.reshape(len(x_test), -1)

# one-hot 表現に変換する。
y_train_onehot = to_categorical(y_train)

ミニバッチを作成するジェネレーターを作る。

python
1def get_batch(x, y, batch_size, shuffle=False):
2    '''ミニバッチを生成するジェネレーター関数
3    '''
4    num_samples = len(x)  # サンプル数
5    if shuffle:  # シャッフルする場合
6        indices = np.random.permutation(num_samples)
7        # 例: num_samples=10 の場合、array([7, 8, 1, 2, 4, 6, 9, 5, 3, 0])
8        # ランダムにシャッフルされている
9    else:  # シャッフルしない場合
10        indices = np.arange(num_samples)
11        # 例: num_samples=10 の場合、array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
12        
13    num_steps = np.ceil(num_samples / batch_size).astype(int)
14    # ステップ数 = ceil(サンプル数 / バッチサイズ)
15    # 例 np.ceil(10 / 3) = np.ceil(3.33333333...) = 4
16    
17    for itr in range(num_steps):
18        start = batch_size * itr
19        excerpt = indices[start:start + batch_size]
20        # [batch_size * itr, batch_size * (itr + 1)] の範囲のサンプルを yield で返す。
21        yield x[excerpt], y[excerpt]

[A, B, C, D, E, F, G, H, I, J] という10個のデータがあったとしたら、
バッチサイズ3の場合、
1step目 [A, B, C]
2step目 [D, E, F]
3step目 [G, H, I]
4step目 [J]
と4stepで全部のデータを一回ネットワークに流すことになるので、4step で 1epoch である。

その下の for の部分は以下のことをしている。

num_steps = 4
batch_size = 3
num_samples = 10
indices = np.arange(num_samples)

for itr in range(num_steps):
    start = batch_size * itr
    excerpt = indices[start:start + batch_size]
    print(excerpt)
# [0 1 2]
# [3 4 5]
# [6 7 8]
# [9]

学習する。

train_on_batch() で1バッチずつ学習する。
1 epoch が終わったら、predict() 関数で推論を行う。
推論結果を np.savetxt() でファイルに保存する。

python
1# 保存用ディレクトリ
2out_dirpath = 'prediction'
3os.makedirs(out_dirpath, exist_ok=True)
4
5# 学習する。
6epochs = 1
7for i in range(epochs):  # エポック数分ループする。
8    for x_batch, y_batch in get_batch(x_train, y_train_onehot, batch_size=128, shuffle=True):
9        # x_batch, y_batch が生成されたミニバッチ
10
11        # 1バッチ分学習する
12        model.train_on_batch(x_batch, y_batch)
13    # この時点で1 epoch 分の学習が完了
14
15    # エポックごとにテストデータで推論する。
16    y_pred = model.predict_classes(x_test)
17    result = np.c_[y_pred, y_test]
18    
19    # 推論結果を保存する。
20    filepath = os.path.join(out_dirpath, 'prediction_{}.csv'.format(i))
21    np.savetxt(filepath, result, fmt='%.0f')

追記

前提として、image_list は (サンプル数, Width * Height) の float 型の numpy 配列
location_list は (サンプル数,) の float 型の numpy 配列です。

print(image_list.shape, image_list.dtype)  # (300, 11250) float64
print(location_list.shape, location_list.dtype)  # (300, 2) float64

その場合に、質問のコードに各エポックごとに推論結果を CSV に保存するは以下のようになります。

# モデルを作成
model = Sequential()
model.add(Dense(8000, input_dim=Z, kernel_initializer='random_uniform',bias_initializer='zeros')) 
model.add(LeakyReLU())
model.add(Dropout(0.2))
model.add(Dense(100,kernel_initializer='random_uniform',bias_initializer='zeros'))
model.add(LeakyReLU())
model.add(Dropout(0.075))
model.add(Dense(50,kernel_initializer='random_uniform',bias_initializer='zeros'))
model.add(LeakyReLU())
model.add(Dropout(0.075))
model.add(Dense(10,kernel_initializer='random_uniform',bias_initializer='zeros'))
model.add(LeakyReLU())
model.add(Dropout(0.0))
model.add(Dense(2))
model.add(Activation("linear"))
model.compile(loss="mean_absolute_percentage_error", optimizer=Adam(lr=LR))

def get_batch(x, y, batch_size, shuffle=False):
    '''ミニバッチを生成するジェネレーター関数
    '''
    num_samples = len(x)
    if shuffle:
        indices = np.random.permutation(num_samples)
    else:
        indices = np.arange(num_samples)

    num_steps = np.ceil(num_samples / batch_size).astype(int)
    # ステップ数 = ceil(サンプル数 / バッチサイズ)
    # 例 np.ceil(10 / 3) = np.ceil(3.33333333...) = 4

    for itr in range(num_steps):
        start = batch_size * itr
        excerpt = indices[start:start + batch_size]
        yield x[excerpt], y[excerpt]

# 保存用ディレクトリ
out_dirpath = 'prediction'
os.makedirs(out_dirpath, exist_ok=True)

# 学習する。
epochs = 5
for i in range(epochs):  # エポック数分ループする。
    for x_batch, y_batch in get_batch(image_list, location_list, batch_size=BATCH_SIZE, shuffle=True):
        # 1バッチ分学習する
        model.train_on_batch(x_batch, y_batch)
    print('training... epoch {}'.format(i))

    # エポックごとにテストデータで推論する。
    y_pred = model.predict(image_list)
    result = np.c_[y_pred, location_list]

    # 推論結果を保存する。
    filepath = os.path.join(out_dirpath, 'prediction_{}.csv'.format(i))
    np.savetxt(filepath, result, fmt='%.0f')

投稿2018/10/23 14:50

編集2018/10/25 06:18

tiitoi

総合スコア21956

質問をすることでしか得られない、回答やアドバイスがある。

15分調べてもわからないことは、質問しよう！

kerasを用いて，画像を入力データとして，数値予測する回帰分析をしています．

手順

サンプルコード

モデル作成

ミニバッチを作成するジェネレーターを作る。

学習する。

追記

関連した質問