前提・実現したいこと
プログラミング初心者です.「詳解・ディープラーニング」という本を参考に,LSTMのモデル保存・復元ができるかどうか試しています.
プログラムは実行できるのですが,保存した結果と復元した結果が一致しません.
乱数設定は同じにしているので,保存した結果と復元した結果は同じになるはずなのですが....
おそらく保存がうまくいっていないのではと検討をつけてはいるのですがどこが不足しているのかわからない状況です.
特にval_lossの値を保存して,グラフにプロットできるようにしたいのですが,
コードに不足している部分や問題点があればご教示いただきたいです.
初心者で基本的なことがわかっていないと思うのですが,どうぞよろしくお願いいたします.
環境
OS: win10
python: 3.6
tensorflow: 1.14.0
保存時のソースコード
python
1import numpy as np 2import tensorflow as tf 3import matplotlib.pyplot as plt 4from sklearn.model_selection import train_test_split 5from sklearn.utils import shuffle 6import os 7 8MODEL_DIR = os.path.join(os.path.dirname('./'), 'model') 9 10if os.path.exists(MODEL_DIR) is False: 11 os.mkdir(MODEL_DIR) 12 13np.random.seed(0) 14tf.set_random_seed(1234) 15 16 17def inference(x, n_batch, maxlen=None, n_hidden=None, n_out=None): 18 def weight_variable(shape, name=None): 19 initial = tf.truncated_normal(shape, stddev=0.01) 20 return tf.Variable(initial, name=name) 21 22 def bias_variable(shape, name=None): 23 initial = tf.zeros(shape, dtype=tf.float32) 24 return tf.Variable(initial, name=name) 25 26 cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) 27 initial_state = cell.zero_state(n_batch, tf.float32) 28 29 state = initial_state 30 outputs = [] # 過去の隠れ層の出力を保存 31 with tf.variable_scope('LSTM'): 32 for t in range(maxlen): 33 if t > 0: 34 tf.get_variable_scope().reuse_variables() 35 (cell_output, state) = cell(x[:, t, :], state) 36 outputs.append(cell_output) 37 38 output = outputs[-1] 39 40 V = weight_variable([n_hidden, n_out], name='V') 41 c = bias_variable([n_out], name='c') 42 y = tf.matmul(output, V) + c # 線形活性(?) 43 44 return y 45 46 47def loss(y, t): 48 mse = tf.reduce_mean(tf.square(y - t)) 49 return mse 50 51 52def training(loss): 53 optimizer = \ 54 tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999) 55 56 train_step = optimizer.minimize(loss) 57 return train_step 58 59 60if __name__ == '__main__': 61 def mask(T=200): 62 mask = np.zeros(T) 63 indices = np.random.permutation(np.arange(T))[:2] 64 mask[indices] = 1 65 return mask 66 67 def toy_problem(N=10, T=200): 68 signals = np.random.uniform(low=0.0, high=1.0, size=(N, T)) 69 masks = np.zeros((N, T)) 70 for i in range(N): 71 masks[i] = mask(T) 72 73 data = np.zeros((N, T, 2)) 74 data[:, :, 0] = signals[:] 75 data[:, :, 1] = masks[:] 76 target = (signals * masks).sum(axis=1).reshape(N, 1) 77 78 return (data, target) 79 80 ''' 81 データの生成 82 ''' 83 N = 100 84 T = 20 85 maxlen = T 86 87 X, Y = toy_problem(N=N, T=T) 88 89 N_train = int(N * 0.9) 90 N_validation = N - N_train 91 92 X_train, X_validation, Y_train, Y_validation = \ 93 train_test_split(X, Y, test_size=N_validation) 94 95 ''' 96 モデル設定 97 ''' 98 n_in = len(X[0][0]) # 2 99 n_hidden = 10 100 n_out = len(Y[0]) # 1 101 102 x = tf.placeholder(tf.float32, shape=[None, maxlen, n_in]) 103 t = tf.placeholder(tf.float32, shape=[None, n_out]) 104 n_batch = tf.placeholder(tf.int32, shape=[]) 105 106 y = inference(x, n_batch, maxlen=maxlen, n_hidden=n_hidden, n_out=n_out) 107 loss = loss(y, t) 108 train_step = training(loss) 109 110 history = { 111 'val_loss': [] 112 } 113 114 ''' 115 モデル学習 116 ''' 117 epochs = 30 118 batch_size = 10 119 120 init = tf.global_variables_initializer() 121 saver = tf.train.Saver() 122 sess = tf.Session() 123 sess.run(init) 124 125 n_batches = N_train // batch_size 126 127 for epoch in range(epochs): 128 X_, Y_ = shuffle(X_train, Y_train) 129 130 for i in range(n_batches): 131 start = i * batch_size 132 end = start + batch_size 133 134 sess.run(train_step, feed_dict={ 135 x: X_[start:end], 136 t: Y_[start:end], 137 n_batch: batch_size 138 }) 139 140 # 検証データを用いた評価 141 val_loss = loss.eval(session=sess, feed_dict={ 142 x: X_validation, 143 t: Y_validation, 144 n_batch: N_validation 145 }) 146 147 history['val_loss'].append(val_loss) 148 print('epoch:', epoch, 149 ' validation loss:', val_loss) 150 151 152 ''' 153 学習の進み具合を可視化 154 ''' 155 loss = history['val_loss'] 156 157 model_path = saver.save(sess, MODEL_DIR + '/model.ckpt') 158 print('Model saved to:', model_path) 159 160 plt.rc('font', family='serif') 161 fig = plt.figure() 162 plt.plot(range(len(loss)), loss, label='loss', color='black') 163 plt.xlabel('epochs') 164 plt.show()
保存時の結果
epoch: 0 validation loss: 1.2469753 epoch: 1 validation loss: 1.192979 epoch: 2 validation loss: 1.1173694 epoch: 3 validation loss: 1.0082648 epoch: 4 validation loss: 0.86058235 epoch: 5 validation loss: 0.6935085 epoch: 6 validation loss: 0.5343941 epoch: 7 validation loss: 0.40002504 epoch: 8 validation loss: 0.29248607 epoch: 9 validation loss: 0.20975745 epoch: 10 validation loss: 0.15537141 epoch: 11 validation loss: 0.12042664 epoch: 12 validation loss: 0.10096844 epoch: 13 validation loss: 0.09290114 epoch: 14 validation loss: 0.08924503 epoch: 15 validation loss: 0.0879887 epoch: 16 validation loss: 0.08780083 epoch: 17 validation loss: 0.087485485 epoch: 18 validation loss: 0.087364085 epoch: 19 validation loss: 0.08751848 epoch: 20 validation loss: 0.08775783 epoch: 21 validation loss: 0.08775983 epoch: 22 validation loss: 0.08768336 epoch: 23 validation loss: 0.087852776 epoch: 24 validation loss: 0.08764634 epoch: 25 validation loss: 0.08769642 epoch: 26 validation loss: 0.087558664 epoch: 27 validation loss: 0.0876438 epoch: 28 validation loss: 0.08760041 epoch: 29 validation loss: 0.0880862 Model saved to: .\model/model.ckpt
復元時のソースコード
python
1import numpy as np 2import tensorflow as tf 3import matplotlib.pyplot as plt 4from sklearn.model_selection import train_test_split 5from sklearn.utils import shuffle 6import os 7 8MODEL_DIR = os.path.join(os.path.dirname('./'), 'model') 9 10if os.path.exists(MODEL_DIR) is False: 11 os.mkdir(MODEL_DIR) 12 13np.random.seed(0) 14tf.set_random_seed(1234) 15 16 17def inference(x, n_batch, maxlen=None, n_hidden=None, n_out=None): 18 def weight_variable(shape, name=None): 19 initial = tf.truncated_normal(shape, stddev=0.01) 20 return tf.Variable(initial, name=name) 21 22 def bias_variable(shape, name=None): 23 initial = tf.zeros(shape, dtype=tf.float32) 24 return tf.Variable(initial, name=name) 25 26 cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) 27 initial_state = cell.zero_state(n_batch, tf.float32) 28 29 state = initial_state 30 outputs = [] # 過去の隠れ層の出力を保存 31 with tf.variable_scope('LSTM'): 32 for t in range(maxlen): 33 if t > 0: 34 tf.get_variable_scope().reuse_variables() 35 (cell_output, state) = cell(x[:, t, :], state) 36 outputs.append(cell_output) 37 38 output = outputs[-1] 39 40 V = weight_variable([n_hidden, n_out], name='V') 41 c = bias_variable([n_out], name='c') 42 y = tf.matmul(output, V) + c # 線形活性(?) 43 44 return y 45 46 47def loss(y, t): 48 mse = tf.reduce_mean(tf.square(y - t)) 49 return mse 50 51 52def training(loss): 53 optimizer = \ 54 tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999) 55 56 train_step = optimizer.minimize(loss) 57 return train_step 58 59 60if __name__ == '__main__': 61 def mask(T=200): 62 mask = np.zeros(T) 63 indices = np.random.permutation(np.arange(T))[:2] 64 mask[indices] = 1 65 return mask 66 67 def toy_problem(N=10, T=200): 68 signals = np.random.uniform(low=0.0, high=1.0, size=(N, T)) 69 masks = np.zeros((N, T)) 70 for i in range(N): 71 masks[i] = mask(T) 72 73 data = np.zeros((N, T, 2)) 74 data[:, :, 0] = signals[:] 75 data[:, :, 1] = masks[:] 76 target = (signals * masks).sum(axis=1).reshape(N, 1) 77 78 return (data, target) 79 80 ''' 81 データの生成 82 ''' 83 N = 100 84 T = 20 85 maxlen = T 86 87 X, Y = toy_problem(N=N, T=T) 88 89 N_train = int(N * 0.9) 90 N_validation = N - N_train 91 92 X_train, X_validation, Y_train, Y_validation = \ 93 train_test_split(X, Y, test_size=N_validation) 94 95 ''' 96 モデル設定 97 ''' 98 n_in = len(X[0][0]) # 2 99 n_hidden = 10 100 n_out = len(Y[0]) # 1 101 102 x = tf.placeholder(tf.float32, shape=[None, maxlen, n_in]) 103 t = tf.placeholder(tf.float32, shape=[None, n_out]) 104 n_batch = tf.placeholder(tf.int32, shape=[]) 105 106 y = inference(x, n_batch, maxlen=maxlen, n_hidden=n_hidden, n_out=n_out) 107 loss = loss(y, t) 108 train_step = training(loss) 109 110 history = { 111 'val_loss': [] 112 } 113 114 ''' 115 モデル学習 116 ''' 117 epochs = 30 118 batch_size = 10 119 120 # init = tf.global_variables_initializer() 121 saver = tf.train.Saver() 122 sess = tf.Session() 123 # sess.run(init) 124 125 saver.restore(sess, MODEL_DIR + '/model.ckpt') 126 print('Model restored.') 127 128 n_batches = N_train // batch_size 129 130 ''' 131 学習の進み具合を可視化 132 ''' 133 loss = history['val_loss'] 134 135 plt.rc('font', family='serif') 136 fig = plt.figure() 137 plt.plot(range(len(loss)), loss, label='loss', color='black') 138 plt.xlabel('epochs') 139 plt.show()
復元時の結果
Model restored.
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。