tensorflow のセッションの仕様に関して

前提・実現したいこと

入力データをRNNとCNNにより特徴抽出し，その特徴量をK-meansによりクラスタリングを行います．
K-meansのクラスタリング結果を正解ラベルとして，損失関数を計算しパラメータを更新することを目指しています．

発生している問題・エラーメッセージ

上記のことを再現する上で，一度特徴抽出までを eval により feed_dict し実行し，その実行結果でK-meansを行い正解ラベルを作り，
さらに，モデルのパラメータを更新させるために train_step を再び feed_dict し実行しています．
その結果グラフを自動生成する問題なのかなんのエラーメッセージもなく，gpuでの実行を行っていることを伝えるメッセージだけでプログラムが終了してしまいます．
このメッセージは通常に動作している時も表示されます．
いまいちtensorflowのsessionの特性がわからず，これが仕様なのか工夫すれば実現できるのか，わかる方がいればご教授していただきたいです．

2019-08-23 03:26:01.306046: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2019-08-23 03:26:01.543479: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:964] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2019-08-23 03:26:01.543992: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 0 with properties: 
name: GeForce RTX 2080 major: 7 minor: 5 memoryClockRate(GHz): 1.86
pciBusID: 0000:01:00.0
totalMemory: 7.76GiB freeMemory: 7.59GiB
2019-08-23 03:26:01.544005: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1511] Adding visible gpu devices: 0
2019-08-23 03:26:01.751846: I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] Device interconnect StreamExecutor with strength 1 edge matrix:
2019-08-23 03:26:01.751877: I tensorflow/core/common_runtime/gpu/gpu_device.cc:988]      0 
2019-08-23 03:26:01.751882: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 0:   N 
2019-08-23 03:26:01.752161: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 7308 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080, pci bus id: 0000:01:00.0, compute capability: 7.5)

該当のソースコード

python
1def inference(x, n_batch, keep_prob, pool_size, filter_size, conv_stride, pool_stride,
2    n_convs, input_len=None, n_in = None, n_hidden=None, n_out=None):
3    def weight_variable(shape):
4        initial = tf.truncated_normal(shape, stddev=0.01)
5        return tf.Variable(initial, name='V')
6
7    def bias_variable(shape):
8        initial = tf.zeros(shape, dtype=tf.float32)
9        return tf.Variable(initial, name='c')
10
11    with tf.name_scope('cell_initial'):
12        cell = tf.contrib.rnn.GRUCell(n_hidden)
13        initial_state = cell.zero_state(tf.shape(x)[0], tf.float32)
14        state = initial_state
15        outputs = []
16
17    with tf.variable_scope('RNN'):
18        for t in range(input_len):
19            if t > 0:
20                tf.get_variable_scope().reuse_variables()
21            (cell_output, state) = cell(x[:, t, :], state)
22
23            outputs.append(cell_output)
24
25    outputs = tf.reshape(outputs, [n_out, n_batch, n_hidden * input_len])
26
27    V = weight_variable([n_out, n_hidden * input_len])
28    c = bias_variable([n_out, n_hidden * input_len])
29
30    rnn_output = outputs * V + c
31
32    rnn_output = tf.transpose(rnn_output, [1, 2, 0])
33    # convnet
34    def conv1d(x, W, stride):
35        return tf.nn.conv1d(x, W, stride=stride , padding='VALID')#x = [batch, in_width(input_len), in_channels(n_in)]
36#(1, stride, 1)
37    def max_pool(x, stride):
38        return tf.layers.max_pooling1d(x, pool_size=pool_size, #[1, 3, 1]\
39            strides=stride, padding='valid')#x = (batch, length, channels)
40
41    def weight_variable_cnn(shape):
42        initial = tf.truncated_normal(shape, stddev=0.01)
43        return tf.Variable(initial, name='V')
44
45    def bias_variable_cnn(shape):
46        initial = tf.zeros(shape, dtype=tf.float32)
47        return tf.Variable(initial, name='c')
48
49    # conv layers
50    with tf.variable_scope('CNN'):
51        for n_conv in range(n_convs):
52            if n_conv > 0:
53                tf.get_variable_scope().reuse_variables()
54            else:
55                pool = rnn_output # Convへの初期値定義
56                cnn_out_dim = input_len * n_hidden
57
58            w_conv = weight_variable_cnn([filter_size[n_conv], n_in, n_out])
59            b_conv = bias_variable_cnn([n_in])
60
61            # 畳み込み層
62            conv = tf.nn.relu(conv1d(pool, w_conv, conv_stride[n_conv]) + b_conv)
63            # プーリング層
64            pool = max_pool(conv, pool_stride[n_conv])
65
66            cnn_out_dim = (cnn_out_dim - filter_size[n_conv]) // conv_stride[n_conv] + 1
67            cnn_out_dim = (cnn_out_dim - pool_size) // pool_stride[n_conv] + 1
68            print(cnn_out_dim)
69
70    # 完全結合層（出力層）
71    w_out = weight_variable_cnn([cnn_out_dim, cnn_out_dim])
72    b_out = bias_variable_cnn([cnn_out_dim])
73
74    cnn_out = tf.nn.relu(tf.matmul(tf.reshape(pool, [n_batch, cnn_out_dim]), w_out) + b_out) #(n_batch, cnn_out_dim, n_out)
75    cnn_out_drop = tf.nn.dropout(cnn_out, keep_prob)
76
77    return cnn_out_drop, cnn_out_dim
78
79
80def loss(cnn_out, t, cnn_out_dim, n_clusters):
81    def softmax_weight(shape):
82        initial = tf.truncated_normal(shape, stddev=0.01)
83        return tf.Variable(initial, name='V')
84
85    def softmax_bias(shape):
86        initial = tf.zeros(shape, dtype=tf.float32)
87        return tf.Variable(initial, name='c')
88
89    w_soft = softmax_weight([cnn_out_dim, n_clusters])
90    b_soft = softmax_bias([n_clusters])
91
92    softmax_input = tf.matmul(cnn_out, w_soft) + b_soft #[n_batch, n_cluster]
93    softmax_output = tf.nn.softmax(softmax_input)
94
95    with tf.variable_scope('loss'):
96        cross_entropy = tf.reduce_mean(-tf.reduce_sum(t * tf.log(softmax_output), axis=1))
97        return cross_entropy
98
99def training(loss, learning_rate):
100    with tf.variable_scope('train_step'):
101        optimizer = \
102            tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999)
103        train_step = optimizer.minimize(loss)
104        return train_step
105
106x = tf.placeholder(tf.float32, shape=[None, input_len, n_in])
107t = tf.placeholder(tf.float32, shape=[None, n_clusters])
108n_batch = tf.placeholder(tf.int32, shape=[])
109keep_prob = tf.placeholder(tf.float32, shape=[])
110
111y, cnn_out_dim = inference(x, n_batch, keep_prob,
112              input_len=input_len,
113              n_hidden=n_hidden,
114              n_in=n_in,
115              n_out=n_out,
116              pool_size=pool_size,
117              filter_size=filter_size,
118              conv_stride=conv_stride,
119              pool_stride=pool_stride,
120              n_convs=n_convs)
121
122loss = loss(y, t, cnn_out_dim, n_clusters)
123train_step = training(loss = loss, learning_rate = learning_rate)
124
125init = tf.global_variables_initializer()
126sess = tf.Session()
127sess.run(init)
128
129input_data = eval_X[k * 24: (k + learning_data_day_len-1) * 24]
130indices = np.array(range(input_data.shape[0]))
131input_data_train, input_data_validation, indices_train, indices_validation = \
132    train_test_split(input_data, indices, test_size = N_validation)
133
134penalty = 0
135for epoch in range(epochs):
136    X_ = shuffle(input_data_train)
137    if epoch == 0 or penalty > 3:
138        tf.reset_default_graph()
139        centroid = 'k-means++'
140        penalty = 0
141        print('reset')
142
143    with tf.name_scope('train'):
144        for h in range(n_batches):
145            start = h * batch_size
146            end = start + batch_size
147
148            features = y.eval(
149                session = sess,
150                feed_dict={
151                    x: X_[start:end],
152                    n_batch: batch_size,
153                    keep_prob: 0.5
154                }
155            )
156
157            print(features)
158
159            # KMeans
160            kmeans_model = KMeans(n_clusters=n_clusters, init = centroid, n_jobs=-1).fit(features)
161            labels = kmeans_model.labels_
162            centroid = kmeans_model.cluster_centers_
163
164            # 擬似ラベル作成（one hot)
165            cent_one_hot = np.identity(n_clusters)[labels]
166            print(cent_one_hot)
167
168            sess.run(train_step, feed_dict={
169                x: X_[start:end],
170                t: cent_one_hot,
171                n_batch: batch_size,
172                keep_prob: 0.5,
173                # centroid: centroid_
174            })
175
176    cross_loss = loss.eval(session = sess,
177    feed_dict={
178        cnn_out: features,
179        t: cent_one_hot
180    })
181
182    if epoch != 0 and cross_loss > cross_loss_old and cross_loss > 1:
183        penalty += 1
184
185    cross_loss_old = cross_loss
186
187    print('epoch:', epoch,
188          ' validation loss:', cross_loss)
189
190    if cross_loss < 0.01:
191        break
192

試したこと

コードの途中にあるbreakが原因かとも思いましたが消去しても変わりませんでした．
また，print などを使ってどこまで実行できているか確認したところ， y.eval までは実行できていたのですが，
train_step は呼び出しもされずに終了してしまっているようです．
やはり複数回 feed を行うと正常に optimize が行われないのでしょうか．