モデルの学習のさせ方が分かりません

前提・実現したいこと

ここ[https://github.com/floydhub/pix2code-template/blob/master/pix2code.ipynb]で配布されている画像キャプション生成モデルにAttentionを追加しようと改良をしています。

発生している問題・エラーメッセージ

学習させようとすると、model.fitの部分でエラー表示はないものの、jupyter notebookuが止まってしまい学習ができない状態が続いています。
初心者なため、学習時のデータの与え方に問題があるのか、それ以外のモデルの構築の部分に問題があるのか分からなくて困っています。

該当のソースコード

Python
1from os import listdir
2from numpy import array
3import numpy as np
4import os
5from keras.layers import Activation
6from keras.preprocessing.text import Tokenizer, one_hot
7from keras.preprocessing.sequence import pad_sequences
8from keras.models import Model, Sequential, model_from_json
9from keras.utils import to_categorical
10from keras.layers.core import Dense, Dropout, Flatten
11from keras.optimizers import RMSprop
12from keras.layers.convolutional import Conv2D
13from keras.callbacks import ModelCheckpoint
14from keras.layers import Embedding, TimeDistributed, RepeatVector, LSTM, concatenate , Input, Reshape, Dense,MaxPooling2D,dot
15from keras.preprocessing.image import array_to_img, img_to_array, load_img
16import tensorflow as tf
17# Path to Dataset
18#DS_PATH = 'resources/eval/training_features/'
19DS_PATH = 'resources/eval/eval1/'
20DS_EVAL_PATH = 'resources/eval/eval1/' # edit to your /path/to/eval/ds
21import matplotlib.pyplot as plt
22
23# Read a file and return a string
24def load_doc(filename):
25    file = open(filename, 'r')
26    text = file.read()
27    file.close()
28    return text
29
30def load_data(data_dir):
31    text = []
32    images = []
33    # Load all the files and order them
34    all_filenames = listdir(data_dir)
35    all_filenames.sort()
36    for filename in (all_filenames):
37        if filename[-3:] == "npz":
38            # Load the images already prepared in arrays
39            image = np.load(data_dir+filename)
40            images.append(image['features'])
41        else:
42            # Load the boostrap tokens and rap them in a start and end tag
43            syntax = '<START> ' + load_doc(data_dir+filename) + ' <END>'
44            # Seperate all the words with a single space
45            syntax = ' '.join(syntax.split())
46            # Add a space after each comma
47            syntax = syntax.replace(',', ' ,')
48            text.append(syntax)
49    images = np.array(images, dtype=np.float32)
50    return images, text
51
52# Get images and text
53train_features, texts = load_data(DS_PATH)
54# Initialize the function to create the vocabulary 
55tokenizer = Tokenizer(filters='', split=" ", lower=False)
56# Create the vocabulary 
57tokenizer.fit_on_texts([load_doc('resources/bootstrap.vocab')])
58# Add one spot for the empty word in the vocabulary 
59VOCAB_SIZE = len(tokenizer.word_index) + 1
60# Map the input sentences into the vocabulary indexes
61train_sequences = tokenizer.texts_to_sequences(texts)
62# The longest set of boostrap tokens
63max_sequence = max(len(s) for s in train_sequences)
64# Specify how many tokens to have in each input sentence
65max_length = 48
66
67def preprocess_data(texts, features, max_sequence):
68    X, y, image_data = list(), list(), list()
69    sequences = tokenizer.texts_to_sequences(texts)
70    for img_no, seq in enumerate(sequences):
71        for i in range(1, len(seq)):
72            # Add the sentence until the current count(i) and add the current count to the output
73            in_seq, out_seq = seq[:i], seq[i]
74            # Pad all the input token sentences to max_sequence
75            in_seq = pad_sequences([in_seq], maxlen=max_sequence)[0]
76            # Turn the output into one-hot encoding
77            out_seq = to_categorical([out_seq], num_classes=VOCAB_SIZE)[0]
78            # Add the corresponding image to the boostrap token file
79            image_data.append(features[img_no])
80            # Cap the input sentence to MAX_LEN tokens and add it
81            X.append(in_seq[-MAX_LEN:])
82            y.append(out_seq)
83    return np.array(image_data), np.array(X), np.array(y)
84
85# Data generator, intended to be used in a call to model.fit_generator()
86def data_generator(descriptions, features, n_step, max_sequence):
87    # loop until we finish training
88    while 1:
89        # loop over photo identifiers in the dataset
90        for i in range(0, len(descriptions), n_step):
91            Ximages, XSeq, y = list(), list(),list()
92            for j in range(i, min(len(descriptions), i+n_step)):
93                image = features[j]
94                # retrieve text input
95                desc = descriptions[j]
96                # Generate input-output pairs
97                in_img, in_seq, out_word = preprocess_data([desc], [image], max_sequence)
98                for k in range(len(in_img)):
99                    Ximages.append(in_img[k])
100                    XSeq.append(in_seq[k])
101                    y.append(out_word[k])
102            # yield this batch of samples to the model
103    yield [[array(Ximages), array(XSeq)], array(y)]
104
105#Create the Image-encoder
106visual_input = Input(shape=(256, 256, 3,))
107image_model = Conv2D(32, (3, 3), padding='valid', activation='relu')(visual_input)
108image_model = Conv2D(32, (3, 3), padding='valid', activation='relu')(image_model)
109image_model = MaxPooling2D(pool_size=(2, 2))(image_model)
110image_model = Dropout(0.25)(image_model)
111
112image_model = Conv2D(64, (3, 3), padding='valid', activation='relu')(image_model)
113image_model = Conv2D(64, (3, 3), padding='valid', activation='relu')(image_model)
114image_model = MaxPooling2D(pool_size=(2, 2))(image_model)
115image_model = Dropout(0.25)(image_model)
116
117image_model = Conv2D(128, (3, 3), padding='valid', activation='relu')(image_model)
118image_model = Conv2D(128, (3, 3), padding='valid', activation='relu')(image_model)
119image_model = MaxPooling2D(pool_size=(2, 2))(image_model)
120image_model = Dropout(0.25)(image_model)
121u = Flatten()(image_model)
122encoded_image = RepeatVector(MAX_LEN)(u)
123
124#Create the Text-encoder
125language_input = Input(shape=(MAX_LEN,))
126language_model = Embedding(VOCAB_SIZE, 50, input_length=MAX_LEN, mask_zero=True)(language_input)
127language_model = LSTM(128, return_sequences=True)(language_model)
128language_model = LSTM(128, return_sequences=True)(language_model)
129
130#Create the decoder
131decoder = concatenate([encoded_image, language_model])
132decoder = LSTM(512, return_sequences=True)(decoder)
133decoder, _, _ = LSTM(18,return_sequences=True, return_state=True)(decoder)
134
135u_map = Reshape((28*28,128))(u)
136
137# 1. スコアの計算
138dense_att = Dense(18)
139score = dot([decoder, dense_att(u_map)], axes=-1)
140
141# 2. Attentionの重みの計算
142attention = Activation('softmax')(score)
143
144# 3. 文脈ベクトルの計算
145context = dot([attention, u_map], axes=(2,1))
146
147# 4. 出力ベクトルの計算
148attention_dense = Dense(512, activation='tanh')
149output_dense = Dense(18, activation='softmax')
150concat = concatenate([context, decoder], axis=2)
151attentional = attention_dense(concat)
152y_pred = output_dense(attentional)
153
154# Compile the model
155model = Model(inputs=[visual_input, language_input], outputs=y_pred)
156#image_model.summary()
157model.summary()
158
159# save model
160model.save("pix2code_model.h5")
161filepath="weights.hdf5"
162checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_weights_only=True, period=2)
163callbacks_list = [checkpoint]
164
165# Optimizer
166optimizer = RMSprop(lr=0.0001, clipvalue=1.0)
167model.compile(loss='categorical_crossentropy', optimizer=optimizer,  metrics = ["acc"])
168
169#Train the model
170history = model.fit_generator(data_generator(texts, train_features, 1, MAX_SEQUENCE),steps_per_epoch=5,epochs=2,callbacks=callbacks_list,verbose=1)

試したこと

参考になる論文や参考書を調べましたが、答えにたどり着けませんでした

補足情報（FW/ツールのバージョンなど）

keras
tensorflow
jupyter notebook

Q71

2019/07/28 13:37

どのようなマシンで、どのようなデータ（大きさ、量）を学習させましたか。また、「止まった」というのを、どのようにして確認しましたか。学習には、大変時間がかかります。学習しているのではなく、止まっていると判断した根拠は何ですか。

plito_zaemon

2019/07/29 05:40

PC：Alienware Aurora R6 メモリ：16GB GPU：NVIDIA GeForce GTX1080 学習させるデータ：.gui形式のプログラムコード(１個あたり1KB)が250個＋　　　　　　　　　.npz形式のファイル(１個あたり18KBぐらい)が250個　で合計3.78MBです学習が止まっていると判断した根拠：今は自分でプログラムを改良しているので学習データを軽量なものを使っていますが、改良の前は中身が同じ形式のデータ11MBを学習させていました。jupyter notebookで学習の経過を表示するようにしているのですが、途中経過のバーが表示されず、一日放置してもIn [*]から動きません。　説明が下手ですいません...