質問編集履歴

修正

2021/07/07 04:48

投稿

退会済みユーザー

スコア0

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -66,26 +66,69 @@
     ValueError: Input 0 of layer dense is incompatible with the layer: expected axis -1 of input shape to have value 4096 but received input with shape [None, 1000]
+```
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.utils import to_categorical
+from numpy import array
+#画像と出力単語を紐づける関数
+def create_sequences(tokenizer, max_length, descriptions, photos):
+    X1, X2, y = list(), list(), list()#X1が入力画像、X2が入力語、yがX1とX2に対応する出力語
+    #各画像名でループ
+    for key, desc_list in descriptions.items():
+        #各画像のキャプションでループ
+        for desc in desc_list:
-### 該当のソースコード
+            #シーケンスをエンコードする
+            seq = tokenizer.texts_to_sequences([desc])[0]
+            #1つのシーケンスを複数のX、Yペアに分割する
+            for i in range(1, len(seq)):
+                #入力と出力のペアに分割する
+                in_seq, out_seq = seq[:i], seq[i]
+                #行列のサイズを最大の単語数に合わせる
+                in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
+                #出力シーケンス
+                out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
+                #全てをarrayに格納
+                X1.append(photos[key][0])
+                X2.append(in_seq)
+                y.append(out_seq)
+    return array(X1), array(X2), array(y)
+#トレーニングデータの入力画像、入力語、出力語を紐付ける
+X1train, X2train, ytrain = create_sequences(tokenizer, max_length, train_descriptions, train_features)
+print(X1train)
+#バリデーションデータの入力画像、入力語、出力語を紐付ける
+X1val, X2val, yval = create_sequences(tokenizer, max_length, val_descriptions, val_features)
 from tensorflow.keras.layers import Input,Dense,LSTM,Embedding,Dropout
 from keras.layers.merge import add
-モデルを定義する関数
+#モデルを定義する関数
 def define_model(vocab_size, max_length):
-    画像の特徴を入力するレイヤ
+    #画像の特徴を入力するレイヤ
     inputs1 = Input(shape=(4096,))
     fe1 = Dropout(0.5)(inputs1)
     fe2 = Dense(256, activation='relu')(fe1)
-    文章を入力するレイヤ
+    #文章を入力するレイヤ
     inputs2 = Input(shape=(max_length,))
     se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
     se2 = Dropout(0.5)(se1)
     se3 = LSTM(256)(se2)
-    上の二つの出力を統合する部分
+    #上の二つの出力を統合する部分
     decoder1 = add([fe2, se3])
     decoder2 = Dense(256, activation='relu')(decoder1)
     outputs = Dense(vocab_size, activation='softmax')(decoder2)
-    モデルの定義．二つを入力にとって一つを出力する形になる
+    #モデルの定義．二つを入力にとって一つを出力する形になる
     model = Model(inputs=[inputs1, inputs2], outputs=outputs)
     model.compile(loss='categorical_crossentropy', optimizer='adam')
-    return model
+    return model
+from tensorflow.keras.callbacks import ModelCheckpoint
+#モデルの定義
+model = define_model(vocab_size, max_length)
+#コールバックを定義する
+filepath = 'model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
+checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
+#学習
+model.fit([X1train, X2train], ytrain, epochs=10, verbose=2, callbacks=[checkpoint], validation_data=([X1val, X2val], yval))
+```

表記方法のミスで訂正

2021/07/07 04:48

投稿

退会済みユーザー

スコア0

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -70,7 +70,7 @@
 from tensorflow.keras.layers import Input,Dense,LSTM,Embedding,Dropout
 from keras.layers.merge import add
-#モデルを定義する関数
+モデルを定義する関数
 def define_model(vocab_size, max_length):
     画像の特徴を入力するレイヤ
     inputs1 = Input(shape=(4096,))