最近、Deep Learningの勉強を始めた、プログラミング初心者です。
この記事のコードを試そうと思い、
コピー&ペーストしたのはいいのですが、
lueError: Layer decoder_Dense expects 1 inputs, but it received 3 input tensors. Input received: [<tf.Tensor 'decoder_LSTM_35/transpose_2:0' shape=(?, ?, 800) dtype=float32>, <tf.Tensor 'decoder_LSTM_35/while/Exit_3:0' shape=(?, 800) dtype=float32>, <tf.Tensor 'decoder_LSTM_35/while/Exit_4:0' shape=(?, 800) dtype=float32>]
と、エラーが出てしまいます。
解決策をよければ教えてください。本当にお願いします。
文章が下手で申し訳ありません。
ソースコードは下記の通りです。import部分と、データの読み込みの部分は省略させて頂きます。
def __init__(self,maxlen_e,maxlen_d,n_hidden,input_dim,vec_dim,output_dim): self.maxlen_e=maxlen_e self.maxlen_d=maxlen_d self.n_hidden=n_hidden self.input_dim=input_dim self.vec_dim=vec_dim self.output_dim=output_dim def create_model(self): print('#2') #エンコーダー encoder_input = Input(shape=(self.maxlen_e,), dtype='int16', name='encoder_input') e_i = Embedding(output_dim=self.vec_dim, input_dim=self.input_dim, #input_length=self.maxlen_e, mask_zero=True, embeddings_initializer=uniform(seed=20170719))(encoder_input) e_i=BatchNormalization(axis=-1)(e_i) e_i=Masking(mask_value=0.0)(e_i) encoder_outputs, state_h, state_c =LSTM(self.n_hidden,name='encoder_LSTM',return_state=True,kernel_initializer=glorot_uniform(seed=20170719), recurrent_initializer=orthogonal(gain=1.0, seed=20170719), dropout=0.5, recurrent_dropout=0.5 )(e_i) encoder_states = [state_h, state_c] encoder_model = Model(inputs=encoder_input, outputs=[encoder_outputs,state_h,state_c]) #レイヤ定義 decoder_LSTM = LSTM(self.n_hidden, name='decoder_LSTM', return_sequences=True, return_state=True, kernel_initializer=glorot_uniform(seed=20170719), recurrent_initializer=orthogonal(gain=1.0, seed=20170719),dropout=0.5, recurrent_dropout=0.5) decoder_Dense = Dense(self.output_dim,activation='softmax', name='decoder_Dense',kernel_initializer=glorot_uniform(seed=20170719)) #入力 decoder_inputs = Input(shape=(self.maxlen_d,), dtype='int16', name='decoder_inputs') d_i = Embedding(output_dim=self.vec_dim, input_dim=self.input_dim,input_length=self.maxlen_d,mask_zero=True,embeddings_initializer=uniform(seed=20170719))(decoder_inputs) #LSTM d_outputs, _, _ =decoder_LSTM(d_i,initial_state=encoder_states) decoder_outputs = decoder_Dense(d_outputs) model = Model(inputs=[encoder_input, decoder_inputs], outputs=decoder_outputs) model.compile(loss="categorical_crossentropy",optimizer="Adam", metrics=['categorical_accuracy']) decoder_state_input_h = Input(shape=(self.n_hidden,),name='input_h') decoder_state_input_c = Input(shape=(self.n_hidden,),name='input_c') decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] #LSTM decoder_lstm,state_h, state_c =decoder_LSTM(d_input, initial_state=decoder_states_inputs),decoder_LSTM(d_input,initial_state=decoder_states_inputs),decoder_LSTM(d_input,initial_state=decoder_states_inputs) decoder_states = [state_h,state_c] decoder_res = decoder_Dense(decoder_lstm) decoder_model = Model( [decoder_inputs] + decoder_states_inputs, [decoder_res] + decoder_states) return model ,encoder_model ,decoder_model def eval_perplexity(self,model,e_test,d_test,t_test,batch_size) : row=e_test.shape[0] list_loss=[] s_time = time.time() n_batch = math.ceil(row/batch_size) n_loss=0 sum_loss=0. for i in range(0,n_batch) : s = i*batch_size e = min([(i+1) * batch_size,row]) e_on_batch = e_test[s:e,:] d_on_batch = d_test[s:e,:] t_on_batch = t_test[s:e,:] t_on_batch = np_utils.to_categorical(t_on_batch,self.output_dim) mask1 = np.zeros((e-s,self.maxlen_d,self.output_dim),dtype=np.float32) for j in range(0,e-s) : n_dim = maxlen_d-list(d_on_batch[j,:]).count(0.) mask1[j,0:n_dim,:]=1 n_loss += n_dim mask2 = mask1.reshape(1,(e-s)*self.maxlen_d*self.output_dim) y_predict1 = model.predict_on_batch([e_on_batch, d_on_batch]) y_predict2 = np.maximum(y_predict1,1e-7) y_predict2 = -np.log(y_predict2) y_predict3 = y_predict2.reshape(1,(e-s)*self.maxlen_d*self.output_dim) target=t_on_batch.reshape(1,(e-s)*self.maxlen_d*self.output_dim) target1=target*mask2 #category_crossentropy計算 loss=np.dot(y_predict3,target.T) sum_loss += loss[0,0] #perplexity計算 perplexity=pow(math.e, sum_loss/n_loss) elapsed_time = time.time() - s_time sys.stdout.write("\r"+str(e)+"/"+str(row)+" "+str(int(elapsed_time))+"s "+"\t"+"{0:.4f}".format(perplexity)+" ") sys.stdout.flush() gc.collect() return perplexity #train_on_batchメイン処理 def on_batch(self,model,j,e_train,d_train,t_train,e_val,d_val,t_val,batch_size) : #損失関数、評価関数の平均計算用リスト list_loss =[] list_accuracy=[] s_time = time.time() row=e_train.shape[0] n_batch = math.ceil(row/batch_size) for i in range(0,n_batch) : s = i*batch_size e = min([(i+1) * batch_size,row]) e_on_batch = e_train[s:e,:] d_on_batch = d_train[s:e,:] t_on_batch = t_train[s:e,:] t_on_batch = np_utils.to_categorical(t_on_batch,self.output_dim) result = model.train_on_batch([e_on_batch, d_on_batch],t_on_batch) list_loss.append(result[0]) list_accuracy.append(result[1]) #perplexity=pow(math.e, np.average(list_loss)) elapsed_time = time.time() - s_time sys.stdout.write("\r"+str(e)+"/"+str(row)+" "+str(int(elapsed_time))+"s "+"\t"+"{0:.4f}".format(np.average(list_loss))+"\t"+"{0:.4f}".format(np.average(list_accuracy))) sys.stdout.flush() #perplexity評価 print() val_perplexity = self.eval_perplexity(model,e_val,d_val,t_val,batch_size) return val_perplexity #学習 def train(self, e_input, d_input,target,batch_size,epochs, emb_param) : model, _, _ = self.create_model() if os.path.isfile(emb_param) : model.load_weights(emb_param) e_i = e_input d_i = d_input t_l = target z = list(zip(e_i,d_i,t_l)) nr.shuffle(z) e_i,d_i,t_l = list(zip(*z)),list(zip(*z)),list(zip(*z)) e_i = np.array(e_i).reshape(len(e_i),self.maxlen_e) d_i = np.array(d_i).reshape(len(d_i),self.maxlen_d) t_l = np.array(t_l).reshape(len(t_l),self.maxlen_d) n_split = int(e_i.shape[0]*0.9) e_train,e_val=np.vsplit(e_i,[n_split]) d_train,d_val=np.vsplit(d_i,[n_split]) t_train,t_val=np.vsplit(t_l,[n_split]) print(e_train.shape) row = e_input.shape[0] loss_bk = 10000 for j in range(0,epochs) : print("Epoch ",j+1,"/",epochs) val_perplexity = self.on_batch(model,j,e_train,d_train,t_train,e_val,d_val,t_val,batch_size) return model #応答文生成 def response(self,e_input,length) : encoder_outputs, state_h, state_c = encoder_model.predict(e_input),encoder_model.predict(e_input),encoder_model.predict(e states_value=[state_h, state_c] target_seq = np.zeros((1,1)) target_seq[0, 0] = word_indices['SSSS'] decoded_sentence = '' for i in range(0,length) : output_tokens, h, c = decoder_model.predict([target_seq]+ states_value),decoder_model.predict([target_seq]+ states_value),decoder_model.predict([target_seq]+ states_value) sampled_token_index = np.argmax(output_tokens[0, 0, :]) sampled_char = indices_word[sampled_token_index] if sampled_char == 'SSSS' : break decoded_sentence += sampled_char if i==length-1: break target_seq[0,0] = sampled_token_index # Update states states_value = [h, c] return decoded_sentence vec_dim = 400 epochs = 10 batch_size = 100 input_dim = len(words) output_dim = input_dim n_hidden = int(vec_dim*2 ) prediction = Dialog(maxlen_e,maxlen_d,n_hidden,input_dim,vec_dim,output_dim) emb_param = 'param_seq2seq.hdf5' row = e_train.shape[0] e_train = e_train.reshape(row,maxlen_e) d_train = d_train.reshape(row,maxlen_d) t_train = t_train.reshape(row,maxlen_d) model = prediction.train(e_train, d_train,t_train,batch_size,epochs,emb_param) plot_model(model, show_shapes=True,to_file='seq2seq01.png') #ネットワーク図出力 model.save_weights(emb_param) row2 = e_test.shape[0] e_test = e_test.reshape(row2,maxlen_e) d_test = d_test.reshape(row2,maxlen_d) t_test=t_test.reshape(row2,maxlen_d) print() perplexity = prediction.eval_perplexity(model,e_test,d_test,t_test,batch_size) print('Perplexity=',perplexity))
あなたの回答
tips
プレビュー