こちらのサイトを参考にchatbotを実装し学習を行ったのですが, なぜか返答が「おはようございます」だけになってしまいます.コードのどこかを変更すればうまく行くようですが, config.py やmain.py loss.pyなどどのコードを見ても変更点がわかりません
どなたか教えていただけないでしょうか
main.py
python
1 import logging 2import os 3import pickle 4 5import torch 6import torch.nn as nn 7import torch.optim as optim 8 9from config import Config 10from nn import build_model 11from tokenizer import Tokenizer 12from utils import (DialogDataset, one_cycle, evaluate, 13 seed_everything, BalancedDataLoader, 14 make_train_data_from_txt, make_itf) 15 16logging.basicConfig(level=logging.INFO) 17 18if __name__ == '__main__': 19 logging.info('*** Initializing ***') 20 21 if not os.path.isdir(Config.data_dir): 22 os.mkdir(Config.data_dir) 23 24 seed_everything(Config.seed) 25 device = torch.device(Config.device) 26 27 start_epoch = 0 28 tokenizer = Tokenizer.from_pretrained(Config.model_name) 29 30 logging.info('Preparing training data') 31 if Config.use_pickle: 32 with open(f'{Config.pickle_path}', 'rb') as f: 33 train_data = pickle.load(f) 34 else: 35 train_data = make_train_data_from_txt(Config, tokenizer) 36 itf = make_itf(train_data, Config.vocab_size) 37 dataset = DialogDataset(train_data, tokenizer) 38 39 logging.info('Define Models') 40 model = build_model(Config).to(device) 41 model.unfreeze() 42 43 logging.info('Define Loss and Optimizer') 44 criterion = nn.CrossEntropyLoss(reduction='none') 45 optimizer = optim.AdamW(model.parameters(), lr=Config.lr, betas=Config.betas, eps=1e-9) 46 47 if Config.load: 48 state_dict = torch.load(f'{Config.data_dir}/{Config.fn}.pth') 49 start_epoch = 10 50 print(f'Start Epoch: {start_epoch}') 51 model.load_state_dict(state_dict['model']) 52 optimizer.load_state_dict(state_dict['opt']) 53 54 logging.info('Start Training') 55 for epoch in range(start_epoch, Config.n_epoch): 56 one_cycle(epoch, Config, model, optimizer, criterion, 57 BalancedDataLoader(dataset, tokenizer.pad_token_id), 58 tokenizer, device) 59 evaluate(Config, 'おはよーーー', tokenizer, model, device)
config.py
class Config: seed = 116 device = 'cuda' n_epoch = 10 batch_size = 64 max_len = 22 lr = 1e-3 betas = (0.9, 0.98) vocab_size = 32000 num_head = 8 d_model = 768 num_layer = 6 d_ff = 2048 drop_rate = 0.1 max_grad_norm = 1.0 smoothing = 0.1 factor = 2 warmup = 4000 # FIXME: Change path of training data. data_dir = './data' # in config.py, line 24 # default value is './data' # data_dir = 'path/to/dir_contains_training_data' train_data_path = f'{data_dir}/train_data.txt' pickle_path = f'{data_dir}/train_data.pkl' fn = 'ckpt' load = False # FIXME: if you use original data, change flag of this use_pickle = True model_name = 'bert-base-japanese-whole-word-masking'
回答1件
あなたの回答
tips
プレビュー