実現したいこと
Mixupによるデータ水増しの有効性を調べるためにFashion-Mnistを用いてシミュレーションをしています.
testデータへのAccuracyは殆ど差がないのですが,Mixupを適用しない場合の精度に比べて,MIxup適用後の学習データでのAccuracyが大きく低下してしまい,困っています.
Mixupは性能向上に寄与する手法として提案されたと記憶しているのですが,こういうものなのでしょうか?
もしくは,プログラムのどこかに問題があるのでしょうか?
分かる方いたら教えて頂けるとありがたいです.
・Mixup無し
・Mixup適用後
該当のソースコード
Python
1# 必要ライブラリのロード 2import torch 3import torch.nn as nn 4import torch.nn.functional as F 5import torchvision 6from torchvision import models 7from torchvision.models.feature_extraction import create_feature_extractor 8#from memory_profiler import profile 9#from pytorch_memlab import profile 10import numpy as np 11import matplotlib.pyplot as plt 12from PIL import Image 13from IPython.display import display 14import time 15import pickle 16import pandas as pd 17import seaborn as sns 18from sklearn.datasets import load_iris 19from sklearn.utils import shuffle 20# データセットの読み込み 21from sklearn.model_selection import train_test_split 22 23train_data = torchvision.datasets.FashionMNIST('./fashion-mnist', train=True, download=True, transform=torchvision.transforms.ToTensor()) 24test_data = torchvision.datasets.FashionMNIST('./fashion-mnist', train=False, download=True, transform=torchvision.transforms.ToTensor()) 25random_state=0 #分割する際のseed 26np.random.seed(5) #Mixupの際のseed 27mix = True #mixupするか 28 29train_data.data,train_data.targets = shuffle(train_data.data,train_data.targets) 30x_train, y_train = train_data.data, train_data.targets 31x_test, y_test = test_data.data, test_data.targets 32 33if torch.cuda.is_available() == True: 34 model = mnist_Net().to('cuda') 35else: 36 model = mnist_Net() 37 38y_train_enc = [] 39for i in range(len(y_train)):#Mixupを適用するために教師データをone-hot encodeへ 40 y_train_enc.append([0,0,0,0,0,0,0,0,0,0]) 41 y_train_enc[i][y_train[i]] = 1 42y_train_enc = torch.tensor(y_train_enc) 43 44mixup_rate = 0.5 #学習データに対するMixupデータの割合 45mixup_size = int(len(x_train)*mixup_rate) 46mixup_data_x = [] 47mixup_data_y = [] 48for i in range(mixup_size): 49 index_1 = np.random.randint(0, len(x_train)-1) #x_1,y_1のindex 50 index_2 = np.random.randint(0, len(x_train)-1) #x_2,y_2のindex 51 if index_1 == index_2:#x_1,x_2が同じ時を避ける 52 if index_1 == 0: 53 index_2 += 1 54 else: 55 index_2 -= 1 56 x_1 = x_train[index_1] 57 y_1 = y_train_enc[index_1] 58 x_2 = x_train[index_2] 59 y_2 = y_train_enc[index_2] 60 x,y = mixup(x_1,y_1,x_2,y_2) 61 mixup_data_x.append(x) 62 mixup_data_y.append(y) 63mixup_data_x = torch.stack(mixup_data_x) 64mixup_data_y = torch.stack(mixup_data_y) 65 66x_train,y_train_enc = shuffle(x_train,y_train_enc) 67 68x_train_mix = np.append(mixup_data_x, x_train,axis= 0) 69y_train_mix = np.append(mixup_data_y, y_train_enc,axis= 0) 70 71 72if mix == False: 73 x_train = x_train 74 y_train_enc = y_train_enc 75else: 76 x_train = torch.tensor(x_train_mix) 77 y_train_enc = torch.tensor(y_train_mix) 78 79optimizer = torch.optim.Adam(model.parameters()) 80epoch = 150 81batch = 1000 82iteration = int(len(x_train)/batch) 83loss_train = [] 84acc_train = [] 85loss_test = [] 86acc_test = [] 87ce = nn.CrossEntropyLoss() 88for i in range(epoch): 89 if i % 10 == 0: 90 print('epoch:', i) 91 92 loss_total_train = 0 93 rmse_train = 0 94 x_train,y_train_enc = shuffle(x_train,y_train_enc) 95 y_output = [] 96 y_output_test = [] 97 model.train() 98 for j in range(iteration): 99 x_train_batch = x_train[j*batch:(j+1)*batch].to(torch.float32).to('cuda') 100 x_train_batch = torch.unsqueeze(x_train_batch, dim=1) 101 y_train_batch = y_train_enc[j*batch:(j+1)*batch].to(torch.float32).to('cuda') 102 103 optimizer.zero_grad() 104 y = model(x_train_batch) 105 #y_train_batch = y_train_batch.reshape(batch,1) 106 loss = ce(y, y_train_batch) 107 loss_total_train += loss.to('cpu') 108 loss.backward() 109 optimizer.step() 110 111 y_output.append(torch.argmax(y,dim = 1).to('cpu').detach().numpy()) 112 loss_train.append(loss_total_train.detach().numpy()/len(x_train)) 113 acc = 0 114 y_output = np.stack(y_output).flatten() 115 for n in range(len(x_train)): 116 if y_output[n] == torch.argmax(y_train_enc[n],dim=0): 117 acc += 1 118 acc_train.append(acc/len(x_train)) 119 model.eval() 120 y = model(torch.unsqueeze(x_test.to(torch.float32).to('cuda'),dim=1)) 121 122 loss = ce(y, y_test.to(torch.int64).to('cuda')) 123 y_output_test.append(torch.argmax(y,dim = 1).to('cpu').detach().numpy()) 124 loss_test.append(loss.to('cpu').detach().numpy()) 125 acc = 0 126 for n in range(len(x_test)): 127 if y_output_test[0][n] == y_test[n]: 128 acc += 1 129 acc_test.append(acc/len(x_test))
Python
1def mixup(x_1,y_1,x_2,y_2, alpha = 1): 2 l = np.random.beta(1, 1) 3 x_l = l#.reshape(1, 1, 1) 4 y_l = l#.reshape(1, ) 5 mix_x = x_1*x_l + x_2*(1-x_l) 6 mix_y = y_1*y_l + y_2*(1-y_l) 7 return mix_x, mix_y 8 9class mnist_Net(nn.Module): 10 def __init__(self): 11 super(mnist_Net, self).__init__() 12 self.features = nn.Sequential( 13 nn.Conv2d(1, 32, kernel_size=3, padding=1), 14 nn.ReLU(), 15 nn.BatchNorm2d(32), 16 nn.MaxPool2d(kernel_size=2), 17 nn.Conv2d(32, 64, kernel_size=3, padding=1), 18 nn.ReLU(), 19 nn.BatchNorm2d(64), 20 nn.MaxPool2d(kernel_size=2), 21 ) 22 self.classifier = nn.Sequential( 23 nn.Dropout(), 24 nn.Linear(64 * 7 * 7, 128), 25 nn.ReLU(), 26 nn.Dropout(), 27 nn.Linear(128, 256), 28 nn.ReLU(), 29 nn.Dropout(), 30 nn.Linear(256,128), 31 nn.ReLU(), 32 nn.Dropout(), 33 nn.Linear(128, 10), 34 ) 35 def forward(self, x): 36 x = self.features(x) 37 x = torch.flatten(x, 1) 38 x = self.classifier(x) 39 return x

あなたの回答
tips
プレビュー