CIFER-10の画像分類を学習するCNNをpytorchで組んだのですが、うまくいきません。
調べてもよくわからなかったので、だめなところ指摘してもらえるとうれしいです。
やりたいこと
CIFER-10 の画像分類を浅い CNN に学習させたところ、accuracy が73%程度であった。
層を増やしてより高い accuracy を得たい。
浅い CNN のコード
モジュールのインポートとデータローダの作成
python
1 2import torch 3device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 4print(device) 5 6import torchvision 7import torchvision.transforms as transforms 8import torch.nn as nn 9import torch.nn.functional as F 10import torch.optim as optim 11% matplotlib inline 12import matplotlib.pyplot as plt 13import numpy as np 14 15 16batch_size = 256 17loss_interval = 50 18 19 20transform_aug = transforms.Compose( 21 [transforms.RandomHorizontalFlip(p=0.5), 22 transforms.RandomAffine(degrees=0.2, scale=(0.8,1.2)), 23 transforms.ToTensor(), 24 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 25 ]) 26 27trainset = torchvision.datasets.CIFAR10(root='./data', train=True, 28 download=True, transform=transform_aug) 29trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, 30 shuffle=True, num_workers=2) 31 32 33testset = torchvision.datasets.CIFAR10(root='./data', train=False, 34 download=True, transform=transform_aug) 35testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, 36 shuffle=True, num_workers=2)
モデルの定義と訓練、評価をする関数の定義
python
1 2#ネットワークの定義 3class Net(nn.Module): 4 def __init__(self): 5 super(Net, self).__init__() 6 self.conv1 = nn.Conv2d(3, 64, 3, padding = 1) 7 self.conv7 = nn.Conv2d(64, 64, 3, padding = 1) 8 self.conv2 = nn.Conv2d(64, 128, 3, padding = 1) 9 self.conv6 = nn.Conv2d(128, 128, 3, padding = 1) 10 self.conv3 = nn.Conv2d(128, 256, 3, padding = 1) 11 self.conv5 = nn.Conv2d(256, 256, 3, padding = 1) 12 self.conv4 = nn.Conv2d(256, 16, 3, padding = 1) 13 self.pool = nn.MaxPool2d(2) 14 self.avgpool = nn.AvgPool2d(2) 15 self.fc1 = nn.Linear(16 * 8 * 8, 120) 16 self.fc2 = nn.Linear(120, 84) 17 self.fc3 = nn.Linear(84, 10) 18 self.sm = nn.Softmax(1) 19 20 # dropoutの定義 21 self.dropout1 = nn.Dropout2d(p=0.2) 22 self.dropout2 = nn.Dropout2d(p=0.25) 23 self.dropout3 = nn.Dropout(p=0.3) 24 self.dropout4 = nn.Dropout(p=0.35) 25 26 27 def forward(self, x): 28 x = F.relu(self.conv1(x)) 29 x = F.relu(self.conv7(x)) 30 x = F.relu(self.conv2(x)) 31 # x = F.relu(self.conv6(x)) 32 x = self.pool(x) 33 x = self.dropout1(x) 34 x = F.relu(self.conv3(x)) 35 # x = F.relu(self.conv5(x)) 36 x = F.relu(self.conv4(x)) 37 x = self.pool(x) 38 x = self.dropout2(x) 39 x = x.view(-1, 16 * 8 * 8) 40 x = F.relu(self.fc1(x)) 41 x = self.dropout3(x) 42 x = F.relu(self.fc2(x)) 43 x = self.dropout4(x) 44 x = self.fc3(x) 45 return x 46 47 48#ネットワークを訓練する関数 49def train(net, criterion, optimizer, n_epoch = 15): 50 # Batch normalization 51 net.train() 52 train_loss = [] 53 test_loss = [] 54 for epoch in range(n_epoch): 55 for i, data in enumerate(trainloader, 0): 56 inputs, labels = data[0].to(device), data[1].to(device) 57 58 optimizer.zero_grad() 59 60 outputs = net(inputs) 61 loss = criterion(outputs, labels) 62 loss.backward() 63 optimizer.step() 64 65 if i % loss_interval == (loss_interval - 1): 66 train_loss.append(loss.item()) 67 68 with torch.no_grad(): 69 data = iter(testloader).next() 70 inputs, labels = data[0].to(device), data[1].to(device) 71 outputs = net(inputs) 72 loss = criterion(outputs, labels) 73 test_loss.append(loss.item()) 74 75 print('epoch {}/{} finished'.format(epoch+1,n_epoch)) 76 77 print('Finished Training') 78 return train_loss, test_loss 79 80# 損失の変遷を表示する関数 81def show_loss(train_loss, test_loss): 82 plt.xlabel("iter") 83 plt.ylabel("loss") 84 x = [i*loss_interval for i in range(len(train_loss))] 85 plt.plot(x, train_loss, label='train_loss') 86 plt.plot(x, test_loss, label='test_loss') 87 plt.legend() 88 plt.show() 89 90# ネットワークの予測精度を計算する関数 91def check_accuracy(net): 92 net.eval() 93 ret = [] 94 with torch.no_grad(): 95 for loader, name in [[trainloader, 'train'], [testloader, 'test']]: 96 correct = 0 97 total = 0 98 for data in loader: 99 images, labels = data[0].to(device), data[1].to(device) 100 outputs = net(images) 101 _, predicted = torch.max(outputs.data, 1) 102 total += labels.size(0) 103 correct += (predicted == labels).sum().item() 104 ret.append(100 * correct / total) 105 print('Accuracy of the network on the {} images: {:.2f} %'.format(name, ret[-1])) 106 107 return ret
訓練、評価
python
1net = Net() 2net.to(device) 3 4criterion = nn.CrossEntropyLoss() 5optimizer_wd = optim.Adam(net.parameters(), lr=0.001, weight_decay=4e-3) 6 7train_loss, test_loss = train(net, criterion, optimizer_wd, n_epoch = 50) 8show_loss(train_loss, test_loss) 9 10trainset = torchvision.datasets.CIFAR10(root='./data', train=True, 11 download=True, transform=transform) 12trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, 13 shuffle=True, num_workers=2) 14testset = torchvision.datasets.CIFAR10(root='./data', train=False, 15 download=True, transform=transform) 16testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, 17 shuffle=True, num_workers=2) 18 19acc = check_accuracy(net)
このコードが accuracy 73% 程度でした。
より層の多い CNN
モデルの定義を次のように変更しました。
python
1class Net(nn.Module): 2 def __init__(self): 3 super(Net, self).__init__() 4 self.conv1 = nn.Conv2d(3, 64, 3, padding = 1) 5 self.conv7 = nn.Conv2d(64, 64, 3, padding = 1) 6 self.conv2 = nn.Conv2d(64, 128, 3, padding = 1) 7 self.conv6 = nn.Conv2d(128, 128, 3, padding = 1) 8 self.conv3 = nn.Conv2d(128, 256, 3, padding = 1) 9 self.conv5 = nn.Conv2d(256, 256, 3, padding = 1) 10 self.conv4 = nn.Conv2d(256, 16, 3, padding = 1) 11 self.pool = nn.MaxPool2d(2) 12 self.avgpool = nn.AvgPool2d(2) 13 self.fc1 = nn.Linear(16 * 4 * 4, 120) 14 self.fc2 = nn.Linear(120, 84) 15 self.fc3 = nn.Linear(84, 10) 16 self.sm = nn.Softmax(1) 17 18 # dropoutの定義 19 self.dropout1 = nn.Dropout2d(p=0.2) 20 self.dropout2 = nn.Dropout2d(p=0.25) 21 self.dropout3 = nn.Dropout(p=0.3) 22 self.dropout4 = nn.Dropout(p=0.35) 23 24 def forward(self, x): 25 x = F.relu(self.conv1(x)) 26 x = F.relu(self.conv7(x)) 27 x = self.dropout2(x) 28 x = self.pool(x) 29 30 x = F.relu(self.conv2(x)) 31 x = F.relu(self.conv6(x)) 32 x = self.dropout2(x) 33 x = self.pool(x) 34 35 x = F.relu(self.conv3(x)) 36 x = F.relu(self.conv5(x)) 37 x = F.relu(self.conv4(x)) 38 x = self.dropout2(x) 39 x = self.avgpool(x) 40 x = x.view(-1, 16 * 4 * 4) 41 42 x = F.relu(self.fc1(x)) 43 x = self.dropout3(x) 44 x = F.relu(self.fc2(x)) 45 x = self.fc3(x) 46 x = self.sm(x) 47 48 return x
このモデルの summery は
--- Layer (type) Output Shape Param # === Conv2d-1 [-1, 64, 32, 32] 1,792 Conv2d-2 [-1, 64, 32, 32] 36,928 Dropout2d-3 [-1, 64, 32, 32] 0 MaxPool2d-4 [-1, 64, 16, 16] 0 Conv2d-5 [-1, 128, 16, 16] 73,856 Conv2d-6 [-1, 128, 16, 16] 147,584 Dropout2d-7 [-1, 128, 16, 16] 0 MaxPool2d-8 [-1, 128, 8, 8] 0 Conv2d-9 [-1, 256, 8, 8] 295,168 Conv2d-10 [-1, 256, 8, 8] 590,080 Conv2d-11 [-1, 16, 8, 8] 36,880 Dropout2d-12 [-1, 16, 8, 8] 0 AvgPool2d-13 [-1, 16, 4, 4] 0 Linear-14 [-1, 120] 30,840 Dropout-15 [-1, 120] 0 Linear-16 [-1, 84] 10,164 Linear-17 [-1, 10] 850 Softmax-18 [-1, 10] 0 ==== ~ (文字数制限のため省略)
このモデルを訓練すると、loss が収束せず、10epoch ほど回してもaccuracyは10%(10種類の画像だから何も学習していない)でした。
ちなみにこのモデルは ここ のベンチマークモデル(Keras)↓を参考に作っています。
def create_bench_model(): inputs = Input(shape = (32,32,3)) x = Conv2D(64,(3,3),padding = "SAME",activation= "relu")(inputs) x = Conv2D(64,(3,3),padding = "SAME",activation= "relu")(x) x = Dropout(0.25)(x) x = MaxPooling2D()(x) x = Conv2D(128,(3,3),padding = "SAME",activation= "relu")(x) x = Conv2D(128,(3,3),padding = "SAME",activation= "relu")(x) x = Dropout(0.25)(x) x = MaxPooling2D()(x) x = Conv2D(256,(3,3),padding = "SAME",activation= "relu")(x) x = Conv2D(256,(3,3),padding = "SAME",activation= "relu")(x) x = GlobalAveragePooling2D()(x) x = Dense(1024,activation = "relu")(x) x = Dropout(0.25)(x) y = Dense(10,activation = "softmax")(x) return Model(input = inputs, output = y)
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。