Pytorchでテスト中にクラッシュ/out of memoryになってしまう

Pytorchでコードを回しているのですが、テスト中にクラッシュを起こすかCUDA:out of memoryを起こしてしまい動作を完了できません。

実行タスクはKagleの「Plant Pathology 2020 - FGVC7」です。
これは、約1800枚の葉っぱの画像を4種類にクラス分けするタスクです。

学習モデルのコード、検証のコードはそれぞれ下記の通りです。

Python
1class Net(nn.Module):
2    def __init__(self):
3        super(Net,self).__init__()
4        densenet = models.densenet121(pretrained=False)
5        self.Densenet = nn.Sequential(*list(densenet.children())[:-1])
6        self.fc1 = nn.Linear(1024*7*7,4096)
7        self.fc2 = nn.Linear(4096, 1024)
8        self.fc3 = nn.Linear(1024, 128)
9        self.fc4 = nn.Linear(128, 4)
10        
11        
12    
13    def forward(self, x):
14        x = self.Densenet(x)
15        x = x.view(-1,1024*7*7)
16        x = self.fc1(x)
17        x = self.fc2(x)
18        x = self.fc3(x)
19        x = self.fc4(x)
20        
21        return x

Python
1device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
2model = Net()
3model.to(device)
4optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
5criterion = nn.MSELoss()
6model.train()
7epochs = 3
8total_steps = 0
9# 学習ループ
10for i in range(epochs):
11    running_loss=0
12    for batch,labels in train_dl:
13        
14        labels=labels.float()
15        optimizer.zero_grad()
16        
17        batch = batch.to(device)
18        labels = labels.to(device)
19
20        preds = model(batch)
21        loss = criterion(preds, labels) 
22        loss.backward()
23        optimizer.step()
24        total_steps +=1
25        running_loss += loss.detach()
26        
27        print_training_loss_summary(running_loss, total_steps, i+1, epochs, len(train_dl))

python
1model.eval()
2x=torch.randn(1,4)
3x=x.to(device)
4for batch in test_dl:
5    optimizer.zero_grad()
6
7    batch = batch.to(device)
8
9    outputs = model(batch)
10    x=torch.cat((x,outputs),0)
11    print(x.size())
12x = x[1:,:]