前提・実現したいこと
backwardにおけるエラーを解消したい
発生している問題・エラーメッセージ
trainning start!! 0 tensor(0.3558, device='cuda:0', grad_fn=<AddBackward0>) tensor(21.2084, device='cuda:0', grad_fn=<AddBackward0>) RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [1024, 1]], which is output 0 of TBackward, is at version 4; expected version 3 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
該当のソースコード
Python
1class Generator(nn.Module): 2 def __init__(self,image_size): 3 super(Generator,self).__init__() 4 self.image_size=image_size 5 6 self.pre_layer=nn.Sequential( 7 nn.Conv2d(3,64,kernel_size=9,stride=1,padding=4), 8 nn.PReLU()) 9 10 self.residual_layer=nn.Sequential( 11 ResidualBlock(64), 12 ResidualBlock(64), 13 ResidualBlock(64), 14 ResidualBlock(64), 15 ResidualBlock(64)) 16 17 self.middle_layer=nn.Sequential( 18 nn.Conv2d(64,64,kernel_size=3,stride=1,padding=1), 19 nn.BatchNorm2d(64)) 20 21 self.pixcelshuffer_layer=nn.Sequential( 22 Pixcelshuffer(64,2), 23 Pixcelshuffer(64,2), 24 nn.Conv2d(64,3,kernel_size=9,stride=1,padding=4)) 25 def forward(self,input_image): 26 pre=self.pre_layer(input_image) 27 res=self.residual_layer(pre) 28 middle=self.middle_layer(res) 29 middle=middle+pre 30 output=self.pixcelshuffer_layer(middle) 31 32 return output 33 34 35class ResidualBlock(nn.Module): 36 def __init__(self,input_channel): 37 super(ResidualBlock,self).__init__() 38 39 self.residualblock=nn.Sequential( 40 nn.Conv2d(input_channel,input_channel,kernel_size=3,stride=1,padding=1), 41 nn.BatchNorm2d(input_channel), 42 nn.PReLU(), 43 nn.Conv2d(input_channel,input_channel,kernel_size=3,stride=1,padding=1), 44 nn.BatchNorm2d(input_channel)) 45 def forward(self,x): 46 residual=self.residualblock(x) 47 48 return x+residual 49 50 51class Pixcelshuffer(nn.Module): 52 def __init__(self,input_channel,r): #r=upscale_factor 53 super(Pixcelshuffer,self).__init__() 54 55 self.layer=nn.Sequential( 56 nn.Conv2d(input_channel,256,kernel_size=3,stride=1,padding=1), 57 nn.PixelShuffle(r), 58 nn.PReLU()) 59 def forward(self,x): 60 return self.layer(x) 61 62 63 64class Discriminator(nn.Module): 65 def __init__(self): 66 super(Discriminator,self).__init__() 67 68 self.conv_layers=nn.Sequential( 69 nn.Conv2d(3,64,kernel_size=3,stride=1,padding=1), 70 nn.LeakyReLU(0.2), 71 72 nn.Conv2d(64,64,kernel_size=3,stride=2,padding=1), 73 nn.BatchNorm2d(64), 74 nn.LeakyReLU(0.2), 75 76 nn.Conv2d(64,128,kernel_size=3,stride=1,padding=1), 77 nn.BatchNorm2d(128), 78 nn.LeakyReLU(0.2), 79 80 nn.Conv2d(128,128,kernel_size=3,stride=2,padding=1), 81 nn.BatchNorm2d(128), 82 nn.LeakyReLU(0.2), 83 84 nn.Conv2d(128,256,kernel_size=3,stride=1,padding=1), 85 nn.BatchNorm2d(256), 86 nn.LeakyReLU(0.2), 87 88 nn.Conv2d(256,256,kernel_size=3,stride=2,padding=1), 89 nn.BatchNorm2d(256), 90 nn.LeakyReLU(0.2), 91 92 nn.Conv2d(256,512,kernel_size=3,stride=1,padding=1), 93 nn.BatchNorm2d(512), 94 nn.LeakyReLU(0.2), 95 96 nn.Conv2d(512,512,kernel_size=3,stride=2,padding=1), 97 nn.BatchNorm2d(512), 98 nn.LeakyReLU(0.2))#(512,316,16) 99 100 self.dense_layer=nn.Sequential( 101 nn.Linear(16*16*512,1024), 102 nn.LeakyReLU(0.2), 103 nn.Linear(1024,1), 104 nn.Sigmoid()) 105 106 def forward(self,input_image): 107 batch_size=input_image.size()[0] 108 109 conv=self.conv_layers(input_image) 110 reshape=conv.view(batch_size,-1) 111 output=self.dense_layer(reshape) 112 113 return output 114 115 116G=Generator(64) 117D=Discriminator() 118 119if cuda: 120 G=G.cuda() 121 D=D.cuda() 122 123 g_param=torch.load("C:/SRGAN/asset/G_first_epoch100.pth") 124 G.load_state_dict(g_param) 125else: 126 g_param=torch.load("C:/SRGAN/asset/G_first_epoch100.pth",map_location=lambda storage, loc:storage) 127 G.load_state_dict(g_param) 128 129 130G_optimizer=optim.Adam(G.parameters(),lr=0.0001,betas=(0.9,0.999)) 131D_optimizer=optim.Adam(D.parameters(),lr=0.0001,betas=(0.9,0.999)) 132 133d_loss=nn.BCELoss() 134 135 136def train(epoch): 137 D.train() 138 G.train() 139 140 y_real=torch.ones(batch_size,1) 141 y_fake=torch.zeros(batch_size,1) 142 143 if cuda: 144 y_real=y_real.cuda() 145 y_fake=y_fake.cuda() 146 147 D_loss=0 148 G_loss=0 149 150 for batch_idx,(data_lr,data_hr)in enumerate(train_loader): 151 if data_lr.size()[0]!=batch_size: 152 break 153 if cuda: 154 data_lr=data_lr.cuda() 155 data_hr=data_hr.cuda() 156 print(batch_idx) 157 D.zero_grad() 158 159 D_real=D(data_hr) 160 D_real_loss=d_loss(D_real,y_real) 161 162 fake_image=G(data_lr) 163 D_fake=D(fake_image) 164 D_fake_loss=d_loss(D_fake,y_fake) 165 166 D_loss=D_real_loss+D_fake_loss 167 D_loss.backward(retain_graph=True) 168 D_optimizer.step() 169 D_loss+=D_loss.data.item() 170 171 G.zero_grad() 172 173 G_loss=generator_loss(fake_image,data_hr,D_fake,y_real) 174 print(G_loss, D_loss) 175 G_loss.backward() 176 G_optimizer.step() 177 G_loss+=G_loss.data.item() 178 179 D_loss/=len(train_loader) 180 G_loss/=len(train_loader) 181 182 if batch_idx%1==0: 183 g_image=fake_image.data.cpu() 184 hr_image=data_hr.data.cpu() 185 HR_image=torch.cat((hr_image,g_image),0) 186 save_image(HR_image,"C:/SRGAN/save_image/epoch_cont_{}.png".format(epoch)) 187 print("save_image") 188 189 return D_loss,G_loss 190 191num_epoch=10 192 193for epoch in range(1,num_epoch+1): 194 if epoch==1: 195 print("trainning start!!") 196 d_loss_,g_loss_=train(epoch) 197 198 if epoch%40==0: 199 #generate_image(epoch) 200 torch.save(G.state_dict(),"C:/SRGAN/asset/G_2nd_epoch{}.pth".format(epoch)) 201 torch.save(D.state_dict(),"C:/SRGAN/asset/D_2nd_epoch{}.pth".format(epoch))
試したこと
このコードを実装しようとしてる時にエラーが発生しました。おそらく、D_loss.backward()かG_loss.backward()のどちらか、もしくは両方でエラーが出ていると思います。ただ、なぜか1回目は実行できました。
補足情報(ツールのバージョンなど)
anaconda JupiterLab 2.1.5
pytorch 1.7.0
CUDA Version 11.0
回答1件
あなたの回答
tips
プレビュー