pytorchでactor-criticの実装中にinplace-errorが起こってしまう

前提

pytorchでactor-criticの実装をしています.
元のコードはhttps://github.com/oreilly-japan/deep-learning-from-scratch-4/blob/master/pytorch/actor_critic.py を参考にし, enviromentの部分のみを変更している.

しかし, loss_pi.backward()の際にin-placeエラーがでてしまい, 実行できない.

実現したいこと

in-placeエラーを出さずにactor-criticを実行する

発生している問題・エラーメッセージ

File "ac.py", line 192, in <module>
simulation(device)
File "ac.py", line 173, in simulation
action, action_prob = agent.get_action(state, episode)
File "ac.py", line 65, in get_action
probs = self.pi(state)
File "/home/○○/.pyenv/versions/miniconda3-latest/envs/○○/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "ac.py", line 29, in forward
x = F.relu(self.l1(x))
File "/home/○○/.pyenv/versions/miniconda3-latest/envs/○○/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/○○/.pyenv/versions/miniconda3-latest/envs/○○/lib/python3.7/site-packages/torch/nn/modules/linear.py", line 103, in forward
return F.linear(input, self.weight, self.bias)
File "/home/○○/.pyenv/versions/miniconda3-latest/envs/○○/lib/python3.7/site-packages/torch/nn/functional.py", line 1848, in linear
return torch._C._nn.linear(input, weight, bias)
(function _print_stack)
Traceback (most recent call last):
File "ac.py", line 192, in <module>
simulation(device)
File "ac.py", line 177, in simulation
agent.update(state, action_prob, reward, next_state, done)
File "ac.py", line 85, in update
loss_pi.backward()
File "/home/○○/.pyenv/versions/miniconda3-latest/envs/○○/lib/python3.7/site-packages/torch/_tensor.py", line 307, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "/home/○○/.pyenv/versions/miniconda3-latest/envs/○○/lib/python3.7/site-packages/torch/autograd/init.py", line 156, in backward
allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [1, 68340]] is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

該当のソースコード

python
1class PolicyNet(torch.nn.Module):
2    def __init__(self, data, hidden_dim=128):
3        super().__init__()
4        self.action_size = data['word'].x.size()[0]
5        self.emb_dim = data['word'].x.size()[1] + data['spot'].x.size()[1]
6        self.hidden_dim = hidden_dim
7        self.l1 = nn.Linear(self.action_size, self.hidden_dim)
8        self.l2 = nn.Linear(self.hidden_dim, self.action_size)
9
10    def forward(self, x):
11        x = F.relu(self.l1(x))
12        x = self.l2(x)
13        x = F.softmax(x, dim=1)
14        return x
15
16class ValueNet(torch.nn.Module):
17    def __init__(self, data, hidden_dim=128):
18        super().__init__()
19        self.action_size = data['word'].x.size()[0]
20        self.hidden_dim = hidden_dim
21        self.l1  = nn.Linear(self.action_size, self.hidden_dim)
22        self.l2 = nn.Linear(self.hidden_dim, 1)
23
24    def forward(self, x):
25        x = F.relu(self.l1(x))
26        x = self.l2(x)
27        return x
28
29class Agent:
30    def __init__(self, data, device):
31        self.gamma = 0.98
32        self.lr_pi = 2e-4
33        self.lr_v = 5e-4
34        self.action_size = data['word'].x.size()[1]
35
36        self.pi =  PolicyNet(data).to(device)
37        self.v = ValueNet(data).to(device)
38        self.optimizer_pi = Adam(self.pi.parameters(), self.lr_pi)
39        self.optimizer_v = Adam(self.v.parameters(), self.lr_v)
40        self.data = data
41        self.device = device
42
43    def get_action(self, state):
44        state = state.unsqueeze(0)
45        probs = self.pi(state)
46        probs = probs[0]
47        m = Categorical(probs)
48        action = m.sample().item()
49        return action, probs[action]
50
51    def update(self, state, action_prob, reward, next_state, done):
52        state = state.unsqueeze(0)
53        next_state = next_state.unsqueeze(0)
54        target = reward + self.gamma * self.v(next_state)
55        target.detach()
56        v = self.v(state)
57        loss_fn = nn.MSELoss()
58        loss_v = loss_fn(v, target)
59
60        delta = target-v
61        loss_pi = -torch.log(action_prob)*delta.item()
62        self.optimizer_v.zero_grad()
63        self.optimizer_pi.zero_grad()
64        loss_v.backward()
65        loss_pi.backward()
66        self.optimizer_v.step()
67        self.optimizer_pi.step()
68
69class MyEnv:
70    def __init__(self, data, device):
71        #省略
72        self.device = device
73        self.original_data = data
74        self.data = data
75
76    def reset(self):
77        #省略　新しい環境を返す
78        return self.state
79
80    @torch.no_grad()
81    def step(self, action):
82　　　　　#省略　別のモデルを使ってactionが与えられたときの新しい環境と報酬を返す
83        return next_state, reward, done, info
84
85    
86
87        
88    
89def simulation(device):
90    episodes = 1000000
91
92    env = MyEnv(data, device) 
93    agent = Agent(data, device)
94    reward_history = []
95    step=0
96    for episode in range(episodes):
97        state = env.reset()
98        done = False
99        total_reward = 0
100
101        while not done:
102            action, action_prob = agent.get_action(state)
103            next_state, reward, done, info = env.step(action)
104            agent.update(state, action_prob, reward, next_state, done)
105            state = next_state
106            total_reward+=reward
107
108        reward_history.append(total_reward)
109

試したこと

loss_pi.backward()でのエラーであることとtorch.cuda.FloatTensor [1, 68340]はstateやprobsのサイズ(つまりaction size)であることからPolicyNetのforwardおよびagent.get_action, agent.updateに問題があると考えているが参考にしているコードとほぼ同じコードなので, 原因が特定できないでいる. また, MyEnvはDQNを実装したときに問題なかったのでMyEnvの問題でもないと思われる

若干投げる感じになって申し訳ないのですがなにか分かる方がいれば助言ください,,

補足情報（FW/ツールのバージョンなど）

ここにより詳細な情報を記載してください。
pytorch '1.10.2+cu113'
python 3.7.13

jbpb0

2022/11/17 01:11 編集

> pytorch '1.10.2+cu113' GPUの処理は非同期なので、現状で「Traceback」以降に表示されてることがエラーの原因ではないかもしれませんそこで、下記のようにする方がいいようです・GPUを使わずCPUのみでの実行でもエラーになるなら、その時の「Traceback」以降に表示されてることを参照する・CPUのみの実行ではエラーにならないなら、コードの一番最初に下記を追加してからGPUを使って実行して、その時の「Traceback」以降に表示されてることを参照する import os os.environ['CUDA_LAUNCH_BLOCKING'] = "1" この質問のエラーは該当しないかもしれませんが、念の為にご確認ください参考 https://torch.classcat.com/2018/05/25/pytorch-docs-notes-cuda/ の「非同期実行」 https://lernapparat.de/debug-device-assert/ https://discuss.pytorch.org/t/runtimeerror-cuda-runtime-error-710-device-side-assert-triggered-at-pytorch-aten-src-thc-generic-thctensormath-cu-26/73167 のptrblckさんの最初のコメント

Tanhx

2022/11/17 02:53

ありがとうございます. 今回はcpuで実行しても同じエラーとなってしまったのですが今後もご参考にさせていただきます.

行動規範の内容に同意します

回答1件

ヒント

Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

投稿2022/11/16 06:43