openAIgymのrenderについて終了時にエラーが出る

前提・実現したいこと

openAIgymを利用して強化学習を勉強中なのですが、env.render()での描画の終了時にエラーメッセージが表示されます
下のコードではエピソードを200としているのでコピペですぐに実行できると思うのですが、200エピソードが完了したあと、エラーメッセージが表示されます。おそらくrenderだけしてウィンドウを閉じていないことが原因だと考えていますが対応方法が分からないです。

発生している問題・エラーメッセージ

Exception ignored in: <bound method Viewer.__del__ of <gym.envs.classic_control.rendering.Viewer object at 0x0000016230960D68>>
Traceback (most recent call last):
  File "C:\Users\xxx\Anaconda3\envs\OpenAIGym_pipenv\lib\site-packages\gym\envs\classic_control\rendering.py", line 152, in __del__
  File "C:\Users\xxx\Anaconda3\envs\OpenAIGym_pipenv\lib\site-packages\gym\envs\classic_control\rendering.py", line 71, in close
  File "C:\Users\xxx\Anaconda3\envs\OpenAIGym_pipenv\lib\site-packages\pyglet\window\win32\__init__.py", line 305, in close
  File "C:\Users\xxx\Anaconda3\envs\OpenAIGym_pipenv\lib\site-packages\pyglet\window\__init__.py", line 770, in close
ImportError: sys.meta_path is None, Python is likely shutting down

該当のソースコード

python
1import gym
2import matplotlib.pyplot as plt
3import numpy as np
4
5NUM_DIGITIZE = 8
6NUM_ACTION = 16
7ACTION = np.linspace(-2, 2, NUM_ACTION)
8NUM_EPISODES = int(200)
9MAX_STEP = 100
10ETA = 0.5
11GAMMA = 0.9
12ENV = 'Pendulum-v0'
13
14
15class Agent:
16    def __init__(self, num_states, num_actions):
17        self.brain = Brain(num_states, num_actions)
18
19    def update_Q_function(self, observation, action, reward, observation_next):
20        self.brain.update_Q_table(observation, action, reward, observation_next)
21
22    def get_action(self, observation, step):
23        action = self.brain.decide_action(observation, step)
24        return action
25
26
27class Brain:
28    def __init__(self, num_states, num_actions):
29        self.num_actions = NUM_ACTION
30        self.q_table = np.random.uniform(0, 1, size=(NUM_DIGITIZE ** num_states, NUM_ACTION))
31
32    def bins(self, clip_min, clip_max, num):
33        return np.linspace(clip_min, clip_max, num + 1)[1:-1]
34
35    def digitize_state(self, observation):
36        cos, sin, w = observation
37        digitized = [
38            np.digitize(cos, bins=self.bins(-1.0, 1.0, NUM_DIGITIZE)),
39            np.digitize(sin, bins=self.bins(-1.0, 1.0, NUM_DIGITIZE)),
40            np.digitize(w, bins=self.bins(-8.0, 8.0, NUM_DIGITIZE))]
41        return sum([x * (NUM_DIGITIZE ** i) for i, x in enumerate(digitized)])
42
43    def digitize_action(self, action):
44        return np.digitize(action, bins=self.bins(-2, 2, NUM_ACTION))
45
46    def update_Q_table(self, observation, action, reward, observation_next):
47        state = self.digitize_state(observation)
48        state_next = self.digitize_state(observation_next)
49        Max_Q_next = max(self.q_table[state_next][:])
50        self.q_table[state, action] = self.q_table[state, action] + \
51                                      ETA * (reward + GAMMA * Max_Q_next - self.q_table[state, action])
52
53    def decide_action(self, observation, episode):
54        state = self.digitize_state(observation)
55        epsilon = 0.5 * (1 / (1 + episode))
56
57        if epsilon <= np.random.uniform(0, 1):
58            action = np.argmax(self.q_table[state][:])
59        else:
60            action = np.random.choice(self.num_actions)
61        Action = ACTION[action]
62
63        return (Action, action)
64
65
66class Logger:
67    def __init__(self):
68        self.log = []
69
70    def log_func(self, x):
71        self.log.append(x)
72
73
74class Environment:
75    def __init__(self):
76        self.env = gym.make(ENV)
77        self.Log = Logger()
78        num_states = self.env.observation_space.shape[0]
79        num_actions = self.env.action_space.shape[0]
80
81        self.agent = Agent(num_states, num_actions)
82
83    def run(self):
84        for episode in range(NUM_EPISODES):
85            print(f"{episode}episode start")
86            XXX=self.Log
87            print(XXX)
88            observation = self.env.reset()
89
90            for step in range(MAX_STEP):
91                if episode % 100 == 0:
92                    self.env.render()
93
94                action_index = self.agent.get_action(observation, step)[1]
95                Action = self.agent.get_action(observation, step)[0]
96                # print(Action)
97                observation_next, reward, done, _ = self.env.step([Action])
98                self.Log.log_func(reward)
99                if done:
100                    reward = -10
101                else:
102                    reward = reward
103                self.agent.update_Q_function(observation, action_index, reward, observation_next)
104                observation = observation_next
105                if done:
106                    break
107
108
109if __name__ == '__main__':
110    cartpole_env = Environment()
111    cartpole_env.run()