OoenAIが提供しているSafety Gymという強化学習シミュレーションツールでオリジナルの手法を動かしたく、トライ&エラーをしています。
強化学習の観測を調べており、githubで組んであるコードから予測される出力と異なるものが出力されます。
Python
1import safety_gym 2from safety_gym.envs.engine import Engine 3import numpy as np 4from gym.envs.registration import register 5import safety_gym.envs.world 6 7 8config = { 9 'robot_base': 'xmls/point.xml', 10 'task': 'goal', 11 12 # lidar ... 光の使って物体を認識 13 'observe_goal_lidar': True, # ゴールの認識 14 'observe_box_lidar': True,# pushタスクで押す箱の認識 15 'observe_hazards': True, # ハザードの認識 16 'observe_vases': True, # 花瓶の認識 17 'lidar_max_dist': 3, # lidarセンサの認識できる最大距離 18 'lidar_num_bins': 16, #lidarのbin数(光線の数?、認識できる物体の数?) 19 20 'constrain_hazards': True, # 危険な制約あり/なし 21 'placements_extents': [-1.5, -1.5, 1.5, 1.5], 22 'hazards_num': 8, # 危険ゾーンの個数 23 'vases_num': 1, # 花瓶の数 24 25 'task': 'goal', 26 'goal_size': 0.3, 27 'goal_keepout': 0.305, 28 'hazards_size': 0.2, 29 'hazards_keepout': 0.18, 30 31 '_seed':np.random.randint(2**16) # この様に変更しないとValueErrorが出る 32} 33 34register(id='SafexpTestEnvironment-v0', 35 entry_point='safety_gym.envs.mujoco:Engine', 36 kwargs={'config': config}) 37 38env = Engine(config) 39env.reset() 40env.obs()
上記のコードの出力が
Python
1array([ 0. , 0. , 9.81 , 0. , 0. , 2 0. , 0. , 0. , 0. , 0. , 3 0.32289485, 0.62832593, 0.30543109, 0. , 0. , 4 0. , 0. , 0. , 0. , 0. , 5 0. , 0. , 0. , 0. , 0. , 6 0. , 0. , 0. , 0. , 0.09979685, 7 0.25093581, 0.28346867, 0.31919212, 0.37231802, 0.55099259, 8 0.73966652, 0.18867394, 0. , -0.05594032, 0.49686083, 9 0. , 0. , 0. , 0. , 0. , 10 0. , 0. , 0. , 0. , 0. , 11 0. , 0.60760634, 0.78115352, 0.17354718, 0. , 12 0. , 0. , 0. , 0. , 0. ])
ですが、https://github.com/openai/safety-gym/blob/master/safety_gym/envs/engine.pyより
Python
1def obs(self): 2 ''' Return the observation of our agent ''' 3 self.sim.forward() # Needed to get sensordata correct 4 obs = {} 5 6 if self.observe_goal_dist: 7 obs['goal_dist'] = np.array([np.exp(-self.dist_goal())]) 8 if self.observe_goal_comp: 9 obs['goal_compass'] = self.obs_compass(self.goal_pos) 10 if self.observe_goal_lidar: 11 obs['goal_lidar'] = self.obs_lidar([self.goal_pos], GROUP_GOAL) 12 if self.task == 'push': 13 box_pos = self.box_pos 14 if self.observe_box_comp: 15 obs['box_compass'] = self.obs_compass(box_pos) 16 if self.observe_box_lidar: 17 obs['box_lidar'] = self.obs_lidar([box_pos], GROUP_BOX) 18 if self.task == 'circle' and self.observe_circle: 19 obs['circle_lidar'] = self.obs_lidar([self.goal_pos], GROUP_CIRCLE) 20 if self.observe_freejoint: 21 joint_id = self.model.joint_name2id('robot') 22 joint_qposadr = self.model.jnt_qposadr[joint_id] 23 assert joint_qposadr == 0 # Needs to be the first entry in qpos 24 obs['freejoint'] = self.data.qpos[:7] 25 if self.observe_com: 26 obs['com'] = self.world.robot_com() 27 if self.observe_sensors: 28 # Sensors which can be read directly, without processing 29 for sensor in self.sensors_obs: # Explicitly listed sensors 30 obs[sensor] = self.world.get_sensor(sensor) 31 for sensor in self.robot.hinge_vel_names: 32 obs[sensor] = self.world.get_sensor(sensor) 33 for sensor in self.robot.ballangvel_names: 34 obs[sensor] = self.world.get_sensor(sensor) 35 # Process angular position sensors 36 if self.sensors_angle_components: 37 for sensor in self.robot.hinge_pos_names: 38 theta = float(self.world.get_sensor(sensor)) # Ensure not 1D, 1-element array 39 obs[sensor] = np.array([np.sin(theta), np.cos(theta)]) 40 for sensor in self.robot.ballquat_names: 41 quat = self.world.get_sensor(sensor) 42 obs[sensor] = quat2mat(quat) 43 else: # Otherwise read sensors directly 44 for sensor in self.robot.hinge_pos_names: 45 obs[sensor] = self.world.get_sensor(sensor) 46 for sensor in self.robot.ballquat_names: 47 obs[sensor] = self.world.get_sensor(sensor) 48 if self.observe_remaining: 49 obs['remaining'] = np.array([self.steps / self.num_steps]) 50 assert 0.0 <= obs['remaining'][0] <= 1.0, 'bad remaining {}'.format(obs['remaining']) 51 if self.walls_num and self.observe_walls: 52 obs['walls_lidar'] = self.obs_lidar(self.walls_pos, GROUP_WALL) 53 if self.observe_hazards: 54 obs['hazards_lidar'] = self.obs_lidar(self.hazards_pos, GROUP_HAZARD) 55 if self.observe_vases: 56 obs['vases_lidar'] = self.obs_lidar(self.vases_pos, GROUP_VASE) 57 if self.gremlins_num and self.observe_gremlins: 58 obs['gremlins_lidar'] = self.obs_lidar(self.gremlins_obj_pos, GROUP_GREMLIN) 59 if self.pillars_num and self.observe_pillars: 60 obs['pillars_lidar'] = self.obs_lidar(self.pillars_pos, GROUP_PILLAR) 61 if self.buttons_num and self.observe_buttons: 62 # Buttons observation is zero while buttons are resetting 63 if self.buttons_timer == 0: 64 obs['buttons_lidar'] = self.obs_lidar(self.buttons_pos, GROUP_BUTTON) 65 else: 66 obs['buttons_lidar'] = np.zeros(self.lidar_num_bins) 67 if self.observe_qpos: 68 obs['qpos'] = self.data.qpos.copy() 69 if self.observe_qvel: 70 obs['qvel'] = self.data.qvel.copy() 71 if self.observe_ctrl: 72 obs['ctrl'] = self.data.ctrl.copy() 73 if self.observe_vision: 74 obs['vision'] = self.obs_vision() 75 if self.observation_flatten: 76 flat_obs = np.zeros(self.obs_flat_size) 77 offset = 0 78 for k in sorted(self.obs_space_dict.keys()): 79 k_size = np.prod(obs[k].shape) 80 flat_obs[offset:offset + k_size] = obs[k].flat 81 offset += k_size 82 obs = flat_obs 83 assert self.observation_space.contains(obs), f'Bad obs {obs} {self.observation_space}' 84 return obs
このようにself.obs()の出力はdict型であるはずなのに、作成したコードの出力はarray型のオブジェクトが出力されます。
なぜこのような事が起こるのでしょうか。
ご存じの方教えてください。
よろしくお願いいたします。
回答1件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。
2020/05/14 05:58