Module moog.env_wrappers.gym_wrapper
Wrapper to make object-oriented games conform to the OpenAI Gym interface.
Note: This wrapper does not inherit from abstract_wrapper.AbstractEnvironmentWrapper, because unlike other wrappers this one (intentionally) changes the API of the environment.
Classes
class GymWrapper (env)
-
Expand source code
class GymWrapper(object): """Wraps a object-oriented game environment into a Gym interface. Observations will be a dictionary, with the same keys as the 'observers' dict provided when constructing a object-oriented game environment. Rendering is always performed, so calling render() is a no-op. """ metadata = {'render.modes': ['rgb_array']} def __init__(self, env): self._env = env self._last_render = None self._action_space = None self._observation_space = None # Reset object-oriented to setup the observation_specs correctly self._env.reset() @property def observation_space(self): if self._observation_space is None: components = {} for key, value in self._env.observation_spec().items(): if value.dtype == np.uint8: components[key] = spaces.Box( 0, 255, value.shape, dtype=value.dtype) else: components[key] = spaces.Box( -np.inf, np.inf, value.shape, dtype=value.dtype) self._observation_space = spaces.Dict(components) return self._observation_space @property def action_space(self): if self._action_space is None: self._action_space = _spec_to_space(self._env.action_spec()) return self._action_space def _process_obs(self, obs): """Convert and processes observations.""" for k, v in obs.items(): obs[k] = np.asarray(v) if obs[k].dtype == bool: # Convert boolean 'success' into an float32 to predict it. obs[k] = obs[k].astype(np.float32) if k == 'image': self._last_render = obs[k] return obs def step(self, action): """Main step function for the environment. Args: action: Array R^4 Returns: obs: dict of observations. Follows from the 'renderers' configuration provided as parameters to object-oriented games. reward: scalar reward. done: True if terminal state. info: dict with extra information (e.g. discount factor). """ time_step = self._env.step(action) obs = self._process_obs(time_step.observation) reward = time_step.reward or 0 done = time_step.last() info = {'discount': time_step.discount} return obs, reward, done, info def reset(self): """Reset environment. Returns: obs: dict of observations. Follows from the 'renderers' configuration provided as parameters to object-oriented games. """ time_step = self._env.reset() return self._process_obs(time_step.observation) def render(self, mode='rgb_array'): """Render function, noop for compatibility. Args: mode: unused, always returns an RGB array. Returns: Last RGB observation (cached from last observation with key 'image'). """ del mode return self._last_render def close(self): """Unused.""" pass
Wraps a object-oriented game environment into a Gym interface.
Observations will be a dictionary, with the same keys as the 'observers' dict provided when constructing a object-oriented game environment. Rendering is always performed, so calling render() is a no-op.
Class variables
var metadata
Instance variables
prop action_space
-
Expand source code
@property def action_space(self): if self._action_space is None: self._action_space = _spec_to_space(self._env.action_spec()) return self._action_space
prop observation_space
-
Expand source code
@property def observation_space(self): if self._observation_space is None: components = {} for key, value in self._env.observation_spec().items(): if value.dtype == np.uint8: components[key] = spaces.Box( 0, 255, value.shape, dtype=value.dtype) else: components[key] = spaces.Box( -np.inf, np.inf, value.shape, dtype=value.dtype) self._observation_space = spaces.Dict(components) return self._observation_space
Methods
def close(self)
-
Expand source code
def close(self): """Unused.""" pass
Unused.
def render(self, mode='rgb_array')
-
Expand source code
def render(self, mode='rgb_array'): """Render function, noop for compatibility. Args: mode: unused, always returns an RGB array. Returns: Last RGB observation (cached from last observation with key 'image'). """ del mode return self._last_render
Render function, noop for compatibility.
Args
mode
- unused, always returns an RGB array.
Returns
Last RGB observation (cached from last observation with key 'image').
def reset(self)
-
Expand source code
def reset(self): """Reset environment. Returns: obs: dict of observations. Follows from the 'renderers' configuration provided as parameters to object-oriented games. """ time_step = self._env.reset() return self._process_obs(time_step.observation)
Reset environment.
Returns
obs
- dict of observations. Follows from the 'renderers' configuration provided as parameters to object-oriented games.
def step(self, action)
-
Expand source code
def step(self, action): """Main step function for the environment. Args: action: Array R^4 Returns: obs: dict of observations. Follows from the 'renderers' configuration provided as parameters to object-oriented games. reward: scalar reward. done: True if terminal state. info: dict with extra information (e.g. discount factor). """ time_step = self._env.step(action) obs = self._process_obs(time_step.observation) reward = time_step.reward or 0 done = time_step.last() info = {'discount': time_step.discount} return obs, reward, done, info
Main step function for the environment.
Args
action
- Array R^4
Returns
obs
- dict of observations. Follows from the 'renderers' configuration provided as parameters to object-oriented games.
reward
- scalar reward.
done
- True if terminal state.
info
- dict with extra information (e.g. discount factor).