Module moog.env_wrappers.gym_wrapper

Wrapper to make object-oriented games conform to the OpenAI Gym interface.

Note: This wrapper does not inherit from abstract_wrapper.AbstractEnvironmentWrapper, because unlike other wrappers this one (intentionally) changes the API of the environment.

Classes

class GymWrapper (env)
Expand source code
class GymWrapper(object):
    """Wraps a object-oriented game environment into a Gym interface.

    Observations will be a dictionary, with the same keys as the 'observers'
    dict provided when constructing a object-oriented game environment.
    Rendering is always performed, so calling render() is a no-op.
    """
    metadata = {'render.modes': ['rgb_array']}

    def __init__(self, env):
        self._env = env
        self._last_render = None
        self._action_space = None
        self._observation_space = None

        # Reset object-oriented to setup the observation_specs correctly
        self._env.reset()

    @property
    def observation_space(self):
        if self._observation_space is None:
            components = {}
            for key, value in self._env.observation_spec().items():
                if value.dtype == np.uint8:
                    components[key] = spaces.Box(
                        0, 255, value.shape, dtype=value.dtype)
                else:
                    components[key] = spaces.Box(
                        -np.inf, np.inf, value.shape, dtype=value.dtype)
            self._observation_space = spaces.Dict(components)
        return self._observation_space

    @property
    def action_space(self):
        if self._action_space is None:
            self._action_space = _spec_to_space(self._env.action_spec())
        return self._action_space

    def _process_obs(self, obs):
        """Convert and processes observations."""
        for k, v in obs.items():
            obs[k] = np.asarray(v)
            if obs[k].dtype == bool:
                # Convert boolean 'success' into an float32 to predict it.
                obs[k] = obs[k].astype(np.float32)
            if k == 'image':
                self._last_render = obs[k]

        return obs

    def step(self, action):
        """Main step function for the environment.

        Args:
            action: Array R^4

        Returns:
            obs: dict of observations. Follows from the 'renderers'
                configuration
                provided as parameters to object-oriented games.
            reward: scalar reward.
            done: True if terminal state.
            info: dict with extra information (e.g. discount factor).
        """
        time_step = self._env.step(action)
        obs = self._process_obs(time_step.observation)
        reward = time_step.reward or 0
        done = time_step.last()
        info = {'discount': time_step.discount}
        return obs, reward, done, info

    def reset(self):
        """Reset environment.

        Returns:
            obs: dict of observations. Follows from the 'renderers'
                configuration provided as parameters to object-oriented games.
        """
        time_step = self._env.reset()
        return self._process_obs(time_step.observation)

    def render(self, mode='rgb_array'):
        """Render function, noop for compatibility.

        Args:
            mode: unused, always returns an RGB array.

        Returns:
            Last RGB observation (cached from last observation with key
                'image').
        """
        del mode
        return self._last_render

    def close(self):
        """Unused."""
        pass

Wraps a object-oriented game environment into a Gym interface.

Observations will be a dictionary, with the same keys as the 'observers' dict provided when constructing a object-oriented game environment. Rendering is always performed, so calling render() is a no-op.

Class variables

var metadata

Instance variables

prop action_space
Expand source code
@property
def action_space(self):
    if self._action_space is None:
        self._action_space = _spec_to_space(self._env.action_spec())
    return self._action_space
prop observation_space
Expand source code
@property
def observation_space(self):
    if self._observation_space is None:
        components = {}
        for key, value in self._env.observation_spec().items():
            if value.dtype == np.uint8:
                components[key] = spaces.Box(
                    0, 255, value.shape, dtype=value.dtype)
            else:
                components[key] = spaces.Box(
                    -np.inf, np.inf, value.shape, dtype=value.dtype)
        self._observation_space = spaces.Dict(components)
    return self._observation_space

Methods

def close(self)
Expand source code
def close(self):
    """Unused."""
    pass

Unused.

def render(self, mode='rgb_array')
Expand source code
def render(self, mode='rgb_array'):
    """Render function, noop for compatibility.

    Args:
        mode: unused, always returns an RGB array.

    Returns:
        Last RGB observation (cached from last observation with key
            'image').
    """
    del mode
    return self._last_render

Render function, noop for compatibility.

Args

mode
unused, always returns an RGB array.

Returns

Last RGB observation (cached from last observation with key 'image').

def reset(self)
Expand source code
def reset(self):
    """Reset environment.

    Returns:
        obs: dict of observations. Follows from the 'renderers'
            configuration provided as parameters to object-oriented games.
    """
    time_step = self._env.reset()
    return self._process_obs(time_step.observation)

Reset environment.

Returns

obs
dict of observations. Follows from the 'renderers' configuration provided as parameters to object-oriented games.
def step(self, action)
Expand source code
def step(self, action):
    """Main step function for the environment.

    Args:
        action: Array R^4

    Returns:
        obs: dict of observations. Follows from the 'renderers'
            configuration
            provided as parameters to object-oriented games.
        reward: scalar reward.
        done: True if terminal state.
        info: dict with extra information (e.g. discount factor).
    """
    time_step = self._env.step(action)
    obs = self._process_obs(time_step.observation)
    reward = time_step.reward or 0
    done = time_step.last()
    info = {'discount': time_step.discount}
    return obs, reward, done, info

Main step function for the environment.

Args

action
Array R^4

Returns

obs
dict of observations. Follows from the 'renderers' configuration provided as parameters to object-oriented games.
reward
scalar reward.
done
True if terminal state.
info
dict with extra information (e.g. discount factor).