Module moog.env_wrappers.gym_wrapper
Wrapper to make object-oriented games conform to the OpenAI Gym interface.
Note: This wrapper does not inherit from abstract_wrapper.AbstractEnvironmentWrapper, because unlike other wrappers this one (intentionally) changes the API of the environment.
Expand source code
# Copyright 2019 DeepMind Technologies Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Wrapper to make object-oriented games conform to the OpenAI Gym interface.
Note: This wrapper does not inherit from
abstract_wrapper.AbstractEnvironmentWrapper, because unlike other wrappers this
one (intentionally) changes the API of the environment.
"""
from dm_env import specs
from gym import spaces
import numpy as np
def _spec_to_space(spec):
"""Convert dm_env.specs to gym.Spaces."""
if isinstance(spec, list):
return spaces.Tuple([_spec_to_space(s) for s in spec])
elif isinstance(spec, specs.DiscreteArray):
return spaces.Discrete(spec.num_values)
elif isinstance(spec, specs.BoundedArray):
return spaces.Box(
spec.minimum.item(),
spec.maximum.item(),
shape=spec.shape,
dtype=spec.dtype)
elif isinstance(spec, dict):
print("not here")
pass
else:
raise ValueError('Unknown type for specs: {}'.format(spec))
class GymWrapper(object):
"""Wraps a object-oriented game environment into a Gym interface.
Observations will be a dictionary, with the same keys as the 'observers'
dict provided when constructing a object-oriented game environment.
Rendering is always performed, so calling render() is a no-op.
"""
metadata = {'render.modes': ['rgb_array']}
def __init__(self, env):
self._env = env
self._last_render = None
self._action_space = None
self._observation_space = None
# Reset object-oriented to setup the observation_specs correctly
self._env.reset()
@property
def observation_space(self):
if self._observation_space is None:
components = {}
for key, value in self._env.observation_spec().items():
components[key] = spaces.Box(
-np.inf, np.inf, value.shape, dtype=value.dtype)
self._observation_space = spaces.Dict(components)
return self._observation_space
@property
def action_space(self):
if self._action_space is None:
self._action_space = _spec_to_space(self._env.action_spec())
return self._action_space
def _process_obs(self, obs):
"""Convert and processes observations."""
for k, v in obs.items():
obs[k] = np.asarray(v)
if obs[k].dtype == bool:
# Convert boolean 'success' into an float32 to predict it.
obs[k] = obs[k].astype(np.float32)
if k == 'image':
self._last_render = obs[k]
return obs
def step(self, action):
"""Main step function for the environment.
Args:
action: Array R^4
Returns:
obs: dict of observations. Follows from the 'renderers'
configuration
provided as parameters to object-oriented games.
reward: scalar reward.
done: True if terminal state.
info: dict with extra information (e.g. discount factor).
"""
time_step = self._env.step(action)
obs = self._process_obs(time_step.observation)
reward = time_step.reward or 0
done = time_step.last()
info = {'discount': time_step.discount}
return obs, reward, done, info
def reset(self):
"""Reset environment.
Returns:
obs: dict of observations. Follows from the 'renderers'
configuration provided as parameters to object-oriented games.
"""
time_step = self._env.reset()
return self._process_obs(time_step.observation)
def render(self, mode='rgb_array'):
"""Render function, noop for compatibility.
Args:
mode: unused, always returns an RGB array.
Returns:
Last RGB observation (cached from last observation with key
'image').
"""
del mode
return self._last_render
def close(self):
"""Unused."""
pass
Classes
class GymWrapper (env)
-
Wraps a object-oriented game environment into a Gym interface.
Observations will be a dictionary, with the same keys as the 'observers' dict provided when constructing a object-oriented game environment. Rendering is always performed, so calling render() is a no-op.
Expand source code
class GymWrapper(object): """Wraps a object-oriented game environment into a Gym interface. Observations will be a dictionary, with the same keys as the 'observers' dict provided when constructing a object-oriented game environment. Rendering is always performed, so calling render() is a no-op. """ metadata = {'render.modes': ['rgb_array']} def __init__(self, env): self._env = env self._last_render = None self._action_space = None self._observation_space = None # Reset object-oriented to setup the observation_specs correctly self._env.reset() @property def observation_space(self): if self._observation_space is None: components = {} for key, value in self._env.observation_spec().items(): components[key] = spaces.Box( -np.inf, np.inf, value.shape, dtype=value.dtype) self._observation_space = spaces.Dict(components) return self._observation_space @property def action_space(self): if self._action_space is None: self._action_space = _spec_to_space(self._env.action_spec()) return self._action_space def _process_obs(self, obs): """Convert and processes observations.""" for k, v in obs.items(): obs[k] = np.asarray(v) if obs[k].dtype == bool: # Convert boolean 'success' into an float32 to predict it. obs[k] = obs[k].astype(np.float32) if k == 'image': self._last_render = obs[k] return obs def step(self, action): """Main step function for the environment. Args: action: Array R^4 Returns: obs: dict of observations. Follows from the 'renderers' configuration provided as parameters to object-oriented games. reward: scalar reward. done: True if terminal state. info: dict with extra information (e.g. discount factor). """ time_step = self._env.step(action) obs = self._process_obs(time_step.observation) reward = time_step.reward or 0 done = time_step.last() info = {'discount': time_step.discount} return obs, reward, done, info def reset(self): """Reset environment. Returns: obs: dict of observations. Follows from the 'renderers' configuration provided as parameters to object-oriented games. """ time_step = self._env.reset() return self._process_obs(time_step.observation) def render(self, mode='rgb_array'): """Render function, noop for compatibility. Args: mode: unused, always returns an RGB array. Returns: Last RGB observation (cached from last observation with key 'image'). """ del mode return self._last_render def close(self): """Unused.""" pass
Class variables
var metadata
Instance variables
var action_space
-
Expand source code
@property def action_space(self): if self._action_space is None: self._action_space = _spec_to_space(self._env.action_spec()) return self._action_space
var observation_space
-
Expand source code
@property def observation_space(self): if self._observation_space is None: components = {} for key, value in self._env.observation_spec().items(): components[key] = spaces.Box( -np.inf, np.inf, value.shape, dtype=value.dtype) self._observation_space = spaces.Dict(components) return self._observation_space
Methods
def close(self)
-
Unused.
Expand source code
def close(self): """Unused.""" pass
def render(self, mode='rgb_array')
-
Render function, noop for compatibility.
Args
mode
- unused, always returns an RGB array.
Returns
Last RGB observation (cached from last observation with key 'image').
Expand source code
def render(self, mode='rgb_array'): """Render function, noop for compatibility. Args: mode: unused, always returns an RGB array. Returns: Last RGB observation (cached from last observation with key 'image'). """ del mode return self._last_render
def reset(self)
-
Reset environment.
Returns
obs
- dict of observations. Follows from the 'renderers' configuration provided as parameters to object-oriented games.
Expand source code
def reset(self): """Reset environment. Returns: obs: dict of observations. Follows from the 'renderers' configuration provided as parameters to object-oriented games. """ time_step = self._env.reset() return self._process_obs(time_step.observation)
def step(self, action)
-
Main step function for the environment.
Args
action
- Array R^4
Returns
obs
- dict of observations. Follows from the 'renderers' configuration provided as parameters to object-oriented games.
reward
- scalar reward.
done
- True if terminal state.
info
- dict with extra information (e.g. discount factor).
Expand source code
def step(self, action): """Main step function for the environment. Args: action: Array R^4 Returns: obs: dict of observations. Follows from the 'renderers' configuration provided as parameters to object-oriented games. reward: scalar reward. done: True if terminal state. info: dict with extra information (e.g. discount factor). """ time_step = self._env.step(action) obs = self._process_obs(time_step.observation) reward = time_step.reward or 0 done = time_step.last() info = {'discount': time_step.discount} return obs, reward, done, info