Module moog.tasks.reset

Task for resetting environment upon a condition of the environment state.

Expand source code
"""Task for resetting environment upon a condition of the environment state."""

from . import abstract_task
import inspect
import numpy as np


class Reset(abstract_task.AbstractTask):
    """Reset task.
    
    This task resets the environment when (or some fixed number of steps after)
    the environment state meets a condition. It can be used for example to reset
    once all prey are gone.

    There is also an option to specify a reward (as a function of the state) to
    be computed when the condition is met.
    """

    def __init__(self, condition, reward_fn=None, steps_after_condition=np.inf):
        """Constructor.

        Args:
            condition: Function with one of the following signatures:
                    * state --> bool
                    * state, meta_state --> bool
                The bool is whether to reset.
            reward_fn: Reward function taking in state and returning scalar
                reward. Only called if condition(state) is True.
            steps_after_condition: Int. Number of steps after condition is True
                to reset.
        """
        if len(inspect.signature(condition).parameters.values()) == 1:
            self._condition = lambda state, meta_state: condition(state)
        else:
            self._condition = condition
        self._steps_after_condition = steps_after_condition

        if reward_fn is None:
            reward_fn = lambda _: 0.
        self._reward_fn = reward_fn

    def reset(self, state, meta_state):
        # We reset to infinity, because self._steps_until_reset will be
        # decremented every time self.reward() is called, so is only set to a
        # finite value when the condition is met and reset is imminent.
        self._steps_until_reset = np.inf

    def reward(self, state, meta_state, step_count):
        """Compute reward."""
        del step_count
        if (self._steps_until_reset == np.inf and
                self._condition(state, meta_state)):
            reward = self._reward_fn(state)
            self._steps_until_reset = self._steps_after_condition
        else:
            reward = 0.

        self._steps_until_reset -= 1
        should_reset = self._steps_until_reset < 0

        return reward, should_reset

Classes

class Reset (condition, reward_fn=None, steps_after_condition=inf)

Reset task.

This task resets the environment when (or some fixed number of steps after) the environment state meets a condition. It can be used for example to reset once all prey are gone.

There is also an option to specify a reward (as a function of the state) to be computed when the condition is met.

Constructor.

Args

condition
Function with one of the following signatures: * state –> bool * state, meta_state –> bool The bool is whether to reset.
reward_fn
Reward function taking in state and returning scalar reward. Only called if condition(state) is True.
steps_after_condition
Int. Number of steps after condition is True to reset.
Expand source code
class Reset(abstract_task.AbstractTask):
    """Reset task.
    
    This task resets the environment when (or some fixed number of steps after)
    the environment state meets a condition. It can be used for example to reset
    once all prey are gone.

    There is also an option to specify a reward (as a function of the state) to
    be computed when the condition is met.
    """

    def __init__(self, condition, reward_fn=None, steps_after_condition=np.inf):
        """Constructor.

        Args:
            condition: Function with one of the following signatures:
                    * state --> bool
                    * state, meta_state --> bool
                The bool is whether to reset.
            reward_fn: Reward function taking in state and returning scalar
                reward. Only called if condition(state) is True.
            steps_after_condition: Int. Number of steps after condition is True
                to reset.
        """
        if len(inspect.signature(condition).parameters.values()) == 1:
            self._condition = lambda state, meta_state: condition(state)
        else:
            self._condition = condition
        self._steps_after_condition = steps_after_condition

        if reward_fn is None:
            reward_fn = lambda _: 0.
        self._reward_fn = reward_fn

    def reset(self, state, meta_state):
        # We reset to infinity, because self._steps_until_reset will be
        # decremented every time self.reward() is called, so is only set to a
        # finite value when the condition is met and reset is imminent.
        self._steps_until_reset = np.inf

    def reward(self, state, meta_state, step_count):
        """Compute reward."""
        del step_count
        if (self._steps_until_reset == np.inf and
                self._condition(state, meta_state)):
            reward = self._reward_fn(state)
            self._steps_until_reset = self._steps_after_condition
        else:
            reward = 0.

        self._steps_until_reset -= 1
        should_reset = self._steps_until_reset < 0

        return reward, should_reset

Ancestors

Methods

def reward(self, state, meta_state, step_count)

Compute reward.

Expand source code
def reward(self, state, meta_state, step_count):
    """Compute reward."""
    del step_count
    if (self._steps_until_reset == np.inf and
            self._condition(state, meta_state)):
        reward = self._reward_fn(state)
        self._steps_until_reset = self._steps_after_condition
    else:
        reward = 0.

    self._steps_until_reset -= 1
    should_reset = self._steps_until_reset < 0

    return reward, should_reset

Inherited members