reward_function_base

`BaseRewardFunction`

Bases: Registerable

Base RewardFunction class Reward-specific reset and get_reward methods are implemented in subclasses

Source code in omnigibson/reward_functions/reward_function_base.py

class BaseRewardFunction(Registerable, metaclass=ABCMeta):
    """
    Base RewardFunction class
    Reward-specific reset and get_reward methods are implemented in subclasses
    """

    def __init__(self):
        # Store internal vars that will be filled in at runtime
        self._reward = None
        self._info = None

    @abstractmethod
    def _step(self, task, env, action):
        """
        Step the reward function and compute the reward at the current timestep. Overwritten by subclasses.

        Args:
            task (BaseTask): Task instance
            env (Environment): Environment instance
            action (n-array): 1D flattened array of actions executed by all agents in the environment

        Returns:
            2-tuple:
                - bool: computed reward
                - dict: any reward-related information for this specific reward
        """
        raise NotImplementedError()

    def step(self, task, env, action):
        """
        Step the reward function and compute the reward at the current timestep.

        Args:
            task (BaseTask): Task instance
            env (Environment): Environment instance
            action (n-array): 1D flattened array of actions executed by all agents in the environment

        Returns:
            2-tuple:
                - bool: computed reward
                - dict: any reward-related information for this specific reward
        """
        # Step internally and store output
        self._reward, self._info = self._step(task=task, env=env, action=action)

        # Return reward and a copy of the info
        return self._reward, deepcopy(self._info)

    def reset(self, task, env):
        """
        Reward function-specific reset

        Args:
            task (BaseTask): Task instance
            env (Environment): Environment instance
        """
        # Reset internal vars
        self._reward = None
        self._info = None

    @property
    def reward(self):
        """
        Returns:
            float: Current reward for this reward function
        """
        assert self._reward is not None, "At least one step() must occur before reward can be calculated!"
        return self._reward

    @property
    def info(self):
        """
        Returns:
            dict: Current info for this reward function
        """
        assert self._info is not None, "At least one step() must occur before info can be calculated!"
        return self._info

    @classproperty
    def _do_not_register_classes(cls):
        # Don't register this class since it's an abstract template
        classes = super()._do_not_register_classes
        classes.add("BaseRewardFunction")
        return classes

    @classproperty
    def _cls_registry(cls):
        # Global registry
        global REGISTERED_REWARD_FUNCTIONS
        return REGISTERED_REWARD_FUNCTIONS

`info` `property`

Returns:

Type	Description
`dict`	Current info for this reward function

`reward` `property`

Returns:

Type	Description
`float`	Current reward for this reward function

`reset(task, env)`

Reward function-specific reset

Parameters:

Name	Type	Description	Default
`task`	`BaseTask`	Task instance	required
`env`	`Environment`	Environment instance	required

Source code in omnigibson/reward_functions/reward_function_base.py

def reset(self, task, env):
    """
    Reward function-specific reset

    Args:
        task (BaseTask): Task instance
        env (Environment): Environment instance
    """
    # Reset internal vars
    self._reward = None
    self._info = None

`step(task, env, action)`

Step the reward function and compute the reward at the current timestep.

Parameters:

Name	Type	Description	Default
`task`	`BaseTask`	Task instance	required
`env`	`Environment`	Environment instance	required
`action`	`n - array`	1D flattened array of actions executed by all agents in the environment	required

Returns:

Type	Description
`2 - tuple`	bool: computed reward dict: any reward-related information for this specific reward

Source code in omnigibson/reward_functions/reward_function_base.py

def step(self, task, env, action):
    """
    Step the reward function and compute the reward at the current timestep.

    Args:
        task (BaseTask): Task instance
        env (Environment): Environment instance
        action (n-array): 1D flattened array of actions executed by all agents in the environment

    Returns:
        2-tuple:
            - bool: computed reward
            - dict: any reward-related information for this specific reward
    """
    # Step internally and store output
    self._reward, self._info = self._step(task=task, env=env, action=action)

    # Return reward and a copy of the info
    return self._reward, deepcopy(self._info)

reward_function_base

BaseRewardFunction

info property

reward property

reset(task, env)

step(task, env, action)

`BaseRewardFunction`

`info` `property`

`reward` `property`

`reset(task, env)`

`step(task, env, action)`