Source code for grid2op.Reward.BaseReward
"""
This module implements some utilities to get rewards given an :class:`grid2op.BaseAction` an :class:`grid2op.Environment`
and some associated context (like has there been an error etc.)
It is possible to modify the reward to use to better suit a training scheme, or to better take into account
some phenomenon by simulating the effect of some :class:`grid2op.BaseAction` using :func:`grid2op.BaseObservation.simulate`.
Doing so only requires to derive the :class:`BaseReward`, and most notably the three abstract methods
:func:`BaseReward.__init__`, :func:`BaseReward.initialize` and :func:`BaseReward.__call__`
"""
from abc import ABC, abstractmethod
[docs]class BaseReward(ABC):
"""
Base class from which all rewards used in the Grid2Op framework should derived.
In reinforcement learning, a reward is a signal send by the :class:`grid2op.Environment` to the
:class:`grid2op.BaseAgent` indicating how well this agent performs.
One of the goal of Reinforcement Learning is to maximize the (discounted) sum of (expected) rewards over time.
Attributes
----------
reward_min: ``float``
The minimum reward an :class:`grid2op.BaseAgent` can get performing the worst possible :class:`grid2op.BaseAction` in
the worst possible scenario.
reward_max: ``float``
The maximum reward an :class:`grid2op.BaseAgent` can get performing the best possible :class:`grid2op.BaseAction` in
the best possible scenario.
"""
[docs] @abstractmethod
def __init__(self):
"""
Initializes :attr:`BaseReward.reward_min` and :attr:`BaseReward.reward_max`
"""
self.reward_min = 0
self.reward_max = 0
[docs] def initialize(self, env):
"""
If :attr:`BaseReward.reward_min`, :attr:`BaseReward.reward_max` or other custom attributes require to have a
valid :class:`grid2op.Environement.Environment` to be initialized, this should be done in this method.
Parameters
----------
env: :class:`grid2op.Environment.Environment`
An environment instance properly initialized.
Returns
-------
``None``
"""
pass
[docs] @abstractmethod
def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
"""
Method called to compute the reward.
Parameters
----------
action: :class:`grid2op.Action.Action`
BaseAction that has been submitted by the :class:`grid2op.BaseAgent`
env: :class:`grid2op.Environment.Environment`
An environment instance properly initialized.
has_error: ``bool``
Has there been an error, for example a :class:`grid2op.DivergingPowerFlow` be thrown when the action has
been implemented in the environment.
is_done: ``bool``
Is the episode over (either because the agent has reached the end, or because there has been a game over)
is_illegal: ``bool``
Has the action submitted by the BaseAgent raised an :class:`grid2op.Exceptions.IllegalAction` exception.
In this case it has been
overidden by "do nohting" by the environment.
is_ambiguous: ``bool``
Has the action submitted by the BaseAgent raised an :class:`grid2op.Exceptions.AmbiguousAction` exception.
In this case it has been
overidden by "do nothing" by the environment.
Returns
-------
res: ``float``
The reward associated to the input parameters.
"""
pass
[docs] def get_range(self):
"""
Shorthand to retrieve both the minimum and maximum possible rewards in one command.
It is not recommended to override this function.
Returns
-------
reward_min: ``float``
The minimum reward, see :attr:`BaseReward.reward_min`
reward_max: ``float``
The maximum reward, see :attr:`BaseReward.reward_max`
"""
return self.reward_min, self.reward_max