Source code for grid2op.Reward.BaseReward

"""
This module implements some utilities to get rewards given an :class:`grid2op.BaseAction` an :class:`grid2op.Environment`
and some associated context (like has there been an error etc.)

It is possible to modify the reward to use to better suit a training scheme, or to better take into account
some phenomenon  by simulating the effect of some :class:`grid2op.BaseAction` using :func:`grid2op.BaseObservation.simulate`.
Doing so only requires to derive the :class:`BaseReward`, and most notably the three abstract methods
:func:`BaseReward.__init__`, :func:`BaseReward.initialize` and :func:`BaseReward.__call__`

"""
from abc import ABC, abstractmethod


[docs]class BaseReward(ABC): """ Base class from which all rewards used in the Grid2Op framework should derived. In reinforcement learning, a reward is a signal send by the :class:`grid2op.Environment` to the :class:`grid2op.BaseAgent` indicating how well this agent performs. One of the goal of Reinforcement Learning is to maximize the (discounted) sum of (expected) rewards over time. Attributes ---------- reward_min: ``float`` The minimum reward an :class:`grid2op.BaseAgent` can get performing the worst possible :class:`grid2op.BaseAction` in the worst possible scenario. reward_max: ``float`` The maximum reward an :class:`grid2op.BaseAgent` can get performing the best possible :class:`grid2op.BaseAction` in the best possible scenario. """
[docs] @abstractmethod def __init__(self): """ Initializes :attr:`BaseReward.reward_min` and :attr:`BaseReward.reward_max` """ self.reward_min = 0 self.reward_max = 0
[docs] def initialize(self, env): """ If :attr:`BaseReward.reward_min`, :attr:`BaseReward.reward_max` or other custom attributes require to have a valid :class:`grid2op.Environement.Environment` to be initialized, this should be done in this method. Parameters ---------- env: :class:`grid2op.Environment.Environment` An environment instance properly initialized. Returns ------- ``None`` """ pass
[docs] @abstractmethod def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): """ Method called to compute the reward. Parameters ---------- action: :class:`grid2op.Action.Action` BaseAction that has been submitted by the :class:`grid2op.BaseAgent` env: :class:`grid2op.Environment.Environment` An environment instance properly initialized. has_error: ``bool`` Has there been an error, for example a :class:`grid2op.DivergingPowerFlow` be thrown when the action has been implemented in the environment. is_done: ``bool`` Is the episode over (either because the agent has reached the end, or because there has been a game over) is_illegal: ``bool`` Has the action submitted by the BaseAgent raised an :class:`grid2op.Exceptions.IllegalAction` exception. In this case it has been overidden by "do nohting" by the environment. is_ambiguous: ``bool`` Has the action submitted by the BaseAgent raised an :class:`grid2op.Exceptions.AmbiguousAction` exception. In this case it has been overidden by "do nothing" by the environment. Returns ------- res: ``float`` The reward associated to the input parameters. """ pass
[docs] def get_range(self): """ Shorthand to retrieve both the minimum and maximum possible rewards in one command. It is not recommended to override this function. Returns ------- reward_min: ``float`` The minimum reward, see :attr:`BaseReward.reward_min` reward_max: ``float`` The maximum reward, see :attr:`BaseReward.reward_max` """ return self.reward_min, self.reward_max