Source code for grid2op.Reward.EconomicReward
import numpy as np
from abc import ABC, abstractmethod
from grid2op.Exceptions import Grid2OpException
from grid2op.Reward.BaseReward import BaseReward
[docs]class EconomicReward(BaseReward):
"""
This reward computes the marginal cost of the powergrid. As RL is about maximising a reward, while we want to
minimize the cost, this class also ensures that:
- the reward is positive if there is no game over, no error etc.
- the reward is inversely proportional to the cost of the grid (the higher the reward, the lower the economic cost).
"""
[docs] def __init__(self):
BaseReward.__init__(self)
self.reward_min = None
self.reward_max = None
self.worst_cost = None
[docs] def initialize(self, env):
if not env.redispatching_unit_commitment_availble:
raise Grid2OpException("Impossible to use the EconomicReward reward with an environment without generators"
"cost. Please make sure env.redispatching_unit_commitment_availble is available.")
self.worst_cost = np.sum(env.gen_cost_per_MW *env.gen_pmax)
self.reward_min = -1.
self.reward_max = self.worst_cost
[docs] def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
if has_error or is_illegal or is_ambiguous:
res = self.reward_min * 0.5
else:
# compute the cost of the grid
res = np.sum(env.current_obs.prod_p * env.gen_cost_per_MW)
# we want to minimize the cost by maximizing the reward so let's take the opposite
res *= -1
# to be sure it's positive, add the highest possible cost
res += self.worst_cost
return res