Source code for ray.rllib.utils.schedules.polynomial_schedule

from typing import Optional

from ray.rllib.utils.annotations import OldAPIStack, override
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.schedules.schedule import Schedule
from ray.rllib.utils.typing import TensorType

tf1, tf, tfv = try_import_tf()
torch, _ = try_import_torch()


[docs] @OldAPIStack class PolynomialSchedule(Schedule): """Polynomial interpolation between `initial_p` and `final_p`. Over `schedule_timesteps`. After this many time steps, always returns `final_p`. """
[docs] def __init__( self, schedule_timesteps: int, final_p: float, framework: Optional[str], initial_p: float = 1.0, power: float = 2.0, ): """Initializes a PolynomialSchedule instance. Args: schedule_timesteps: Number of time steps for which to linearly anneal initial_p to final_p final_p: Final output value. framework: The framework descriptor string, e.g. "tf", "torch", or None. initial_p: Initial output value. power: The exponent to use (default: quadratic). """ super().__init__(framework=framework) assert schedule_timesteps > 0 self.schedule_timesteps = schedule_timesteps self.final_p = final_p self.initial_p = initial_p self.power = power
@override(Schedule) def _value(self, t: TensorType) -> TensorType: """Returns the result of: final_p + (initial_p - final_p) * (1 - `t`/t_max) ** power """ if self.framework == "torch" and torch and isinstance(t, torch.Tensor): t = t.float() t = min(t, self.schedule_timesteps) return ( self.final_p + (self.initial_p - self.final_p) * (1.0 - (t / self.schedule_timesteps)) ** self.power ) @override(Schedule) def _tf_value_op(self, t: TensorType) -> TensorType: t = tf.math.minimum(t, self.schedule_timesteps) return ( self.final_p + (self.initial_p - self.final_p) * (1.0 - (t / self.schedule_timesteps)) ** self.power )