ray/rllib/utils/schedules/polynomial_schedule.py

from ray.rllib.utils.schedules.schedule import Schedule
from ray.rllib.utils.framework import try_import_tf

tf = try_import_tf()


class PolynomialSchedule(Schedule):
    def __init__(self,
                 schedule_timesteps,
                 final_p,
                 initial_p=1.0,
                 power=2.0,
                 framework=None):
        """
        Polynomial interpolation between initial_p and final_p over
        schedule_timesteps. After this many time steps always `final_p` is
        returned.

        Agrs:
            schedule_timesteps (int): Number of time steps for which to
                linearly anneal initial_p to final_p
            final_p (float): Final output value.
            initial_p (float): Initial output value.
            framework (Optional[str]): One of "tf", "torch", or None.
        """
        super().__init__(framework=framework)
        assert schedule_timesteps > 0
        self.schedule_timesteps = schedule_timesteps
        self.final_p = final_p
        self.initial_p = initial_p
        self.power = power

    def value(self, t):
        """
        Returns the result of:
        final_p + (initial_p - final_p) * (1 - `t`/t_max) ** power
        """
        if self.framework == "tf" and tf.executing_eagerly() is False:
            return tf.train.polynomial_decay(
                learning_rate=self.initial_p,
                global_step=t,
                decay_steps=self.schedule_timesteps,
                end_learning_rate=self.final_p,
                power=self.power)
        return self.final_p + (self.initial_p - self.final_p) * (
            1.0 - (t / self.schedule_timesteps))**self.power