ray/rllib/agents/a3c/a3c_tf_policy.py

"""Note: Keep in sync with changes to VTraceTFPolicy."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import ray
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.utils.explained_variance import explained_variance
from ray.rllib.evaluation.postprocessing import compute_advantages, \
    Postprocessing
from ray.rllib.policy.tf_policy_template import build_tf_policy
from ray.rllib.policy.tf_policy import LearningRateSchedule
from ray.rllib.utils import try_import_tf

tf = try_import_tf()


class A3CLoss(object):
    def __init__(self,
                 action_dist,
                 actions,
                 advantages,
                 v_target,
                 vf,
                 vf_loss_coeff=0.5,
                 entropy_coeff=0.01):
        log_prob = action_dist.logp(actions)

        # The "policy gradients" loss
        self.pi_loss = -tf.reduce_sum(log_prob * advantages)

        delta = vf - v_target
        self.vf_loss = 0.5 * tf.reduce_sum(tf.square(delta))
        self.entropy = tf.reduce_sum(action_dist.entropy())
        self.total_loss = (self.pi_loss + self.vf_loss * vf_loss_coeff -
                           self.entropy * entropy_coeff)


def actor_critic_loss(policy, batch_tensors):
    policy.loss = A3CLoss(
        policy.action_dist, batch_tensors[SampleBatch.ACTIONS],
        batch_tensors[Postprocessing.ADVANTAGES],
        batch_tensors[Postprocessing.VALUE_TARGETS], policy.vf,
        policy.config["vf_loss_coeff"], policy.config["entropy_coeff"])
    return policy.loss.total_loss


def postprocess_advantages(policy,
                           sample_batch,
                           other_agent_batches=None,
                           episode=None):
    completed = sample_batch[SampleBatch.DONES][-1]
    if completed:
        last_r = 0.0
    else:
        next_state = []
        for i in range(len(policy.state_in)):
            next_state.append([sample_batch["state_out_{}".format(i)][-1]])
        last_r = policy._value(sample_batch[SampleBatch.NEXT_OBS][-1],
                               sample_batch[SampleBatch.ACTIONS][-1],
                               sample_batch[SampleBatch.REWARDS][-1],
                               *next_state)
    return compute_advantages(sample_batch, last_r, policy.config["gamma"],
                              policy.config["lambda"])


def add_value_function_fetch(policy):
    return {SampleBatch.VF_PREDS: policy.vf}


class ValueNetworkMixin(object):
    def __init__(self):
        self.vf = self.model.value_function()

    def _value(self, ob, prev_action, prev_reward, *args):
        feed_dict = {
            self.get_placeholder(SampleBatch.CUR_OBS): [ob],
            self.get_placeholder(SampleBatch.PREV_ACTIONS): [prev_action],
            self.get_placeholder(SampleBatch.PREV_REWARDS): [prev_reward],
            self.seq_lens: [1]
        }
        assert len(args) == len(self.state_in), \
            (args, self.state_in)
        for k, v in zip(self.state_in, args):
            feed_dict[k] = v
        vf = self.get_session().run(self.vf, feed_dict)
        return vf[0]


def stats(policy, batch_tensors):
    return {
        "cur_lr": tf.cast(policy.cur_lr, tf.float64),
        "policy_loss": policy.loss.pi_loss,
        "policy_entropy": policy.loss.entropy,
        "var_gnorm": tf.global_norm([x for x in policy.var_list]),
        "vf_loss": policy.loss.vf_loss,
    }


def grad_stats(policy, batch_tensors, grads):
    return {
        "grad_gnorm": tf.global_norm(grads),
        "vf_explained_var": explained_variance(
            policy.get_placeholder(Postprocessing.VALUE_TARGETS), policy.vf),
    }


def clip_gradients(policy, optimizer, loss):
    grads = tf.gradients(loss, policy.var_list)
    grads, _ = tf.clip_by_global_norm(grads, policy.config["grad_clip"])
    clipped_grads = list(zip(grads, policy.var_list))
    return clipped_grads


def setup_mixins(policy, obs_space, action_space, config):
    ValueNetworkMixin.__init__(policy)
    LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"])
    policy.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                        tf.get_variable_scope().name)


A3CTFPolicy = build_tf_policy(
    name="A3CTFPolicy",
    get_default_config=lambda: ray.rllib.agents.a3c.a3c.DEFAULT_CONFIG,
    loss_fn=actor_critic_loss,
    stats_fn=stats,
    grad_stats_fn=grad_stats,
    gradients_fn=clip_gradients,
    postprocess_fn=postprocess_advantages,
    extra_action_fetches_fn=add_value_function_fetch,
    before_loss_init=setup_mixins,
    mixins=[ValueNetworkMixin, LearningRateSchedule])
[rllib] Rename PolicyGraph => Policy, move from evaluation/ to policy/ (#4819) This implements some of the renames proposed in #4813 We leave behind backwards-compatibility aliases for *PolicyGraph and SampleBatch. 2019-05-20 16:46:05 -07:00			`"""Note: Keep in sync with changes to VTraceTFPolicy."""`
[rllib] Basic IMPALA implementation (using deepmind's reference vtrace.py) (#2504) Rename AsyncSamplesOptimizer -> AsyncReplayOptimizer Add AsyncSamplesOptimizer that implements the IMPALA architecture integrate V-trace with a3c policy graph audit V-trace integration benchmark compare vs A3C and with V-trace on/off PongNoFrameskip-v4 on IMPALA scaling from 16 to 128 workers, solving Pong in <10 min. For reference, solving this env takes ~40 minutes for Ape-X and several hours for A3C. 2018-08-01 20:53:53 -07:00
[rllib] Refactor rllib to have a common sample collection pathway (#2149) 2018-06-09 00:21:35 -07:00			`from __future__ import absolute_import`
			`from __future__ import division`
			`from __future__ import print_function`

[rllib] Part 2 of multiagent support (#2286) * wip * cls * re * wip * wip * a3c working * torch support * pg works * lint * rm v2 * consumer id * clean up pg * clean up more * fix python 2.7 * tf session management * docs * dqn wip * fix compile * dqn * apex runs * up * impotrs * ddpg * quotes * fix tests * fix last r * fix tests * lint * pass checkpoint restore * kwar * nits * policy graph * fix yapf * com * class * pyt * vectorization * update * test cpe * unit test * fix ddpg2 * changes * wip * args * faster test * common * fix * add alg option * batch mode and policy serving * multi serving test * todo * wip * serving test * doc async env * num envs * comments * thread * remove init hook * update * fix ppo * comments1 * fix * updates * add jenkins tests * fix * fix pytorch * fix * fixes * fix a3c policy * fix squeeze * fix trunc on apex * fix squeezing for real * update * remove horizon test for now * multiagent wip * update * fix race condition * fix ma * t * doc * st * wip * example * wip * working * cartpole * wip * batch wip * fix bug * make other_batches None default * working * debug * nit * warn * comments * fix ppo * fix obs filter * update * fix obs filter * pass thru worker index * fix * fix log action * debug name * fix sphinx 2018-06-25 22:33:57 -07:00			`import ray`
[rllib] Rename PolicyGraph => Policy, move from evaluation/ to policy/ (#4819) This implements some of the renames proposed in #4813 We leave behind backwards-compatibility aliases for *PolicyGraph and SampleBatch. 2019-05-20 16:46:05 -07:00			`from ray.rllib.policy.sample_batch import SampleBatch`
[rllib] Fix atari reward calculations, add LR annealing, explained var stat for A2C / impala (#2700) Changes needed to reproduce Atari plots in IMPALA / A2C: https://github.com/ray-project/rl-experiments 2018-08-23 17:49:10 -07:00			`from ray.rllib.utils.explained_variance import explained_variance`
[rllib] Minor cleanups to TFPolicyGraph: add init args, constants for loss inputs (#4478) 2019-03-29 12:44:23 -07:00			`from ray.rllib.evaluation.postprocessing import compute_advantages, \`
			`Postprocessing`
[rllib] Rough port of DQN to build_tf_policy() pattern (#4823) 2019-06-02 14:14:31 +08:00			`from ray.rllib.policy.tf_policy_template import build_tf_policy`
			`from ray.rllib.policy.tf_policy import LearningRateSchedule`
[rllib] Remove dependency on TensorFlow (#4764) * remove hard tf dep * add test * comment fix * fix test 2019-05-10 20:36:18 -07:00			`from ray.rllib.utils import try_import_tf`

			`tf = try_import_tf()`
[rllib] Refactor rllib to have a common sample collection pathway (#2149) 2018-06-09 00:21:35 -07:00

[rllib] Modularize Torch and TF policy graphs (#2294) * wip * cls * re * wip * wip * a3c working * torch support * pg works * lint * rm v2 * consumer id * clean up pg * clean up more * fix python 2.7 * tf session management * docs * dqn wip * fix compile * dqn * apex runs * up * impotrs * ddpg * quotes * fix tests * fix last r * fix tests * lint * pass checkpoint restore * kwar * nits * policy graph * fix yapf * com * class * pyt * vectorization * update * test cpe * unit test * fix ddpg2 * changes * wip * args * faster test * common * fix * add alg option * batch mode and policy serving * multi serving test * todo * wip * serving test * doc async env * num envs * comments * thread * remove init hook * update * fix ppo * comments1 * fix * updates * add jenkins tests * fix * fix pytorch * fix * fixes * fix a3c policy * fix squeeze * fix trunc on apex * fix squeezing for real * update * remove horizon test for now * multiagent wip * update * fix race condition * fix ma * t * doc * st * wip * example * wip * working * cartpole * wip * batch wip * fix bug * make other_batches None default * working * debug * nit * warn * comments * fix ppo * fix obs filter * update * wip * tf * update * fix * cleanup * cleanup * spacing * model * fix * dqn * fix ddpg * doc * keep names * update * fix * com * docs * clarify model outputs * Update torch_policy_graph.py * fix obs filter * pass thru worker index * fix * rename * vlad torch comments * fix log action * debug name * fix lstm * remove unused ddpg net * remove conv net * revert lstm * cast * clean up * fix lstm check * move to end * fix sphinx * fix cmd * remove bad doc * clarify * copy * async sa * fix 2018-06-26 13:17:15 -07:00			`class A3CLoss(object):`
[rllib] format with yapf (#2427) * initial yapf * manual fix yapf bugs 2018-07-19 15:30:36 -07:00			`def __init__(self,`
			`action_dist,`
			`actions,`
			`advantages,`
			`v_target,`
			`vf,`
			`vf_loss_coeff=0.5,`
[rllib] Flip sign of A2C, IMPALA entropy coefficient; raise DeprecationWarning if negative (#4374) 2019-03-17 18:07:37 -07:00			`entropy_coeff=0.01):`
[rllib] Modularize Torch and TF policy graphs (#2294) * wip * cls * re * wip * wip * a3c working * torch support * pg works * lint * rm v2 * consumer id * clean up pg * clean up more * fix python 2.7 * tf session management * docs * dqn wip * fix compile * dqn * apex runs * up * impotrs * ddpg * quotes * fix tests * fix last r * fix tests * lint * pass checkpoint restore * kwar * nits * policy graph * fix yapf * com * class * pyt * vectorization * update * test cpe * unit test * fix ddpg2 * changes * wip * args * faster test * common * fix * add alg option * batch mode and policy serving * multi serving test * todo * wip * serving test * doc async env * num envs * comments * thread * remove init hook * update * fix ppo * comments1 * fix * updates * add jenkins tests * fix * fix pytorch * fix * fixes * fix a3c policy * fix squeeze * fix trunc on apex * fix squeezing for real * update * remove horizon test for now * multiagent wip * update * fix race condition * fix ma * t * doc * st * wip * example * wip * working * cartpole * wip * batch wip * fix bug * make other_batches None default * working * debug * nit * warn * comments * fix ppo * fix obs filter * update * wip * tf * update * fix * cleanup * cleanup * spacing * model * fix * dqn * fix ddpg * doc * keep names * update * fix * com * docs * clarify model outputs * Update torch_policy_graph.py * fix obs filter * pass thru worker index * fix * rename * vlad torch comments * fix log action * debug name * fix lstm * remove unused ddpg net * remove conv net * revert lstm * cast * clean up * fix lstm check * move to end * fix sphinx * fix cmd * remove bad doc * clarify * copy * async sa * fix 2018-06-26 13:17:15 -07:00			`log_prob = action_dist.logp(actions)`
[rllib] Refactor rllib to have a common sample collection pathway (#2149) 2018-06-09 00:21:35 -07:00
[rllib] Modularize Torch and TF policy graphs (#2294) * wip * cls * re * wip * wip * a3c working * torch support * pg works * lint * rm v2 * consumer id * clean up pg * clean up more * fix python 2.7 * tf session management * docs * dqn wip * fix compile * dqn * apex runs * up * impotrs * ddpg * quotes * fix tests * fix last r * fix tests * lint * pass checkpoint restore * kwar * nits * policy graph * fix yapf * com * class * pyt * vectorization * update * test cpe * unit test * fix ddpg2 * changes * wip * args * faster test * common * fix * add alg option * batch mode and policy serving * multi serving test * todo * wip * serving test * doc async env * num envs * comments * thread * remove init hook * update * fix ppo * comments1 * fix * updates * add jenkins tests * fix * fix pytorch * fix * fixes * fix a3c policy * fix squeeze * fix trunc on apex * fix squeezing for real * update * remove horizon test for now * multiagent wip * update * fix race condition * fix ma * t * doc * st * wip * example * wip * working * cartpole * wip * batch wip * fix bug * make other_batches None default * working * debug * nit * warn * comments * fix ppo * fix obs filter * update * wip * tf * update * fix * cleanup * cleanup * spacing * model * fix * dqn * fix ddpg * doc * keep names * update * fix * com * docs * clarify model outputs * Update torch_policy_graph.py * fix obs filter * pass thru worker index * fix * rename * vlad torch comments * fix log action * debug name * fix lstm * remove unused ddpg net * remove conv net * revert lstm * cast * clean up * fix lstm check * move to end * fix sphinx * fix cmd * remove bad doc * clarify * copy * async sa * fix 2018-06-26 13:17:15 -07:00			`# The "policy gradients" loss`
[rllib] format with yapf (#2427) * initial yapf * manual fix yapf bugs 2018-07-19 15:30:36 -07:00			`self.pi_loss = -tf.reduce_sum(log_prob * advantages)`
[rllib] Refactor rllib to have a common sample collection pathway (#2149) 2018-06-09 00:21:35 -07:00
[rllib] Modularize Torch and TF policy graphs (#2294) * wip * cls * re * wip * wip * a3c working * torch support * pg works * lint * rm v2 * consumer id * clean up pg * clean up more * fix python 2.7 * tf session management * docs * dqn wip * fix compile * dqn * apex runs * up * impotrs * ddpg * quotes * fix tests * fix last r * fix tests * lint * pass checkpoint restore * kwar * nits * policy graph * fix yapf * com * class * pyt * vectorization * update * test cpe * unit test * fix ddpg2 * changes * wip * args * faster test * common * fix * add alg option * batch mode and policy serving * multi serving test * todo * wip * serving test * doc async env * num envs * comments * thread * remove init hook * update * fix ppo * comments1 * fix * updates * add jenkins tests * fix * fix pytorch * fix * fixes * fix a3c policy * fix squeeze * fix trunc on apex * fix squeezing for real * update * remove horizon test for now * multiagent wip * update * fix race condition * fix ma * t * doc * st * wip * example * wip * working * cartpole * wip * batch wip * fix bug * make other_batches None default * working * debug * nit * warn * comments * fix ppo * fix obs filter * update * wip * tf * update * fix * cleanup * cleanup * spacing * model * fix * dqn * fix ddpg * doc * keep names * update * fix * com * docs * clarify model outputs * Update torch_policy_graph.py * fix obs filter * pass thru worker index * fix * rename * vlad torch comments * fix log action * debug name * fix lstm * remove unused ddpg net * remove conv net * revert lstm * cast * clean up * fix lstm check * move to end * fix sphinx * fix cmd * remove bad doc * clarify * copy * async sa * fix 2018-06-26 13:17:15 -07:00			`delta = vf - v_target`
			`self.vf_loss = 0.5 * tf.reduce_sum(tf.square(delta))`
			`self.entropy = tf.reduce_sum(action_dist.entropy())`
[rllib] Flip sign of A2C, IMPALA entropy coefficient; raise DeprecationWarning if negative (#4374) 2019-03-17 18:07:37 -07:00			`self.total_loss = (self.pi_loss + self.vf_loss * vf_loss_coeff -`
[rllib] Modularize Torch and TF policy graphs (#2294) * wip * cls * re * wip * wip * a3c working * torch support * pg works * lint * rm v2 * consumer id * clean up pg * clean up more * fix python 2.7 * tf session management * docs * dqn wip * fix compile * dqn * apex runs * up * impotrs * ddpg * quotes * fix tests * fix last r * fix tests * lint * pass checkpoint restore * kwar * nits * policy graph * fix yapf * com * class * pyt * vectorization * update * test cpe * unit test * fix ddpg2 * changes * wip * args * faster test * common * fix * add alg option * batch mode and policy serving * multi serving test * todo * wip * serving test * doc async env * num envs * comments * thread * remove init hook * update * fix ppo * comments1 * fix * updates * add jenkins tests * fix * fix pytorch * fix * fixes * fix a3c policy * fix squeeze * fix trunc on apex * fix squeezing for real * update * remove horizon test for now * multiagent wip * update * fix race condition * fix ma * t * doc * st * wip * example * wip * working * cartpole * wip * batch wip * fix bug * make other_batches None default * working * debug * nit * warn * comments * fix ppo * fix obs filter * update * wip * tf * update * fix * cleanup * cleanup * spacing * model * fix * dqn * fix ddpg * doc * keep names * update * fix * com * docs * clarify model outputs * Update torch_policy_graph.py * fix obs filter * pass thru worker index * fix * rename * vlad torch comments * fix log action * debug name * fix lstm * remove unused ddpg net * remove conv net * revert lstm * cast * clean up * fix lstm check * move to end * fix sphinx * fix cmd * remove bad doc * clarify * copy * async sa * fix 2018-06-26 13:17:15 -07:00			`self.entropy * entropy_coeff)`
[rllib] Refactor rllib to have a common sample collection pathway (#2149) 2018-06-09 00:21:35 -07:00

[rllib] Rough port of DQN to build_tf_policy() pattern (#4823) 2019-06-02 14:14:31 +08:00			`def actor_critic_loss(policy, batch_tensors):`
			`policy.loss = A3CLoss(`
			`policy.action_dist, batch_tensors[SampleBatch.ACTIONS],`
			`batch_tensors[Postprocessing.ADVANTAGES],`
[rllib] Remove experimental eager support 2019-07-21 12:27:17 -07:00			`batch_tensors[Postprocessing.VALUE_TARGETS], policy.vf,`
			`policy.config["vf_loss_coeff"], policy.config["entropy_coeff"])`
[rllib] Rough port of DQN to build_tf_policy() pattern (#4823) 2019-06-02 14:14:31 +08:00			`return policy.loss.total_loss`


			`def postprocess_advantages(policy,`
			`sample_batch,`
			`other_agent_batches=None,`
			`episode=None):`
			`completed = sample_batch[SampleBatch.DONES][-1]`
			`if completed:`
			`last_r = 0.0`
			`else:`
			`next_state = []`
[rllib] ModelV2 API (#4926) 2019-07-03 15:59:47 -07:00			`for i in range(len(policy.state_in)):`
[rllib] Rough port of DQN to build_tf_policy() pattern (#4823) 2019-06-02 14:14:31 +08:00			`next_state.append([sample_batch["state_out_{}".format(i)][-1]])`
			`last_r = policy._value(sample_batch[SampleBatch.NEXT_OBS][-1],`
			`sample_batch[SampleBatch.ACTIONS][-1],`
			`sample_batch[SampleBatch.REWARDS][-1],`
			`*next_state)`
			`return compute_advantages(sample_batch, last_r, policy.config["gamma"],`
			`policy.config["lambda"])`


			`def add_value_function_fetch(policy):`
			`return {SampleBatch.VF_PREDS: policy.vf}`


			`class ValueNetworkMixin(object):`
			`def __init__(self):`
[rllib] Add custom value functions, fix up and document multi-agent variable sharing (#3151) 2018-10-29 19:37:27 -07:00			`self.vf = self.model.value_function()`
[rllib] Better document which methods are abstract and which ones are overrides (#3480) 2018-12-08 16:28:58 -08:00
[rllib] Fix APPO + continuous spaces, feed prev_rew/act to A3C properly (#4286) 2019-03-06 21:36:26 -08:00			`def _value(self, ob, prev_action, prev_reward, *args):`
			`feed_dict = {`
[rllib] Rough port of DQN to build_tf_policy() pattern (#4823) 2019-06-02 14:14:31 +08:00			`self.get_placeholder(SampleBatch.CUR_OBS): [ob],`
			`self.get_placeholder(SampleBatch.PREV_ACTIONS): [prev_action],`
			`self.get_placeholder(SampleBatch.PREV_REWARDS): [prev_reward],`
[rllib] ModelV2 API (#4926) 2019-07-03 15:59:47 -07:00			`self.seq_lens: [1]`
[rllib] Fix APPO + continuous spaces, feed prev_rew/act to A3C properly (#4286) 2019-03-06 21:36:26 -08:00			`}`
[rllib] ModelV2 API (#4926) 2019-07-03 15:59:47 -07:00			`assert len(args) == len(self.state_in), \`
			`(args, self.state_in)`
			`for k, v in zip(self.state_in, args):`
[rllib] Better document which methods are abstract and which ones are overrides (#3480) 2018-12-08 16:28:58 -08:00			`feed_dict[k] = v`
[rllib] Rough port of DQN to build_tf_policy() pattern (#4823) 2019-06-02 14:14:31 +08:00			`vf = self.get_session().run(self.vf, feed_dict)`
[rllib] Better document which methods are abstract and which ones are overrides (#3480) 2018-12-08 16:28:58 -08:00			`return vf[0]`
[rllib] Rough port of DQN to build_tf_policy() pattern (#4823) 2019-06-02 14:14:31 +08:00

			`def stats(policy, batch_tensors):`
			`return {`
[rllib] Remove experimental eager support 2019-07-21 12:27:17 -07:00			`"cur_lr": tf.cast(policy.cur_lr, tf.float64),`
[rllib] Rough port of DQN to build_tf_policy() pattern (#4823) 2019-06-02 14:14:31 +08:00			`"policy_loss": policy.loss.pi_loss,`
			`"policy_entropy": policy.loss.entropy,`
[rllib] Remove experimental eager support 2019-07-21 12:27:17 -07:00			`"var_gnorm": tf.global_norm([x for x in policy.var_list]),`
[rllib] Rough port of DQN to build_tf_policy() pattern (#4823) 2019-06-02 14:14:31 +08:00			`"vf_loss": policy.loss.vf_loss,`
			`}`


[rllib] Centralized critic / PPO example on TwoStepGame (#5392) 2019-08-08 14:03:28 -07:00			`def grad_stats(policy, batch_tensors, grads):`
[rllib] Rough port of DQN to build_tf_policy() pattern (#4823) 2019-06-02 14:14:31 +08:00			`return {`
			`"grad_gnorm": tf.global_norm(grads),`
			`"vf_explained_var": explained_variance(`
			`policy.get_placeholder(Postprocessing.VALUE_TARGETS), policy.vf),`
			`}`


			`def clip_gradients(policy, optimizer, loss):`
			`grads = tf.gradients(loss, policy.var_list)`
			`grads, _ = tf.clip_by_global_norm(grads, policy.config["grad_clip"])`
			`clipped_grads = list(zip(grads, policy.var_list))`
			`return clipped_grads`


			`def setup_mixins(policy, obs_space, action_space, config):`
			`ValueNetworkMixin.__init__(policy)`
			`LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"])`
			`policy.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,`
			`tf.get_variable_scope().name)`


			`A3CTFPolicy = build_tf_policy(`
			`name="A3CTFPolicy",`
			`get_default_config=lambda: ray.rllib.agents.a3c.a3c.DEFAULT_CONFIG,`
			`loss_fn=actor_critic_loss,`
			`stats_fn=stats,`
			`grad_stats_fn=grad_stats,`
			`gradients_fn=clip_gradients,`
			`postprocess_fn=postprocess_advantages,`
			`extra_action_fetches_fn=add_value_function_fetch,`
			`before_loss_init=setup_mixins,`
			`mixins=[ValueNetworkMixin, LearningRateSchedule])`