ray/rllib/agents/impala/impala.py

from ray.rllib.agents.a3c.a3c_tf_policy import A3CTFPolicy
from ray.rllib.agents.impala.vtrace_policy import VTraceTFPolicy
from ray.rllib.agents.trainer import Trainer, with_common_config
from ray.rllib.agents.trainer_template import build_trainer
from ray.rllib.optimizers import AsyncSamplesOptimizer
from ray.rllib.optimizers.aso_tree_aggregator import TreeAggregator
from ray.rllib.utils.annotations import override
from ray.tune.trainable import Trainable
from ray.tune.resources import Resources

# yapf: disable
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
    # V-trace params (see vtrace.py).
    "vtrace": True,
    "vtrace_clip_rho_threshold": 1.0,
    "vtrace_clip_pg_rho_threshold": 1.0,

    # System params.
    #
    # == Overview of data flow in IMPALA ==
    # 1. Policy evaluation in parallel across `num_workers` actors produces
    #    batches of size `sample_batch_size * num_envs_per_worker`.
    # 2. If enabled, the replay buffer stores and produces batches of size
    #    `sample_batch_size * num_envs_per_worker`.
    # 3. If enabled, the minibatch ring buffer stores and replays batches of
    #    size `train_batch_size` up to `num_sgd_iter` times per batch.
    # 4. The learner thread executes data parallel SGD across `num_gpus` GPUs
    #    on batches of size `train_batch_size`.
    #
    "sample_batch_size": 50,
    "train_batch_size": 500,
    "min_iter_time_s": 10,
    "num_workers": 2,
    # number of GPUs the learner should use.
    "num_gpus": 1,
    # set >1 to load data into GPUs in parallel. Increases GPU memory usage
    # proportionally with the number of buffers.
    "num_data_loader_buffers": 1,
    # how many train batches should be retained for minibatching. This conf
    # only has an effect if `num_sgd_iter > 1`.
    "minibatch_buffer_size": 1,
    # number of passes to make over each train batch
    "num_sgd_iter": 1,
    # set >0 to enable experience replay. Saved samples will be replayed with
    # a p:1 proportion to new data samples.
    "replay_proportion": 0.0,
    # number of sample batches to store for replay. The number of transitions
    # saved total will be (replay_buffer_num_slots * sample_batch_size).
    "replay_buffer_num_slots": 0,
    # max queue size for train batches feeding into the learner
    "learner_queue_size": 16,
    # wait for train batches to be available in minibatch buffer queue
    # this many seconds. This may need to be increased e.g. when training
    # with a slow environment
    "learner_queue_timeout": 300,
    # level of queuing for sampling.
    "max_sample_requests_in_flight_per_worker": 2,
    # max number of workers to broadcast one set of weights to
    "broadcast_interval": 1,
    # use intermediate actors for multi-level aggregation. This can make sense
    # if ingesting >2GB/s of samples, or if the data requires decompression.
    "num_aggregation_workers": 0,

    # Learning params.
    "grad_clip": 40.0,
    # either "adam" or "rmsprop"
    "opt_type": "adam",
    "lr": 0.0005,
    "lr_schedule": None,
    # rmsprop considered
    "decay": 0.99,
    "momentum": 0.0,
    "epsilon": 0.1,
    # balancing the three losses
    "vf_loss_coeff": 0.5,
    "entropy_coeff": 0.01,
    "entropy_coeff_schedule": None,

    # use fake (infinite speed) sampler for testing
    "_fake_sampler": False,
})
# __sphinx_doc_end__
# yapf: enable


def choose_policy(config):
    if config["vtrace"]:
        return VTraceTFPolicy
    else:
        return A3CTFPolicy


def validate_config(config):
    # PyTorch check.
    if config["use_pytorch"]:
        raise ValueError(
            "IMPALA does not support PyTorch yet! Use tf instead."
        )
    if config["entropy_coeff"] < 0:
        raise DeprecationWarning("entropy_coeff must be >= 0")


def defer_make_workers(trainer, env_creator, policy, config):
    # Defer worker creation to after the optimizer has been created.
    return trainer._make_workers(env_creator, policy, config, 0)


def make_aggregators_and_optimizer(workers, config):
    if config["num_aggregation_workers"] > 0:
        # Create co-located aggregator actors first for placement pref
        aggregators = TreeAggregator.precreate_aggregators(
            config["num_aggregation_workers"])
    else:
        aggregators = None
    workers.add_workers(config["num_workers"])

    optimizer = AsyncSamplesOptimizer(
        workers,
        lr=config["lr"],
        num_gpus=config["num_gpus"],
        sample_batch_size=config["sample_batch_size"],
        train_batch_size=config["train_batch_size"],
        replay_buffer_num_slots=config["replay_buffer_num_slots"],
        replay_proportion=config["replay_proportion"],
        num_data_loader_buffers=config["num_data_loader_buffers"],
        max_sample_requests_in_flight_per_worker=config[
            "max_sample_requests_in_flight_per_worker"],
        broadcast_interval=config["broadcast_interval"],
        num_sgd_iter=config["num_sgd_iter"],
        minibatch_buffer_size=config["minibatch_buffer_size"],
        num_aggregation_workers=config["num_aggregation_workers"],
        learner_queue_size=config["learner_queue_size"],
        learner_queue_timeout=config["learner_queue_timeout"],
        **config["optimizer"])

    if aggregators:
        # Assign the pre-created aggregators to the optimizer
        optimizer.aggregator.init(aggregators)
    return optimizer


class OverrideDefaultResourceRequest:
    @classmethod
    @override(Trainable)
    def default_resource_request(cls, config):
        cf = dict(cls._default_config, **config)
        Trainer._validate_config(cf)
        return Resources(
            cpu=cf["num_cpus_for_driver"],
            gpu=cf["num_gpus"],
            memory=cf["memory"],
            object_store_memory=cf["object_store_memory"],
            extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"] +
            cf["num_aggregation_workers"],
            extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"],
            extra_memory=cf["memory_per_worker"] * cf["num_workers"],
            extra_object_store_memory=cf["object_store_memory_per_worker"] *
            cf["num_workers"])


ImpalaTrainer = build_trainer(
    name="IMPALA",
    default_config=DEFAULT_CONFIG,
    default_policy=VTraceTFPolicy,
    validate_config=validate_config,
    get_policy_class=choose_policy,
    make_workers=defer_make_workers,
    make_policy_optimizer=make_aggregators_and_optimizer,
    mixins=[OverrideDefaultResourceRequest])
[rllib] Rename PolicyGraph => Policy, move from evaluation/ to policy/ (#4819) This implements some of the renames proposed in #4813 We leave behind backwards-compatibility aliases for *PolicyGraph and SampleBatch. 2019-05-20 16:46:05 -07:00			`from ray.rllib.agents.a3c.a3c_tf_policy import A3CTFPolicy`
			`from ray.rllib.agents.impala.vtrace_policy import VTraceTFPolicy`
[rllib] Rename Agent to Trainer (#4556) 2019-04-07 00:36:18 -07:00			`from ray.rllib.agents.trainer import Trainer, with_common_config`
[rllib] Port remainder of algorithms to build_trainer() pattern (#4920) 2019-06-07 16:45:36 -07:00			`from ray.rllib.agents.trainer_template import build_trainer`
[rllib] Basic IMPALA implementation (using deepmind's reference vtrace.py) (#2504) Rename AsyncSamplesOptimizer -> AsyncReplayOptimizer Add AsyncSamplesOptimizer that implements the IMPALA architecture integrate V-trace with a3c policy graph audit V-trace integration benchmark compare vs A3C and with V-trace on/off PongNoFrameskip-v4 on IMPALA scaling from 16 to 128 workers, solving Pong in <10 min. For reference, solving this env takes ~40 minutes for Ape-X and several hours for A3C. 2018-08-01 20:53:53 -07:00			`from ray.rllib.optimizers import AsyncSamplesOptimizer`
[rllib] Improve datapath throughput of IMPALA / APPO (#4324) 2019-03-31 12:25:52 -07:00			`from ray.rllib.optimizers.aso_tree_aggregator import TreeAggregator`
[rllib] Better document which methods are abstract and which ones are overrides (#3480) 2018-12-08 16:28:58 -08:00			`from ray.rllib.utils.annotations import override`
[rllib] Improve datapath throughput of IMPALA / APPO (#4324) 2019-03-31 12:25:52 -07:00			`from ray.tune.trainable import Trainable`
[sgd] Replaced class Resources in sgd with `use_gpu` (#5252) 2019-08-01 01:03:10 -07:00			`from ray.tune.resources import Resources`
[rllib] Basic IMPALA implementation (using deepmind's reference vtrace.py) (#2504) Rename AsyncSamplesOptimizer -> AsyncReplayOptimizer Add AsyncSamplesOptimizer that implements the IMPALA architecture integrate V-trace with a3c policy graph audit V-trace integration benchmark compare vs A3C and with V-trace on/off PongNoFrameskip-v4 on IMPALA scaling from 16 to 128 workers, solving Pong in <10 min. For reference, solving this env takes ~40 minutes for Ape-X and several hours for A3C. 2018-08-01 20:53:53 -07:00
[rllib] switch to python logger (#3098) * logg * set rllib logger * comment * info * rlib * comment * add format * fix lint * add file info * update * add ts * lint * better docs * fix value error * soft log level 2018-10-21 23:43:57 -07:00			`# yapf: disable`
[rllib] Include config dicts in the sphinx docs (#3064) 2018-10-16 15:55:11 -07:00			`# __sphinx_doc_begin__`
[rllib] Basic IMPALA implementation (using deepmind's reference vtrace.py) (#2504) Rename AsyncSamplesOptimizer -> AsyncReplayOptimizer Add AsyncSamplesOptimizer that implements the IMPALA architecture integrate V-trace with a3c policy graph audit V-trace integration benchmark compare vs A3C and with V-trace on/off PongNoFrameskip-v4 on IMPALA scaling from 16 to 128 workers, solving Pong in <10 min. For reference, solving this env takes ~40 minutes for Ape-X and several hours for A3C. 2018-08-01 20:53:53 -07:00			`DEFAULT_CONFIG = with_common_config({`
			`# V-trace params (see vtrace.py).`
			`"vtrace": True,`
			`"vtrace_clip_rho_threshold": 1.0,`
			`"vtrace_clip_pg_rho_threshold": 1.0,`

			`# System params.`
[rllib] streaming minibatching for IMPALA (#3402) * mb impala * fix * paropt * update * cpu warn * on cpu * fix mb * doc * docs * comment * larger num * early release * remove grad clip * only check loader count in multi gpu mode * revert bad multigpu changes * num sgd iter * comment * reuse optimizer * add test * par load test * loosen test * Update run_multi_node_tests.sh * fix local mode * Update agent.py 2018-12-19 02:23:29 -08:00			`#`
			`# == Overview of data flow in IMPALA ==`
			# 1. Policy evaluation in parallel across `num_workers` actors produces
			# batches of size `sample_batch_size * num_envs_per_worker`.
			`# 2. If enabled, the replay buffer stores and produces batches of size`
			# `sample_batch_size * num_envs_per_worker`.
			`# 3. If enabled, the minibatch ring buffer stores and replays batches of`
			# size `train_batch_size` up to `num_sgd_iter` times per batch.
			# 4. The learner thread executes data parallel SGD across `num_gpus` GPUs
			# on batches of size `train_batch_size`.
			`#`
[rllib] Basic IMPALA implementation (using deepmind's reference vtrace.py) (#2504) Rename AsyncSamplesOptimizer -> AsyncReplayOptimizer Add AsyncSamplesOptimizer that implements the IMPALA architecture integrate V-trace with a3c policy graph audit V-trace integration benchmark compare vs A3C and with V-trace on/off PongNoFrameskip-v4 on IMPALA scaling from 16 to 128 workers, solving Pong in <10 min. For reference, solving this env takes ~40 minutes for Ape-X and several hours for A3C. 2018-08-01 20:53:53 -07:00			`"sample_batch_size": 50,`
			`"train_batch_size": 500,`
			`"min_iter_time_s": 10,`
			`"num_workers": 2,`
[rllib] Parallel-data loading and multi-gpu support for IMPALA (#2766) 2018-10-15 11:02:50 -07:00			`# number of GPUs the learner should use.`
			`"num_gpus": 1,`
			`# set >1 to load data into GPUs in parallel. Increases GPU memory usage`
[rllib] streaming minibatching for IMPALA (#3402) * mb impala * fix * paropt * update * cpu warn * on cpu * fix mb * doc * docs * comment * larger num * early release * remove grad clip * only check loader count in multi gpu mode * revert bad multigpu changes * num sgd iter * comment * reuse optimizer * add test * par load test * loosen test * Update run_multi_node_tests.sh * fix local mode * Update agent.py 2018-12-19 02:23:29 -08:00			`# proportionally with the number of buffers.`
			`"num_data_loader_buffers": 1,`
			`# how many train batches should be retained for minibatching. This conf`
			# only has an effect if `num_sgd_iter > 1`.
			`"minibatch_buffer_size": 1,`
			`# number of passes to make over each train batch`
			`"num_sgd_iter": 1,`
[rllib] Parallel-data loading and multi-gpu support for IMPALA (#2766) 2018-10-15 11:02:50 -07:00			`# set >0 to enable experience replay. Saved samples will be replayed with`
			`# a p:1 proportion to new data samples.`
			`"replay_proportion": 0.0,`
			`# number of sample batches to store for replay. The number of transitions`
			`# saved total will be (replay_buffer_num_slots * sample_batch_size).`
[rllib] Replay buffer for IMPALA should default to 0 slots. (#3971) * disable replay * make lq configurable * leak test * Update run_multi_node_tests.sh 2019-02-08 10:03:11 -08:00			`"replay_buffer_num_slots": 0,`
			`# max queue size for train batches feeding into the learner`
			`"learner_queue_size": 16,`
[rllib] Configure learner queue timeout (#5270) * configure learner queue timeout * lint * use config * fix method args order, add unit test * fix wrong param name 2019-07-26 06:18:05 +02:00			`# wait for train batches to be available in minibatch buffer queue`
			`# this many seconds. This may need to be increased e.g. when training`
			`# with a slow environment`
			`"learner_queue_timeout": 300,`
[rllib] streaming minibatching for IMPALA (#3402) * mb impala * fix * paropt * update * cpu warn * on cpu * fix mb * doc * docs * comment * larger num * early release * remove grad clip * only check loader count in multi gpu mode * revert bad multigpu changes * num sgd iter * comment * reuse optimizer * add test * par load test * loosen test * Update run_multi_node_tests.sh * fix local mode * Update agent.py 2018-12-19 02:23:29 -08:00			`# level of queuing for sampling.`
			`"max_sample_requests_in_flight_per_worker": 2,`
			`# max number of workers to broadcast one set of weights to`
			`"broadcast_interval": 1,`
[rllib] Improve datapath throughput of IMPALA / APPO (#4324) 2019-03-31 12:25:52 -07:00			`# use intermediate actors for multi-level aggregation. This can make sense`
			`# if ingesting >2GB/s of samples, or if the data requires decompression.`
			`"num_aggregation_workers": 0,`
[rllib] Basic IMPALA implementation (using deepmind's reference vtrace.py) (#2504) Rename AsyncSamplesOptimizer -> AsyncReplayOptimizer Add AsyncSamplesOptimizer that implements the IMPALA architecture integrate V-trace with a3c policy graph audit V-trace integration benchmark compare vs A3C and with V-trace on/off PongNoFrameskip-v4 on IMPALA scaling from 16 to 128 workers, solving Pong in <10 min. For reference, solving this env takes ~40 minutes for Ape-X and several hours for A3C. 2018-08-01 20:53:53 -07:00
			`# Learning params.`
			`"grad_clip": 40.0,`
Support older version TF and Support RMSProp in Impala (#2590) to support TF version < 1.5 to support rmsprop optimizer in Impala Before TF1.5, tf.reduce_sum() and tf.reduce_max() has an argument keep_dims which has been renamed as keepdims in later versions. In the original paper of Impala, they use rmsprop algorithm to optimize the model. We'd better also support it so that users can reproduce their experiments. Without any tuning, say that using the same hyper-parameters as AdamOptimizer, it reaches "episode_reward_mean": 19.083333333333332 in Pong after consume 3,610,350 samples. 2018-08-09 19:51:32 -07:00			`# either "adam" or "rmsprop"`
			`"opt_type": "adam",`
			`"lr": 0.0005,`
[rllib] Fix atari reward calculations, add LR annealing, explained var stat for A2C / impala (#2700) Changes needed to reproduce Atari plots in IMPALA / A2C: https://github.com/ray-project/rl-experiments 2018-08-23 17:49:10 -07:00			`"lr_schedule": None,`
Support older version TF and Support RMSProp in Impala (#2590) to support TF version < 1.5 to support rmsprop optimizer in Impala Before TF1.5, tf.reduce_sum() and tf.reduce_max() has an argument keep_dims which has been renamed as keepdims in later versions. In the original paper of Impala, they use rmsprop algorithm to optimize the model. We'd better also support it so that users can reproduce their experiments. Without any tuning, say that using the same hyper-parameters as AdamOptimizer, it reaches "episode_reward_mean": 19.083333333333332 in Pong after consume 3,610,350 samples. 2018-08-09 19:51:32 -07:00			`# rmsprop considered`
			`"decay": 0.99,`
			`"momentum": 0.0,`
			`"epsilon": 0.1,`
			`# balancing the three losses`
[rllib] Basic IMPALA implementation (using deepmind's reference vtrace.py) (#2504) Rename AsyncSamplesOptimizer -> AsyncReplayOptimizer Add AsyncSamplesOptimizer that implements the IMPALA architecture integrate V-trace with a3c policy graph audit V-trace integration benchmark compare vs A3C and with V-trace on/off PongNoFrameskip-v4 on IMPALA scaling from 16 to 128 workers, solving Pong in <10 min. For reference, solving this env takes ~40 minutes for Ape-X and several hours for A3C. 2018-08-01 20:53:53 -07:00			`"vf_loss_coeff": 0.5,`
[rllib] Flip sign of A2C, IMPALA entropy coefficient; raise DeprecationWarning if negative (#4374) 2019-03-17 18:07:37 -07:00			`"entropy_coeff": 0.01,`
[rllib]Add entropy coeff decay (#5043) 2019-07-09 03:30:32 +02:00			`"entropy_coeff_schedule": None,`
[rllib] Improve datapath throughput of IMPALA / APPO (#4324) 2019-03-31 12:25:52 -07:00
			`# use fake (infinite speed) sampler for testing`
			`"_fake_sampler": False,`
[rllib] Basic IMPALA implementation (using deepmind's reference vtrace.py) (#2504) Rename AsyncSamplesOptimizer -> AsyncReplayOptimizer Add AsyncSamplesOptimizer that implements the IMPALA architecture integrate V-trace with a3c policy graph audit V-trace integration benchmark compare vs A3C and with V-trace on/off PongNoFrameskip-v4 on IMPALA scaling from 16 to 128 workers, solving Pong in <10 min. For reference, solving this env takes ~40 minutes for Ape-X and several hours for A3C. 2018-08-01 20:53:53 -07:00			`})`
[rllib] Include config dicts in the sphinx docs (#3064) 2018-10-16 15:55:11 -07:00			`# __sphinx_doc_end__`
[rllib] switch to python logger (#3098) * logg * set rllib logger * comment * info * rlib * comment * add format * fix lint * add file info * update * add ts * lint * better docs * fix value error * soft log level 2018-10-21 23:43:57 -07:00			`# yapf: enable`
[rllib] Include config dicts in the sphinx docs (#3064) 2018-10-16 15:55:11 -07:00
[rllib] Basic IMPALA implementation (using deepmind's reference vtrace.py) (#2504) Rename AsyncSamplesOptimizer -> AsyncReplayOptimizer Add AsyncSamplesOptimizer that implements the IMPALA architecture integrate V-trace with a3c policy graph audit V-trace integration benchmark compare vs A3C and with V-trace on/off PongNoFrameskip-v4 on IMPALA scaling from 16 to 128 workers, solving Pong in <10 min. For reference, solving this env takes ~40 minutes for Ape-X and several hours for A3C. 2018-08-01 20:53:53 -07:00
[rllib] Port remainder of algorithms to build_trainer() pattern (#4920) 2019-06-07 16:45:36 -07:00			`def choose_policy(config):`
			`if config["vtrace"]:`
			`return VTraceTFPolicy`
			`else:`
			`return A3CTFPolicy`


			`def validate_config(config):`
[RLlib] Add `torch` flag to train.py (#6807) 2020-01-18 03:48:44 +01:00			`# PyTorch check.`
			`if config["use_pytorch"]:`
			`raise ValueError(`
			`"IMPALA does not support PyTorch yet! Use tf instead."`
			`)`
[rllib] Port remainder of algorithms to build_trainer() pattern (#4920) 2019-06-07 16:45:36 -07:00			`if config["entropy_coeff"] < 0:`
			`raise DeprecationWarning("entropy_coeff must be >= 0")`


			`def defer_make_workers(trainer, env_creator, policy, config):`
			`# Defer worker creation to after the optimizer has been created.`
			`return trainer._make_workers(env_creator, policy, config, 0)`


			`def make_aggregators_and_optimizer(workers, config):`
			`if config["num_aggregation_workers"] > 0:`
			`# Create co-located aggregator actors first for placement pref`
			`aggregators = TreeAggregator.precreate_aggregators(`
			`config["num_aggregation_workers"])`
			`else:`
			`aggregators = None`
			`workers.add_workers(config["num_workers"])`

			`optimizer = AsyncSamplesOptimizer(`
			`workers,`
			`lr=config["lr"],`
			`num_gpus=config["num_gpus"],`
			`sample_batch_size=config["sample_batch_size"],`
			`train_batch_size=config["train_batch_size"],`
			`replay_buffer_num_slots=config["replay_buffer_num_slots"],`
			`replay_proportion=config["replay_proportion"],`
			`num_data_loader_buffers=config["num_data_loader_buffers"],`
			`max_sample_requests_in_flight_per_worker=config[`
			`"max_sample_requests_in_flight_per_worker"],`
			`broadcast_interval=config["broadcast_interval"],`
			`num_sgd_iter=config["num_sgd_iter"],`
			`minibatch_buffer_size=config["minibatch_buffer_size"],`
			`num_aggregation_workers=config["num_aggregation_workers"],`
[rllib] Configure learner queue timeout (#5270) * configure learner queue timeout * lint * use config * fix method args order, add unit test * fix wrong param name 2019-07-26 06:18:05 +02:00			`learner_queue_size=config["learner_queue_size"],`
			`learner_queue_timeout=config["learner_queue_timeout"],`
[rllib] Port remainder of algorithms to build_trainer() pattern (#4920) 2019-06-07 16:45:36 -07:00			`**config["optimizer"])`

			`if aggregators:`
			`# Assign the pre-created aggregators to the optimizer`
			`optimizer.aggregator.init(aggregators)`
			`return optimizer`


Remove (object) from class declarations. (#6658) 2020-01-02 17:42:13 -08:00			`class OverrideDefaultResourceRequest:`
[rllib] Improve datapath throughput of IMPALA / APPO (#4324) 2019-03-31 12:25:52 -07:00			`@classmethod`
			`@override(Trainable)`
			`def default_resource_request(cls, config):`
			`cf = dict(cls._default_config, **config)`
[rllib] Rename Agent to Trainer (#4556) 2019-04-07 00:36:18 -07:00			`Trainer._validate_config(cf)`
[rllib] Improve datapath throughput of IMPALA / APPO (#4324) 2019-03-31 12:25:52 -07:00			`return Resources(`
			`cpu=cf["num_cpus_for_driver"],`
			`gpu=cf["num_gpus"],`
Ray, Tune, and RLlib support for memory, object_store_memory options (#5226) 2019-08-22 14:01:10 +08:00			`memory=cf["memory"],`
			`object_store_memory=cf["object_store_memory"],`
[rllib] Improve datapath throughput of IMPALA / APPO (#4324) 2019-03-31 12:25:52 -07:00			`extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"] +`
			`cf["num_aggregation_workers"],`
Ray, Tune, and RLlib support for memory, object_store_memory options (#5226) 2019-08-22 14:01:10 +08:00			`extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"],`
			`extra_memory=cf["memory_per_worker"] * cf["num_workers"],`
			`extra_object_store_memory=cf["object_store_memory_per_worker"] *`
			`cf["num_workers"])`
[rllib] Improve datapath throughput of IMPALA / APPO (#4324) 2019-03-31 12:25:52 -07:00
[rllib] Port remainder of algorithms to build_trainer() pattern (#4920) 2019-06-07 16:45:36 -07:00
			`ImpalaTrainer = build_trainer(`
			`name="IMPALA",`
			`default_config=DEFAULT_CONFIG,`
			`default_policy=VTraceTFPolicy,`
			`validate_config=validate_config,`
			`get_policy_class=choose_policy,`
			`make_workers=defer_make_workers,`
			`make_policy_optimizer=make_aggregators_and_optimizer,`
			`mixins=[OverrideDefaultResourceRequest])`