[RLlib] Some minor cleanups (buffer buffer_size -> capacity and others). (#19623)

This commit is contained in:
gjoliver 2021-10-25 00:42:39 -07:00 committed by GitHub
parent 9b0352f363
commit 89fbfc00f8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 10 additions and 10 deletions

View file

@ -87,7 +87,7 @@ def execution_plan(workers, config):
local_replay_buffer = LocalReplayBuffer(
num_shards=1,
learning_starts=config["learning_starts"],
buffer_size=config["buffer_size"],
capacity=config["buffer_size"],
replay_batch_size=config["train_batch_size"],
replay_mode=config["multiagent"]["replay_mode"],
replay_sequence_length=config.get("replay_sequence_length", 1),

View file

@ -170,7 +170,7 @@ def execution_plan(trainer: Trainer, workers: WorkerSet,
local_replay_buffer = LocalReplayBuffer(
num_shards=1,
learning_starts=config["learning_starts"],
buffer_size=config["buffer_size"],
capacity=config["buffer_size"],
replay_batch_size=config["train_batch_size"],
replay_mode=config["multiagent"]["replay_mode"],
replay_sequence_length=config.get("replay_sequence_length", 1),

View file

@ -138,7 +138,7 @@ def execution_plan(trainer: Trainer, workers: WorkerSet,
local_replay_buffer = LocalReplayBuffer(
num_shards=1,
learning_starts=config["learning_starts"],
buffer_size=config["buffer_size"],
capacity=config["buffer_size"],
replay_batch_size=config["train_batch_size"],
replay_mode=config["multiagent"]["replay_mode"],
replay_sequence_length=config["replay_sequence_length"])

View file

@ -106,7 +106,7 @@ def execution_plan(workers: WorkerSet,
rollouts = ParallelRollouts(workers, mode="bulk_sync")
replay_buffer = LocalReplayBuffer(
learning_starts=config["learning_starts"],
buffer_size=config["replay_buffer_size"],
capacity=config["replay_buffer_size"],
replay_batch_size=config["train_batch_size"],
replay_sequence_length=1,
)

View file

@ -306,7 +306,7 @@ def stats(policy: Policy, train_batch: SampleBatch) -> Dict[str, TensorType]:
"vf_explained_var": explained_variance(
tf.reshape(policy._value_targets, [-1]),
tf.reshape(values_batched, [-1])),
"entropy_coeff": policy.entropy_coeff,
"entropy_coeff": tf.cast(policy.entropy_coeff, tf.float64),
}
if policy.config["vtrace"]:

View file

@ -78,7 +78,7 @@ class TestAPPO(unittest.TestCase):
config["train_batch_size"] = 20
config["batch_mode"] = "truncate_episodes"
config["rollout_fragment_length"] = 10
config["timesteps_per_iteration"] = 10
config["timesteps_per_iteration"] = 20
# 0 metrics reporting delay, this makes sure timestep,
# which entropy coeff depends on, is updated after each worker rollout.
config["min_iter_time_s"] = 0
@ -112,7 +112,7 @@ class TestAPPO(unittest.TestCase):
self.assertLessEqual(coeff, 0.005)
self.assertGreaterEqual(coeff, 0.0005)
coeff = _step_n_times(trainer, 6) # 120 timesteps
coeff = _step_n_times(trainer, 3) # 120 timesteps
# PiecewiseSchedule does interpolation. So roughly 0.0001 here.
self.assertLessEqual(coeff, 0.0005)
self.assertGreaterEqual(coeff, 0.00005)

View file

@ -167,7 +167,7 @@ def execution_plan(workers: WorkerSet,
local_replay_buffer = LocalReplayBuffer(
num_shards=1,
learning_starts=config["learning_starts"],
buffer_size=config["buffer_size"],
capacity=config["buffer_size"],
replay_batch_size=config["train_batch_size"],
replay_mode=config["multiagent"]["replay_mode"],
replay_sequence_length=config["replay_sequence_length"],

View file

@ -59,7 +59,7 @@ def custom_training_workflow(workers: WorkerSet, config: dict):
local_replay_buffer = LocalReplayBuffer(
num_shards=1,
learning_starts=1000,
buffer_size=50000,
capacity=50000,
replay_batch_size=64)
def add_ppo_metrics(batch):

View file

@ -208,7 +208,7 @@ def test_store_to_replay_local(ray_start_regular_shared):
buf = LocalReplayBuffer(
num_shards=1,
learning_starts=200,
buffer_size=1000,
capacity=1000,
replay_batch_size=100,
prioritized_replay_alpha=0.6,
prioritized_replay_beta=0.4,