[RLlib] Some minor cleanups (buffer buffer_size -> capacity and others). (#19623)

This commit is contained in:
gjoliver 2021-10-25 00:42:39 -07:00 committed by GitHub
parent 9b0352f363
commit 89fbfc00f8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 10 additions and 10 deletions

View file

@ -87,7 +87,7 @@ def execution_plan(workers, config):
local_replay_buffer = LocalReplayBuffer( local_replay_buffer = LocalReplayBuffer(
num_shards=1, num_shards=1,
learning_starts=config["learning_starts"], learning_starts=config["learning_starts"],
buffer_size=config["buffer_size"], capacity=config["buffer_size"],
replay_batch_size=config["train_batch_size"], replay_batch_size=config["train_batch_size"],
replay_mode=config["multiagent"]["replay_mode"], replay_mode=config["multiagent"]["replay_mode"],
replay_sequence_length=config.get("replay_sequence_length", 1), replay_sequence_length=config.get("replay_sequence_length", 1),

View file

@ -170,7 +170,7 @@ def execution_plan(trainer: Trainer, workers: WorkerSet,
local_replay_buffer = LocalReplayBuffer( local_replay_buffer = LocalReplayBuffer(
num_shards=1, num_shards=1,
learning_starts=config["learning_starts"], learning_starts=config["learning_starts"],
buffer_size=config["buffer_size"], capacity=config["buffer_size"],
replay_batch_size=config["train_batch_size"], replay_batch_size=config["train_batch_size"],
replay_mode=config["multiagent"]["replay_mode"], replay_mode=config["multiagent"]["replay_mode"],
replay_sequence_length=config.get("replay_sequence_length", 1), replay_sequence_length=config.get("replay_sequence_length", 1),

View file

@ -138,7 +138,7 @@ def execution_plan(trainer: Trainer, workers: WorkerSet,
local_replay_buffer = LocalReplayBuffer( local_replay_buffer = LocalReplayBuffer(
num_shards=1, num_shards=1,
learning_starts=config["learning_starts"], learning_starts=config["learning_starts"],
buffer_size=config["buffer_size"], capacity=config["buffer_size"],
replay_batch_size=config["train_batch_size"], replay_batch_size=config["train_batch_size"],
replay_mode=config["multiagent"]["replay_mode"], replay_mode=config["multiagent"]["replay_mode"],
replay_sequence_length=config["replay_sequence_length"]) replay_sequence_length=config["replay_sequence_length"])

View file

@ -106,7 +106,7 @@ def execution_plan(workers: WorkerSet,
rollouts = ParallelRollouts(workers, mode="bulk_sync") rollouts = ParallelRollouts(workers, mode="bulk_sync")
replay_buffer = LocalReplayBuffer( replay_buffer = LocalReplayBuffer(
learning_starts=config["learning_starts"], learning_starts=config["learning_starts"],
buffer_size=config["replay_buffer_size"], capacity=config["replay_buffer_size"],
replay_batch_size=config["train_batch_size"], replay_batch_size=config["train_batch_size"],
replay_sequence_length=1, replay_sequence_length=1,
) )

View file

@ -306,7 +306,7 @@ def stats(policy: Policy, train_batch: SampleBatch) -> Dict[str, TensorType]:
"vf_explained_var": explained_variance( "vf_explained_var": explained_variance(
tf.reshape(policy._value_targets, [-1]), tf.reshape(policy._value_targets, [-1]),
tf.reshape(values_batched, [-1])), tf.reshape(values_batched, [-1])),
"entropy_coeff": policy.entropy_coeff, "entropy_coeff": tf.cast(policy.entropy_coeff, tf.float64),
} }
if policy.config["vtrace"]: if policy.config["vtrace"]:

View file

@ -78,7 +78,7 @@ class TestAPPO(unittest.TestCase):
config["train_batch_size"] = 20 config["train_batch_size"] = 20
config["batch_mode"] = "truncate_episodes" config["batch_mode"] = "truncate_episodes"
config["rollout_fragment_length"] = 10 config["rollout_fragment_length"] = 10
config["timesteps_per_iteration"] = 10 config["timesteps_per_iteration"] = 20
# 0 metrics reporting delay, this makes sure timestep, # 0 metrics reporting delay, this makes sure timestep,
# which entropy coeff depends on, is updated after each worker rollout. # which entropy coeff depends on, is updated after each worker rollout.
config["min_iter_time_s"] = 0 config["min_iter_time_s"] = 0
@ -112,7 +112,7 @@ class TestAPPO(unittest.TestCase):
self.assertLessEqual(coeff, 0.005) self.assertLessEqual(coeff, 0.005)
self.assertGreaterEqual(coeff, 0.0005) self.assertGreaterEqual(coeff, 0.0005)
coeff = _step_n_times(trainer, 6) # 120 timesteps coeff = _step_n_times(trainer, 3) # 120 timesteps
# PiecewiseSchedule does interpolation. So roughly 0.0001 here. # PiecewiseSchedule does interpolation. So roughly 0.0001 here.
self.assertLessEqual(coeff, 0.0005) self.assertLessEqual(coeff, 0.0005)
self.assertGreaterEqual(coeff, 0.00005) self.assertGreaterEqual(coeff, 0.00005)

View file

@ -167,7 +167,7 @@ def execution_plan(workers: WorkerSet,
local_replay_buffer = LocalReplayBuffer( local_replay_buffer = LocalReplayBuffer(
num_shards=1, num_shards=1,
learning_starts=config["learning_starts"], learning_starts=config["learning_starts"],
buffer_size=config["buffer_size"], capacity=config["buffer_size"],
replay_batch_size=config["train_batch_size"], replay_batch_size=config["train_batch_size"],
replay_mode=config["multiagent"]["replay_mode"], replay_mode=config["multiagent"]["replay_mode"],
replay_sequence_length=config["replay_sequence_length"], replay_sequence_length=config["replay_sequence_length"],

View file

@ -59,7 +59,7 @@ def custom_training_workflow(workers: WorkerSet, config: dict):
local_replay_buffer = LocalReplayBuffer( local_replay_buffer = LocalReplayBuffer(
num_shards=1, num_shards=1,
learning_starts=1000, learning_starts=1000,
buffer_size=50000, capacity=50000,
replay_batch_size=64) replay_batch_size=64)
def add_ppo_metrics(batch): def add_ppo_metrics(batch):

View file

@ -208,7 +208,7 @@ def test_store_to_replay_local(ray_start_regular_shared):
buf = LocalReplayBuffer( buf = LocalReplayBuffer(
num_shards=1, num_shards=1,
learning_starts=200, learning_starts=200,
buffer_size=1000, capacity=1000,
replay_batch_size=100, replay_batch_size=100,
prioritized_replay_alpha=0.6, prioritized_replay_alpha=0.6,
prioritized_replay_beta=0.4, prioritized_replay_beta=0.4,