mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
[RLlib] Some minor cleanups (buffer buffer_size -> capacity and others). (#19623)
This commit is contained in:
parent
9b0352f363
commit
89fbfc00f8
9 changed files with 10 additions and 10 deletions
|
@ -87,7 +87,7 @@ def execution_plan(workers, config):
|
||||||
local_replay_buffer = LocalReplayBuffer(
|
local_replay_buffer = LocalReplayBuffer(
|
||||||
num_shards=1,
|
num_shards=1,
|
||||||
learning_starts=config["learning_starts"],
|
learning_starts=config["learning_starts"],
|
||||||
buffer_size=config["buffer_size"],
|
capacity=config["buffer_size"],
|
||||||
replay_batch_size=config["train_batch_size"],
|
replay_batch_size=config["train_batch_size"],
|
||||||
replay_mode=config["multiagent"]["replay_mode"],
|
replay_mode=config["multiagent"]["replay_mode"],
|
||||||
replay_sequence_length=config.get("replay_sequence_length", 1),
|
replay_sequence_length=config.get("replay_sequence_length", 1),
|
||||||
|
|
|
@ -170,7 +170,7 @@ def execution_plan(trainer: Trainer, workers: WorkerSet,
|
||||||
local_replay_buffer = LocalReplayBuffer(
|
local_replay_buffer = LocalReplayBuffer(
|
||||||
num_shards=1,
|
num_shards=1,
|
||||||
learning_starts=config["learning_starts"],
|
learning_starts=config["learning_starts"],
|
||||||
buffer_size=config["buffer_size"],
|
capacity=config["buffer_size"],
|
||||||
replay_batch_size=config["train_batch_size"],
|
replay_batch_size=config["train_batch_size"],
|
||||||
replay_mode=config["multiagent"]["replay_mode"],
|
replay_mode=config["multiagent"]["replay_mode"],
|
||||||
replay_sequence_length=config.get("replay_sequence_length", 1),
|
replay_sequence_length=config.get("replay_sequence_length", 1),
|
||||||
|
|
|
@ -138,7 +138,7 @@ def execution_plan(trainer: Trainer, workers: WorkerSet,
|
||||||
local_replay_buffer = LocalReplayBuffer(
|
local_replay_buffer = LocalReplayBuffer(
|
||||||
num_shards=1,
|
num_shards=1,
|
||||||
learning_starts=config["learning_starts"],
|
learning_starts=config["learning_starts"],
|
||||||
buffer_size=config["buffer_size"],
|
capacity=config["buffer_size"],
|
||||||
replay_batch_size=config["train_batch_size"],
|
replay_batch_size=config["train_batch_size"],
|
||||||
replay_mode=config["multiagent"]["replay_mode"],
|
replay_mode=config["multiagent"]["replay_mode"],
|
||||||
replay_sequence_length=config["replay_sequence_length"])
|
replay_sequence_length=config["replay_sequence_length"])
|
||||||
|
|
|
@ -106,7 +106,7 @@ def execution_plan(workers: WorkerSet,
|
||||||
rollouts = ParallelRollouts(workers, mode="bulk_sync")
|
rollouts = ParallelRollouts(workers, mode="bulk_sync")
|
||||||
replay_buffer = LocalReplayBuffer(
|
replay_buffer = LocalReplayBuffer(
|
||||||
learning_starts=config["learning_starts"],
|
learning_starts=config["learning_starts"],
|
||||||
buffer_size=config["replay_buffer_size"],
|
capacity=config["replay_buffer_size"],
|
||||||
replay_batch_size=config["train_batch_size"],
|
replay_batch_size=config["train_batch_size"],
|
||||||
replay_sequence_length=1,
|
replay_sequence_length=1,
|
||||||
)
|
)
|
||||||
|
|
|
@ -306,7 +306,7 @@ def stats(policy: Policy, train_batch: SampleBatch) -> Dict[str, TensorType]:
|
||||||
"vf_explained_var": explained_variance(
|
"vf_explained_var": explained_variance(
|
||||||
tf.reshape(policy._value_targets, [-1]),
|
tf.reshape(policy._value_targets, [-1]),
|
||||||
tf.reshape(values_batched, [-1])),
|
tf.reshape(values_batched, [-1])),
|
||||||
"entropy_coeff": policy.entropy_coeff,
|
"entropy_coeff": tf.cast(policy.entropy_coeff, tf.float64),
|
||||||
}
|
}
|
||||||
|
|
||||||
if policy.config["vtrace"]:
|
if policy.config["vtrace"]:
|
||||||
|
|
|
@ -78,7 +78,7 @@ class TestAPPO(unittest.TestCase):
|
||||||
config["train_batch_size"] = 20
|
config["train_batch_size"] = 20
|
||||||
config["batch_mode"] = "truncate_episodes"
|
config["batch_mode"] = "truncate_episodes"
|
||||||
config["rollout_fragment_length"] = 10
|
config["rollout_fragment_length"] = 10
|
||||||
config["timesteps_per_iteration"] = 10
|
config["timesteps_per_iteration"] = 20
|
||||||
# 0 metrics reporting delay, this makes sure timestep,
|
# 0 metrics reporting delay, this makes sure timestep,
|
||||||
# which entropy coeff depends on, is updated after each worker rollout.
|
# which entropy coeff depends on, is updated after each worker rollout.
|
||||||
config["min_iter_time_s"] = 0
|
config["min_iter_time_s"] = 0
|
||||||
|
@ -112,7 +112,7 @@ class TestAPPO(unittest.TestCase):
|
||||||
self.assertLessEqual(coeff, 0.005)
|
self.assertLessEqual(coeff, 0.005)
|
||||||
self.assertGreaterEqual(coeff, 0.0005)
|
self.assertGreaterEqual(coeff, 0.0005)
|
||||||
|
|
||||||
coeff = _step_n_times(trainer, 6) # 120 timesteps
|
coeff = _step_n_times(trainer, 3) # 120 timesteps
|
||||||
# PiecewiseSchedule does interpolation. So roughly 0.0001 here.
|
# PiecewiseSchedule does interpolation. So roughly 0.0001 here.
|
||||||
self.assertLessEqual(coeff, 0.0005)
|
self.assertLessEqual(coeff, 0.0005)
|
||||||
self.assertGreaterEqual(coeff, 0.00005)
|
self.assertGreaterEqual(coeff, 0.00005)
|
||||||
|
|
|
@ -167,7 +167,7 @@ def execution_plan(workers: WorkerSet,
|
||||||
local_replay_buffer = LocalReplayBuffer(
|
local_replay_buffer = LocalReplayBuffer(
|
||||||
num_shards=1,
|
num_shards=1,
|
||||||
learning_starts=config["learning_starts"],
|
learning_starts=config["learning_starts"],
|
||||||
buffer_size=config["buffer_size"],
|
capacity=config["buffer_size"],
|
||||||
replay_batch_size=config["train_batch_size"],
|
replay_batch_size=config["train_batch_size"],
|
||||||
replay_mode=config["multiagent"]["replay_mode"],
|
replay_mode=config["multiagent"]["replay_mode"],
|
||||||
replay_sequence_length=config["replay_sequence_length"],
|
replay_sequence_length=config["replay_sequence_length"],
|
||||||
|
|
|
@ -59,7 +59,7 @@ def custom_training_workflow(workers: WorkerSet, config: dict):
|
||||||
local_replay_buffer = LocalReplayBuffer(
|
local_replay_buffer = LocalReplayBuffer(
|
||||||
num_shards=1,
|
num_shards=1,
|
||||||
learning_starts=1000,
|
learning_starts=1000,
|
||||||
buffer_size=50000,
|
capacity=50000,
|
||||||
replay_batch_size=64)
|
replay_batch_size=64)
|
||||||
|
|
||||||
def add_ppo_metrics(batch):
|
def add_ppo_metrics(batch):
|
||||||
|
|
|
@ -208,7 +208,7 @@ def test_store_to_replay_local(ray_start_regular_shared):
|
||||||
buf = LocalReplayBuffer(
|
buf = LocalReplayBuffer(
|
||||||
num_shards=1,
|
num_shards=1,
|
||||||
learning_starts=200,
|
learning_starts=200,
|
||||||
buffer_size=1000,
|
capacity=1000,
|
||||||
replay_batch_size=100,
|
replay_batch_size=100,
|
||||||
prioritized_replay_alpha=0.6,
|
prioritized_replay_alpha=0.6,
|
||||||
prioritized_replay_beta=0.4,
|
prioritized_replay_beta=0.4,
|
||||||
|
|
Loading…
Add table
Reference in a new issue