From baa053496a78bbb89279847277e49242f721af0e Mon Sep 17 00:00:00 2001 From: Sven Mika Date: Tue, 26 May 2020 11:10:27 +0200 Subject: [PATCH] [RLlib] Benchmark and regression test yaml cleanup and restructuring. (#8414) --- rllib/BUILD | 366 ++++++++++-------- rllib/agents/ddpg/ddpg_torch_policy.py | 3 +- rllib/agents/ddpg/tests/test_ddpg.py | 4 +- rllib/tests/run_regression_tests.py | 53 ++- rllib/tuned_examples/{ => a3c}/atari-a2c.yaml | 1 + .../cartpole-a2c-microbatch.yaml | 5 +- rllib/tuned_examples/a3c/cartpole-a2c.yaml | 11 + .../cartpole-a3c.yaml} | 7 +- rllib/tuned_examples/{ => a3c}/pong-a3c.yaml | 3 +- .../cartpole-ars.yaml} | 7 +- .../tuned_examples/{ => ars}/swimmer-ars.yaml | 2 + .../tuned_examples/cartpole-marwil-torch.yaml | 13 - rllib/tuned_examples/cleanup_experiment.py | 169 ++++++++ rllib/tuned_examples/create_plots.py | 5 + .../{ => ddpg}/halfcheetah-ddpg.yaml | 1 + .../{ => ddpg}/invertedpendulum-td3.yaml | 2 + .../mountaincarcontinuous-apex-ddpg.yaml | 2 + .../mountaincarcontinuous-ddpg.yaml | 2 + .../tuned_examples/{ => ddpg}/mujoco-td3.yaml | 2 + .../{ => ddpg}/pendulum-apex-ddpg.yaml | 2 + .../{ => ddpg}/pendulum-ddpg.yaml | 10 +- .../{ => ddpg}/pendulum-td3.yaml | 10 +- .../tuned_examples/{ => dqn}/atari-apex.yaml | 1 + .../{ => dqn}/atari-dist-dqn.yaml | 0 .../atari-dqn.yaml} | 1 + .../{ => dqn}/atari-duel-ddqn.yaml | 1 + .../cartpole-dqn-param-noise.yaml} | 3 +- .../cartpole-dqn.yaml} | 3 +- .../cartpole-simpleq.yaml} | 3 +- rllib/tuned_examples/{ => dqn}/pong-apex.yaml | 1 + rllib/tuned_examples/{ => dqn}/pong-dqn.yaml | 2 + .../{ => dqn}/pong-rainbow.yaml | 0 .../cartpole-es.yaml} | 3 +- rllib/tuned_examples/es/humanoid-es.yaml | 9 + rllib/tuned_examples/humanoid-es.yaml | 7 - .../{ => impala}/atari-impala-large.yaml | 0 .../{ => impala}/atari-impala.yaml | 0 .../cartpole-impala.yaml} | 1 + .../impala/pendulum-impala.yaml | 6 + .../{ => impala}/pong-impala-fast.yaml | 0 .../{ => impala}/pong-impala-vectorized.yaml | 0 .../{ => impala}/pong-impala.yaml | 0 .../cartpole-marwil.yaml} | 3 +- rllib/tuned_examples/pendulum-appo-torch.yaml | 21 - rllib/tuned_examples/pendulum-ppo.yaml | 18 - .../cartpole-pg.yaml} | 5 +- rllib/tuned_examples/pong-a3c-pytorch.yaml | 21 - .../tuned_examples/{ => ppo}/atari-ddppo.yaml | 1 + rllib/tuned_examples/{ => ppo}/atari-ppo.yaml | 1 + .../cartpole-appo-vtrace.yaml} | 3 +- .../cartpole-appo.yaml} | 3 +- .../cartpole-ddppo.yaml | 5 +- .../cartpole-grid-search-example.yaml | 1 + .../cartpole-ppo-hyperband.yaml} | 2 + .../cartpole-ppo.yaml} | 5 +- .../{ => ppo}/halfcheetah-appo.yaml | 7 +- .../{ => ppo}/halfcheetah-ppo.yaml | 47 +-- .../tuned_examples/{ => ppo}/hopper-ppo.yaml | 2 + .../{ => ppo}/humanoid-ppo-gae.yaml | 2 + .../{ => ppo}/humanoid-ppo.yaml | 2 + .../pendulum-appo.yaml} | 3 +- .../pendulum-ppo.yaml} | 4 +- rllib/tuned_examples/{ => ppo}/pong-appo.yaml | 60 +-- rllib/tuned_examples/{ => ppo}/pong-ppo.yaml | 4 +- .../{ => ppo}/walker2d-ppo.yaml | 2 + .../regression_tests/cartpole-a2c-torch.yaml | 9 - .../regression_tests/cartpole-appo-torch.yaml | 14 - .../cartpole-appo-vtrace-torch.yaml | 14 - .../regression_tests/cartpole-ars-tf.yaml | 17 - .../cartpole-dqn-param-noise-torch.yaml | 18 - .../regression_tests/cartpole-dqn-torch.yaml | 10 - .../regression_tests/cartpole-es-torch.yaml | 11 - .../cartpole-impala-torch.yaml | 9 - .../regression_tests/cartpole-pg-tf.yaml | 8 - .../regression_tests/cartpole-ppo-tf.yaml | 17 - .../regression_tests/cartpole-sac-torch.yaml | 17 - .../cartpole-simpleq-torch.yaml | 8 - .../regression_tests/pendulum-ddpg-tf.yaml | 10 - .../regression_tests/pendulum-ddpg-torch.yaml | 10 - .../regression_tests/pendulum-ppo-torch.yaml | 21 - .../regression_tests/pendulum-sac-tf.yaml | 13 - .../regression_tests/pendulum-sac-torch.yaml | 13 - .../regression_tests/pendulum-td3.yaml | 8 - rllib/tuned_examples/{ => sac}/atari-sac.yaml | 4 +- .../cartpole-sac.yaml} | 5 +- .../{ => sac}/halfcheetah-sac.yaml | 1 + .../{ => sac}/mspacman-sac.yaml | 1 + .../{ => sac}/pendulum-sac.yaml | 15 +- rllib/utils/schedules/piecewise_schedule.py | 2 +- 89 files changed, 614 insertions(+), 584 deletions(-) rename rllib/tuned_examples/{ => a3c}/atari-a2c.yaml (91%) rename rllib/tuned_examples/{regression_tests => a3c}/cartpole-a2c-microbatch.yaml (69%) create mode 100644 rllib/tuned_examples/a3c/cartpole-a2c.yaml rename rllib/tuned_examples/{regression_tests/cartpole-a3c-tf.yaml => a3c/cartpole-a3c.yaml} (51%) rename rllib/tuned_examples/{ => a3c}/pong-a3c.yaml (95%) rename rllib/tuned_examples/{regression_tests/cartpole-ars-torch.yaml => ars/cartpole-ars.yaml} (75%) rename rllib/tuned_examples/{ => ars}/swimmer-ars.yaml (87%) delete mode 100644 rllib/tuned_examples/cartpole-marwil-torch.yaml create mode 100644 rllib/tuned_examples/cleanup_experiment.py create mode 100644 rllib/tuned_examples/create_plots.py rename rllib/tuned_examples/{ => ddpg}/halfcheetah-ddpg.yaml (96%) rename rllib/tuned_examples/{ => ddpg}/invertedpendulum-td3.yaml (91%) rename rllib/tuned_examples/{ => ddpg}/mountaincarcontinuous-apex-ddpg.yaml (88%) rename rllib/tuned_examples/{ => ddpg}/mountaincarcontinuous-ddpg.yaml (95%) rename rllib/tuned_examples/{ => ddpg}/mujoco-td3.yaml (92%) rename rllib/tuned_examples/{ => ddpg}/pendulum-apex-ddpg.yaml (86%) rename rllib/tuned_examples/{ => ddpg}/pendulum-ddpg.yaml (89%) rename rllib/tuned_examples/{ => ddpg}/pendulum-td3.yaml (74%) rename rllib/tuned_examples/{ => dqn}/atari-apex.yaml (94%) rename rllib/tuned_examples/{ => dqn}/atari-dist-dqn.yaml (100%) rename rllib/tuned_examples/{atari-dqn-tf-and-torch.yaml => dqn/atari-dqn.yaml} (94%) rename rllib/tuned_examples/{ => dqn}/atari-duel-ddqn.yaml (94%) rename rllib/tuned_examples/{regression_tests/cartpole-dqn-param-noise-tf.yaml => dqn/cartpole-dqn-param-noise.yaml} (86%) rename rllib/tuned_examples/{regression_tests/cartpole-dqn-tf.yaml => dqn/cartpole-dqn.yaml} (77%) rename rllib/tuned_examples/{regression_tests/cartpole-simpleq-tf.yaml => dqn/cartpole-simpleq.yaml} (74%) rename rllib/tuned_examples/{ => dqn}/pong-apex.yaml (94%) rename rllib/tuned_examples/{ => dqn}/pong-dqn.yaml (89%) rename rllib/tuned_examples/{ => dqn}/pong-rainbow.yaml (100%) rename rllib/tuned_examples/{regression_tests/cartpole-es-tf.yaml => es/cartpole-es.yaml} (81%) create mode 100644 rllib/tuned_examples/es/humanoid-es.yaml delete mode 100644 rllib/tuned_examples/humanoid-es.yaml rename rllib/tuned_examples/{ => impala}/atari-impala-large.yaml (100%) rename rllib/tuned_examples/{ => impala}/atari-impala.yaml (100%) rename rllib/tuned_examples/{regression_tests/cartpole-impala-tf.yaml => impala/cartpole-impala.yaml} (83%) create mode 100644 rllib/tuned_examples/impala/pendulum-impala.yaml rename rllib/tuned_examples/{ => impala}/pong-impala-fast.yaml (100%) rename rllib/tuned_examples/{ => impala}/pong-impala-vectorized.yaml (100%) rename rllib/tuned_examples/{ => impala}/pong-impala.yaml (100%) rename rllib/tuned_examples/{cartpole-marwil-tf.yaml => marwil/cartpole-marwil.yaml} (85%) delete mode 100644 rllib/tuned_examples/pendulum-appo-torch.yaml delete mode 100644 rllib/tuned_examples/pendulum-ppo.yaml rename rllib/tuned_examples/{regression_tests/cartpole-pg-torch.yaml => pg/cartpole-pg.yaml} (64%) delete mode 100644 rllib/tuned_examples/pong-a3c-pytorch.yaml rename rllib/tuned_examples/{ => ppo}/atari-ddppo.yaml (94%) rename rllib/tuned_examples/{ => ppo}/atari-ppo.yaml (93%) rename rllib/tuned_examples/{regression_tests/cartpole-appo-vtrace-tf.yaml => ppo/cartpole-appo-vtrace.yaml} (83%) rename rllib/tuned_examples/{regression_tests/cartpole-appo-tf.yaml => ppo/cartpole-appo.yaml} (85%) rename rllib/tuned_examples/{regression_tests => ppo}/cartpole-ddppo.yaml (59%) rename rllib/tuned_examples/{ => ppo}/cartpole-grid-search-example.yaml (83%) rename rllib/tuned_examples/{hyperband-cartpole.yaml => ppo/cartpole-ppo-hyperband.yaml} (83%) rename rllib/tuned_examples/{regression_tests/cartpole-ppo-torch.yaml => ppo/cartpole-ppo.yaml} (82%) rename rllib/tuned_examples/{ => ppo}/halfcheetah-appo.yaml (83%) rename rllib/tuned_examples/{ => ppo}/halfcheetah-ppo.yaml (82%) rename rllib/tuned_examples/{ => ppo}/hopper-ppo.yaml (83%) rename rllib/tuned_examples/{ => ppo}/humanoid-ppo-gae.yaml (88%) rename rllib/tuned_examples/{ => ppo}/humanoid-ppo.yaml (87%) rename rllib/tuned_examples/{pendulum-appo-tf.yaml => ppo/pendulum-appo.yaml} (89%) rename rllib/tuned_examples/{regression_tests/pendulum-ppo-tf.yaml => ppo/pendulum-ppo.yaml} (81%) rename rllib/tuned_examples/{ => ppo}/pong-appo.yaml (86%) rename rllib/tuned_examples/{ => ppo}/pong-ppo.yaml (84%) rename rllib/tuned_examples/{ => ppo}/walker2d-ppo.yaml (82%) delete mode 100644 rllib/tuned_examples/regression_tests/cartpole-a2c-torch.yaml delete mode 100644 rllib/tuned_examples/regression_tests/cartpole-appo-torch.yaml delete mode 100644 rllib/tuned_examples/regression_tests/cartpole-appo-vtrace-torch.yaml delete mode 100644 rllib/tuned_examples/regression_tests/cartpole-ars-tf.yaml delete mode 100644 rllib/tuned_examples/regression_tests/cartpole-dqn-param-noise-torch.yaml delete mode 100644 rllib/tuned_examples/regression_tests/cartpole-dqn-torch.yaml delete mode 100644 rllib/tuned_examples/regression_tests/cartpole-es-torch.yaml delete mode 100644 rllib/tuned_examples/regression_tests/cartpole-impala-torch.yaml delete mode 100644 rllib/tuned_examples/regression_tests/cartpole-pg-tf.yaml delete mode 100644 rllib/tuned_examples/regression_tests/cartpole-ppo-tf.yaml delete mode 100644 rllib/tuned_examples/regression_tests/cartpole-sac-torch.yaml delete mode 100644 rllib/tuned_examples/regression_tests/cartpole-simpleq-torch.yaml delete mode 100644 rllib/tuned_examples/regression_tests/pendulum-ddpg-tf.yaml delete mode 100644 rllib/tuned_examples/regression_tests/pendulum-ddpg-torch.yaml delete mode 100644 rllib/tuned_examples/regression_tests/pendulum-ppo-torch.yaml delete mode 100644 rllib/tuned_examples/regression_tests/pendulum-sac-tf.yaml delete mode 100644 rllib/tuned_examples/regression_tests/pendulum-sac-torch.yaml delete mode 100644 rllib/tuned_examples/regression_tests/pendulum-td3.yaml rename rllib/tuned_examples/{ => sac}/atari-sac.yaml (94%) rename rllib/tuned_examples/{regression_tests/cartpole-sac-tf.yaml => sac/cartpole-sac.yaml} (85%) rename rllib/tuned_examples/{ => sac}/halfcheetah-sac.yaml (95%) rename rllib/tuned_examples/{ => sac}/mspacman-sac.yaml (97%) rename rllib/tuned_examples/{ => sac}/pendulum-sac.yaml (76%) diff --git a/rllib/BUILD b/rllib/BUILD index 9203e13aa..75b8d4d45 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -37,70 +37,120 @@ # Tag: learning_tests # # This will test all yaml files (via `rllib train`) -# inside rllib/tuned_examples/regression_tests for actual learning success. +# inside rllib/tuned_examples/[algo-name] for actual learning success. # -------------------------------------------------------------------- +# A2C/A3C py_test( - name = "run_regression_tests_cartpole_pg_a3c_tf", + name = "regression_test_a2c_cartpole_tf", main = "tests/run_regression_tests.py", tags = ["learning_tests_tf", "learning_tests_cartpole"], - size = "large", + size = "medium", srcs = ["tests/run_regression_tests.py"], - data = [ - "tuned_examples/regression_tests/cartpole-pg-tf.yaml", - "tuned_examples/regression_tests/cartpole-a3c-tf.yaml", - ], - args = ["BAZEL", "tuned_examples/regression_tests"] + data = ["tuned_examples/a3c/cartpole-a2c.yaml"], + args = ["--yaml-dir=tuned_examples/a3c"] ) +py_test( + name = "regression_test_a2c_cartpole_torch", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_torch", "learning_tests_cartpole"], + size = "medium", + srcs = ["tests/run_regression_tests.py"], + data = ["tuned_examples/a3c/cartpole-a2c.yaml"], + args = ["--yaml-dir=tuned_examples/a3c", "--torch"] +) + +py_test( + name = "regression_test_a3c_cartpole_tf", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_tf", "learning_tests_cartpole"], + size = "medium", + srcs = ["tests/run_regression_tests.py"], + data = ["tuned_examples/a3c/cartpole-a3c.yaml"], + args = ["--yaml-dir=tuned_examples/a3c"] +) + +py_test( + name = "regression_test_a3c_cartpole_torch", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_torch", "learning_tests_cartpole"], + size = "medium", + srcs = ["tests/run_regression_tests.py"], + data = ["tuned_examples/a3c/cartpole-a3c.yaml"], + args = ["--yaml-dir=tuned_examples/a3c", "--torch"] +) + +# APPO py_test( name = "run_regression_tests_cartpole_appo_tf", main = "tests/run_regression_tests.py", tags = ["learning_tests_tf", "learning_tests_cartpole"], - size = "large", + size = "medium", srcs = ["tests/run_regression_tests.py"], data = [ - "tuned_examples/regression_tests/cartpole-appo-tf.yaml", + "tuned_examples/ppo/cartpole-appo.yaml", + "tuned_examples/ppo/cartpole-appo-vtrace.yaml" ], - args = ["BAZEL", "tuned_examples/regression_tests"] + args = ["--yaml-dir=tuned_examples/ppo"] ) py_test( - name = "run_regression_tests_cartpole_appo_vtrace_tf", + name = "run_regression_tests_cartpole_appo_torch", main = "tests/run_regression_tests.py", - tags = ["learning_tests_tf", "learning_tests_cartpole"], - size = "large", + tags = ["learning_tests_torch", "learning_tests_cartpole"], + size = "medium", srcs = ["tests/run_regression_tests.py"], data = [ - "tuned_examples/regression_tests/cartpole-appo-vtrace-tf.yaml", + "tuned_examples/ppo/cartpole-appo.yaml", + "tuned_examples/ppo/cartpole-appo-vtrace.yaml" ], - args = ["BAZEL", "tuned_examples/regression_tests"] -) - -py_test( - name = "run_regression_tests_cartpole_es_tf", - main = "tests/run_regression_tests.py", - tags = ["learning_tests_tf", "learning_tests_cartpole"], - size = "large", - srcs = ["tests/run_regression_tests.py"], - data = [ - "tuned_examples/regression_tests/cartpole-es-tf.yaml", - ], - args = ["BAZEL", "tuned_examples/regression_tests"] + args = ["--yaml-dir=tuned_examples/ppo", "--torch"] ) +# ARS py_test( name = "run_regression_tests_cartpole_ars_tf", main = "tests/run_regression_tests.py", tags = ["learning_tests_tf", "learning_tests_cartpole"], - size = "large", + size = "medium", srcs = ["tests/run_regression_tests.py"], - data = [ - "tuned_examples/regression_tests/cartpole-ars-tf.yaml", - ], - args = ["BAZEL", "tuned_examples/regression_tests"] + data = ["tuned_examples/ars/cartpole-ars.yaml"], + args = ["--yaml-dir=tuned_examples/ars"] ) +py_test( + name = "run_regression_tests_cartpole_ars_torch", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_torch", "learning_tests_cartpole"], + size = "medium", + srcs = ["tests/run_regression_tests.py"], + data = ["tuned_examples/ars/cartpole-ars.yaml"], + args = ["--yaml-dir=tuned_examples/ars", "--torch"] +) + +# DDPG +py_test( + name = "run_regression_tests_pendulum_ddpg_tf", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_tf", "learning_tests_pendulum"], + size = "large", + srcs = ["tests/run_regression_tests.py"], + data = glob(["tuned_examples/ddpg/pendulum-ddpg.yaml"]), + args = ["--yaml-dir=tuned_examples/ddpg"] +) + +py_test( + name = "run_regression_tests_pendulum_ddpg_torch", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_torch", "learning_tests_pendulum"], + size = "large", + srcs = ["tests/run_regression_tests.py"], + data = glob(["tuned_examples/ddpg/pendulum-ddpg.yaml"]), + args = ["--torch", "--yaml-dir=tuned_examples/ddpg"] +) + +# DQN/Simple-Q py_test( name = "run_regression_tests_cartpole_dqn_tf", main = "tests/run_regression_tests.py", @@ -108,95 +158,11 @@ py_test( size = "large", srcs = ["tests/run_regression_tests.py"], data = [ - "tuned_examples/regression_tests/cartpole-simpleq-tf.yaml", - "tuned_examples/regression_tests/cartpole-dqn-tf.yaml", - "tuned_examples/regression_tests/cartpole-dqn-param-noise-tf.yaml", + "tuned_examples/dqn/cartpole-simpleq.yaml", + "tuned_examples/dqn/cartpole-dqn.yaml", + "tuned_examples/dqn/cartpole-dqn-param-noise.yaml", ], - args = ["BAZEL", "tuned_examples/regression_tests"] -) - -py_test( - name = "run_regression_tests_cartpole_impala_tf", - main = "tests/run_regression_tests.py", - tags = ["learning_tests_tf", "learning_tests_cartpole"], - size = "large", - srcs = ["tests/run_regression_tests.py"], - data = [ - "tuned_examples/regression_tests/cartpole-impala-tf.yaml", - ], - args = ["BAZEL", "tuned_examples/regression_tests"] -) - -py_test( - name = "run_regression_tests_cartpole_sac_tf", - main = "tests/run_regression_tests.py", - tags = ["learning_tests_tf", "learning_tests_cartpole"], - size = "large", - srcs = ["tests/run_regression_tests.py"], - data = [ - "tuned_examples/regression_tests/cartpole-sac-tf.yaml", - ], - args = ["BAZEL", "tuned_examples/regression_tests"] -) - -py_test( - name = "run_regression_tests_cartpole_ppo_tf", - main = "tests/run_regression_tests.py", - tags = ["learning_tests_tf", "learning_tests_cartpole"], - size = "large", - srcs = ["tests/run_regression_tests.py"], - data = [ - "tuned_examples/regression_tests/cartpole-ppo-tf.yaml", - ], - args = ["BAZEL", "tuned_examples/regression_tests"] -) - -py_test( - name = "run_regression_tests_cartpole_a2c_torch", - main = "tests/run_regression_tests.py", - tags = ["learning_tests_torch", "learning_tests_cartpole"], - size = "large", - srcs = ["tests/run_regression_tests.py"], - data = [ - "tuned_examples/regression_tests/cartpole-a2c-torch.yaml" - ], - args = ["BAZEL", "tuned_examples/regression_tests"] -) - -py_test( - name = "run_regression_tests_cartpole_appo_torch", - main = "tests/run_regression_tests.py", - tags = ["learning_tests_torch", "learning_tests_cartpole"], - size = "large", - srcs = ["tests/run_regression_tests.py"], - data = [ - "tuned_examples/regression_tests/cartpole-appo-torch.yaml" - ], - args = ["BAZEL", "tuned_examples/regression_tests"] -) - -py_test( - name = "run_regression_tests_cartpole_appo_vtrace_torch", - main = "tests/run_regression_tests.py", - tags = ["learning_tests_torch", "learning_tests_cartpole"], - size = "large", - srcs = ["tests/run_regression_tests.py"], - data = [ - "tuned_examples/regression_tests/cartpole-appo-vtrace-torch.yaml" - ], - args = ["BAZEL", "tuned_examples/regression_tests"] -) - -py_test( - name = "run_regression_tests_cartpole_ars_torch", - main = "tests/run_regression_tests.py", - tags = ["learning_tests_torch", "learning_tests_cartpole"], - size = "large", - srcs = ["tests/run_regression_tests.py"], - data = [ - "tuned_examples/regression_tests/cartpole-ars-torch.yaml" - ], - args = ["BAZEL", "tuned_examples/regression_tests"] + args = ["--yaml-dir=tuned_examples/dqn"] ) py_test( @@ -206,91 +172,177 @@ py_test( size = "large", srcs = ["tests/run_regression_tests.py"], data = [ - "tuned_examples/regression_tests/cartpole-dqn-param-noise-torch.yaml" + "tuned_examples/dqn/cartpole-simpleq.yaml", + "tuned_examples/dqn/cartpole-dqn.yaml", + "tuned_examples/dqn/cartpole-dqn-param-noise.yaml", ], - args = ["BAZEL", "tuned_examples/regression_tests"] + args = ["--yaml-dir=tuned_examples/dqn", "--torch"] +) + +# ES +py_test( + name = "run_regression_tests_cartpole_es_tf", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_tf", "learning_tests_cartpole"], + size = "medium", + srcs = ["tests/run_regression_tests.py"], + data = ["tuned_examples/es/cartpole-es.yaml"], + args = ["--yaml-dir=tuned_examples/es"] ) py_test( name = "run_regression_tests_cartpole_es_torch", main = "tests/run_regression_tests.py", tags = ["learning_tests_torch", "learning_tests_cartpole"], - size = "large", + size = "medium", srcs = ["tests/run_regression_tests.py"], - data = [ - "tuned_examples/regression_tests/cartpole-es-torch.yaml" - ], - args = ["BAZEL", "tuned_examples/regression_tests"] + data = ["tuned_examples/es/cartpole-es.yaml"], + args = ["--yaml-dir=tuned_examples/es", "--torch"] +) + +# IMPALA +py_test( + name = "run_regression_tests_cartpole_impala_tf", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_tf", "learning_tests_cartpole"], + size = "medium", + srcs = ["tests/run_regression_tests.py"], + data = ["tuned_examples/impala/cartpole-impala.yaml"], + args = ["--yaml-dir=tuned_examples/impala"] ) py_test( name = "run_regression_tests_cartpole_impala_torch", main = "tests/run_regression_tests.py", tags = ["learning_tests_torch", "learning_tests_cartpole"], - size = "large", + size = "medium", srcs = ["tests/run_regression_tests.py"], - data = [ - "tuned_examples/regression_tests/cartpole-impala-torch.yaml" - ], - args = ["BAZEL", "tuned_examples/regression_tests"] + data = ["tuned_examples/impala/cartpole-impala.yaml"], + args = ["--yaml-dir=tuned_examples/impala", "--torch"] +) + +# PG +py_test( + name = "run_regression_tests_cartpole_pg_tf", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_tf", "learning_tests_cartpole"], + size = "medium", + srcs = ["tests/run_regression_tests.py"], + data = ["tuned_examples/pg/cartpole-pg.yaml"], + args = ["--yaml-dir=tuned_examples/pg"] ) py_test( name = "run_regression_tests_cartpole_pg_torch", main = "tests/run_regression_tests.py", tags = ["learning_tests_torch", "learning_tests_cartpole"], - size = "large", + size = "medium", srcs = ["tests/run_regression_tests.py"], - data = [ - "tuned_examples/regression_tests/cartpole-pg-torch.yaml" - ], - args = ["BAZEL", "tuned_examples/regression_tests"] + data = ["tuned_examples/pg/cartpole-pg.yaml"], + args = ["--yaml-dir=tuned_examples/pg", "--torch"] +) + +# PPO +py_test( + name = "run_regression_tests_cartpole_ppo_tf", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_tf", "learning_tests_cartpole"], + size = "medium", + srcs = ["tests/run_regression_tests.py"], + data = ["tuned_examples/ppo/cartpole-ppo.yaml"], + args = ["--yaml-dir=tuned_examples/ppo"] ) py_test( name = "run_regression_tests_cartpole_ppo_torch", main = "tests/run_regression_tests.py", tags = ["learning_tests_torch", "learning_tests_cartpole"], + size = "medium", + srcs = ["tests/run_regression_tests.py"], + data = ["tuned_examples/ppo/cartpole-ppo.yaml"], + args = ["--yaml-dir=tuned_examples/ppo", "--torch"] +) + +py_test( + name = "run_regression_tests_pendulum_ppo_tf", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_tf", "learning_tests_pendulum"], size = "large", srcs = ["tests/run_regression_tests.py"], - data = [ - "tuned_examples/regression_tests/cartpole-ppo-torch.yaml" - ], - args = ["BAZEL", "tuned_examples/regression_tests"] + data = ["tuned_examples/ppo/pendulum-ppo.yaml"], + args = ["--yaml-dir=tuned_examples/ppo"] +) + +py_test( + name = "run_regression_tests_pendulum_ppo_torch", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_torch", "learning_tests_pendulum"], + size = "large", + srcs = ["tests/run_regression_tests.py"], + data = ["tuned_examples/ppo/pendulum-ppo.yaml"], + args = ["--torch", "--yaml-dir=tuned_examples/ppo"] +) + +# SAC +py_test( + name = "run_regression_tests_cartpole_sac_tf", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_tf", "learning_tests_cartpole"], + size = "medium", + srcs = ["tests/run_regression_tests.py"], + data = ["tuned_examples/sac/cartpole-sac.yaml"], + args = ["--yaml-dir=tuned_examples/sac"] ) py_test( name = "run_regression_tests_cartpole_sac_torch", main = "tests/run_regression_tests.py", tags = ["learning_tests_torch", "learning_tests_cartpole"], - size = "large", + size = "medium", srcs = ["tests/run_regression_tests.py"], - data = [ - "tuned_examples/regression_tests/cartpole-sac-torch.yaml" - ], - args = ["BAZEL", "tuned_examples/regression_tests"] + data = ["tuned_examples/sac/cartpole-sac.yaml"], + args = ["--yaml-dir=tuned_examples/sac", "--torch"] ) py_test( - name = "run_regression_tests_pendulum_tf", + name = "run_regression_tests_pendulum_sac_tf", main = "tests/run_regression_tests.py", tags = ["learning_tests_tf", "learning_tests_pendulum"], - size = "enormous", # = 60min timeout + size = "large", srcs = ["tests/run_regression_tests.py"], - data = glob(["tuned_examples/regression_tests/pendulum-*-tf.yaml"]), - # Pass `BAZEL` option and the path to look for yaml regression files. - args = ["BAZEL", "tuned_examples/regression_tests"] + data = ["tuned_examples/sac/pendulum-sac.yaml"], + args = ["--yaml-dir=tuned_examples/sac"] ) py_test( - name = "run_regression_tests_pendulum_torch", + name = "run_regression_tests_pendulum_sac_torch", main = "tests/run_regression_tests.py", tags = ["learning_tests_torch", "learning_tests_pendulum"], - size = "enormous", # = 60min timeout + size = "large", srcs = ["tests/run_regression_tests.py"], - data = glob(["tuned_examples/regression_tests/pendulum-*-torch.yaml"]), - # Pass `BAZEL` option and the path to look for yaml regression files. - args = ["BAZEL", "tuned_examples/regression_tests"] + data = ["tuned_examples/sac/pendulum-sac.yaml"], + args = ["--yaml-dir=tuned_examples/sac", "--torch"] +) + +# TD3 +py_test( + name = "run_regression_tests_pendulum_td3_tf", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_tf", "learning_tests_pendulum"], + size = "large", + srcs = ["tests/run_regression_tests.py"], + data = ["tuned_examples/ddpg/pendulum-td3.yaml"], + args = ["--yaml-dir=tuned_examples/ddpg"] +) + +py_test( + name = "run_regression_tests_pendulum_td3_torch", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_torch", "learning_tests_pendulum"], + size = "large", + srcs = ["tests/run_regression_tests.py"], + data = ["tuned_examples/ddpg/pendulum-td3.yaml"], + args = ["--yaml-dir=tuned_examples/ddpg", "--torch"] ) # -------------------------------------------------------------------- diff --git a/rllib/agents/ddpg/ddpg_torch_policy.py b/rllib/agents/ddpg/ddpg_torch_policy.py index aa57b59fb..1c8d1d7a9 100644 --- a/rllib/agents/ddpg/ddpg_torch_policy.py +++ b/rllib/agents/ddpg/ddpg_torch_policy.py @@ -200,7 +200,8 @@ def build_ddpg_stats(policy, batch): "mean_q": torch.mean(policy.q_t), "max_q": torch.max(policy.q_t), "min_q": torch.min(policy.q_t), - "td_error": policy.td_error + "mean_td_error": torch.mean(policy.td_error), + "td_error": policy.td_error, } return stats diff --git a/rllib/agents/ddpg/tests/test_ddpg.py b/rllib/agents/ddpg/tests/test_ddpg.py index a66949b9b..2b7e06aaf 100644 --- a/rllib/agents/ddpg/tests/test_ddpg.py +++ b/rllib/agents/ddpg/tests/test_ddpg.py @@ -23,7 +23,9 @@ class TestDDPG(unittest.TestCase): """Test whether a DDPGTrainer can be built with both frameworks.""" config = ddpg.DEFAULT_CONFIG.copy() config["num_workers"] = 0 # Run locally. - config["num_envs_per_worker"] = 2 # Run locally. + config["num_envs_per_worker"] = 2 + config["learning_starts"] = 0 + config["exploration_config"]["random_timesteps"] = 100 num_iterations = 2 diff --git a/rllib/tests/run_regression_tests.py b/rllib/tests/run_regression_tests.py index 6399282df..2c1f399af 100644 --- a/rllib/tests/run_regression_tests.py +++ b/rllib/tests/run_regression_tests.py @@ -9,13 +9,15 @@ # name = "run_regression_tests", # main = "tests/run_regression_tests.py", # tags = ["learning_tests"], -# size = "enormous", # = 60min timeout +# size = "medium", # 5min timeout # srcs = ["tests/run_regression_tests.py"], # data = glob(["tuned_examples/regression_tests/*.yaml"]), -# Pass `BAZEL` option and the path to look for yaml regression files. +# # Pass `BAZEL` option and the path to look for yaml regression files. # args = ["BAZEL", "tuned_examples/regression_tests"] # ) +import argparse +import os from pathlib import Path import sys import yaml @@ -24,30 +26,51 @@ import ray from ray.tune import run_experiments from ray.rllib import _register_all -if __name__ == "__main__": - # Bazel regression test mode: Get path to look for yaml files from argv[2]. - if sys.argv[1] == "BAZEL": - # Get the path to use. - rllib_dir = Path(__file__).parent.parent - print("rllib dir={}".format(rllib_dir)) - yaml_files = rllib_dir.rglob(sys.argv[2] + "/*.yaml") - yaml_files = sorted( - map(lambda path: str(path.absolute()), yaml_files), reverse=True) - # Normal mode: Get yaml files to run from command line. - else: - yaml_files = sys.argv[1:] +parser = argparse.ArgumentParser() +parser.add_argument( + "--torch", + action="store_true", + help="Runs all tests with PyTorch enabled.") +parser.add_argument( + "--yaml-dir", + type=str, + help="The directory in which to find all yamls to test.") - print("Will run the following regression files:") +if __name__ == "__main__": + args = parser.parse_args() + + # Bazel regression test mode: Get path to look for yaml files from argv[2]. + # Get the path or single file to use. + rllib_dir = Path(__file__).parent.parent + print("rllib dir={}".format(rllib_dir)) + + if not os.path.isdir(os.path.join(rllib_dir, args.yaml_dir)): + raise ValueError("yaml-dir ({}) not found!".format(args.yaml_dir)) + + yaml_files = rllib_dir.rglob(args.yaml_dir + "/*.yaml") + yaml_files = sorted( + map(lambda path: str(path.absolute()), yaml_files), reverse=True) + + print("Will run the following regression tests:") for yaml_file in yaml_files: print("->", yaml_file) # Loop through all collected files. for yaml_file in yaml_files: experiments = yaml.load(open(yaml_file).read()) + assert len(experiments) == 1,\ + "Error, can only run a single experiment per yaml file!" print("== Test config ==") print(yaml.dump(experiments)) + # Add torch option to exp configs. + for exp in experiments.values(): + if args.torch: + exp["config"]["use_pytorch"] = True + + # Try running each test 3 times and make sure it reaches the given + # reward. passed = False for i in range(3): try: diff --git a/rllib/tuned_examples/atari-a2c.yaml b/rllib/tuned_examples/a3c/atari-a2c.yaml similarity index 91% rename from rllib/tuned_examples/atari-a2c.yaml rename to rllib/tuned_examples/a3c/atari-a2c.yaml index d8290142a..276b05767 100644 --- a/rllib/tuned_examples/atari-a2c.yaml +++ b/rllib/tuned_examples/a3c/atari-a2c.yaml @@ -9,6 +9,7 @@ atari-a2c: - SpaceInvadersNoFrameskip-v4 run: A2C config: + use_pytorch: false # <- switch on/off torch rollout_fragment_length: 20 clip_rewards: True num_workers: 5 diff --git a/rllib/tuned_examples/regression_tests/cartpole-a2c-microbatch.yaml b/rllib/tuned_examples/a3c/cartpole-a2c-microbatch.yaml similarity index 69% rename from rllib/tuned_examples/regression_tests/cartpole-a2c-microbatch.yaml rename to rllib/tuned_examples/a3c/cartpole-a2c-microbatch.yaml index 7ddafc01d..bf7fc76fa 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-a2c-microbatch.yaml +++ b/rllib/tuned_examples/a3c/cartpole-a2c-microbatch.yaml @@ -1,10 +1,11 @@ -cartpole-a2c-microbatch-tf: +cartpole-a2c-microbatch: env: CartPole-v0 run: A2C stop: - episode_reward_mean: 100 + episode_reward_mean: 150 timesteps_total: 100000 config: + # Works for both torch and tf. use_pytorch: false num_workers: 1 gamma: 0.95 diff --git a/rllib/tuned_examples/a3c/cartpole-a2c.yaml b/rllib/tuned_examples/a3c/cartpole-a2c.yaml new file mode 100644 index 000000000..06a129e8d --- /dev/null +++ b/rllib/tuned_examples/a3c/cartpole-a2c.yaml @@ -0,0 +1,11 @@ +cartpole-a2c: + env: CartPole-v0 + run: A2C + stop: + episode_reward_mean: 150 + timesteps_total: 500000 + config: + # Works for both torch and tf. + use_pytorch: false + num_workers: 0 + lr: 0.001 diff --git a/rllib/tuned_examples/regression_tests/cartpole-a3c-tf.yaml b/rllib/tuned_examples/a3c/cartpole-a3c.yaml similarity index 51% rename from rllib/tuned_examples/regression_tests/cartpole-a3c-tf.yaml rename to rllib/tuned_examples/a3c/cartpole-a3c.yaml index 3713db3a9..77f9f4946 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-a3c-tf.yaml +++ b/rllib/tuned_examples/a3c/cartpole-a3c.yaml @@ -1,10 +1,11 @@ -cartpole-a3c-tf: +cartpole-a3c: env: CartPole-v0 run: A3C stop: - episode_reward_mean: 100 - timesteps_total: 100000 + episode_reward_mean: 150 + timesteps_total: 200000 config: + # Works for both torch and tf. use_pytorch: false num_workers: 1 gamma: 0.95 diff --git a/rllib/tuned_examples/pong-a3c.yaml b/rllib/tuned_examples/a3c/pong-a3c.yaml similarity index 95% rename from rllib/tuned_examples/pong-a3c.yaml rename to rllib/tuned_examples/a3c/pong-a3c.yaml index 0d0898e90..c62ee31e0 100644 --- a/rllib/tuned_examples/pong-a3c.yaml +++ b/rllib/tuned_examples/a3c/pong-a3c.yaml @@ -4,9 +4,10 @@ pong-a3c: env: PongDeterministic-v4 run: A3C config: + # Works for both torch and tf. + use_pytorch: false num_workers: 16 rollout_fragment_length: 20 - use_pytorch: false vf_loss_coeff: 0.5 entropy_coeff: 0.01 gamma: 0.99 diff --git a/rllib/tuned_examples/regression_tests/cartpole-ars-torch.yaml b/rllib/tuned_examples/ars/cartpole-ars.yaml similarity index 75% rename from rllib/tuned_examples/regression_tests/cartpole-ars-torch.yaml rename to rllib/tuned_examples/ars/cartpole-ars.yaml index 6aca5a26c..034689437 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-ars-torch.yaml +++ b/rllib/tuned_examples/ars/cartpole-ars.yaml @@ -1,11 +1,12 @@ -cartpole-ars-torch: +cartpole-ars: env: CartPole-v0 run: ARS stop: episode_reward_mean: 150 timesteps_total: 500000 config: - use_pytorch: true + # Works for both torch and tf. + use_pytorch: false noise_stdev: 0.02 num_rollouts: 50 rollouts_used: 25 @@ -13,5 +14,3 @@ cartpole-ars-torch: sgd_stepsize: 0.01 noise_size: 25000000 eval_prob: 0.5 - model: - fcnet_hiddens: [64, 64] diff --git a/rllib/tuned_examples/swimmer-ars.yaml b/rllib/tuned_examples/ars/swimmer-ars.yaml similarity index 87% rename from rllib/tuned_examples/swimmer-ars.yaml rename to rllib/tuned_examples/ars/swimmer-ars.yaml index effb4cfe1..fce949971 100644 --- a/rllib/tuned_examples/swimmer-ars.yaml +++ b/rllib/tuned_examples/ars/swimmer-ars.yaml @@ -3,6 +3,8 @@ swimmer-ars: env: Swimmer-v2 run: ARS config: + # Works for both torch and tf. + use_pytorch: false noise_stdev: 0.01 num_rollouts: 1 rollouts_used: 1 diff --git a/rllib/tuned_examples/cartpole-marwil-torch.yaml b/rllib/tuned_examples/cartpole-marwil-torch.yaml deleted file mode 100644 index 7b3d0fe99..000000000 --- a/rllib/tuned_examples/cartpole-marwil-torch.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# To generate training data, first run: -# $ ./train.py --run=PPO --env=CartPole-v0 \ -# --stop='{"timesteps_total": 50000}' \ -# --config='{"use_pytorch": true, "output": "/tmp/out", "batch_mode": "complete_episodes"}' -cartpole-marwil-torch: - env: CartPole-v0 - run: MARWIL - stop: - timesteps_total: 500000 - config: - beta: - grid_search: [0, 1] # compare IL (beta=0) vs MARWIL - input: /tmp/out diff --git a/rllib/tuned_examples/cleanup_experiment.py b/rllib/tuned_examples/cleanup_experiment.py new file mode 100644 index 000000000..0a34f84cd --- /dev/null +++ b/rllib/tuned_examples/cleanup_experiment.py @@ -0,0 +1,169 @@ +""" +This script automates cleaning up a benchmark/experiment run of some algo +against some config (with possibly more than one tune trial, +e.g. torch=grid_search([True, False])). + +Run `python cleanup_experiment.py --help` for more information. + +Use on an input directory with trial contents e.g.: +.. +IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_10-17-54topr3h9k +IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_13-59-35dqaetxnf +IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_17-21-28tbhedw72 +IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_10-17-54lv20cgn_ +IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_13-59-35kwzhax_y +IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_17-21-28a5j0s7za + +Then run: +>> python cleanup_experiment.py --experiment-dir [parent dir w/ trial sub-dirs] +>> --output-dir [your out dir] --results-filter dumb_col_2,superfluous_col3 +>> --results-max-size [max results file size in kb before(!) zipping] + +The script will create one output sub-dir for each trial and only copy +the configuration and the csv results (filtered and every nth row removed +based on the given args). +""" + +import argparse +import json +import os +import re +import shutil +import yaml + +parser = argparse.ArgumentParser() +parser.add_argument( + "--experiment-dir", + type=str, + help="Experiment dir in which all sub-runs (seeds) are " + "located (as sub-dirs). Each sub0-run dir must contain the files: " + "params.json and progress.csv.") +parser.add_argument( + "--output-dir", + type=str, + help="The output dir, in which the cleaned up output will be placed.") +parser.add_argument( + "--results-filter", + type=str, + help="comma-separated list of csv fields to exclude.", + default="experiment_id,pid,hostname,node_ip,trial_id,hist_stats/episode_" + "reward,hist_stats/episode_lengths,experiment_tag") +parser.add_argument( + "--results-max-size", + type=int, + help="the max. size of the final results.csv file (in kb). Will erase " + "every nth line in the original input to reach that goal. " + "Use 0 for no limit (default=100).", + default=100) + + +def process_single_run(in_dir, out_dir): + exp_dir = os.listdir(in_dir) + + # Make sure trials dir is ok. + assert "params.json" in exp_dir and "progress.csv" in exp_dir, \ + "params.json or progress.csv not found in {}!".format(in_dir) + + os.makedirs(out_dir, exist_ok=True) + + for file in exp_dir: + absfile = os.path.join(in_dir, file) + # Config file -> Convert to yaml and move to output dir. + if file == "params.json": + assert os.path.isfile(absfile), "{} not a file!".format(file) + with open(absfile) as fp: + contents = json.load(fp) + with open(os.path.join(out_dir, "config.yaml"), "w") as fp: + yaml.dump(contents, fp) + # Progress csv file -> Filter out some columns, cut, and write to + # output_dir. + elif file == "progress.csv": + assert os.path.isfile(absfile), "{} not a file!".format(file) + col_idx_to_filter = [] + with open(absfile) as fp: + # Get column names. + col_names_orig = fp.readline().strip().split(",") + # Split by comma (abiding to quotes), filter out + # unwanted columns, then write to disk. + cols_to_filter = args.results_filter.split(",") + for i, c in enumerate(col_names_orig): + if c in cols_to_filter: + col_idx_to_filter.insert(0, i) + col_names = col_names_orig.copy() + for idx in col_idx_to_filter: + col_names.pop(idx) + absfile_out = os.path.join(out_dir, "progress.csv") + with open(absfile_out, "w") as out_fp: + print(",".join(col_names), file=out_fp) + while True: + line = fp.readline().strip() + if not line: + break + line = re.sub( + "(,{2,})", + lambda m: ",None" * (len(m.group()) - 1) + ",", + line) + cols = re.findall('".+?"|[^,]+', line) + if len(cols) != len(col_names_orig): + continue + for idx in col_idx_to_filter: + cols.pop(idx) + print(",".join(cols), file=out_fp) + + # Reduce the size of the output file if necessary. + out_size = os.path.getsize(absfile_out) + max_size = args.results_max_size * 1024 + if 0 < max_size < out_size: + # Figure out roughly every which line we have to drop. + ratio = out_size / max_size + # If ratio > 2.0, we'll have to keep only every nth line. + if ratio > 2.0: + nth = out_size // max_size + os.system("awk 'NR==1||NR%{}==0' {} > {}.new".format( + nth, absfile_out, absfile_out)) + # If ratio < 2.0 (>1.0), we'll have to drop every nth line. + else: + nth = out_size // (out_size - max_size) + os.system("awk 'NR==1||NR%{}!=0' {} > {}.new".format( + nth, absfile_out, absfile_out)) + os.remove(absfile_out) + os.rename(absfile_out + ".new", absfile_out) + + # Zip progress.csv into results.zip. + zip_file = os.path.join(out_dir, "results.zip") + try: + os.remove(zip_file) + except FileNotFoundError: + pass + os.system("zip -j {} {}".format( + zip_file, os.path.join(out_dir, "progress.csv"))) + os.remove(os.path.join(out_dir, "progress.csv")) + + # TBX events file -> Move as is. + elif re.search("^(events\\.out\\.|params\\.pkl)", file): + assert os.path.isfile(absfile), "{} not a file!".format(file) + shutil.copyfile(absfile, os.path.join(out_dir, file)) + + +if __name__ == "__main__": + args = parser.parse_args() + exp_dir = os.listdir(args.experiment_dir) + # Loop through all sub-directories. + for i, sub_run in enumerate(sorted(exp_dir)): + abspath = os.path.join(args.experiment_dir, sub_run) + # This is a seed run. + if os.path.isdir(abspath) and \ + re.search("^(\\w+?)_(\\w+?-v\\d+)(_\\d+)", sub_run): + # Create meaningful output dir name: + # [algo]_[env]_[trial #]_[trial-config]_[date YYYY-MM-DD]. + cleaned_up_out = re.sub( + "^(\\w+?)_(\\w+?-v\\d+)(_\\d+)(_.+)?(_\\d{4}-\\d{2}-\\d{2})" + "_\\d{2}-\\d{2}-\\w+", "{:02}_\\1_\\2\\4\\5".format(i), + sub_run) + # Remove superflous `env=` specifier (anv always included in name). + cleaned_up_out = re.sub("^(.+)env=\\w+?-v\\d+,?(.+)", "\\1\\2", + cleaned_up_out) + out_path = os.path.join(args.output_dir, cleaned_up_out) + process_single_run(abspath, out_path) + # Done. + print("done") diff --git a/rllib/tuned_examples/create_plots.py b/rllib/tuned_examples/create_plots.py new file mode 100644 index 000000000..aae042bb5 --- /dev/null +++ b/rllib/tuned_examples/create_plots.py @@ -0,0 +1,5 @@ +# TODO(sven): +# Add a simple script that takes n csv input files and generates plot(s) +# from these with: x-axis=ts OR wall-time; y-axis=any metric(s) (up to 2). +# ability to merge any m csv files (e.g. tf vs torch; or n seeds) together +# in one plot. diff --git a/rllib/tuned_examples/halfcheetah-ddpg.yaml b/rllib/tuned_examples/ddpg/halfcheetah-ddpg.yaml similarity index 96% rename from rllib/tuned_examples/halfcheetah-ddpg.yaml rename to rllib/tuned_examples/ddpg/halfcheetah-ddpg.yaml index 66c35e67e..e2dd993aa 100644 --- a/rllib/tuned_examples/halfcheetah-ddpg.yaml +++ b/rllib/tuned_examples/ddpg/halfcheetah-ddpg.yaml @@ -6,6 +6,7 @@ halfcheetah-ddpg: episode_reward_mean: 2000 time_total_s: 5400 # 90 minutes config: + use_pytorch: false # <- switch on/off torch # === Model === actor_hiddens: [64, 64] critic_hiddens: [64, 64] diff --git a/rllib/tuned_examples/invertedpendulum-td3.yaml b/rllib/tuned_examples/ddpg/invertedpendulum-td3.yaml similarity index 91% rename from rllib/tuned_examples/invertedpendulum-td3.yaml rename to rllib/tuned_examples/ddpg/invertedpendulum-td3.yaml index aef61a3ef..ccf958c3c 100644 --- a/rllib/tuned_examples/invertedpendulum-td3.yaml +++ b/rllib/tuned_examples/ddpg/invertedpendulum-td3.yaml @@ -9,6 +9,8 @@ invertedpendulum-td3: time_total_s: 900 # 15 minutes timesteps_total: 1000000 config: + # Works for both torch and tf. + use_pytorch: false # === Model === actor_hiddens: [32, 32] critic_hiddens: [32, 32] diff --git a/rllib/tuned_examples/mountaincarcontinuous-apex-ddpg.yaml b/rllib/tuned_examples/ddpg/mountaincarcontinuous-apex-ddpg.yaml similarity index 88% rename from rllib/tuned_examples/mountaincarcontinuous-apex-ddpg.yaml rename to rllib/tuned_examples/ddpg/mountaincarcontinuous-apex-ddpg.yaml index 753d8c51e..aae8ffa65 100644 --- a/rllib/tuned_examples/mountaincarcontinuous-apex-ddpg.yaml +++ b/rllib/tuned_examples/ddpg/mountaincarcontinuous-apex-ddpg.yaml @@ -5,6 +5,8 @@ mountaincarcontinuous-apex-ddpg: stop: episode_reward_mean: 90 config: + # Works for both torch and tf. + use_pytorch: false clip_rewards: False num_workers: 16 exploration_config: diff --git a/rllib/tuned_examples/mountaincarcontinuous-ddpg.yaml b/rllib/tuned_examples/ddpg/mountaincarcontinuous-ddpg.yaml similarity index 95% rename from rllib/tuned_examples/mountaincarcontinuous-ddpg.yaml rename to rllib/tuned_examples/ddpg/mountaincarcontinuous-ddpg.yaml index 2da195273..76b88c1a9 100644 --- a/rllib/tuned_examples/mountaincarcontinuous-ddpg.yaml +++ b/rllib/tuned_examples/ddpg/mountaincarcontinuous-ddpg.yaml @@ -6,6 +6,8 @@ mountaincarcontinuous-ddpg: episode_reward_mean: 90 time_total_s: 600 # 10 minutes config: + # Works for both torch and tf. + use_pytorch: false # === Model === actor_hiddens: [32, 64] critic_hiddens: [64, 64] diff --git a/rllib/tuned_examples/mujoco-td3.yaml b/rllib/tuned_examples/ddpg/mujoco-td3.yaml similarity index 92% rename from rllib/tuned_examples/mujoco-td3.yaml rename to rllib/tuned_examples/ddpg/mujoco-td3.yaml index e328043f4..fb64d2834 100644 --- a/rllib/tuned_examples/mujoco-td3.yaml +++ b/rllib/tuned_examples/ddpg/mujoco-td3.yaml @@ -15,6 +15,8 @@ mujoco-td3: stop: timesteps_total: 1000000 config: + # Works for both torch and tf. + use_pytorch: false # === Exploration === learning_starts: 10000 exploration_config: diff --git a/rllib/tuned_examples/pendulum-apex-ddpg.yaml b/rllib/tuned_examples/ddpg/pendulum-apex-ddpg.yaml similarity index 86% rename from rllib/tuned_examples/pendulum-apex-ddpg.yaml rename to rllib/tuned_examples/ddpg/pendulum-apex-ddpg.yaml index 7122b577e..a21ac3798 100644 --- a/rllib/tuned_examples/pendulum-apex-ddpg.yaml +++ b/rllib/tuned_examples/ddpg/pendulum-apex-ddpg.yaml @@ -5,6 +5,8 @@ pendulum-apex-ddpg: stop: episode_reward_mean: -160 config: + # Works for both torch and tf. + use_pytorch: false use_huber: True clip_rewards: False num_workers: 16 diff --git a/rllib/tuned_examples/pendulum-ddpg.yaml b/rllib/tuned_examples/ddpg/pendulum-ddpg.yaml similarity index 89% rename from rllib/tuned_examples/pendulum-ddpg.yaml rename to rllib/tuned_examples/ddpg/pendulum-ddpg.yaml index dc8064b88..aeb873273 100644 --- a/rllib/tuned_examples/pendulum-ddpg.yaml +++ b/rllib/tuned_examples/ddpg/pendulum-ddpg.yaml @@ -1,11 +1,13 @@ -# This configuration can expect to reach -160 reward in 10k-20k timesteps +# This configuration can expect to reach -160 reward in 10k-20k timesteps. pendulum-ddpg: env: Pendulum-v0 run: DDPG stop: - episode_reward_mean: -160 - timesteps_total: 100000 + episode_reward_mean: -900 + timesteps_total: 20000 config: + # Works for both torch and tf. + use_pytorch: false # === Model === actor_hiddens: [64, 64] critic_hiddens: [64, 64] @@ -18,7 +20,7 @@ pendulum-ddpg: exploration_config: type: "OrnsteinUhlenbeckNoise" scale_timesteps: 10000 - initial_scale: 1.0, + initial_scale: 1.0 final_scale: 0.02 ou_base_scale: 0.1 ou_theta: 0.15 diff --git a/rllib/tuned_examples/pendulum-td3.yaml b/rllib/tuned_examples/ddpg/pendulum-td3.yaml similarity index 74% rename from rllib/tuned_examples/pendulum-td3.yaml rename to rllib/tuned_examples/ddpg/pendulum-td3.yaml index 6632d2ff2..9fa7e4ce4 100644 --- a/rllib/tuned_examples/pendulum-td3.yaml +++ b/rllib/tuned_examples/ddpg/pendulum-td3.yaml @@ -1,20 +1,20 @@ # This configuration can expect to reach -160 reward in 10k-20k timesteps -pendulum-ddpg: +pendulum-td3: env: Pendulum-v0 run: TD3 stop: - episode_reward_mean: -130 - time_total_s: 900 # 10 minutes + episode_reward_mean: -900 + timesteps_total: 100000 config: + # Works for both torch and tf. + use_pytorch: false # === Model === actor_hiddens: [64, 64] critic_hiddens: [64, 64] - # === Exploration === learning_starts: 5000 exploration_config: random_timesteps: 5000 - # === Evaluation === evaluation_interval: 1 evaluation_num_episodes: 5 diff --git a/rllib/tuned_examples/atari-apex.yaml b/rllib/tuned_examples/dqn/atari-apex.yaml similarity index 94% rename from rllib/tuned_examples/atari-apex.yaml rename to rllib/tuned_examples/dqn/atari-apex.yaml index 779b677b0..3d0e4a45b 100644 --- a/rllib/tuned_examples/atari-apex.yaml +++ b/rllib/tuned_examples/dqn/atari-apex.yaml @@ -8,6 +8,7 @@ apex: - SpaceInvadersNoFrameskip-v4 run: APEX config: + use_pytorch: false # <- switch on/off torch double_q: false dueling: false num_atoms: 1 diff --git a/rllib/tuned_examples/atari-dist-dqn.yaml b/rllib/tuned_examples/dqn/atari-dist-dqn.yaml similarity index 100% rename from rllib/tuned_examples/atari-dist-dqn.yaml rename to rllib/tuned_examples/dqn/atari-dist-dqn.yaml diff --git a/rllib/tuned_examples/atari-dqn-tf-and-torch.yaml b/rllib/tuned_examples/dqn/atari-dqn.yaml similarity index 94% rename from rllib/tuned_examples/atari-dqn-tf-and-torch.yaml rename to rllib/tuned_examples/dqn/atari-dqn.yaml index 0a3c1397c..f56b0c2eb 100644 --- a/rllib/tuned_examples/atari-dqn-tf-and-torch.yaml +++ b/rllib/tuned_examples/dqn/atari-dqn.yaml @@ -9,6 +9,7 @@ atari-basic-dqn: - SpaceInvadersNoFrameskip-v4 run: DQN config: + use_pytorch: false # <- switch on/off torch double_q: false dueling: false num_atoms: 1 diff --git a/rllib/tuned_examples/atari-duel-ddqn.yaml b/rllib/tuned_examples/dqn/atari-duel-ddqn.yaml similarity index 94% rename from rllib/tuned_examples/atari-duel-ddqn.yaml rename to rllib/tuned_examples/dqn/atari-duel-ddqn.yaml index b1f70a26a..cd7792048 100644 --- a/rllib/tuned_examples/atari-duel-ddqn.yaml +++ b/rllib/tuned_examples/dqn/atari-duel-ddqn.yaml @@ -9,6 +9,7 @@ dueling-ddqn: - SpaceInvadersNoFrameskip-v4 run: DQN config: + use_pytorch: false # <- switch on/off torch double_q: true dueling: true num_atoms: 1 diff --git a/rllib/tuned_examples/regression_tests/cartpole-dqn-param-noise-tf.yaml b/rllib/tuned_examples/dqn/cartpole-dqn-param-noise.yaml similarity index 86% rename from rllib/tuned_examples/regression_tests/cartpole-dqn-param-noise-tf.yaml rename to rllib/tuned_examples/dqn/cartpole-dqn-param-noise.yaml index 50d3075f5..f737e29f7 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-dqn-param-noise-tf.yaml +++ b/rllib/tuned_examples/dqn/cartpole-dqn-param-noise.yaml @@ -1,10 +1,11 @@ -cartpole-dqn-tf-w-param-noise: +cartpole-dqn-w-param-noise: env: CartPole-v0 run: DQN stop: episode_reward_mean: 150 timesteps_total: 300000 config: + # Works for both torch and tf. use_pytorch: false exploration_config: type: ParameterNoise diff --git a/rllib/tuned_examples/regression_tests/cartpole-dqn-tf.yaml b/rllib/tuned_examples/dqn/cartpole-dqn.yaml similarity index 77% rename from rllib/tuned_examples/regression_tests/cartpole-dqn-tf.yaml rename to rllib/tuned_examples/dqn/cartpole-dqn.yaml index 410fafb87..63e67256c 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-dqn-tf.yaml +++ b/rllib/tuned_examples/dqn/cartpole-dqn.yaml @@ -1,10 +1,11 @@ -cartpole-dqn-tf: +cartpole-dqn: env: CartPole-v0 run: DQN stop: episode_reward_mean: 150 timesteps_total: 50000 config: + # Works for both torch and tf. use_pytorch: false n_step: 3 gamma: 0.95 diff --git a/rllib/tuned_examples/regression_tests/cartpole-simpleq-tf.yaml b/rllib/tuned_examples/dqn/cartpole-simpleq.yaml similarity index 74% rename from rllib/tuned_examples/regression_tests/cartpole-simpleq-tf.yaml rename to rllib/tuned_examples/dqn/cartpole-simpleq.yaml index 16da7921f..bc27751c2 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-simpleq-tf.yaml +++ b/rllib/tuned_examples/dqn/cartpole-simpleq.yaml @@ -1,8 +1,9 @@ -cartpole-dqn-tf: +cartpole-dqn: env: CartPole-v0 run: SimpleQ stop: episode_reward_mean: 150 timesteps_total: 50000 config: + # Works for both torch and tf. use_pytorch: false diff --git a/rllib/tuned_examples/pong-apex.yaml b/rllib/tuned_examples/dqn/pong-apex.yaml similarity index 94% rename from rllib/tuned_examples/pong-apex.yaml rename to rllib/tuned_examples/dqn/pong-apex.yaml index 28097900c..9db78aa23 100644 --- a/rllib/tuned_examples/pong-apex.yaml +++ b/rllib/tuned_examples/dqn/pong-apex.yaml @@ -6,6 +6,7 @@ pong-apex: env: PongNoFrameskip-v4 run: APEX config: + use_pytorch: false target_network_update_freq: 20000 num_workers: 4 num_envs_per_worker: 8 diff --git a/rllib/tuned_examples/pong-dqn.yaml b/rllib/tuned_examples/dqn/pong-dqn.yaml similarity index 89% rename from rllib/tuned_examples/pong-dqn.yaml rename to rllib/tuned_examples/dqn/pong-dqn.yaml index 91ff37486..58100550f 100644 --- a/rllib/tuned_examples/pong-dqn.yaml +++ b/rllib/tuned_examples/dqn/pong-dqn.yaml @@ -6,6 +6,8 @@ pong-deterministic-dqn: episode_reward_mean: 20 time_total_s: 7200 config: + # Works for both torch and tf. + use_pytorch: false num_gpus: 1 gamma: 0.99 lr: .0001 diff --git a/rllib/tuned_examples/pong-rainbow.yaml b/rllib/tuned_examples/dqn/pong-rainbow.yaml similarity index 100% rename from rllib/tuned_examples/pong-rainbow.yaml rename to rllib/tuned_examples/dqn/pong-rainbow.yaml diff --git a/rllib/tuned_examples/regression_tests/cartpole-es-tf.yaml b/rllib/tuned_examples/es/cartpole-es.yaml similarity index 81% rename from rllib/tuned_examples/regression_tests/cartpole-es-tf.yaml rename to rllib/tuned_examples/es/cartpole-es.yaml index 62e827e75..6f7c2a67c 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-es-tf.yaml +++ b/rllib/tuned_examples/es/cartpole-es.yaml @@ -1,10 +1,11 @@ -cartpole-es-tf: +cartpole-es: env: CartPole-v0 run: ES stop: episode_reward_mean: 150 timesteps_total: 500000 config: + # Works for both torch and tf. use_pytorch: false num_workers: 2 noise_size: 25000000 diff --git a/rllib/tuned_examples/es/humanoid-es.yaml b/rllib/tuned_examples/es/humanoid-es.yaml new file mode 100644 index 000000000..356111062 --- /dev/null +++ b/rllib/tuned_examples/es/humanoid-es.yaml @@ -0,0 +1,9 @@ +humanoid-v2-es: + env: Humanoid-v2 + run: ES + stop: + episode_reward_mean: 6000 + config: + # Works for both torch and tf. + use_pytorch: false + num_workers: 100 diff --git a/rllib/tuned_examples/humanoid-es.yaml b/rllib/tuned_examples/humanoid-es.yaml deleted file mode 100644 index a61510a9d..000000000 --- a/rllib/tuned_examples/humanoid-es.yaml +++ /dev/null @@ -1,7 +0,0 @@ -humanoid-es: - env: Humanoid-v1 - run: ES - stop: - episode_reward_mean: 6000 - config: - num_workers: 100 diff --git a/rllib/tuned_examples/atari-impala-large.yaml b/rllib/tuned_examples/impala/atari-impala-large.yaml similarity index 100% rename from rllib/tuned_examples/atari-impala-large.yaml rename to rllib/tuned_examples/impala/atari-impala-large.yaml diff --git a/rllib/tuned_examples/atari-impala.yaml b/rllib/tuned_examples/impala/atari-impala.yaml similarity index 100% rename from rllib/tuned_examples/atari-impala.yaml rename to rllib/tuned_examples/impala/atari-impala.yaml diff --git a/rllib/tuned_examples/regression_tests/cartpole-impala-tf.yaml b/rllib/tuned_examples/impala/cartpole-impala.yaml similarity index 83% rename from rllib/tuned_examples/regression_tests/cartpole-impala-tf.yaml rename to rllib/tuned_examples/impala/cartpole-impala.yaml index 8233220fb..efadeff69 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-impala-tf.yaml +++ b/rllib/tuned_examples/impala/cartpole-impala.yaml @@ -5,5 +5,6 @@ cartpole-impala-tf: episode_reward_mean: 150 timesteps_total: 500000 config: + # Works for both torch and tf. use_pytorch: false num_gpus: 0 diff --git a/rllib/tuned_examples/impala/pendulum-impala.yaml b/rllib/tuned_examples/impala/pendulum-impala.yaml new file mode 100644 index 000000000..d5fc7d091 --- /dev/null +++ b/rllib/tuned_examples/impala/pendulum-impala.yaml @@ -0,0 +1,6 @@ +pendulum-impala-tf: + env: Pendulum-v0 + run: IMPALA + stop: + episode_reward_mean: -700 + timesteps_total: 500000 diff --git a/rllib/tuned_examples/pong-impala-fast.yaml b/rllib/tuned_examples/impala/pong-impala-fast.yaml similarity index 100% rename from rllib/tuned_examples/pong-impala-fast.yaml rename to rllib/tuned_examples/impala/pong-impala-fast.yaml diff --git a/rllib/tuned_examples/pong-impala-vectorized.yaml b/rllib/tuned_examples/impala/pong-impala-vectorized.yaml similarity index 100% rename from rllib/tuned_examples/pong-impala-vectorized.yaml rename to rllib/tuned_examples/impala/pong-impala-vectorized.yaml diff --git a/rllib/tuned_examples/pong-impala.yaml b/rllib/tuned_examples/impala/pong-impala.yaml similarity index 100% rename from rllib/tuned_examples/pong-impala.yaml rename to rllib/tuned_examples/impala/pong-impala.yaml diff --git a/rllib/tuned_examples/cartpole-marwil-tf.yaml b/rllib/tuned_examples/marwil/cartpole-marwil.yaml similarity index 85% rename from rllib/tuned_examples/cartpole-marwil-tf.yaml rename to rllib/tuned_examples/marwil/cartpole-marwil.yaml index e566f731a..314d94c0c 100644 --- a/rllib/tuned_examples/cartpole-marwil-tf.yaml +++ b/rllib/tuned_examples/marwil/cartpole-marwil.yaml @@ -2,12 +2,13 @@ # $ ./train.py --run=PPO --env=CartPole-v0 \ # --stop='{"timesteps_total": 50000}' \ # --config='{"output": "/tmp/out", "batch_mode": "complete_episodes"}' -cartpole-marwil-tf: +cartpole-marwil: env: CartPole-v0 run: MARWIL stop: timesteps_total: 500000 config: + use_pytorch: false # <- switch on/off torch beta: grid_search: [0, 1] # compare IL (beta=0) vs MARWIL input: /tmp/out diff --git a/rllib/tuned_examples/pendulum-appo-torch.yaml b/rllib/tuned_examples/pendulum-appo-torch.yaml deleted file mode 100644 index 73f4b6348..000000000 --- a/rllib/tuned_examples/pendulum-appo-torch.yaml +++ /dev/null @@ -1,21 +0,0 @@ -pendulum-appo-vtrace-torch: - env: Pendulum-v0 - run: APPO - stop: - episode_reward_mean: -1000 # just check it learns a bit - timesteps_total: 500000 - config: - use_pytorch: true - vtrace: true - num_gpus: 0 - num_workers: 1 - lambda: 0.1 - gamma: 0.95 - lr: 0.0003 - train_batch_size: 100 - minibatch_buffer_size: 16 - num_sgd_iter: 10 - model: - fcnet_hiddens: [256, 256] - batch_mode: truncate_episodes - observation_filter: MeanStdFilter diff --git a/rllib/tuned_examples/pendulum-ppo.yaml b/rllib/tuned_examples/pendulum-ppo.yaml deleted file mode 100644 index 65142f667..000000000 --- a/rllib/tuned_examples/pendulum-ppo.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# can expect improvement to -140 reward in ~300-500k timesteps -pendulum-ppo: - env: Pendulum-v0 - run: PPO - config: - train_batch_size: 2048 - vf_clip_param: 10.0 - num_workers: 0 - num_envs_per_worker: 10 - lambda: 0.1 - gamma: 0.95 - lr: 0.0003 - sgd_minibatch_size: 64 - num_sgd_iter: 10 - model: - fcnet_hiddens: [64, 64] - batch_mode: complete_episodes - observation_filter: MeanStdFilter diff --git a/rllib/tuned_examples/regression_tests/cartpole-pg-torch.yaml b/rllib/tuned_examples/pg/cartpole-pg.yaml similarity index 64% rename from rllib/tuned_examples/regression_tests/cartpole-pg-torch.yaml rename to rllib/tuned_examples/pg/cartpole-pg.yaml index baaec5660..27df47a00 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-pg-torch.yaml +++ b/rllib/tuned_examples/pg/cartpole-pg.yaml @@ -1,9 +1,10 @@ -cartpole-pg-torch: +cartpole-pg: env: CartPole-v0 run: PG stop: episode_reward_mean: 150 timesteps_total: 100000 config: + # Works for both torch and tf. + use_pytorch: false num_workers: 0 - use_pytorch: true diff --git a/rllib/tuned_examples/pong-a3c-pytorch.yaml b/rllib/tuned_examples/pong-a3c-pytorch.yaml deleted file mode 100644 index 854e22123..000000000 --- a/rllib/tuned_examples/pong-a3c-pytorch.yaml +++ /dev/null @@ -1,21 +0,0 @@ -pong-a3c-pytorch-cnn: - env: PongDeterministic-v4 - run: A3C - config: - num_workers: 16 - rollout_fragment_length: 20 - use_pytorch: true - vf_loss_coeff: 0.5 - entropy_coeff: 0.01 - gamma: 0.99 - grad_clip: 40.0 - lambda: 1.0 - lr: 0.0001 - observation_filter: NoFilter - model: - use_lstm: false - dim: 84 - grayscale: true - zero_mean: false - optimizer: - grads_per_step: 1000 diff --git a/rllib/tuned_examples/atari-ddppo.yaml b/rllib/tuned_examples/ppo/atari-ddppo.yaml similarity index 94% rename from rllib/tuned_examples/atari-ddppo.yaml rename to rllib/tuned_examples/ppo/atari-ddppo.yaml index ab71bf1ce..9d0cc1653 100644 --- a/rllib/tuned_examples/atari-ddppo.yaml +++ b/rllib/tuned_examples/ppo/atari-ddppo.yaml @@ -7,6 +7,7 @@ atari-ddppo: - BreakoutNoFrameskip-v4 run: DDPPO config: + use_pytorch: true # DDPPO only supports PyTorch so far # Worker config: 10 workers, each of which requires a GPU. num_workers: 10 num_gpus_per_worker: 1 diff --git a/rllib/tuned_examples/atari-ppo.yaml b/rllib/tuned_examples/ppo/atari-ppo.yaml similarity index 93% rename from rllib/tuned_examples/atari-ppo.yaml rename to rllib/tuned_examples/ppo/atari-ppo.yaml index 1d0dc62f6..205b6f918 100644 --- a/rllib/tuned_examples/atari-ppo.yaml +++ b/rllib/tuned_examples/ppo/atari-ppo.yaml @@ -9,6 +9,7 @@ atari-ppo: - SpaceInvadersNoFrameskip-v4 run: PPO config: + use_pytorch: false # <- switch on/off torch lambda: 0.95 kl_coeff: 0.5 clip_rewards: True diff --git a/rllib/tuned_examples/regression_tests/cartpole-appo-vtrace-tf.yaml b/rllib/tuned_examples/ppo/cartpole-appo-vtrace.yaml similarity index 83% rename from rllib/tuned_examples/regression_tests/cartpole-appo-vtrace-tf.yaml rename to rllib/tuned_examples/ppo/cartpole-appo-vtrace.yaml index 023dc56a4..e0561b359 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-appo-vtrace-tf.yaml +++ b/rllib/tuned_examples/ppo/cartpole-appo-vtrace.yaml @@ -1,10 +1,11 @@ -cartpole-appo-vtrace-tf: +cartpole-appo-vtrace: env: CartPole-v0 run: APPO stop: episode_reward_mean: 150 timesteps_total: 200000 config: + # Works for both torch and tf. use_pytorch: false rollout_fragment_length: 10 train_batch_size: 10 diff --git a/rllib/tuned_examples/regression_tests/cartpole-appo-tf.yaml b/rllib/tuned_examples/ppo/cartpole-appo.yaml similarity index 85% rename from rllib/tuned_examples/regression_tests/cartpole-appo-tf.yaml rename to rllib/tuned_examples/ppo/cartpole-appo.yaml index 183dcb05d..79c0078a2 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-appo-tf.yaml +++ b/rllib/tuned_examples/ppo/cartpole-appo.yaml @@ -1,10 +1,11 @@ -cartpole-appo-tf: +cartpole-appo: env: CartPole-v0 run: APPO stop: episode_reward_mean: 150 timesteps_total: 200000 config: + # Works for both torch and tf. use_pytorch: false rollout_fragment_length: 10 train_batch_size: 10 diff --git a/rllib/tuned_examples/regression_tests/cartpole-ddppo.yaml b/rllib/tuned_examples/ppo/cartpole-ddppo.yaml similarity index 59% rename from rllib/tuned_examples/regression_tests/cartpole-ddppo.yaml rename to rllib/tuned_examples/ppo/cartpole-ddppo.yaml index 5a438a15e..3cf08bc5b 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-ddppo.yaml +++ b/rllib/tuned_examples/ppo/cartpole-ddppo.yaml @@ -1,9 +1,8 @@ -cartpole-ddppo-torch: +cartpole-ddppo: env: CartPole-v0 run: DDPPO stop: - episode_reward_mean: 100 + episode_reward_mean: 150 timesteps_total: 100000 config: - use_pytorch: true num_gpus_per_worker: 0 diff --git a/rllib/tuned_examples/cartpole-grid-search-example.yaml b/rllib/tuned_examples/ppo/cartpole-grid-search-example.yaml similarity index 83% rename from rllib/tuned_examples/cartpole-grid-search-example.yaml rename to rllib/tuned_examples/ppo/cartpole-grid-search-example.yaml index 8f01a6284..c2a98a940 100644 --- a/rllib/tuned_examples/cartpole-grid-search-example.yaml +++ b/rllib/tuned_examples/ppo/cartpole-grid-search-example.yaml @@ -5,6 +5,7 @@ cartpole-ppo: episode_reward_mean: 200 time_total_s: 180 config: + use_pytorch: false # <- switch on/off torch num_workers: 2 num_sgd_iter: grid_search: [1, 4] diff --git a/rllib/tuned_examples/hyperband-cartpole.yaml b/rllib/tuned_examples/ppo/cartpole-ppo-hyperband.yaml similarity index 83% rename from rllib/tuned_examples/hyperband-cartpole.yaml rename to rllib/tuned_examples/ppo/cartpole-ppo-hyperband.yaml index dee9c0fc0..eb70ddfdf 100644 --- a/rllib/tuned_examples/hyperband-cartpole.yaml +++ b/rllib/tuned_examples/ppo/cartpole-ppo-hyperband.yaml @@ -6,6 +6,8 @@ cartpole-ppo: episode_reward_mean: 200 time_total_s: 180 config: + # Works for both torch and tf. + use_pytorch: false num_workers: 1 num_sgd_iter: grid_search: [1, 4] diff --git a/rllib/tuned_examples/regression_tests/cartpole-ppo-torch.yaml b/rllib/tuned_examples/ppo/cartpole-ppo.yaml similarity index 82% rename from rllib/tuned_examples/regression_tests/cartpole-ppo-torch.yaml rename to rllib/tuned_examples/ppo/cartpole-ppo.yaml index ec953a653..021e52603 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-ppo-torch.yaml +++ b/rllib/tuned_examples/ppo/cartpole-ppo.yaml @@ -1,11 +1,12 @@ -cartpole-ppo-torch: +cartpole-ppo: env: CartPole-v0 run: PPO stop: episode_reward_mean: 150 timesteps_total: 100000 config: - use_pytorch: true + # Works for both torch and tf. + use_pytorch: false gamma: 0.99 lr: 0.0003 num_workers: 1 diff --git a/rllib/tuned_examples/halfcheetah-appo.yaml b/rllib/tuned_examples/ppo/halfcheetah-appo.yaml similarity index 83% rename from rllib/tuned_examples/halfcheetah-appo.yaml rename to rllib/tuned_examples/ppo/halfcheetah-appo.yaml index bb2edfe34..f6697bdfb 100644 --- a/rllib/tuned_examples/halfcheetah-appo.yaml +++ b/rllib/tuned_examples/ppo/halfcheetah-appo.yaml @@ -1,11 +1,12 @@ -# This can reach 9k reward in 2 hours on a Titan XP GPU +# This can reach 9k reward in 2 hours on a Titan XP GPU # with 16 workers and 8 envs per worker. halfcheetah-appo: env: HalfCheetah-v2 run: APPO stop: - time_total_s: 10800 + time_total_s: 10800 config: + use_pytorch: false # <- switch on/off torch vtrace: True gamma: 0.99 lambda: 0.95 @@ -30,6 +31,6 @@ halfcheetah-appo: batch_mode: truncate_episodes use_kl_loss: True kl_coeff: 1.0 - kl_target: 0.04 + kl_target: 0.04 observation_filter: MeanStdFilter diff --git a/rllib/tuned_examples/halfcheetah-ppo.yaml b/rllib/tuned_examples/ppo/halfcheetah-ppo.yaml similarity index 82% rename from rllib/tuned_examples/halfcheetah-ppo.yaml rename to rllib/tuned_examples/ppo/halfcheetah-ppo.yaml index 60cbd03dc..2b4300c25 100644 --- a/rllib/tuned_examples/halfcheetah-ppo.yaml +++ b/rllib/tuned_examples/ppo/halfcheetah-ppo.yaml @@ -1,23 +1,24 @@ -halfcheetah-ppo: - env: HalfCheetah-v2 - run: PPO - stop: - episode_reward_mean: 9800 - time_total_s: 10800 - config: - gamma: 0.99 - lambda: 0.95 - kl_coeff: 1.0 - num_sgd_iter: 32 - lr: .0003 - vf_loss_coeff: 0.5 - clip_param: 0.2 - sgd_minibatch_size: 4096 - train_batch_size: 65536 - num_workers: 16 - num_gpus: 1 - grad_clip: 0.5 - num_envs_per_worker: - grid_search: [16, 32] - batch_mode: truncate_episodes - observation_filter: MeanStdFilter +halfcheetah-ppo: + env: HalfCheetah-v2 + run: PPO + stop: + episode_reward_mean: 9800 + time_total_s: 10800 + config: + use_pytorch: false # <- switch on/off torch + gamma: 0.99 + lambda: 0.95 + kl_coeff: 1.0 + num_sgd_iter: 32 + lr: .0003 + vf_loss_coeff: 0.5 + clip_param: 0.2 + sgd_minibatch_size: 4096 + train_batch_size: 65536 + num_workers: 16 + num_gpus: 1 + grad_clip: 0.5 + num_envs_per_worker: + grid_search: [16, 32] + batch_mode: truncate_episodes + observation_filter: MeanStdFilter diff --git a/rllib/tuned_examples/hopper-ppo.yaml b/rllib/tuned_examples/ppo/hopper-ppo.yaml similarity index 83% rename from rllib/tuned_examples/hopper-ppo.yaml rename to rllib/tuned_examples/ppo/hopper-ppo.yaml index c73d4480e..6a0398cd2 100644 --- a/rllib/tuned_examples/hopper-ppo.yaml +++ b/rllib/tuned_examples/ppo/hopper-ppo.yaml @@ -2,6 +2,8 @@ hopper-ppo: env: Hopper-v1 run: PPO config: + # Works for both torch and tf. + use_pytorch: false gamma: 0.995 kl_coeff: 1.0 num_sgd_iter: 20 diff --git a/rllib/tuned_examples/humanoid-ppo-gae.yaml b/rllib/tuned_examples/ppo/humanoid-ppo-gae.yaml similarity index 88% rename from rllib/tuned_examples/humanoid-ppo-gae.yaml rename to rllib/tuned_examples/ppo/humanoid-ppo-gae.yaml index c9ddbd017..362d8cf09 100644 --- a/rllib/tuned_examples/humanoid-ppo-gae.yaml +++ b/rllib/tuned_examples/ppo/humanoid-ppo-gae.yaml @@ -4,6 +4,8 @@ humanoid-ppo-gae: stop: episode_reward_mean: 6000 config: + # Works for both torch and tf. + use_pytorch: false gamma: 0.995 lambda: 0.95 clip_param: 0.2 diff --git a/rllib/tuned_examples/humanoid-ppo.yaml b/rllib/tuned_examples/ppo/humanoid-ppo.yaml similarity index 87% rename from rllib/tuned_examples/humanoid-ppo.yaml rename to rllib/tuned_examples/ppo/humanoid-ppo.yaml index b531531e9..94f33dd13 100644 --- a/rllib/tuned_examples/humanoid-ppo.yaml +++ b/rllib/tuned_examples/ppo/humanoid-ppo.yaml @@ -4,6 +4,8 @@ humanoid-ppo: stop: episode_reward_mean: 6000 config: + # Works for both torch and tf. + use_pytorch: false gamma: 0.995 kl_coeff: 1.0 num_sgd_iter: 20 diff --git a/rllib/tuned_examples/pendulum-appo-tf.yaml b/rllib/tuned_examples/ppo/pendulum-appo.yaml similarity index 89% rename from rllib/tuned_examples/pendulum-appo-tf.yaml rename to rllib/tuned_examples/ppo/pendulum-appo.yaml index 29f6d505a..ada828c6a 100644 --- a/rllib/tuned_examples/pendulum-appo-tf.yaml +++ b/rllib/tuned_examples/ppo/pendulum-appo.yaml @@ -1,10 +1,11 @@ -pendulum-appo-vtrace-tf: +pendulum-appo-vtrace: env: Pendulum-v0 run: APPO stop: episode_reward_mean: -1000 # just check it learns a bit timesteps_total: 500000 config: + # Works for both torch and tf. use_pytorch: false vtrace: true num_gpus: 0 diff --git a/rllib/tuned_examples/regression_tests/pendulum-ppo-tf.yaml b/rllib/tuned_examples/ppo/pendulum-ppo.yaml similarity index 81% rename from rllib/tuned_examples/regression_tests/pendulum-ppo-tf.yaml rename to rllib/tuned_examples/ppo/pendulum-ppo.yaml index ac3afac06..d8b1234b8 100644 --- a/rllib/tuned_examples/regression_tests/pendulum-ppo-tf.yaml +++ b/rllib/tuned_examples/ppo/pendulum-ppo.yaml @@ -1,10 +1,12 @@ -pendulum-ppo-tf: +# Can expect improvement to -140 reward in ~300-500k timesteps. +pendulum-ppo: env: Pendulum-v0 run: PPO stop: episode_reward_mean: -500 timesteps_total: 400000 config: + # Works for both torch and tf. use_pytorch: false train_batch_size: 2048 vf_clip_param: 10.0 diff --git a/rllib/tuned_examples/pong-appo.yaml b/rllib/tuned_examples/ppo/pong-appo.yaml similarity index 86% rename from rllib/tuned_examples/pong-appo.yaml rename to rllib/tuned_examples/ppo/pong-appo.yaml index 666852545..4564f129b 100644 --- a/rllib/tuned_examples/pong-appo.yaml +++ b/rllib/tuned_examples/ppo/pong-appo.yaml @@ -1,29 +1,31 @@ -# This can reach 18-19 reward in ~5-7 minutes on a Titan XP GPU -# with 32 workers and 8 envs per worker. IMPALA, when ran with -# similar configurations, solved Pong in 10-12 minutes. -# APPO can also solve Pong in 2.5 million timesteps, which is -# 2x more efficient than that of IMPALA. -pong-appo: - env: PongNoFrameskip-v4 - run: APPO - stop: - episode_reward_mean: 18.0 - timesteps_total: 5000000 - config: - vtrace: True - use_kl_loss: False - rollout_fragment_length: 50 - train_batch_size: 750 - num_workers: 32 - broadcast_interval: 1 - max_sample_requests_in_flight_per_worker: 1 - num_data_loader_buffers: 1 - num_envs_per_worker: 8 - minibatch_buffer_size: 4 - num_sgd_iter: 2 - vf_loss_coeff: 1.0 - clip_param: 0.3 - num_gpus: 1 - grad_clip: 10 - model: - dim: 42 +# This can reach 18-19 reward in ~5-7 minutes on a Titan XP GPU +# with 32 workers and 8 envs per worker. IMPALA, when ran with +# similar configurations, solved Pong in 10-12 minutes. +# APPO can also solve Pong in 2.5 million timesteps, which is +# 2x more efficient than that of IMPALA. +pong-appo: + env: PongNoFrameskip-v4 + run: APPO + stop: + episode_reward_mean: 18.0 + timesteps_total: 5000000 + config: + # Works for both torch and tf. + use_pytorch: false + vtrace: True + use_kl_loss: False + rollout_fragment_length: 50 + train_batch_size: 750 + num_workers: 32 + broadcast_interval: 1 + max_sample_requests_in_flight_per_worker: 1 + num_data_loader_buffers: 1 + num_envs_per_worker: 8 + minibatch_buffer_size: 4 + num_sgd_iter: 2 + vf_loss_coeff: 1.0 + clip_param: 0.3 + num_gpus: 1 + grad_clip: 10 + model: + dim: 42 diff --git a/rllib/tuned_examples/pong-ppo.yaml b/rllib/tuned_examples/ppo/pong-ppo.yaml similarity index 84% rename from rllib/tuned_examples/pong-ppo.yaml rename to rllib/tuned_examples/ppo/pong-ppo.yaml index e9d3d9ecd..6dfca95f0 100644 --- a/rllib/tuned_examples/pong-ppo.yaml +++ b/rllib/tuned_examples/ppo/pong-ppo.yaml @@ -1,11 +1,13 @@ # On a single GPU, this achieves maximum reward in ~15-20 minutes. # -# $ python train.py -f tuned_examples/pong-ppo.yaml +# $ python train.py -f tuned_configs/pong-ppo.yaml # pong-ppo: env: PongNoFrameskip-v4 run: PPO config: + # Works for both torch and tf. + use_pytorch: false lambda: 0.95 kl_coeff: 0.5 clip_rewards: True diff --git a/rllib/tuned_examples/walker2d-ppo.yaml b/rllib/tuned_examples/ppo/walker2d-ppo.yaml similarity index 82% rename from rllib/tuned_examples/walker2d-ppo.yaml rename to rllib/tuned_examples/ppo/walker2d-ppo.yaml index a88589ebf..c50a3858d 100644 --- a/rllib/tuned_examples/walker2d-ppo.yaml +++ b/rllib/tuned_examples/ppo/walker2d-ppo.yaml @@ -2,6 +2,8 @@ walker2d-v1-ppo: env: Walker2d-v1 run: PPO config: + # Works for both torch and tf. + use_pytorch: false kl_coeff: 1.0 num_sgd_iter: 20 lr: .0001 diff --git a/rllib/tuned_examples/regression_tests/cartpole-a2c-torch.yaml b/rllib/tuned_examples/regression_tests/cartpole-a2c-torch.yaml deleted file mode 100644 index e76975e5f..000000000 --- a/rllib/tuned_examples/regression_tests/cartpole-a2c-torch.yaml +++ /dev/null @@ -1,9 +0,0 @@ -cartpole-a2c-torch: - env: CartPole-v0 - run: A2C - stop: - episode_reward_mean: 100 - timesteps_total: 100000 - config: - num_workers: 0 - use_pytorch: true diff --git a/rllib/tuned_examples/regression_tests/cartpole-appo-torch.yaml b/rllib/tuned_examples/regression_tests/cartpole-appo-torch.yaml deleted file mode 100644 index a1db540a7..000000000 --- a/rllib/tuned_examples/regression_tests/cartpole-appo-torch.yaml +++ /dev/null @@ -1,14 +0,0 @@ -cartpole-appo-torch: - env: CartPole-v0 - run: APPO - stop: - episode_reward_mean: 150 - timesteps_total: 200000 - config: - use_pytorch: true - rollout_fragment_length: 10 - train_batch_size: 10 - num_envs_per_worker: 5 - num_workers: 1 - num_gpus: 0 - vtrace: false diff --git a/rllib/tuned_examples/regression_tests/cartpole-appo-vtrace-torch.yaml b/rllib/tuned_examples/regression_tests/cartpole-appo-vtrace-torch.yaml deleted file mode 100644 index 4466621e0..000000000 --- a/rllib/tuned_examples/regression_tests/cartpole-appo-vtrace-torch.yaml +++ /dev/null @@ -1,14 +0,0 @@ -cartpole-appo-vtrace-torch: - env: CartPole-v0 - run: APPO - stop: - episode_reward_mean: 150 - timesteps_total: 200000 - config: - use_pytorch: true - rollout_fragment_length: 10 - train_batch_size: 10 - num_envs_per_worker: 5 - num_workers: 1 - num_gpus: 0 - vtrace: true diff --git a/rllib/tuned_examples/regression_tests/cartpole-ars-tf.yaml b/rllib/tuned_examples/regression_tests/cartpole-ars-tf.yaml deleted file mode 100644 index 24e6e51ed..000000000 --- a/rllib/tuned_examples/regression_tests/cartpole-ars-tf.yaml +++ /dev/null @@ -1,17 +0,0 @@ -cartpole-ars-tf: - env: CartPole-v0 - run: ARS - stop: - episode_reward_mean: 50 - timesteps_total: 500000 - config: - use_pytorch: false - noise_stdev: 0.02 - num_rollouts: 50 - rollouts_used: 25 - num_workers: 2 - sgd_stepsize: 0.01 - noise_size: 25000000 - eval_prob: 0.5 - model: - fcnet_hiddens: [] # a linear policy diff --git a/rllib/tuned_examples/regression_tests/cartpole-dqn-param-noise-torch.yaml b/rllib/tuned_examples/regression_tests/cartpole-dqn-param-noise-torch.yaml deleted file mode 100644 index 1e5fcdf10..000000000 --- a/rllib/tuned_examples/regression_tests/cartpole-dqn-param-noise-torch.yaml +++ /dev/null @@ -1,18 +0,0 @@ -cartpole-dqn-torch-w-param-noise: - env: CartPole-v0 - run: DQN - stop: - episode_reward_mean: 150 - timesteps_total: 300000 - config: - use_pytorch: true - exploration_config: - type: ParameterNoise - random_timesteps: 10000 - initial_stddev: 1.0 - batch_mode: complete_episodes - lr: 0.0008 - num_workers: 0 - model: - fcnet_hiddens: [32, 32] - fcnet_activation: tanh diff --git a/rllib/tuned_examples/regression_tests/cartpole-dqn-torch.yaml b/rllib/tuned_examples/regression_tests/cartpole-dqn-torch.yaml deleted file mode 100644 index ff0d48620..000000000 --- a/rllib/tuned_examples/regression_tests/cartpole-dqn-torch.yaml +++ /dev/null @@ -1,10 +0,0 @@ -cartpole-dqn-torch: - env: CartPole-v0 - run: DQN - stop: - episode_reward_mean: 150 - timesteps_total: 50000 - config: - use_pytorch: true - n_step: 3 - gamma: 0.95 diff --git a/rllib/tuned_examples/regression_tests/cartpole-es-torch.yaml b/rllib/tuned_examples/regression_tests/cartpole-es-torch.yaml deleted file mode 100644 index 147d0638a..000000000 --- a/rllib/tuned_examples/regression_tests/cartpole-es-torch.yaml +++ /dev/null @@ -1,11 +0,0 @@ -cartpole-es-torch: - env: CartPole-v0 - run: ES - stop: - episode_reward_mean: 150 - timesteps_total: 500000 - config: - use_pytorch: true - num_workers: 2 - noise_size: 25000000 - episodes_per_batch: 50 diff --git a/rllib/tuned_examples/regression_tests/cartpole-impala-torch.yaml b/rllib/tuned_examples/regression_tests/cartpole-impala-torch.yaml deleted file mode 100644 index e29d2caad..000000000 --- a/rllib/tuned_examples/regression_tests/cartpole-impala-torch.yaml +++ /dev/null @@ -1,9 +0,0 @@ -cartpole-impala-torch: - env: CartPole-v0 - run: IMPALA - stop: - episode_reward_mean: 150 - timesteps_total: 500000 - config: - use_pytorch: true - num_gpus: 0 diff --git a/rllib/tuned_examples/regression_tests/cartpole-pg-tf.yaml b/rllib/tuned_examples/regression_tests/cartpole-pg-tf.yaml deleted file mode 100644 index 016b0b553..000000000 --- a/rllib/tuned_examples/regression_tests/cartpole-pg-tf.yaml +++ /dev/null @@ -1,8 +0,0 @@ -cartpole-pg-tf: - env: CartPole-v0 - run: PG - stop: - episode_reward_mean: 100 - timesteps_total: 100000 - config: - num_workers: 0 diff --git a/rllib/tuned_examples/regression_tests/cartpole-ppo-tf.yaml b/rllib/tuned_examples/regression_tests/cartpole-ppo-tf.yaml deleted file mode 100644 index 315b7dd44..000000000 --- a/rllib/tuned_examples/regression_tests/cartpole-ppo-tf.yaml +++ /dev/null @@ -1,17 +0,0 @@ -cartpole-ppo-tf: - env: CartPole-v0 - run: PPO - stop: - episode_reward_mean: 150 - timesteps_total: 100000 - config: - gamma: 0.99 - lr: 0.0003 - num_workers: 1 - observation_filter: MeanStdFilter - num_sgd_iter: 6 - vf_share_layers: true - vf_loss_coeff: 0.01 - model: - fcnet_hiddens: [32] - fcnet_activation: linear diff --git a/rllib/tuned_examples/regression_tests/cartpole-sac-torch.yaml b/rllib/tuned_examples/regression_tests/cartpole-sac-torch.yaml deleted file mode 100644 index 1f7259bf8..000000000 --- a/rllib/tuned_examples/regression_tests/cartpole-sac-torch.yaml +++ /dev/null @@ -1,17 +0,0 @@ -cartpole-sac-torch: - env: CartPole-v0 - run: SAC - stop: - episode_reward_mean: 150 - timesteps_total: 50000 - config: - use_pytorch: true - gamma: 0.95 - no_done_at_end: false - target_network_update_freq: 32 - tau: 1.0 - train_batch_size: 32 - optimization: - actor_learning_rate: 0.005 - critic_learning_rate: 0.005 - entropy_learning_rate: 0.0001 diff --git a/rllib/tuned_examples/regression_tests/cartpole-simpleq-torch.yaml b/rllib/tuned_examples/regression_tests/cartpole-simpleq-torch.yaml deleted file mode 100644 index dcb9128b0..000000000 --- a/rllib/tuned_examples/regression_tests/cartpole-simpleq-torch.yaml +++ /dev/null @@ -1,8 +0,0 @@ -cartpole-dqn-torch: - env: CartPole-v0 - run: SimpleQ - stop: - episode_reward_mean: 150 - timesteps_total: 50000 - config: - use_pytorch: true diff --git a/rllib/tuned_examples/regression_tests/pendulum-ddpg-tf.yaml b/rllib/tuned_examples/regression_tests/pendulum-ddpg-tf.yaml deleted file mode 100644 index 76147e7b7..000000000 --- a/rllib/tuned_examples/regression_tests/pendulum-ddpg-tf.yaml +++ /dev/null @@ -1,10 +0,0 @@ -pendulum-ddpg-tf: - env: Pendulum-v0 - run: DDPG - stop: - episode_reward_mean: -700 - timesteps_total: 100000 - config: - use_pytorch: false - use_huber: true - clip_rewards: false diff --git a/rllib/tuned_examples/regression_tests/pendulum-ddpg-torch.yaml b/rllib/tuned_examples/regression_tests/pendulum-ddpg-torch.yaml deleted file mode 100644 index d1ac7a56e..000000000 --- a/rllib/tuned_examples/regression_tests/pendulum-ddpg-torch.yaml +++ /dev/null @@ -1,10 +0,0 @@ -pendulum-ddpg-torch: - env: Pendulum-v0 - run: DDPG - stop: - episode_reward_mean: -700 - timesteps_total: 100000 - config: - use_pytorch: true - use_huber: true - clip_rewards: false diff --git a/rllib/tuned_examples/regression_tests/pendulum-ppo-torch.yaml b/rllib/tuned_examples/regression_tests/pendulum-ppo-torch.yaml deleted file mode 100644 index 17132f5ce..000000000 --- a/rllib/tuned_examples/regression_tests/pendulum-ppo-torch.yaml +++ /dev/null @@ -1,21 +0,0 @@ -pendulum-ppo-torch: - env: Pendulum-v0 - run: PPO - stop: - episode_reward_mean: -500 - timesteps_total: 400000 - config: - use_pytorch: true - train_batch_size: 2048 - vf_clip_param: 10.0 - num_workers: 0 - num_envs_per_worker: 10 - lambda: 0.1 - gamma: 0.95 - lr: 0.0003 - sgd_minibatch_size: 64 - num_sgd_iter: 10 - model: - fcnet_hiddens: [64, 64] - batch_mode: complete_episodes - observation_filter: MeanStdFilter diff --git a/rllib/tuned_examples/regression_tests/pendulum-sac-tf.yaml b/rllib/tuned_examples/regression_tests/pendulum-sac-tf.yaml deleted file mode 100644 index 14230cf53..000000000 --- a/rllib/tuned_examples/regression_tests/pendulum-sac-tf.yaml +++ /dev/null @@ -1,13 +0,0 @@ -pendulum-sac-tf: - env: Pendulum-v0 - run: SAC - stop: - episode_reward_mean: -300 # note that evaluation perf is higher - timesteps_total: 10000 - config: - use_pytorch: false - soft_horizon: true - clip_actions: false - normalize_actions: true - metrics_smoothing_episodes: 5 - no_done_at_end: true diff --git a/rllib/tuned_examples/regression_tests/pendulum-sac-torch.yaml b/rllib/tuned_examples/regression_tests/pendulum-sac-torch.yaml deleted file mode 100644 index eddb06357..000000000 --- a/rllib/tuned_examples/regression_tests/pendulum-sac-torch.yaml +++ /dev/null @@ -1,13 +0,0 @@ -pendulum-sac-torch: - env: Pendulum-v0 - run: SAC - stop: - episode_reward_mean: -300 # note that evaluation perf is higher - timesteps_total: 10000 - config: - use_pytorch: true - soft_horizon: true - clip_actions: false - normalize_actions: true - metrics_smoothing_episodes: 5 - no_done_at_end: true diff --git a/rllib/tuned_examples/regression_tests/pendulum-td3.yaml b/rllib/tuned_examples/regression_tests/pendulum-td3.yaml deleted file mode 100644 index 20b84b99e..000000000 --- a/rllib/tuned_examples/regression_tests/pendulum-td3.yaml +++ /dev/null @@ -1,8 +0,0 @@ -pendulum-td3-tf: - env: Pendulum-v0 - run: TD3 - config: - use_pytorch: false - stop: - episode_reward_mean: -900 - timesteps_total: 100000 diff --git a/rllib/tuned_examples/atari-sac.yaml b/rllib/tuned_examples/sac/atari-sac.yaml similarity index 94% rename from rllib/tuned_examples/atari-sac.yaml rename to rllib/tuned_examples/sac/atari-sac.yaml index e87594f7b..67f247665 100644 --- a/rllib/tuned_examples/atari-sac.yaml +++ b/rllib/tuned_examples/sac/atari-sac.yaml @@ -10,9 +10,7 @@ atari-sac-tf-and-torch: stop: timesteps_total: 20000000 config: - # Works for both torch and tf. - use_pytorch: - grid_search: [false, true] + use_pytorch: false # <- switch on/off torch gamma: 0.99 # state-preprocessor=Our default Atari Conv2D-net. use_state_preprocessor: true diff --git a/rllib/tuned_examples/regression_tests/cartpole-sac-tf.yaml b/rllib/tuned_examples/sac/cartpole-sac.yaml similarity index 85% rename from rllib/tuned_examples/regression_tests/cartpole-sac-tf.yaml rename to rllib/tuned_examples/sac/cartpole-sac.yaml index 8a9c030d4..f3010c189 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-sac-tf.yaml +++ b/rllib/tuned_examples/sac/cartpole-sac.yaml @@ -1,10 +1,11 @@ -cartpole-sac-tf: +cartpole-sac: env: CartPole-v0 run: SAC stop: episode_reward_mean: 150 - timesteps_total: 50000 + timesteps_total: 100000 config: + # Works for both torch and tf. use_pytorch: false gamma: 0.95 no_done_at_end: false diff --git a/rllib/tuned_examples/halfcheetah-sac.yaml b/rllib/tuned_examples/sac/halfcheetah-sac.yaml similarity index 95% rename from rllib/tuned_examples/halfcheetah-sac.yaml rename to rllib/tuned_examples/sac/halfcheetah-sac.yaml index 56824baf4..dd8f888d2 100644 --- a/rllib/tuned_examples/halfcheetah-sac.yaml +++ b/rllib/tuned_examples/sac/halfcheetah-sac.yaml @@ -5,6 +5,7 @@ halfcheetah_sac: stop: episode_reward_mean: 9000 config: + use_pytorch: false # <- switch on/off torch horizon: 1000 soft_horizon: false Q_model: diff --git a/rllib/tuned_examples/mspacman-sac.yaml b/rllib/tuned_examples/sac/mspacman-sac.yaml similarity index 97% rename from rllib/tuned_examples/mspacman-sac.yaml rename to rllib/tuned_examples/sac/mspacman-sac.yaml index 4b70aa4f5..2b77b4625 100644 --- a/rllib/tuned_examples/mspacman-sac.yaml +++ b/rllib/tuned_examples/sac/mspacman-sac.yaml @@ -8,6 +8,7 @@ mspacman-sac-tf: episode_reward_mean: 800 timesteps_total: 100000 config: + # Works for both torch and tf. use_pytorch: false gamma: 0.99 # state-preprocessor=Our default Atari Conv2D-net. diff --git a/rllib/tuned_examples/pendulum-sac.yaml b/rllib/tuned_examples/sac/pendulum-sac.yaml similarity index 76% rename from rllib/tuned_examples/pendulum-sac.yaml rename to rllib/tuned_examples/sac/pendulum-sac.yaml index 6d2d7296c..159249ecc 100644 --- a/rllib/tuned_examples/pendulum-sac.yaml +++ b/rllib/tuned_examples/sac/pendulum-sac.yaml @@ -1,13 +1,16 @@ # Pendulum SAC can attain -150+ reward in 6-7k # Configurations are the similar to original softlearning/sac codebase -pendulum_sac: +pendulum-sac: env: Pendulum-v0 run: SAC stop: - episode_reward_mean: -150 + episode_reward_mean: -300 + timesteps_total: 10000 config: + # Works for both torch and tf. + use_pytorch: false horizon: 200 - soft_horizon: False + soft_horizon: true Q_model: fcnet_activation: relu fcnet_hiddens: [256, 256] @@ -16,10 +19,10 @@ pendulum_sac: fcnet_hiddens: [256, 256] tau: 0.005 target_entropy: auto - no_done_at_end: True + no_done_at_end: true n_step: 1 rollout_fragment_length: 1 - prioritized_replay: False + prioritized_replay: true train_batch_size: 256 target_network_update_freq: 1 timesteps_per_iteration: 1000 @@ -31,6 +34,6 @@ pendulum_sac: num_workers: 0 num_gpus: 0 clip_actions: False - normalize_actions: True + normalize_actions: true evaluation_interval: 1 metrics_smoothing_episodes: 5 diff --git a/rllib/utils/schedules/piecewise_schedule.py b/rllib/utils/schedules/piecewise_schedule.py index 9acd1d513..6c82c30a0 100644 --- a/rllib/utils/schedules/piecewise_schedule.py +++ b/rllib/utils/schedules/piecewise_schedule.py @@ -43,7 +43,7 @@ class PiecewiseSchedule(Schedule): assert idxes == sorted(idxes) self.interpolation = interpolation self.outside_value = outside_value - self.endpoints = endpoints + self.endpoints = [(int(e[0]), float(e[1])) for e in endpoints] @override(Schedule) def _value(self, t):