2017-03-18 23:44:54 -07:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
|
|
|
# Cause the script to exit if a single command fails.
|
|
|
|
set -e
|
|
|
|
|
2017-05-31 17:30:46 -07:00
|
|
|
# Show explicitly which commands are currently running.
|
2017-05-18 17:04:56 -07:00
|
|
|
set -x
|
|
|
|
|
2017-03-18 23:44:54 -07:00
|
|
|
ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
|
|
|
|
|
2017-07-16 11:51:33 -07:00
|
|
|
DOCKER_SHA=$($ROOT_DIR/../../build-docker.sh --output-sha --no-cache)
|
2017-03-18 23:44:54 -07:00
|
|
|
echo "Using Docker image" $DOCKER_SHA
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2017-08-07 19:05:48 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env PongDeterministic-v0 \
|
2017-11-20 17:52:43 -08:00
|
|
|
--run A3C \
|
2017-10-13 16:18:16 -07:00
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-08-20 15:28:03 -07:00
|
|
|
--config '{"num_workers": 2}'
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-09-18 15:09:16 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env Pong-ram-v4 \
|
|
|
|
--run A3C \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
|
|
|
--config '{"num_workers": 2}'
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-08-20 15:28:03 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env PongDeterministic-v0 \
|
|
|
|
--run A2C \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
|
|
|
--config '{"num_workers": 2}'
|
2017-07-16 11:51:33 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2017-08-07 19:05:48 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v1 \
|
2017-11-20 17:52:43 -08:00
|
|
|
--run PPO \
|
2017-10-13 16:18:16 -07:00
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-09-05 12:06:13 -07:00
|
|
|
--config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "model": {"free_log_std": true}}'
|
2017-08-07 19:05:48 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-07-12 19:22:46 +02:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v1 \
|
|
|
|
--run PPO \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-07-17 06:55:46 +02:00
|
|
|
--config '{"simple_optimizer": false, "num_sgd_iter": 2, "model": {"use_lstm": true}}'
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-07-17 06:55:46 +02:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v1 \
|
|
|
|
--run PPO \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
|
|
|
--config '{"simple_optimizer": true, "num_sgd_iter": 2, "model": {"use_lstm": true}}'
|
2018-07-12 19:22:46 +02:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2017-08-23 20:35:47 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v1 \
|
2017-11-20 17:52:43 -08:00
|
|
|
--run PPO \
|
2017-10-13 16:18:16 -07:00
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-09-30 18:37:55 -07:00
|
|
|
--config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "use_gae": false, "batch_mode": "complete_episodes"}'
|
2017-08-23 20:35:47 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2017-08-07 19:05:48 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env Pendulum-v0 \
|
2017-11-20 17:52:43 -08:00
|
|
|
--run ES \
|
2017-10-13 16:18:16 -07:00
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-09-05 12:06:13 -07:00
|
|
|
--config '{"stepsize": 0.01, "episodes_per_batch": 20, "train_batch_size": 100, "num_workers": 2}'
|
2017-07-16 11:51:33 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2017-11-16 21:58:30 -08:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env Pong-v0 \
|
2017-11-20 17:52:43 -08:00
|
|
|
--run ES \
|
2017-11-16 21:58:30 -08:00
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-09-05 12:06:13 -07:00
|
|
|
--config '{"stepsize": 0.01, "episodes_per_batch": 20, "train_batch_size": 100, "num_workers": 2}'
|
2017-11-16 21:58:30 -08:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2017-10-03 18:45:02 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v0 \
|
2017-11-20 17:52:43 -08:00
|
|
|
--run A3C \
|
2017-10-13 16:18:16 -07:00
|
|
|
--stop '{"training_iteration": 2}' \
|
2017-10-03 18:45:02 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2017-08-07 19:05:48 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v0 \
|
2017-11-20 17:52:43 -08:00
|
|
|
--run DQN \
|
2017-10-13 16:18:16 -07:00
|
|
|
--stop '{"training_iteration": 2}' \
|
2017-09-02 17:20:56 -07:00
|
|
|
--config '{"lr": 1e-3, "schedule_max_timesteps": 100000, "exploration_fraction": 0.1, "exploration_final_eps": 0.02, "dueling": false, "hiddens": [], "model": {"fcnet_hiddens": [64], "fcnet_activation": "relu"}}'
|
2017-07-26 12:29:00 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2017-12-06 17:51:57 -08:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v0 \
|
|
|
|
--run DQN \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-03-04 12:25:25 -08:00
|
|
|
--config '{"num_workers": 2}'
|
2017-12-06 17:51:57 -08:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2017-12-17 15:59:57 -08:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v0 \
|
2018-03-04 12:25:25 -08:00
|
|
|
--run APEX \
|
2017-12-17 15:59:57 -08:00
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-11-13 18:00:03 -08:00
|
|
|
--config '{"num_workers": 2, "timesteps_per_iteration": 1000, "num_gpus": 0, "min_iter_time_s": 1}'
|
2017-12-17 15:59:57 -08:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2017-10-23 23:16:52 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env FrozenLake-v0 \
|
2017-11-20 17:52:43 -08:00
|
|
|
--run DQN \
|
2017-10-23 23:16:52 -07:00
|
|
|
--stop '{"training_iteration": 2}'
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2017-10-23 23:16:52 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env FrozenLake-v0 \
|
2017-11-20 17:52:43 -08:00
|
|
|
--run PPO \
|
2017-10-23 23:16:52 -07:00
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-09-05 12:06:13 -07:00
|
|
|
--config '{"num_sgd_iter": 10, "sgd_minibatch_size": 64, "train_batch_size": 1000, "num_workers": 1}'
|
2017-10-23 23:16:52 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2017-08-07 19:05:48 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
2017-10-14 20:16:36 -07:00
|
|
|
--env PongDeterministic-v4 \
|
2017-11-20 17:52:43 -08:00
|
|
|
--run DQN \
|
2017-10-13 16:18:16 -07:00
|
|
|
--stop '{"training_iteration": 2}' \
|
2017-09-29 00:06:51 -07:00
|
|
|
--config '{"lr": 1e-4, "schedule_max_timesteps": 2000000, "buffer_size": 10000, "exploration_fraction": 0.1, "exploration_final_eps": 0.01, "sample_batch_size": 4, "learning_starts": 10000, "target_network_update_freq": 1000, "gamma": 0.99, "prioritized_replay": true}'
|
2017-09-02 17:20:56 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2017-09-02 17:20:56 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env MontezumaRevenge-v0 \
|
2017-11-20 17:52:43 -08:00
|
|
|
--run PPO \
|
2017-10-13 16:18:16 -07:00
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-09-05 12:06:13 -07:00
|
|
|
--config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "model": {"dim": 40, "conv_filters": [[16, [8, 8], 4], [32, [4, 4], 2], [512, [5, 5], 1]]}}'
|
2017-10-03 23:17:54 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-08-01 20:53:53 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v1 \
|
|
|
|
--run A3C \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
|
|
|
--config '{"num_workers": 2, "model": {"use_lstm": true}}'
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-02-25 22:30:11 -08:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v0 \
|
|
|
|
--run DQN \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
|
|
|
--config '{"num_workers": 2}'
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-02-10 13:54:51 -08:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v0 \
|
|
|
|
--run PG \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-07-01 00:05:08 -07:00
|
|
|
--config '{"sample_batch_size": 500, "num_workers": 1}'
|
2018-02-10 13:54:51 -08:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-06-27 22:51:04 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v0 \
|
|
|
|
--run PG \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-07-01 00:05:08 -07:00
|
|
|
--config '{"sample_batch_size": 500, "num_workers": 1, "model": {"use_lstm": true, "max_seq_len": 100}}'
|
2018-06-27 22:51:04 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v0 \
|
|
|
|
--run PG \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-07-12 19:22:46 +02:00
|
|
|
--config '{"sample_batch_size": 500, "num_workers": 1, "num_envs_per_worker": 10}'
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-02-10 13:54:51 -08:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env Pong-v0 \
|
|
|
|
--run PG \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-07-01 00:05:08 -07:00
|
|
|
--config '{"sample_batch_size": 500, "num_workers": 1}'
|
2018-02-10 13:54:51 -08:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-02-10 13:54:51 -08:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env FrozenLake-v0 \
|
|
|
|
--run PG \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-07-01 00:05:08 -07:00
|
|
|
--config '{"sample_batch_size": 500, "num_workers": 1}'
|
2018-02-10 13:54:51 -08:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-04-11 15:08:39 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env Pendulum-v0 \
|
|
|
|
--run DDPG \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
|
|
|
--config '{"num_workers": 1}'
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-08-01 20:53:53 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v0 \
|
|
|
|
--run IMPALA \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-10-15 11:02:50 -07:00
|
|
|
--config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1}'
|
2018-08-01 20:53:53 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-08-01 20:53:53 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v0 \
|
|
|
|
--run IMPALA \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-10-15 11:02:50 -07:00
|
|
|
--config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1, "model": {"use_lstm": true}}'
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-10-15 11:02:50 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v0 \
|
|
|
|
--run IMPALA \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
|
|
|
--config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1, "num_parallel_data_loaders": 2, "replay_proportion": 1.0}'
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-10-15 11:02:50 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v0 \
|
|
|
|
--run IMPALA \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
|
|
|
--config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1, "num_parallel_data_loaders": 2, "replay_proportion": 1.0, "model": {"use_lstm": true}}'
|
2018-08-01 20:53:53 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-04-11 15:08:39 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env MountainCarContinuous-v0 \
|
|
|
|
--run DDPG \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
|
|
|
--config '{"num_workers": 1}'
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-07-12 19:12:04 +02:00
|
|
|
rllib train \
|
|
|
|
--env MountainCarContinuous-v0 \
|
|
|
|
--run DDPG \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
|
|
|
--config '{"num_workers": 1}'
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-07-19 15:58:09 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env Pendulum-v0 \
|
|
|
|
--run APEX_DDPG \
|
2018-07-20 13:47:41 -07:00
|
|
|
--ray-num-cpus 8 \
|
2018-07-19 15:58:09 -07:00
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-07-30 13:25:35 -07:00
|
|
|
--config '{"num_workers": 2, "optimizer": {"num_replay_buffer_shards": 1}, "learning_starts": 100, "min_iter_time_s": 1}'
|
2018-07-19 15:58:09 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2017-12-11 11:59:44 -08:00
|
|
|
sh /ray/test/jenkins_tests/multi_node_tests/test_rllib_eval.sh
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-09-02 23:02:19 -07:00
|
|
|
python /ray/python/ray/rllib/test/test_local.py
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2017-10-03 23:17:54 -07:00
|
|
|
python /ray/python/ray/rllib/test/test_checkpoint_restore.py
|
2017-10-18 11:49:28 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-07-08 13:03:53 -07:00
|
|
|
python /ray/python/ray/rllib/test/test_policy_evaluator.py
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-10-20 15:21:22 -07:00
|
|
|
python /ray/python/ray/rllib/test/test_nested_spaces.py
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-11-12 16:31:27 -08:00
|
|
|
python /ray/python/ray/rllib/test/test_external_env.py
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-06-27 22:51:04 -07:00
|
|
|
python /ray/python/ray/rllib/test/test_lstm.py
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-06-27 22:51:04 -07:00
|
|
|
python /ray/python/ray/rllib/test/test_multi_agent_env.py
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-01-24 11:03:43 -08:00
|
|
|
python /ray/python/ray/rllib/test/test_supported_spaces.py
|
|
|
|
|
2018-11-05 00:33:25 -08:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
|
|
/ray/python/ray/rllib/test/test_rollout.sh
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2017-11-09 00:56:20 -08:00
|
|
|
python /ray/python/ray/tune/examples/tune_mnist_ray.py \
|
2018-02-02 23:03:12 -08:00
|
|
|
--smoke-test
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-02-02 23:03:12 -08:00
|
|
|
python /ray/python/ray/tune/examples/pbt_example.py \
|
|
|
|
--smoke-test
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-02-02 23:03:12 -08:00
|
|
|
python /ray/python/ray/tune/examples/hyperband_example.py \
|
|
|
|
--smoke-test
|
2018-01-18 19:51:31 -08:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-03-04 14:05:56 -08:00
|
|
|
python /ray/python/ray/tune/examples/async_hyperband_example.py \
|
|
|
|
--smoke-test
|
2018-03-16 13:25:29 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-03-03 13:01:49 -08:00
|
|
|
python /ray/python/ray/tune/examples/tune_mnist_ray_hyperband.py \
|
|
|
|
--smoke-test
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-03-16 13:25:29 -07:00
|
|
|
python /ray/python/ray/tune/examples/tune_mnist_async_hyperband.py \
|
|
|
|
--smoke-test
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-04-04 11:08:26 -07:00
|
|
|
python /ray/python/ray/tune/examples/hyperopt_example.py \
|
|
|
|
--smoke-test
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-07-17 23:52:52 +02:00
|
|
|
python /ray/python/ray/tune/examples/tune_mnist_keras.py \
|
|
|
|
--smoke-test
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-08-30 16:18:56 -07:00
|
|
|
python /ray/python/ray/tune/examples/mnist_pytorch.py \
|
|
|
|
--smoke-test
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-08-30 16:18:56 -07:00
|
|
|
python /ray/python/ray/tune/examples/mnist_pytorch_trainable.py \
|
|
|
|
--smoke-test
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-09-13 00:17:04 +08:00
|
|
|
python /ray/python/ray/tune/examples/genetic_example.py \
|
|
|
|
--smoke-test
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-07-12 19:22:46 +02:00
|
|
|
python /ray/python/ray/rllib/examples/multiagent_cartpole.py --num-iters=2
|
2018-06-26 13:17:15 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-07-22 05:09:25 -07:00
|
|
|
python /ray/python/ray/rllib/examples/multiagent_two_trainers.py --num-iters=2
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-10-15 11:02:50 -07:00
|
|
|
python /ray/python/ray/rllib/examples/cartpole_lstm.py --run=PPO --stop=200
|
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-10-15 11:02:50 -07:00
|
|
|
python /ray/python/ray/rllib/examples/cartpole_lstm.py --run=IMPALA --stop=100
|
2018-09-18 15:09:16 -07:00
|
|
|
|
2018-10-23 12:46:39 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-10-20 15:21:22 -07:00
|
|
|
python /ray/python/ray/rllib/examples/cartpole_lstm.py --stop=200 --use-prev-action-reward
|
|
|
|
|
2018-11-03 18:48:32 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
|
|
python /ray/python/ray/rllib/examples/custom_metrics_and_callbacks.py --num-iters=2
|
|
|
|
|
2018-11-10 21:52:20 -08:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-10-27 21:25:02 -07:00
|
|
|
python /ray/python/ray/experimental/sgd/test_sgd.py --num-iters=2 \
|
|
|
|
--batch-size=1 --strategy=simple
|
|
|
|
|
2018-11-10 21:52:20 -08:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-10-27 21:25:02 -07:00
|
|
|
python /ray/python/ray/experimental/sgd/test_sgd.py --num-iters=2 \
|
|
|
|
--batch-size=1 --strategy=ps
|
2018-09-19 21:12:37 -07:00
|
|
|
|
2018-11-10 21:52:20 -08:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
|
|
python /ray/python/ray/experimental/sgd/mnist_example.py --num-iters=1 \
|
|
|
|
--num-workers=1 --devices-per-worker=1 --strategy=ps
|
|
|
|
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
|
|
python /ray/python/ray/experimental/sgd/mnist_example.py --num-iters=1 \
|
|
|
|
--num-workers=1 --devices-per-worker=1 --strategy=ps --tune
|
|
|
|
|
2018-10-26 13:36:58 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-08-16 18:03:50 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env PongDeterministic-v4 \
|
|
|
|
--run A3C \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
2018-08-20 15:28:03 -07:00
|
|
|
--config '{"num_workers": 2, "use_pytorch": true, "model": {"use_lstm": false, "grayscale": true, "zero_mean": false, "dim": 84, "channel_major": true}, "preprocessor_pref": "rllib"}'
|
2018-08-16 18:03:50 -07:00
|
|
|
|
2018-10-26 13:36:58 -07:00
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
2018-08-16 18:03:50 -07:00
|
|
|
python /ray/python/ray/rllib/train.py \
|
|
|
|
--env CartPole-v1 \
|
|
|
|
--run A3C \
|
|
|
|
--stop '{"training_iteration": 2}' \
|
|
|
|
--config '{"num_workers": 2, "use_pytorch": true}'
|
|
|
|
|
2018-08-03 01:28:28 +08:00
|
|
|
python3 $ROOT_DIR/multi_node_docker_test.py \
|
2018-06-26 13:17:15 -07:00
|
|
|
--docker-image=$DOCKER_SHA \
|
|
|
|
--num-nodes=5 \
|
|
|
|
--num-redis-shards=10 \
|
|
|
|
--test-script=/ray/test/jenkins_tests/multi_node_tests/test_0.py
|
|
|
|
|
2018-08-03 01:28:28 +08:00
|
|
|
python3 $ROOT_DIR/multi_node_docker_test.py \
|
2018-06-26 13:17:15 -07:00
|
|
|
--docker-image=$DOCKER_SHA \
|
|
|
|
--num-nodes=5 \
|
|
|
|
--num-redis-shards=5 \
|
|
|
|
--num-gpus=0,1,2,3,4 \
|
|
|
|
--num-drivers=7 \
|
|
|
|
--driver-locations=0,1,0,1,2,3,4 \
|
|
|
|
--test-script=/ray/test/jenkins_tests/multi_node_tests/remove_driver_test.py
|
|
|
|
|
2018-08-03 01:28:28 +08:00
|
|
|
python3 $ROOT_DIR/multi_node_docker_test.py \
|
2018-06-26 13:17:15 -07:00
|
|
|
--docker-image=$DOCKER_SHA \
|
|
|
|
--num-nodes=5 \
|
|
|
|
--num-redis-shards=2 \
|
|
|
|
--num-gpus=0,0,5,6,50 \
|
|
|
|
--num-drivers=100 \
|
|
|
|
--test-script=/ray/test/jenkins_tests/multi_node_tests/many_drivers_test.py
|
|
|
|
|
2018-08-03 01:28:28 +08:00
|
|
|
python3 $ROOT_DIR/multi_node_docker_test.py \
|
2018-06-26 13:17:15 -07:00
|
|
|
--docker-image=$DOCKER_SHA \
|
|
|
|
--num-nodes=1 \
|
|
|
|
--mem-size=60G \
|
|
|
|
--shm-size=60G \
|
2018-08-16 18:03:50 -07:00
|
|
|
--test-script=/ray/test/jenkins_tests/multi_node_tests/large_memory_test.py
|