[rllib] Move repeat field to asv script (#2367)

This commit is contained in:
Eric Liang 2018-07-07 12:10:06 -07:00 committed by Richard Liaw
parent e32aed8717
commit 9a6e329325
10 changed files with 3 additions and 8 deletions

1
.gitignore vendored
View file

@ -29,6 +29,7 @@
# Python byte code files
*.pyc
python/.eggs
# Backup files
*.bak

View file

@ -52,7 +52,6 @@ class LocalMultiGPUOptimizer(PolicyOptimizer):
self.update_weights_timer = TimerStat()
print("LocalMultiGPUOptimizer devices", self.devices)
print("LocalMultiGPUOptimizer batch size", self.batch_size)
assert set(self.local_evaluator.policy_map.keys()) == {"default"}, \
"Multi-agent is not supported"

View file

@ -1,7 +1,6 @@
cartpole-a3c:
env: CartPole-v0
run: A3C
repeat: 3
stop:
episode_reward_mean: 200
time_total_s: 600

View file

@ -1,7 +1,6 @@
cartpole-a3c:
env: CartPole-v0
run: A3C
repeat: 3
stop:
episode_reward_mean: 200
time_total_s: 600

View file

@ -1,7 +1,6 @@
cartpole-dqn:
env: CartPole-v0
run: DQN
repeat: 3
stop:
episode_reward_mean: 200
time_total_s: 600

View file

@ -1,7 +1,6 @@
cartpole-pg:
env: CartPole-v0
run: PG
repeat: 3
stop:
episode_reward_mean: 200
time_total_s: 300

View file

@ -1,7 +1,6 @@
cartpole-ppo:
env: CartPole-v0
run: PPO
repeat: 3
stop:
episode_reward_mean: 200
time_total_s: 300

View file

@ -1,7 +1,6 @@
pendulum-ddpg:
env: Pendulum-v0
run: DDPG
repeat: 3
stop:
episode_reward_mean: -160
time_total_s: 900

View file

@ -1,7 +1,6 @@
pendulum-ppo:
env: Pendulum-v0
run: PPO
repeat: 3
stop:
episode_reward_mean: -160
# expect -140 within 300-500k steps

View file

@ -22,6 +22,8 @@ CONFIG_DIR = os.path.dirname(os.path.abspath(__file__))
def _evaulate_config(filename):
with open(os.path.join(CONFIG_DIR, filename)) as f:
experiments = yaml.load(f)
for _, config in experiments.items():
config["repeat"] = 3
ray.init()
trials = tune.run_experiments(experiments)
results = defaultdict(list)