# This configuration can expect to reach -160 reward in 10k-20k timesteps pendulum-ddpg: env: Pendulum-v0 run: TD3 stop: episode_reward_mean: -130 time_total_s: 900 # 10 minutes config: # === Model === actor_hiddens: [64, 64] critic_hiddens: [64, 64] # === Exploration === learning_starts: 5000 pure_exploration_steps: 5000 # === Evaluation === evaluation_interval: 1 evaluation_num_episodes: 5