pendulum-ddpg-fake-gpus: env: Pendulum-v1 run: DDPG stop: episode_reward_mean: -1000 timesteps_total: 40000 config: # Works for both torch and tf. seed: 42 framework: tf actor_hiddens: [64, 64] critic_hiddens: [64, 64] n_step: 1 model: {} gamma: 0.99 exploration_config: final_scale: 0.02 min_sample_timesteps_per_reporting: 600 replay_buffer_config: capacity: 10000 clip_rewards: false use_huber: true learning_starts: 500 train_batch_size: 64 num_workers: 0 worker_side_prioritization: false actor_lr: 0.0001 critic_lr: 0.0001 # Fake 2 GPUs. num_gpus: 2 _fake_gpus: true