cartpole_crr: env: 'CartPole-v0' run: CRR stop: evaluation/episode_reward_mean: 200 training_iteration: 100 config: input: - 'tests/data/cartpole/large.json' framework: torch gamma: 0.99 train_batch_size: 2048 critic_hidden_activation: 'tanh' critic_hiddens: [128, 128, 128] critic_lr: 0.0003 actor_hidden_activation: 'tanh' actor_hiddens: [128, 128, 128] actor_lr: 0.0003 actions_in_input_normalized: True clip_actions: True # Q function update setting twin_q: True target_update_grad_intervals: 1 tau: 0.0005 # evaluation evaluation_config: explore: False input: sampler evaluation_duration: 10 evaluation_duration_unit: episodes evaluation_interval: 1 evaluation_num_workers: 1 evaluation_parallel_to_training: True # replay buffer replay_buffer_config: type: ray.rllib.utils.replay_buffers.MultiAgentReplayBuffer learning_starts: 0 capacity: 100000 # specific to CRR weight_type: bin advantage_type: max max_weight: 20.0 n_action_sample: 4