cartpole_crr: env: 'CartPole-v0' run: CRR stop: evaluation/episode_reward_mean: 200 training_iteration: 100 config: input: - 'tests/data/cartpole/large.json' num_workers: 3 framework: torch gamma: 0.99 train_batch_size: 2048 critic_hidden_activation: 'tanh' critic_hiddens: [128, 128, 128] critic_lr: 0.0003 actor_hidden_activation: 'tanh' actor_hiddens: [128, 128, 128] actor_lr: 0.0003 actions_in_input_normalized: True clip_actions: True # Q function update setting twin_q: True target_network_update_freq: 1 tau: 0.0005 # evaluation evaluation_config: explore: False input: sampler evaluation_duration: 10 evaluation_duration_unit: episodes evaluation_interval: 1 evaluation_num_workers: 1 evaluation_parallel_to_training: True # specific to CRR temperature: 1.0 weight_type: bin advantage_type: mean max_weight: 20.0 n_action_sample: 4