marwil-halfcheetahbulletenv-v0: env: HalfCheetahBulletEnv-v0 run: MARWIL pass_criteria: evaluation/episode_reward_mean: 400.0 timesteps_total: 10000000 stop: time_total_s: 3600 config: # Use input produced by expert SAC algo. input: ["~/halfcheetah_expert_sac.zip"] actions_in_input_normalized: true # Switch off input evaluation (data does not contain action probs). input_evaluation: [] num_gpus: 1 model: fcnet_activation: relu fcnet_hiddens: [256, 256, 256] evaluation_num_workers: 1 evaluation_parallel_to_training: true evaluation_interval: 3 evaluation_config: input: sampler always_attach_evaluation_results: True