ray/release/rllib_tests/learning_tests/yaml_files/marwil/marwil-halfcheetahbulletenv-v0.yaml

marwil-halfcheetahbulletenv-v0:
    env: HalfCheetahBulletEnv-v0
    run: MARWIL
    pass_criteria:
        evaluation/episode_reward_mean: 400.0
        # Can not check throughput for offline methods.
        # timesteps_total: 10000000
    stop:
        time_total_s: 3600
    config:
        # Use input produced by expert SAC algo.
        input: ["~/halfcheetah_expert_sac.zip"]
        actions_in_input_normalized: true
        # Switch off input evaluation (data does not contain action probs).
        off_policy_estimation_methods: {}

        num_gpus: 1

        model:
            fcnet_activation: relu
            fcnet_hiddens: [256, 256, 256]

        evaluation_num_workers: 1
        evaluation_parallel_to_training: true
        evaluation_interval: 3
        evaluation_config:
            input: sampler
            off_policy_estimation_methods: null
        always_attach_evaluation_results: True