ray/release/rllib_tests/learning_tests/yaml_files/marwil-halfcheetahbulletenv-v0.yaml

27 lines
814 B
YAML

marwil-halfcheetahbulletenv-v0:
env: HalfCheetahBulletEnv-v0
run: MARWIL
pass_criteria:
evaluation/episode_reward_mean: 400.0
timesteps_total: 10000000
stop:
time_total_s: 3600
config:
# Use input produced by expert SAC algo.
input: ["~/halfcheetah_expert_sac.zip"]
actions_in_input_normalized: true
# Switch off input evaluation (data does not contain action probs).
off_policy_estimation_methods: {}
num_gpus: 1
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256, 256]
evaluation_num_workers: 1
evaluation_parallel_to_training: true
evaluation_interval: 3
evaluation_config:
input: sampler
always_attach_evaluation_results: True