mirror of
https://github.com/vale981/ray
synced 2025-03-09 21:06:39 -04:00
171 lines
4.7 KiB
YAML
171 lines
4.7 KiB
YAML
a3c-pongdeterministic-v4:
|
|
env: PongDeterministic-v4
|
|
run: A3C
|
|
# TODO(sven, jungong): fix A3C on torch and re-enable.
|
|
frameworks: [ "tf", "tf2" ]
|
|
stop:
|
|
time_total_s: 3600
|
|
config:
|
|
num_gpus: 0
|
|
num_workers: 16
|
|
rollout_fragment_length: 20
|
|
vf_loss_coeff: 0.5
|
|
entropy_coeff: 0.01
|
|
gamma: 0.99
|
|
grad_clip: 40.0
|
|
lambda: 1.0
|
|
lr: 0.0001
|
|
observation_filter: NoFilter
|
|
preprocessor_pref: rllib
|
|
model:
|
|
use_lstm: true
|
|
conv_activation: elu
|
|
dim: 42
|
|
grayscale: true
|
|
zero_mean: false
|
|
# Reduced channel depth and kernel size from default.
|
|
conv_filters: [
|
|
[32, [3, 3], 2],
|
|
[32, [3, 3], 2],
|
|
[32, [3, 3], 2],
|
|
[32, [3, 3], 2],
|
|
]
|
|
# TODO(jungong) : flip to True after we have collected some data.
|
|
_disable_execution_plan_api: false
|
|
|
|
apex-breakoutnoframeskip-v4:
|
|
env: BreakoutNoFrameskip-v4
|
|
run: APEX
|
|
frameworks: [ "tf", "tf2", "torch" ]
|
|
stop:
|
|
time_total_s: 3600
|
|
config:
|
|
double_q: false
|
|
dueling: false
|
|
num_atoms: 1
|
|
noisy: false
|
|
n_step: 3
|
|
lr: .0001
|
|
adam_epsilon: .00015
|
|
hiddens: [512]
|
|
buffer_size: 1000000
|
|
exploration_config:
|
|
epsilon_timesteps: 200000
|
|
final_epsilon: 0.01
|
|
prioritized_replay_alpha: 0.5
|
|
final_prioritized_replay_beta: 1.0
|
|
prioritized_replay_beta_annealing_timesteps: 2000000
|
|
num_gpus: 1
|
|
num_workers: 8
|
|
num_envs_per_worker: 8
|
|
rollout_fragment_length: 20
|
|
train_batch_size: 512
|
|
target_network_update_freq: 50000
|
|
timesteps_per_iteration: 25000
|
|
|
|
appo-pongnoframeskip-v4:
|
|
env: PongNoFrameskip-v4
|
|
run: APPO
|
|
frameworks: [ "tf", "tf2", "torch" ]
|
|
stop:
|
|
time_total_s: 2000
|
|
config:
|
|
vtrace: True
|
|
use_kl_loss: False
|
|
rollout_fragment_length: 50
|
|
train_batch_size: 750
|
|
num_workers: 31
|
|
broadcast_interval: 1
|
|
max_sample_requests_in_flight_per_worker: 1
|
|
num_multi_gpu_tower_stacks: 1
|
|
num_envs_per_worker: 8
|
|
num_sgd_iter: 2
|
|
vf_loss_coeff: 1.0
|
|
clip_param: 0.3
|
|
num_gpus: 1
|
|
grad_clip: 10
|
|
model:
|
|
dim: 42
|
|
|
|
cql-halfcheetahbulletenv-v0:
|
|
env: HalfCheetahBulletEnv-v0
|
|
run: CQL
|
|
frameworks: [ "tf", "tf2", "torch" ]
|
|
stop:
|
|
time_total_s: 3600
|
|
config:
|
|
# Use input produced by expert SAC algo.
|
|
input: ["~/halfcheetah_expert_sac.zip"]
|
|
actions_in_input_normalized: true
|
|
|
|
soft_horizon: False
|
|
horizon: 1000
|
|
Q_model:
|
|
fcnet_activation: relu
|
|
fcnet_hiddens: [256, 256, 256]
|
|
policy_model:
|
|
fcnet_activation: relu
|
|
fcnet_hiddens: [256, 256, 256]
|
|
tau: 0.005
|
|
target_entropy: auto
|
|
no_done_at_end: false
|
|
n_step: 3
|
|
rollout_fragment_length: 1
|
|
prioritized_replay: false
|
|
train_batch_size: 256
|
|
target_network_update_freq: 0
|
|
timesteps_per_iteration: 1000
|
|
learning_starts: 256
|
|
optimization:
|
|
actor_learning_rate: 0.0001
|
|
critic_learning_rate: 0.0003
|
|
entropy_learning_rate: 0.0001
|
|
num_workers: 0
|
|
num_gpus: 1
|
|
metrics_smoothing_episodes: 5
|
|
|
|
# CQL Configs
|
|
min_q_weight: 5.0
|
|
bc_iters: 20000
|
|
temperature: 1.0
|
|
num_actions: 10
|
|
lagrangian: False
|
|
|
|
# Switch on online evaluation.
|
|
evaluation_interval: 3
|
|
evaluation_config:
|
|
input: sampler
|
|
always_attach_evaluation_results: True
|
|
|
|
sac-halfcheetahbulletenv-v0:
|
|
env: HalfCheetahBulletEnv-v0
|
|
run: SAC
|
|
frameworks: [ "tf", "tf2", "torch" ]
|
|
stop:
|
|
time_total_s: 3600
|
|
config:
|
|
horizon: 1000
|
|
soft_horizon: false
|
|
Q_model:
|
|
fcnet_activation: relu
|
|
fcnet_hiddens: [256, 256]
|
|
policy_model:
|
|
fcnet_activation: relu
|
|
fcnet_hiddens: [256, 256]
|
|
tau: 0.005
|
|
target_entropy: auto
|
|
no_done_at_end: false
|
|
n_step: 3
|
|
rollout_fragment_length: 1
|
|
prioritized_replay: true
|
|
train_batch_size: 256
|
|
target_network_update_freq: 1
|
|
timesteps_per_iteration: 1000
|
|
learning_starts: 10000
|
|
optimization:
|
|
actor_learning_rate: 0.0003
|
|
critic_learning_rate: 0.0003
|
|
entropy_learning_rate: 0.0003
|
|
num_workers: 0
|
|
num_gpus: 1
|
|
metrics_smoothing_episodes: 5
|