mirror of
https://github.com/vale981/ray
synced 2025-03-05 10:01:43 -05:00
49 lines
1.5 KiB
YAML
49 lines
1.5 KiB
YAML
recomm-sys001-ppo:
|
|
env: ray.rllib.examples.env.recommender_system_envs.RecommSys001
|
|
run: PPO
|
|
stop:
|
|
#evaluation/episode_reward_mean: 48.0
|
|
timesteps_total: 200000
|
|
config:
|
|
framework: tf
|
|
|
|
metrics_num_episodes_for_smoothing: 1000
|
|
|
|
# Env c'tor kwargs:
|
|
env_config:
|
|
# Number of different categories a doc can have and a user can
|
|
# have a preference for.
|
|
num_categories: 2
|
|
# Number of docs to choose (a slate) from each timestep.
|
|
num_docs_to_select_from: 10
|
|
# Slate size.
|
|
slate_size: 1
|
|
# Re-sample docs each timesteps.
|
|
num_docs_in_db: 100
|
|
# Re-sample user each episode.
|
|
num_users_in_db: 100
|
|
# User time budget (determines lengths of episodes).
|
|
user_time_budget: 60.0
|
|
|
|
# Larger networks seem to help (large obs/action spaces).
|
|
model:
|
|
fcnet_hiddens: [256, 256]
|
|
|
|
# Larger batch sizes seem to help (more stability, even with higher lr).
|
|
#train_batch_size: 32
|
|
|
|
#num_workers: 2
|
|
#num_gpus: 0
|
|
|
|
#lr_choice_model: 0.002
|
|
#lr_q_model: 0.002
|
|
|
|
#target_network_update_freq: 500
|
|
#tau: 1.0
|
|
|
|
# Evaluation settings.
|
|
evaluation_interval: 1
|
|
evaluation_num_workers: 4
|
|
evaluation_duration: 200
|
|
evaluation_duration_unit: episodes
|
|
evaluation_parallel_to_training: true
|