mirror of
https://github.com/vale981/ray
synced 2025-03-09 12:56:46 -04:00
51 lines
1.3 KiB
YAML
51 lines
1.3 KiB
YAML
![]() |
parametric-item-reco-env-slateq:
|
||
|
env: ray.rllib.examples.env.bandit_envs_recommender_system.ParametricItemRecoEnv
|
||
|
run: SlateQ
|
||
|
stop:
|
||
|
#evaluation/episode_reward_mean: 48.0
|
||
|
timesteps_total: 200000
|
||
|
config:
|
||
|
# SlateQ only supported for torch so far.
|
||
|
framework: torch
|
||
|
|
||
|
metrics_num_episodes_for_smoothing: 200
|
||
|
|
||
|
exploration_config:
|
||
|
temperature: 0.7
|
||
|
|
||
|
# Env c'tor kwargs:
|
||
|
env_config:
|
||
|
config:
|
||
|
slate_q: true
|
||
|
num_users: 50
|
||
|
num_items: 1000
|
||
|
num_candidates: 50
|
||
|
slate_size: 1
|
||
|
feature_dim: 16
|
||
|
|
||
|
grad_clip: 10.0
|
||
|
#double_q: false
|
||
|
#slateq_strategy: MYOP
|
||
|
|
||
|
# Larger networks seem to help (large obs/action spaces).
|
||
|
hiddens: [512, 512]
|
||
|
|
||
|
# Larger batch sizes seem to help (more stability, even with higher lr).
|
||
|
train_batch_size: 64
|
||
|
|
||
|
num_workers: 0
|
||
|
num_gpus: 0
|
||
|
|
||
|
lr_choice_model: 0.01
|
||
|
lr_q_model: 0.01
|
||
|
|
||
|
target_network_update_freq: 500
|
||
|
tau: 1.0
|
||
|
|
||
|
# Evaluation settings.
|
||
|
evaluation_interval: 1
|
||
|
evaluation_num_workers: 4
|
||
|
evaluation_duration: 200
|
||
|
evaluation_duration_unit: episodes
|
||
|
evaluation_parallel_to_training: true
|