mirror of
https://github.com/vale981/ray
synced 2025-03-10 13:26:39 -04:00
40 lines
1.3 KiB
YAML
40 lines
1.3 KiB
YAML
![]() |
interest-evolution-recsim-env-slateq:
|
||
|
env: ray.rllib.examples.env.recsim_recommender_system_envs.InterestEvolutionRecSimEnv
|
||
|
run: SlateQ
|
||
|
stop:
|
||
|
episode_reward_mean: 200.0
|
||
|
timesteps_total: 60000
|
||
|
config:
|
||
|
# SlateQ only supported for torch so far.
|
||
|
framework: torch
|
||
|
|
||
|
# RLlib/RecSim wrapper specific settings:
|
||
|
env_config:
|
||
|
config:
|
||
|
# Each step, sample 100 documents using the env-internal
|
||
|
# document sampler model (a logic that creates n documents to select
|
||
|
# the slate from).
|
||
|
resample_documents: false
|
||
|
num_candidates: 10
|
||
|
# How many documents to recommend (out of `num_candidates`) each
|
||
|
# timestep?
|
||
|
slate_size: 2
|
||
|
# Should the action space be purely Discrete? Useful for algos that
|
||
|
# don't support MultiDiscrete (e.g. DQN).
|
||
|
# SlateQ handles MultiDiscrete action spaces.
|
||
|
convert_to_discrete_action_space: false
|
||
|
seed: 42
|
||
|
|
||
|
hiddens: [256, 256]
|
||
|
|
||
|
num_workers: 0
|
||
|
num_gpus: 0
|
||
|
|
||
|
lr_choice_model: 0.003
|
||
|
lr_q_model: 0.003
|
||
|
rollout_fragment_length: 4
|
||
|
exploration_config:
|
||
|
epsilon_timesteps: 50000
|
||
|
|
||
|
target_network_update_freq: 800
|