ray/rllib/tuned_examples/slateq/long-term-satisfaction-recsim-env-slateq.yaml

40 lines
1.4 KiB
YAML

long-term-satisfaction-recsim-env-slateq:
env: ray.rllib.examples.env.recsim_recommender_system_envs.LongTermSatisfactionRecSimEnv
run: SlateQ
stop:
episode_reward_mean: 2000.0
timesteps_total: 100000
config:
# SlateQ only supported for torch so far.
framework: torch
# RLlib/RecSim wrapper specific settings:
env_config:
config:
# Each step, sample 100 documents using the env-internal
# document sampler model (a logic that creates n documents to select
# the slate from).
resample_documents: true
num_candidates: 25
# How many documents to recommend (out of `num_candidates`) each
# timestep?
slate_size: 2
# Should the action space be purely Discrete? Useful for algos that
# don't support MultiDiscrete (e.g. DQN).
# SlateQ handles MultiDiscrete action spaces.
convert_to_discrete_action_space: false
seed: 42
hiddens: [256, 256]
num_workers: 0
num_gpus: 0
lr_choice_model: 0.003
lr_q_model: 0.003
rollout_fragment_length: 4
#batch_mode: complete_episodes
exploration_config:
epsilon_timesteps: 50000
target_network_update_freq: 800