ray/rllib/tuned_examples/slateq/interest-evolution-10-candidates-recsim-env-slateq-fake-gpus.yaml

46 lines
1.6 KiB
YAML

interest-evolution-recsim-env-slateq:
env: ray.rllib.examples.env.recommender_system_envs_with_recsim.InterestEvolutionRecSimEnv
run: SlateQ
stop:
episode_reward_mean: 160.0
timesteps_total: 100000
config:
framework: tf
# RLlib/RecSim wrapper specific settings:
env_config:
# Env class specified above takes one `config` arg in its c'tor:
config:
# Each step, sample `num_candidates` documents using the env-internal
# document sampler model (a logic that creates n documents to select
# the slate from).
resample_documents: true
num_candidates: 10
# How many documents to recommend (out of `num_candidates`) each
# timestep?
slate_size: 2
# Should the action space be purely Discrete? Useful for algos that
# don't support MultiDiscrete (e.g. DQN or Bandits).
# SlateQ handles MultiDiscrete action spaces.
convert_to_discrete_action_space: false
seed: 0
# Fake 2 GPUs.
num_gpus: 2
_fake_gpus: true
exploration_config:
warmup_timesteps: 10000
epsilon_timesteps: 25000
replay_buffer_config:
capacity: 100000
# Double learning rate and batch size.
lr: 0.002
train_batch_size: 64
learning_starts: 10000
target_network_update_freq: 3200
metrics_num_episodes_for_smoothing: 200