interest-evolution-recsim-env-slateq: env: ray.rllib.examples.env.recommender_system_envs_with_recsim.InterestEvolutionRecSimEnv run: SlateQ stop: episode_reward_mean: 160.0 timesteps_total: 100000 config: framework: tf # RLlib/RecSim wrapper specific settings: env_config: # Env class specified above takes one `config` arg in its c'tor: config: # Each step, sample `num_candidates` documents using the env-internal # document sampler model (a logic that creates n documents to select # the slate from). resample_documents: true num_candidates: 10 # How many documents to recommend (out of `num_candidates`) each # timestep? slate_size: 2 # Should the action space be purely Discrete? Useful for algos that # don't support MultiDiscrete (e.g. DQN or Bandits). # SlateQ handles MultiDiscrete action spaces. convert_to_discrete_action_space: false seed: 0 # Fake 2 GPUs. num_gpus: 2 _fake_gpus: true exploration_config: warmup_timesteps: 10000 epsilon_timesteps: 25000 replay_buffer_config: capacity: 100000 # Double learning rate and batch size. lr: 0.002 train_batch_size: 64 learning_starts: 10000 target_network_update_freq: 3200 metrics_num_episodes_for_smoothing: 200