interest-evolution-recsim-env-slateq: env: ray.rllib.examples.env.recsim_recommender_system_envs.InterestEvolutionRecSimEnv run: SlateQ stop: episode_reward_mean: 200.0 timesteps_total: 60000 config: # SlateQ only supported for torch so far. framework: torch # RLlib/RecSim wrapper specific settings: env_config: config: # Each step, sample 100 documents using the env-internal # document sampler model (a logic that creates n documents to select # the slate from). resample_documents: false num_candidates: 10 # How many documents to recommend (out of `num_candidates`) each # timestep? slate_size: 2 # Should the action space be purely Discrete? Useful for algos that # don't support MultiDiscrete (e.g. DQN). # SlateQ handles MultiDiscrete action spaces. convert_to_discrete_action_space: false seed: 42 hiddens: [256, 256] num_workers: 0 num_gpus: 0 lr_choice_model: 0.003 lr_q_model: 0.003 rollout_fragment_length: 4 exploration_config: epsilon_timesteps: 50000 target_network_update_freq: 800