ray/rllib/tuned_examples/slateq/interest-evolution-recsim-env-slateq.yaml

interest-evolution-recsim-env-slateq:
    env: ray.rllib.examples.env.recsim_recommender_system_envs.InterestEvolutionRecSimEnv
    run: SlateQ
    stop:
        episode_reward_mean: 200.0
        timesteps_total: 60000
    config:
        # SlateQ only supported for torch so far.
        framework: torch

        # RLlib/RecSim wrapper specific settings:
        env_config:
            config:
                # Each step, sample 100 documents using the env-internal
                # document sampler model (a logic that creates n documents to select
                # the slate from).
                resample_documents: false
                num_candidates: 10
                # How many documents to recommend (out of `num_candidates`) each
                # timestep?
                slate_size: 2
                # Should the action space be purely Discrete? Useful for algos that
                # don't support MultiDiscrete (e.g. DQN).
                # SlateQ handles MultiDiscrete action spaces.
                convert_to_discrete_action_space: false
                seed: 42

        hiddens: [256, 256]

        num_workers: 0
        num_gpus: 0

        lr_choice_model: 0.003
        lr_q_model: 0.003
        rollout_fragment_length: 4
        exploration_config:
            epsilon_timesteps: 50000

        target_network_update_freq: 800
[RLlib] Cleanup SlateQ algo; add test + add target Q-net (#21827) 2022-02-04 17:01:12 +01:00			`interest-evolution-recsim-env-slateq:`
			`env: ray.rllib.examples.env.recsim_recommender_system_envs.InterestEvolutionRecSimEnv`
			`run: SlateQ`
			`stop:`
			`episode_reward_mean: 200.0`
			`timesteps_total: 60000`
			`config:`
			`# SlateQ only supported for torch so far.`
			`framework: torch`

			`# RLlib/RecSim wrapper specific settings:`
			`env_config:`
			`config:`
			`# Each step, sample 100 documents using the env-internal`
			`# document sampler model (a logic that creates n documents to select`
			`# the slate from).`
			`resample_documents: false`
			`num_candidates: 10`
			# How many documents to recommend (out of `num_candidates`) each
			`# timestep?`
			`slate_size: 2`
			`# Should the action space be purely Discrete? Useful for algos that`
			`# don't support MultiDiscrete (e.g. DQN).`
			`# SlateQ handles MultiDiscrete action spaces.`
			`convert_to_discrete_action_space: false`
			`seed: 42`

			`hiddens: [256, 256]`

			`num_workers: 0`
			`num_gpus: 0`

			`lr_choice_model: 0.003`
			`lr_q_model: 0.003`
			`rollout_fragment_length: 4`
			`exploration_config:`
			`epsilon_timesteps: 50000`

			`target_network_update_freq: 800`