ray/rllib/examples/bare_metal_policy_with_custom_view_reqs.py

import argparse
import os

import ray
from ray.rllib.agents.trainer import Trainer
from ray.rllib.examples.policy.bare_metal_policy_with_custom_view_reqs import (
    BareMetalPolicyWithCustomViewReqs,
)
from ray import tune


def get_cli_args():
    """Create CLI parser and return parsed arguments"""
    parser = argparse.ArgumentParser()

    # general args
    parser.add_argument(
        "--run", default="PPO", help="The RLlib-registered algorithm to use."
    )
    parser.add_argument("--num-cpus", type=int, default=3)
    parser.add_argument(
        "--stop-iters", type=int, default=200, help="Number of iterations to train."
    )
    parser.add_argument(
        "--stop-timesteps",
        type=int,
        default=100000,
        help="Number of timesteps to train.",
    )
    parser.add_argument(
        "--stop-reward",
        type=float,
        default=80.0,
        help="Reward at which we stop training.",
    )
    parser.add_argument(
        "--local-mode",
        action="store_true",
        help="Init Ray in local mode for easier debugging.",
    )

    args = parser.parse_args()
    print(f"Running with following CLI args: {args}")
    return args


if __name__ == "__main__":
    args = get_cli_args()

    ray.init(num_cpus=args.num_cpus or None, local_mode=args.local_mode)

    # Create q custom Trainer class using our custom Policy.
    class BareMetalPolicyTrainer(Trainer):
        def get_default_policy_class(self, config):
            return BareMetalPolicyWithCustomViewReqs

    config = {
        "env": "CartPole-v0",
        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "model": {
            # Necessary to get the whole trajectory of 'state_in_0' in the
            # sample batch.
            "max_seq_len": 1,
        },
        "num_workers": 1,
        # NOTE: Does this have consequences?
        # I use it for not loading tensorflow/pytorch.
        "framework": None,
        "log_level": "DEBUG",
        "create_env_on_driver": True,
    }

    stop = {
        "training_iteration": args.stop_iters,
        "timesteps_total": args.stop_timesteps,
        "episode_reward_mean": args.stop_reward,
    }

    # Train the Trainer with our policy.
    results = tune.run(BareMetalPolicyTrainer, config=config, stop=stop)
    print(results)
[RLlib] Add example script for bare metal Policy with custom `view_requirements`. (#17896) 2021-08-20 12:17:13 +02:00			`import argparse`
[rllib] Add deterministic test to gpu (#19306) Co-authored-by: sven1977 <svenmika1977@gmail.com> 2021-10-26 10:11:39 -07:00			`import os`
[RLlib] Add example script for bare metal Policy with custom `view_requirements`. (#17896) 2021-08-20 12:17:13 +02:00
[rllib] Add deterministic test to gpu (#19306) Co-authored-by: sven1977 <svenmika1977@gmail.com> 2021-10-26 10:11:39 -07:00			`import ray`
[RLlib] trainer_template.py: hard deprecation (error when used). (#23488) 2022-03-25 18:25:51 +01:00			`from ray.rllib.agents.trainer import Trainer`
[RLlib] Add example script for bare metal Policy with custom `view_requirements`. (#17896) 2021-08-20 12:17:13 +02:00			`from ray.rllib.examples.policy.bare_metal_policy_with_custom_view_reqs import (`
			`BareMetalPolicyWithCustomViewReqs,`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`)`
[rllib] Add deterministic test to gpu (#19306) Co-authored-by: sven1977 <svenmika1977@gmail.com> 2021-10-26 10:11:39 -07:00			`from ray import tune`
[RLlib] Add example script for bare metal Policy with custom `view_requirements`. (#17896) 2021-08-20 12:17:13 +02:00

			`def get_cli_args():`
			`"""Create CLI parser and return parsed arguments"""`
			`parser = argparse.ArgumentParser()`

			`# general args`
			`parser.add_argument(`
			`"--run", default="PPO", help="The RLlib-registered algorithm to use."`
			`)`
			`parser.add_argument("--num-cpus", type=int, default=3)`
[rllib] Add deterministic test to gpu (#19306) Co-authored-by: sven1977 <svenmika1977@gmail.com> 2021-10-26 10:11:39 -07:00			`parser.add_argument(`
Revert "Revert "[RLlib] Speedup A3C up to 3x (new training_iteration function instead of execution_plan) and re-instate Pong learning test."" (#18708) 2022-02-10 13:44:22 +01:00			`"--stop-iters", type=int, default=200, help="Number of iterations to train."`
[rllib] Add deterministic test to gpu (#19306) Co-authored-by: sven1977 <svenmika1977@gmail.com> 2021-10-26 10:11:39 -07:00			`)`
			`parser.add_argument(`
			`"--stop-timesteps",`
			`type=int,`
			`default=100000,`
			`help="Number of timesteps to train.",`
			`)`
Revert "Revert "[RLlib] Speedup A3C up to 3x (new training_iteration function instead of execution_plan) and re-instate Pong learning test."" (#18708) 2022-02-10 13:44:22 +01:00			`parser.add_argument(`
			`"--stop-reward",`
			`type=float,`
			`default=80.0,`
			`help="Reward at which we stop training.",`
			`)`
[RLlib] Add example script for bare metal Policy with custom `view_requirements`. (#17896) 2021-08-20 12:17:13 +02:00			`parser.add_argument(`
			`"--local-mode",`
			`action="store_true",`
			`help="Init Ray in local mode for easier debugging.",`
			`)`

			`args = parser.parse_args()`
			`print(f"Running with following CLI args: {args}")`
			`return args`


			`if __name__ == "__main__":`
			`args = get_cli_args()`

			`ray.init(num_cpus=args.num_cpus or None, local_mode=args.local_mode)`

			`# Create q custom Trainer class using our custom Policy.`
[RLlib] trainer_template.py: hard deprecation (error when used). (#23488) 2022-03-25 18:25:51 +01:00			`class BareMetalPolicyTrainer(Trainer):`
			`def get_default_policy_class(self, config):`
			`return BareMetalPolicyWithCustomViewReqs`
[RLlib] Add example script for bare metal Policy with custom `view_requirements`. (#17896) 2021-08-20 12:17:13 +02:00
			`config = {`
			`"env": "CartPole-v0",`
[rllib] Add deterministic test to gpu (#19306) Co-authored-by: sven1977 <svenmika1977@gmail.com> 2021-10-26 10:11:39 -07:00			# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
			`"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),`
[RLlib] Add example script for bare metal Policy with custom `view_requirements`. (#17896) 2021-08-20 12:17:13 +02:00			`"model": {`
			`# Necessary to get the whole trajectory of 'state_in_0' in the`
			`# sample batch.`
			`"max_seq_len": 1,`
			`},`
			`"num_workers": 1,`
			`# NOTE: Does this have consequences?`
			`# I use it for not loading tensorflow/pytorch.`
			`"framework": None,`
			`"log_level": "DEBUG",`
			`"create_env_on_driver": True,`
			`}`

[rllib] Add deterministic test to gpu (#19306) Co-authored-by: sven1977 <svenmika1977@gmail.com> 2021-10-26 10:11:39 -07:00			`stop = {`
			`"training_iteration": args.stop_iters,`
			`"timesteps_total": args.stop_timesteps,`
Revert "Revert "[RLlib] Speedup A3C up to 3x (new training_iteration function instead of execution_plan) and re-instate Pong learning test."" (#18708) 2022-02-10 13:44:22 +01:00			`"episode_reward_mean": args.stop_reward,`
[rllib] Add deterministic test to gpu (#19306) Co-authored-by: sven1977 <svenmika1977@gmail.com> 2021-10-26 10:11:39 -07:00			`}`

[RLlib] Add example script for bare metal Policy with custom `view_requirements`. (#17896) 2021-08-20 12:17:13 +02:00			`# Train the Trainer with our policy.`
[rllib] Add deterministic test to gpu (#19306) Co-authored-by: sven1977 <svenmika1977@gmail.com> 2021-10-26 10:11:39 -07:00			`results = tune.run(BareMetalPolicyTrainer, config=config, stop=stop)`
[RLlib] Add example script for bare metal Policy with custom `view_requirements`. (#17896) 2021-08-20 12:17:13 +02:00			`print(results)`