"""Example of testing, whether RLlib can still learn with a certain config.

Can be used with git bisect to find the faulty commit responsible for a
learning failure. Produces an error if the given reward is not reached within
the stopping criteria (training iters or timesteps) allowing git bisect to
properly analyze and find the faulty commit.

Run as follows using a simple command line config:
$ python debug_learning_failure_git_bisect.py --config '{...}'
    --env CartPole-v0 --run PPO --stop-reward=180 --stop-iters=100

With a yaml file:
$ python debug_learning_failure_git_bisect.py -f [yaml file] --stop-reward=180
    --stop-iters=100

Within git bisect:
$ git bisect start
$ git bisect bad
$ git bisect good [some previous commit we know was good]
$ git bisect run python debug_learning_failure_git_bisect.py [... options]
"""
import argparse
import json
import yaml

import ray
from ray import tune
from ray.rllib.utils.test_utils import check_learning_achieved

parser = argparse.ArgumentParser()
parser.add_argument("--run", type=str, default=None)
parser.add_argument("--torch", action="store_true")
parser.add_argument("--stop-iters", type=int, default=None)
parser.add_argument("--stop-timesteps", type=int, default=None)
parser.add_argument("--stop-reward", type=float, default=None)
parser.add_argument("-f", type=str, default=None)
parser.add_argument("--config", type=str, default=None)
parser.add_argument("--env", type=str, default=None)

if __name__ == "__main__":
    run = None

    args = parser.parse_args()

    # Explicit yaml config file.
    if args.f:
        with open(args.f, "r") as fp:
            experiment_config = yaml.load(fp)
            experiment_config = experiment_config[next(
                iter(experiment_config))]
            config = experiment_config.get("config", {})
            config["env"] = experiment_config.get("env")
            run = experiment_config.pop("run")
    # JSON string on command line.
    else:
        config = json.loads(args.config)
        assert args.env
        config["env"] = args.env

    # Explicit run.
    if args.run:
        run = args.run

    # Explicit --torch framework.
    if args.torch:
        config["framework"] = "torch"
    # Framework not specified in config, try to infer it.
    if "framework" not in config:
        config["framework"] = "torch" if args.torch else "tf"

    ray.init()

    stop = {}
    if args.stop_iters:
        stop["training_iteration"] = args.stop_iters
    if args.stop_timesteps:
        stop["timesteps_total"] = args.stop_timesteps
    if args.stop_reward:
        stop["episode_reward_mean"] = args.stop_reward

    results = tune.run(run, stop=stop, config=config)

    check_learning_achieved(results, args.stop_reward)

    ray.shutdown()