[rllib] Added missing action clipping for rollout example script (#4413)

* Added action clipping for rollout example script * Used action_clipping from sampler * Fixed and improved naming
2025-03-06 18:41:40 -05:00 · 2019-03-22 08:51:27 +01:00 · 2019-03-22 08:51:27 +01:00 · b21c20c9a6
commit b21c20c9a6
parent 59d74d5e92
2 changed files with 32 additions and 27 deletions
--- a/python/ray/rllib/evaluation/sampler.py
+++ b/python/ray/rllib/evaluation/sampler.py
@ -200,6 +200,31 @@ class AsyncSampler(threading.Thread, SamplerInput):
        return extra
 def clip_action(action, space):
    """Called to clip actions to the specified range of this policy.
    Arguments:
        action: Single action.
        space: Action space the actions should be present in.
    Returns:
        Clipped batch of actions.
    """
    if isinstance(space, gym.spaces.Box):
        return np.clip(action, space.low, space.high)
    elif isinstance(space, gym.spaces.Tuple):
        if type(action) not in (tuple, list):
            raise ValueError("Expected tuple space for actions {}: {}".format(
                action, space))
        out = []
        for a, s in zip(action, space.spaces):
            out.append(clip_action(a, s))
        return out
    else:
        return action
 def _env_runner(base_env,
                extra_batch_callback,
                policies,
@ -526,7 +551,7 @@ def _process_policy_eval_results(to_eval, eval_results, active_episodes,
            env_id = eval_data[i].env_id
            agent_id = eval_data[i].agent_id
            if clip_actions:
-                actions_to_send[env_id][agent_id] = _clip_actions(
+                actions_to_send[env_id][agent_id] = clip_action(
                    action, policy.action_space)
            else:
                actions_to_send[env_id][agent_id] = action
@ -563,31 +588,6 @@ def _fetch_atari_metrics(base_env):
    return atari_out
 def _clip_actions(actions, space):
    """Called to clip actions to the specified range of this policy.
    Arguments:
        actions: Single action.
        space: Action space the actions should be present in.
    Returns:
        Clipped batch of actions.
    """
    if isinstance(space, gym.spaces.Box):
        return np.clip(actions, space.low, space.high)
    elif isinstance(space, gym.spaces.Tuple):
        if type(actions) not in (tuple, list):
            raise ValueError("Expected tuple space for actions {}: {}".format(
                actions, space))
        out = []
        for a, s in zip(actions, space.spaces):
            out.append(_clip_actions(a, s))
        return out
    else:
        return actions
 def _unbatch_tuple_actions(action_batch):
    # convert list of batches -> batch of lists
    if isinstance(action_batch, TupleActions):
--- a/python/ray/rllib/rollout.py
+++ b/python/ray/rllib/rollout.py
@ -12,6 +12,7 @@ import pickle
 import gym
 import ray
 from ray.rllib.agents.registry import get_agent_class
 from ray.rllib.evaluation.sampler import clip_action
 from ray.tune.util import merge_dicts
 EXAMPLE_USAGE = """
@ -153,7 +154,11 @@ def rollout(agent, env_name, num_steps, out=None, no_render=True):
                else:
                    action = agent.compute_action(state)
-            next_state, reward, done, _ = env.step(action)
+            if agent.config["clip_actions"]:
                clipped_action = clip_action(action, env.action_space)
                next_state, reward, done, _ = env.step(clipped_action)
            else:
                next_state, reward, done, _ = env.step(action)
            if multiagent:
                done = done["__all__"]