ray/rllib/examples/env/tests/test_wrappers.py

56 lines
1.9 KiB
Python

import random
import numpy as np
from ray.rllib.examples.env.coin_game_non_vectorized_env import \
CoinGame, AsymCoinGame
from ray.rllib.env.wrappers.uncertainty_wrappers import\
add_RewardUncertaintyEnvClassWrapper
def init_env(max_steps, env_class, seed=None, grid_size=3):
config = {
"max_steps": max_steps,
"grid_size": grid_size,
}
env = env_class(config)
env.seed(seed)
return env
def test_add_RewardUncertaintyEnvClassWrapper():
max_steps, grid_size = 20, 3
n_steps = int(max_steps * 8.25)
reward_uncertainty_mean, reward_uncertainty_std = 10, 1
MyCoinGame = add_RewardUncertaintyEnvClassWrapper(
CoinGame, reward_uncertainty_std, reward_uncertainty_mean)
MyAsymCoinGame = add_RewardUncertaintyEnvClassWrapper(
AsymCoinGame, reward_uncertainty_std, reward_uncertainty_mean)
coin_game = init_env(max_steps, MyCoinGame, grid_size)
asymm_coin_game = init_env(max_steps, MyAsymCoinGame, grid_size)
all_rewards = []
for env in [coin_game, asymm_coin_game]:
_ = env.reset()
step_i = 0
for _ in range(n_steps):
step_i += 1
actions = {
policy_id: random.randint(0, env.NUM_ACTIONS - 1)
for policy_id in env.players_ids
}
obs, reward, done, info = env.step(actions)
print("reward", reward)
all_rewards.append(reward[env.player_red_id])
all_rewards.append(reward[env.player_blue_id])
if done["__all__"]:
_ = env.reset()
step_i = 0
assert np.array(all_rewards).mean() > reward_uncertainty_mean - 1.0
assert np.array(all_rewards).mean() < reward_uncertainty_mean + 1.0
assert np.array(all_rewards).std() > reward_uncertainty_std - 0.1
assert np.array(all_rewards).std() < reward_uncertainty_mean + 0.1