mirror of
https://github.com/vale981/ray
synced 2025-03-06 18:41:40 -05:00
58 lines
1.9 KiB
Python
58 lines
1.9 KiB
Python
import random
|
|
|
|
import numpy as np
|
|
from ray.rllib.examples.env.coin_game_non_vectorized_env import CoinGame, AsymCoinGame
|
|
from ray.rllib.env.wrappers.uncertainty_wrappers import (
|
|
add_RewardUncertaintyEnvClassWrapper,
|
|
)
|
|
|
|
|
|
def init_env(max_steps, env_class, seed=None, grid_size=3):
|
|
config = {
|
|
"max_steps": max_steps,
|
|
"grid_size": grid_size,
|
|
}
|
|
env = env_class(config)
|
|
env.seed(seed)
|
|
|
|
return env
|
|
|
|
|
|
def test_add_RewardUncertaintyEnvClassWrapper():
|
|
max_steps, grid_size = 20, 3
|
|
n_steps = int(max_steps * 8.25)
|
|
reward_uncertainty_mean, reward_uncertainty_std = 10, 1
|
|
MyCoinGame = add_RewardUncertaintyEnvClassWrapper(
|
|
CoinGame, reward_uncertainty_std, reward_uncertainty_mean
|
|
)
|
|
MyAsymCoinGame = add_RewardUncertaintyEnvClassWrapper(
|
|
AsymCoinGame, reward_uncertainty_std, reward_uncertainty_mean
|
|
)
|
|
coin_game = init_env(max_steps, MyCoinGame, grid_size)
|
|
asymm_coin_game = init_env(max_steps, MyAsymCoinGame, grid_size)
|
|
|
|
all_rewards = []
|
|
for env in [coin_game, asymm_coin_game]:
|
|
_ = env.reset()
|
|
|
|
step_i = 0
|
|
for _ in range(n_steps):
|
|
step_i += 1
|
|
actions = {
|
|
policy_id: random.randint(0, env.NUM_ACTIONS - 1)
|
|
for policy_id in env.players_ids
|
|
}
|
|
obs, reward, done, info = env.step(actions)
|
|
print("reward", reward)
|
|
all_rewards.append(reward[env.player_red_id])
|
|
all_rewards.append(reward[env.player_blue_id])
|
|
|
|
if done["__all__"]:
|
|
_ = env.reset()
|
|
step_i = 0
|
|
|
|
assert np.array(all_rewards).mean() > reward_uncertainty_mean - 1.0
|
|
assert np.array(all_rewards).mean() < reward_uncertainty_mean + 1.0
|
|
|
|
assert np.array(all_rewards).std() > reward_uncertainty_std - 0.1
|
|
assert np.array(all_rewards).std() < reward_uncertainty_mean + 0.1
|