mirror of
https://github.com/vale981/ray
synced 2025-03-06 18:41:40 -05:00
24 lines
799 B
Python
24 lines
799 B
Python
![]() |
##########
|
||
|
# Contribution by the Center on Long-Term Risk:
|
||
|
# https://github.com/longtermrisk/marltoolbox
|
||
|
##########
|
||
|
import numpy as np
|
||
|
|
||
|
|
||
|
def add_RewardUncertaintyEnvClassWrapper(
|
||
|
EnvClass, reward_uncertainty_std, reward_uncertainty_mean=0.0
|
||
|
):
|
||
|
class RewardUncertaintyEnvClassWrapper(EnvClass):
|
||
|
def step(self, action):
|
||
|
observations, rewards, done, info = super().step(action)
|
||
|
return observations, self.reward_wrapper(rewards), done, info
|
||
|
|
||
|
def reward_wrapper(self, reward_dict):
|
||
|
for k in reward_dict.keys():
|
||
|
reward_dict[k] += np.random.normal(
|
||
|
loc=reward_uncertainty_mean, scale=reward_uncertainty_std, size=()
|
||
|
)
|
||
|
return reward_dict
|
||
|
|
||
|
return RewardUncertaintyEnvClassWrapper
|