[RLlib] CQL: Bug fixes and OPE example added to test and offline_rl.py example. (#15761)

This commit is contained in:
Sven Mika 2021-05-13 09:17:23 +02:00 committed by GitHub
parent 78d0ed3503
commit c4a3e1589b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 149 additions and 90 deletions

View file

@ -2224,7 +2224,7 @@ py_test(
name = "examples/parametric_actions_cartpole_pg_torch", name = "examples/parametric_actions_cartpole_pg_torch",
main = "examples/parametric_actions_cartpole.py", main = "examples/parametric_actions_cartpole.py",
tags = ["examples", "examples_P"], tags = ["examples", "examples_P"],
size = "small", size = "medium",
srcs = ["examples/parametric_actions_cartpole.py"], srcs = ["examples/parametric_actions_cartpole.py"],
args = ["--as-test", "--torch", "--stop-reward=60.0", "--run=PG"] args = ["--as-test", "--torch", "--stop-reward=60.0", "--run=PG"]
) )

View file

@ -14,6 +14,7 @@ from ray.rllib.agents.sac.sac_torch_policy import _get_dist_class, stats, \
build_sac_model_and_action_dist, optimizer_fn, ComputeTDErrorMixin, \ build_sac_model_and_action_dist, optimizer_fn, ComputeTDErrorMixin, \
TargetNetworkMixin, setup_late_mixins, action_distribution_fn TargetNetworkMixin, setup_late_mixins, action_distribution_fn
from ray.rllib.models.torch.torch_action_dist import TorchDistributionWrapper from ray.rllib.models.torch.torch_action_dist import TorchDistributionWrapper
from ray.rllib.policy.policy import LEARNER_STATS_KEY
from ray.rllib.policy.policy_template import build_policy_class from ray.rllib.policy.policy_template import build_policy_class
from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.models.modelv2 import ModelV2
from ray.rllib.utils.numpy import SMALL_NUMBER, MIN_LOG_NN_OUTPUT, \ from ray.rllib.utils.numpy import SMALL_NUMBER, MIN_LOG_NN_OUTPUT, \
@ -335,7 +336,11 @@ def compute_gradients_fn(policy, postprocessed_batch):
batches = [policy._lazy_tensor_dict(postprocessed_batch)] batches = [policy._lazy_tensor_dict(postprocessed_batch)]
model = policy.model model = policy.model
policy._loss(policy, model, policy.dist_class, batches[0]) policy._loss(policy, model, policy.dist_class, batches[0])
return [None, dict()] stats = {
LEARNER_STATS_KEY: policy._convert_to_non_torch_type(
cql_stats(policy, batches[0]))
}
return [None, stats]
def apply_gradients_fn(policy, gradients): def apply_gradients_fn(policy, gradients):

View file

@ -4,9 +4,12 @@ import unittest
import ray import ray
import ray.rllib.agents.cql as cql import ray.rllib.agents.cql as cql
from ray.rllib.utils.framework import try_import_torch
from ray.rllib.utils.test_utils import check_compute_single_action, \ from ray.rllib.utils.test_utils import check_compute_single_action, \
framework_iterator framework_iterator
torch, _ = try_import_torch()
class TestCQL(unittest.TestCase): class TestCQL(unittest.TestCase):
@classmethod @classmethod
@ -43,14 +46,43 @@ class TestCQL(unittest.TestCase):
config["rollout_fragment_length"] = 1 config["rollout_fragment_length"] = 1
config["train_batch_size"] = 10 config["train_batch_size"] = 10
# Switch on off-policy evaluation.
config["input_evaluation"] = ["is"]
num_iterations = 2 num_iterations = 2
# Test for tf framework (torch not implemented yet). # Test for tf framework (torch not implemented yet).
for _ in framework_iterator(config, frameworks=("torch")): for _ in framework_iterator(config, frameworks=("torch")):
trainer = cql.CQLTrainer(config=config) trainer = cql.CQLTrainer(config=config)
for i in range(num_iterations): for i in range(num_iterations):
trainer.train() print(trainer.train())
check_compute_single_action(trainer) check_compute_single_action(trainer)
# Get policy, model, and replay-buffer.
pol = trainer.get_policy()
cql_model = pol.model
from ray.rllib.agents.cql.cql import replay_buffer
# Example on how to do evaluation on the trained Trainer
# using the data from our buffer.
# Get a sample (MultiAgentBatch -> SampleBatch).
batch = replay_buffer.replay().policy_batches["default_policy"]
obs = torch.from_numpy(batch["obs"])
# Pass the observations through our model to get the
# features, which then to pass through the Q-head.
model_out, _ = cql_model({"obs": obs})
# The estimated Q-values from the (historic) actions in the batch.
q_values_old = cql_model.get_q_values(
model_out, torch.from_numpy(batch["actions"]))
# The estimated Q-values for the new actions computed
# by our trainer policy.
actions_new = pol.compute_actions_from_input_dict({"obs": obs})[0]
q_values_new = cql_model.get_q_values(
model_out, torch.from_numpy(actions_new))
print(f"Q-val batch={q_values_old}")
print(f"Q-val policy={q_values_new}")
trainer.stop() trainer.stop()

View file

@ -13,7 +13,7 @@ from ray.rllib.execution.metric_ops import StandardMetricsReporting
DEFAULT_CONFIG = with_common_config({ DEFAULT_CONFIG = with_common_config({
# You should override this to point to an offline dataset (see agent.py). # You should override this to point to an offline dataset (see agent.py).
"input": "sampler", "input": "sampler",
# Use importance sampling estimators for reward # Use importance sampling estimators for reward.
"input_evaluation": ["is", "wis"], "input_evaluation": ["is", "wis"],
# If true, use the Generalized Advantage Estimator (GAE) # If true, use the Generalized Advantage Estimator (GAE)

View file

@ -57,7 +57,7 @@ class TestMARWIL(unittest.TestCase):
for i in range(num_iterations): for i in range(num_iterations):
eval_results = trainer.train().get("evaluation") eval_results = trainer.train().get("evaluation")
if eval_results: if eval_results:
print("iter={} R={}".format( print("iter={} R={} ".format(
i, eval_results["episode_reward_mean"])) i, eval_results["episode_reward_mean"]))
# Learn until some reward is reached on an actual live env. # Learn until some reward is reached on an actual live env.
if eval_results["episode_reward_mean"] > min_reward: if eval_results["episode_reward_mean"] > min_reward:

View file

@ -80,7 +80,7 @@ if __name__ == "__main__":
# Check, whether we can learn from the given file in `num_iterations` # Check, whether we can learn from the given file in `num_iterations`
# iterations, up to a reward of `min_reward`. # iterations, up to a reward of `min_reward`.
num_iterations = 50 num_iterations = 5
min_reward = -300 min_reward = -300
# Test for torch framework (tf not implemented yet). # Test for torch framework (tf not implemented yet).
@ -99,17 +99,39 @@ if __name__ == "__main__":
raise ValueError("CQLTrainer did not reach {} reward from expert " raise ValueError("CQLTrainer did not reach {} reward from expert "
"offline data!".format(min_reward)) "offline data!".format(min_reward))
# Get policy, model, and replay-buffer.
pol = trainer.get_policy()
cql_model = pol.model
from ray.rllib.agents.cql.cql import replay_buffer
# If you would like to query CQL's learnt Q-function for arbitrary # If you would like to query CQL's learnt Q-function for arbitrary
# (cont.) actions, do the following: # (cont.) actions, do the following:
obs_batch = torch.from_numpy(np.random.random(size=(5, 3))) obs_batch = torch.from_numpy(np.random.random(size=(5, 3)))
action_batch = torch.from_numpy(np.random.random(size=(5, 1))) action_batch = torch.from_numpy(np.random.random(size=(5, 1)))
q_values = cql_model.get_q_values(obs_batch, action_batch)
cql_model = trainer.get_policy().model
q_values = cql_model.get_q_values([obs_batch], [action_batch])
# If you are using the "twin_q", there'll be 2 Q-networks and # If you are using the "twin_q", there'll be 2 Q-networks and
# we usually consider the min of the 2 outputs, like so: # we usually consider the min of the 2 outputs, like so:
twin_q_values = cql_model.get_twin_q_values([obs_batch], [action_batch]) twin_q_values = cql_model.get_twin_q_values(obs_batch, action_batch)
final_q_values = torch.min(q_values, twin_q_values) final_q_values = torch.min(q_values, twin_q_values)
print(final_q_values) print(final_q_values)
# Example on how to do evaluation on the trained Trainer
# using the data from our buffer.
# Get a sample (MultiAgentBatch -> SampleBatch).
batch = replay_buffer.replay().policy_batches["default_policy"]
obs = torch.from_numpy(batch["obs"])
# Pass the observations through our model to get the
# features, which then to pass through the Q-head.
model_out, _ = cql_model({"obs": obs})
# The estimated Q-values from the (historic) actions in the batch.
q_values_old = cql_model.get_q_values(model_out,
torch.from_numpy(batch["actions"]))
# The estimated Q-values for the new actions computed
# by our trainer policy.
actions_new = pol.compute_actions_from_input_dict({"obs": obs})[0]
q_values_new = cql_model.get_q_values(model_out,
torch.from_numpy(actions_new))
print(f"Q-val batch={q_values_old}")
print(f"Q-val policy={q_values_new}")
trainer.stop() trainer.stop()

View file

@ -1,40 +1,40 @@
from ray.rllib.offline.off_policy_estimator import OffPolicyEstimator, \ from ray.rllib.offline.off_policy_estimator import OffPolicyEstimator, \
OffPolicyEstimate OffPolicyEstimate
from ray.rllib.utils.annotations import override from ray.rllib.utils.annotations import override
from ray.rllib.utils.typing import SampleBatchType from ray.rllib.utils.typing import SampleBatchType
class ImportanceSamplingEstimator(OffPolicyEstimator): class ImportanceSamplingEstimator(OffPolicyEstimator):
"""The step-wise IS estimator. """The step-wise IS estimator.
Step-wise IS estimator described in https://arxiv.org/pdf/1511.03722.pdf""" Step-wise IS estimator described in https://arxiv.org/pdf/1511.03722.pdf"""
@override(OffPolicyEstimator) @override(OffPolicyEstimator)
def estimate(self, batch: SampleBatchType) -> OffPolicyEstimate: def estimate(self, batch: SampleBatchType) -> OffPolicyEstimate:
self.check_can_estimate_for(batch) self.check_can_estimate_for(batch)
rewards, old_prob = batch["rewards"], batch["action_prob"] rewards, old_prob = batch["rewards"], batch["action_prob"]
new_prob = self.action_prob(batch) new_prob = self.action_prob(batch)
# calculate importance ratios # calculate importance ratios
p = [] p = []
for t in range(batch.count): for t in range(batch.count):
if t == 0: if t == 0:
pt_prev = 1.0 pt_prev = 1.0
else: else:
pt_prev = p[t - 1] pt_prev = p[t - 1]
p.append(pt_prev * new_prob[t] / old_prob[t]) p.append(pt_prev * new_prob[t] / old_prob[t])
# calculate stepwise IS estimate # calculate stepwise IS estimate
V_prev, V_step_IS = 0.0, 0.0 V_prev, V_step_IS = 0.0, 0.0
for t in range(batch.count): for t in range(batch.count):
V_prev += rewards[t] * self.gamma**t V_prev += rewards[t] * self.gamma**t
V_step_IS += p[t] * rewards[t] * self.gamma**t V_step_IS += p[t] * rewards[t] * self.gamma**t
estimation = OffPolicyEstimate( estimation = OffPolicyEstimate(
"is", { "is", {
"V_prev": V_prev, "V_prev": V_prev,
"V_step_IS": V_step_IS, "V_step_IS": V_step_IS,
"V_gain_est": V_step_IS / max(1e-8, V_prev), "V_gain_est": V_step_IS / max(1e-8, V_prev),
}) })
return estimation return estimation

View file

@ -1,40 +1,40 @@
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDBaDi/lZAxP2QkX76UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQA6NTq/dq0vP22RUD6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.3287242650985718]], "rewards": [-5.646285057067871], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [0], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDBaDi/lZAxP2QkX76UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQA6NTq/dq0vP22RUD6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.3287242650985718]], "rewards": [-5.646285057067871], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [0], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQA6NTq/dq0vP22RUD6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCankC/fJ8oPzirPj+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.08813309669494629]], "rewards": [-5.693763256072998], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [1], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQA6NTq/dq0vP22RUD6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCankC/fJ8oPzirPj+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.08813309669494629]], "rewards": [-5.693763256072998], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [1], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCankC/fJ8oPzirPj+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAkpEy/wc8ZPzjOvj+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.8395185470581055]], "rewards": [-5.9269185066223145], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [2], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCankC/fJ8oPzirPj+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAkpEy/wc8ZPzjOvj+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.8395185470581055]], "rewards": [-5.9269185066223145], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [2], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAkpEy/wc8ZPzjOvj+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCvD1q/9hsGP/J47j+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.2607402801513672]], "rewards": [-6.457780838012695], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [3], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAkpEy/wc8ZPzjOvj+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCvD1q/9hsGP/J47j+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.2607402801513672]], "rewards": [-6.457780838012695], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [3], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCvD1q/9hsGP/J47j+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAT92i/xUHUPi2dHkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.7412639856338501]], "rewards": [-7.058495044708252], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [4], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCvD1q/9hsGP/J47j+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAT92i/xUHUPi2dHkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.7412639856338501]], "rewards": [-7.058495044708252], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [4], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAT92i/xUHUPi2dHkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDZJna/WKWMPrT1PkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.6482666730880737]], "rewards": [-7.982394218444824], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [5], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAT92i/xUHUPi2dHkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDZJna/WKWMPrT1PkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.6482666730880737]], "rewards": [-7.982394218444824], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [5], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDZJna/WKWMPrT1PkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBnuX2/aDwIPodkOUCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.9767004251480103]], "rewards": [-9.092668533325195], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [6], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDZJna/WKWMPrT1PkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBnuX2/aDwIPodkOUCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.9767004251480103]], "rewards": [-9.092668533325195], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [6], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBnuX2/aDwIPodkOUCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCQ/n+/HgvZu5tHM0CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.6509912014007568]], "rewards": [-9.889808654785156], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [7], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBnuX2/aDwIPodkOUCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCQ/n+/HgvZu5tHM0CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.6509912014007568]], "rewards": [-9.889808654785156], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [7], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCQ/n+/HgvZu5tHM0CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAR63y/EWkevkdVPkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.5922571420669556]], "rewards": [-10.614130973815918], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [8], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCQ/n+/HgvZu5tHM0CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAR63y/EWkevkdVPkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.5922571420669556]], "rewards": [-10.614130973815918], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [8], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAR63y/EWkevkdVPkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBDYnW/K+qRvmAaK0CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.6148225665092468]], "rewards": [-9.803768157958984], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [9], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAR63y/EWkevkdVPkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBDYnW/K+qRvmAaK0CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.6148225665092468]], "rewards": [-9.803768157958984], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [9], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBDYnW/K+qRvmAaK0CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDczmu/8E3HvuDfDUCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.8098440170288086]], "rewards": [-8.854684829711914], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [10], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBDYnW/K+qRvmAaK0CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDczmu/8E3HvuDfDUCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.8098440170288086]], "rewards": [-8.854684829711914], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [10], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDczmu/8E3HvuDfDUCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBvLmG/N43zvjqH9T+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.022167325019836426]], "rewards": [-8.008651733398438], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [11], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDczmu/8E3HvuDfDUCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBvLmG/N43zvjqH9T+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.022167325019836426]], "rewards": [-8.008651733398438], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [11], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBvLmG/N43zvjqH9T+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB/HFi/uDoJvyU5sz+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.5374762415885925]], "rewards": [-7.369612216949463], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [12], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBvLmG/N43zvjqH9T+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB/HFi/uDoJvyU5sz+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.5374762415885925]], "rewards": [-7.369612216949463], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [12], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB/HFi/uDoJvyU5sz+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+RFK/oQYSv606Uz+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.576760470867157]], "rewards": [-6.832327842712402], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [13], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB/HFi/uDoJvyU5sz+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+RFK/oQYSv606Uz+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.576760470867157]], "rewards": [-6.832327842712402], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [13], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+RFK/oQYSv606Uz+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+OlG/xYITv0L9ED6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.8523757457733154]], "rewards": [-6.495100021362305], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [14], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+RFK/oQYSv606Uz+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+OlG/xYITv0L9ED6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.8523757457733154]], "rewards": [-6.495100021362305], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [14], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+OlG/xYITv0L9ED6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCTNlG/VIgTv60DCDuUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.9754830598831177]], "rewards": [-6.3940863609313965], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [15], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+OlG/xYITv0L9ED6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCTNlG/VIgTv60DCDuUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.9754830598831177]], "rewards": [-6.3940863609313965], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [15], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCTNlG/VIgTv60DCDuUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDNWFK/XekRvzI/Hr6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.9187028408050537]], "rewards": [-6.391127109527588], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [16], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCTNlG/VIgTv60DCDuUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDNWFK/XekRvzI/Hr6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.9187028408050537]], "rewards": [-6.391127109527588], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [16], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDNWFK/XekRvzI/Hr6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAAU1W/54UNv3oh1L6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.5589845180511475]], "rewards": [-6.430506229400635], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [17], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDNWFK/XekRvzI/Hr6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAAU1W/54UNv3oh1L6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.5589845180511475]], "rewards": [-6.430506229400635], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [17], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAAU1W/54UNv3oh1L6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAYClu/bYEEv3iJVb+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.01730865240097046]], "rewards": [-6.549499034881592], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [18], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAAU1W/54UNv3oh1L6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAYClu/bYEEv3iJVb+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.01730865240097046]], "rewards": [-6.549499034881592], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [18], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAYClu/bYEEv3iJVb+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAFcWS/sRbnvpX5wb+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.9770012497901917]], "rewards": [-6.820656776428223], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [19], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAYClu/bYEEv3iJVb+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAFcWS/sRbnvpX5wb+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.9770012497901917]], "rewards": [-6.820656776428223], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [19], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAFcWS/sRbnvpX5wb+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDHO2+/xT62vnmPBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.776479959487915]], "rewards": [-7.378707408905029], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [20], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAFcWS/sRbnvpX5wb+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDHO2+/xT62vnmPBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.776479959487915]], "rewards": [-7.378707408905029], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [20], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDHO2+/xT62vnmPBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDFcHe/9UmDvtXlBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.8722989559173584]], "rewards": [-8.153972625732422], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [21], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDHO2+/xT62vnmPBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDFcHe/9UmDvtXlBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.8722989559173584]], "rewards": [-8.153972625732422], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [21], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDFcHe/9UmDvtXlBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+0Xy/GvEgvkPqAcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.8484913110733032]], "rewards": [-8.74808406829834], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [22], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDFcHe/9UmDvtXlBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+0Xy/GvEgvkPqAcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.8484913110733032]], "rewards": [-8.74808406829834], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [22], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+0Xy/GvEgvkPqAcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCPzH+/H0Iivb9LF8CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.7206584811210632]], "rewards": [-9.317010879516602], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [23], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+0Xy/GvEgvkPqAcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCPzH+/H0Iivb9LF8CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.7206584811210632]], "rewards": [-9.317010879516602], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [23], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCPzH+/H0Iivb9LF8CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDmSH+/2fuYPUlsEsCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.3528306484222412]], "rewards": [-10.181554794311523], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [24], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCPzH+/H0Iivb9LF8CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDmSH+/2fuYPUlsEsCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.3528306484222412]], "rewards": [-10.181554794311523], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [24], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDmSH+/2fuYPUlsEsCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCYEHu/LRlIPuoJHMCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.6875670552253723]], "rewards": [-9.930729866027832], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [25], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDmSH+/2fuYPUlsEsCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCYEHu/LRlIPuoJHMCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.6875670552253723]], "rewards": [-9.930729866027832], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [25], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCYEHu/LRlIPuoJHMCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQC8gHS//LaXPoVTBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.6944198608398438]], "rewards": [-9.26891040802002], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [26], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCYEHu/LRlIPuoJHMCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQC8gHS//LaXPoVTBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.6944198608398438]], "rewards": [-9.26891040802002], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [26], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQC8gHS//LaXPoVTBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAjG2y/bePFPh7C9b+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.19667410850524902]], "rewards": [-8.504039764404297], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [27], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQC8gHS//LaXPoVTBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAjG2y/bePFPh7C9b+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.19667410850524902]], "rewards": [-8.504039764404297], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [27], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAjG2y/bePFPh7C9b+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB8xGS/f8vlPuOir7+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.8598417043685913]], "rewards": [-7.905289649963379], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [28], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAjG2y/bePFPh7C9b+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB8xGS/f8vlPuOir7+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.8598417043685913]], "rewards": [-7.905289649963379], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [28], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB8xGS/f8vlPuOir7+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQD3vFy/7qcBPyELqL+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.9243161082267761]], "rewards": [-7.353479385375977], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [29], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB8xGS/f8vlPuOir7+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQD3vFy/7qcBPyELqL+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.9243161082267761]], "rewards": [-7.353479385375977], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [29], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQD3vFy/7qcBPyELqL+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAZdli/N60IPw1sJL+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.969048023223877]], "rewards": [-6.9908647537231445], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [30], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQD3vFy/7qcBPyELqL+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAZdli/N60IPw1sJL+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.969048023223877]], "rewards": [-6.9908647537231445], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [30], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAZdli/N60IPw1sJL+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDqrlW/+foMPxjnzL6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.5278222560882568]], "rewards": [-6.690484523773193], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [31], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAZdli/N60IPw1sJL+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDqrlW/+foMPxjnzL6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.5278222560882568]], "rewards": [-6.690484523773193], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [31], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDqrlW/+foMPxjnzL6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDMlVS/f6EOP/uXHr6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.5590147972106934]], "rewards": [-6.562597751617432], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [32], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDqrlW/+foMPxjnzL6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDMlVS/f6EOP/uXHr6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.5590147972106934]], "rewards": [-6.562597751617432], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [32], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDMlVS/f6EOP/uXHr6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQD8hFW/cjoNP0vSBj6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.43775200843811035]], "rewards": [-6.5089335441589355], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [33], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDMlVS/f6EOP/uXHr6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQD8hFW/cjoNP0vSBj6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.43775200843811035]], "rewards": [-6.5089335441589355], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [33], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQD8hFW/cjoNP0vSBj6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDru1m/y6MGPwVuHD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.21879124641418457]], "rewards": [-6.541318893432617], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [34], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQD8hFW/cjoNP0vSBj6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDru1m/y6MGPwVuHD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.21879124641418457]], "rewards": [-6.541318893432617], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [34], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDru1m/y6MGPwVuHD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBAnV+/nUL5PqUzaD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.3282276391983032]], "rewards": [-6.734356880187988], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [35], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDru1m/y6MGPwVuHD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBAnV+/nUL5PqUzaD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.3282276391983032]], "rewards": [-6.734356880187988], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [35], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBAnV+/nUL5PqUzaD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAoXGi/0uTWPo3awD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.7816812992095947]], "rewards": [-7.018081188201904], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [36], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBAnV+/nUL5PqUzaD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAoXGi/0uTWPo3awD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.7816812992095947]], "rewards": [-7.018081188201904], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [36], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAoXGi/0uTWPo3awD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDscHC//8WvPsGy0z+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.5585429072380066]], "rewards": [-7.564019203186035], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [37], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAoXGi/0uTWPo3awD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDscHC//8WvPsGy0z+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.5585429072380066]], "rewards": [-7.564019203186035], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [37], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDscHC//8WvPsGy0z+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB1dXe/mCaDPpj46T+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.2782477140426636]], "rewards": [-8.064400672912598], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [38], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDscHC//8WvPsGy0z+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB1dXe/mCaDPpj46T+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.2782477140426636]], "rewards": [-8.064400672912598], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [38], "weights": [1.0]}
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB1dXe/mCaDPpj46T+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQC+Qn2/S2YVPn4nEECUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.774653434753418]], "rewards": [-8.645625114440918], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [39], "weights": [1.0]} {"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB1dXe/mCaDPpj46T+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQC+Qn2/S2YVPn4nEECUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.774653434753418]], "rewards": [-8.645625114440918], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [39], "weights": [1.0]}