mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
[RLlib] CQL: Bug fixes and OPE example added to test and offline_rl.py example. (#15761)
This commit is contained in:
parent
78d0ed3503
commit
c4a3e1589b
8 changed files with 149 additions and 90 deletions
|
@ -2224,7 +2224,7 @@ py_test(
|
||||||
name = "examples/parametric_actions_cartpole_pg_torch",
|
name = "examples/parametric_actions_cartpole_pg_torch",
|
||||||
main = "examples/parametric_actions_cartpole.py",
|
main = "examples/parametric_actions_cartpole.py",
|
||||||
tags = ["examples", "examples_P"],
|
tags = ["examples", "examples_P"],
|
||||||
size = "small",
|
size = "medium",
|
||||||
srcs = ["examples/parametric_actions_cartpole.py"],
|
srcs = ["examples/parametric_actions_cartpole.py"],
|
||||||
args = ["--as-test", "--torch", "--stop-reward=60.0", "--run=PG"]
|
args = ["--as-test", "--torch", "--stop-reward=60.0", "--run=PG"]
|
||||||
)
|
)
|
||||||
|
|
|
@ -14,6 +14,7 @@ from ray.rllib.agents.sac.sac_torch_policy import _get_dist_class, stats, \
|
||||||
build_sac_model_and_action_dist, optimizer_fn, ComputeTDErrorMixin, \
|
build_sac_model_and_action_dist, optimizer_fn, ComputeTDErrorMixin, \
|
||||||
TargetNetworkMixin, setup_late_mixins, action_distribution_fn
|
TargetNetworkMixin, setup_late_mixins, action_distribution_fn
|
||||||
from ray.rllib.models.torch.torch_action_dist import TorchDistributionWrapper
|
from ray.rllib.models.torch.torch_action_dist import TorchDistributionWrapper
|
||||||
|
from ray.rllib.policy.policy import LEARNER_STATS_KEY
|
||||||
from ray.rllib.policy.policy_template import build_policy_class
|
from ray.rllib.policy.policy_template import build_policy_class
|
||||||
from ray.rllib.models.modelv2 import ModelV2
|
from ray.rllib.models.modelv2 import ModelV2
|
||||||
from ray.rllib.utils.numpy import SMALL_NUMBER, MIN_LOG_NN_OUTPUT, \
|
from ray.rllib.utils.numpy import SMALL_NUMBER, MIN_LOG_NN_OUTPUT, \
|
||||||
|
@ -335,7 +336,11 @@ def compute_gradients_fn(policy, postprocessed_batch):
|
||||||
batches = [policy._lazy_tensor_dict(postprocessed_batch)]
|
batches = [policy._lazy_tensor_dict(postprocessed_batch)]
|
||||||
model = policy.model
|
model = policy.model
|
||||||
policy._loss(policy, model, policy.dist_class, batches[0])
|
policy._loss(policy, model, policy.dist_class, batches[0])
|
||||||
return [None, dict()]
|
stats = {
|
||||||
|
LEARNER_STATS_KEY: policy._convert_to_non_torch_type(
|
||||||
|
cql_stats(policy, batches[0]))
|
||||||
|
}
|
||||||
|
return [None, stats]
|
||||||
|
|
||||||
|
|
||||||
def apply_gradients_fn(policy, gradients):
|
def apply_gradients_fn(policy, gradients):
|
||||||
|
|
|
@ -4,9 +4,12 @@ import unittest
|
||||||
|
|
||||||
import ray
|
import ray
|
||||||
import ray.rllib.agents.cql as cql
|
import ray.rllib.agents.cql as cql
|
||||||
|
from ray.rllib.utils.framework import try_import_torch
|
||||||
from ray.rllib.utils.test_utils import check_compute_single_action, \
|
from ray.rllib.utils.test_utils import check_compute_single_action, \
|
||||||
framework_iterator
|
framework_iterator
|
||||||
|
|
||||||
|
torch, _ = try_import_torch()
|
||||||
|
|
||||||
|
|
||||||
class TestCQL(unittest.TestCase):
|
class TestCQL(unittest.TestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -43,14 +46,43 @@ class TestCQL(unittest.TestCase):
|
||||||
config["rollout_fragment_length"] = 1
|
config["rollout_fragment_length"] = 1
|
||||||
config["train_batch_size"] = 10
|
config["train_batch_size"] = 10
|
||||||
|
|
||||||
|
# Switch on off-policy evaluation.
|
||||||
|
config["input_evaluation"] = ["is"]
|
||||||
|
|
||||||
num_iterations = 2
|
num_iterations = 2
|
||||||
|
|
||||||
# Test for tf framework (torch not implemented yet).
|
# Test for tf framework (torch not implemented yet).
|
||||||
for _ in framework_iterator(config, frameworks=("torch")):
|
for _ in framework_iterator(config, frameworks=("torch")):
|
||||||
trainer = cql.CQLTrainer(config=config)
|
trainer = cql.CQLTrainer(config=config)
|
||||||
for i in range(num_iterations):
|
for i in range(num_iterations):
|
||||||
trainer.train()
|
print(trainer.train())
|
||||||
|
|
||||||
check_compute_single_action(trainer)
|
check_compute_single_action(trainer)
|
||||||
|
|
||||||
|
# Get policy, model, and replay-buffer.
|
||||||
|
pol = trainer.get_policy()
|
||||||
|
cql_model = pol.model
|
||||||
|
from ray.rllib.agents.cql.cql import replay_buffer
|
||||||
|
|
||||||
|
# Example on how to do evaluation on the trained Trainer
|
||||||
|
# using the data from our buffer.
|
||||||
|
# Get a sample (MultiAgentBatch -> SampleBatch).
|
||||||
|
batch = replay_buffer.replay().policy_batches["default_policy"]
|
||||||
|
obs = torch.from_numpy(batch["obs"])
|
||||||
|
# Pass the observations through our model to get the
|
||||||
|
# features, which then to pass through the Q-head.
|
||||||
|
model_out, _ = cql_model({"obs": obs})
|
||||||
|
# The estimated Q-values from the (historic) actions in the batch.
|
||||||
|
q_values_old = cql_model.get_q_values(
|
||||||
|
model_out, torch.from_numpy(batch["actions"]))
|
||||||
|
# The estimated Q-values for the new actions computed
|
||||||
|
# by our trainer policy.
|
||||||
|
actions_new = pol.compute_actions_from_input_dict({"obs": obs})[0]
|
||||||
|
q_values_new = cql_model.get_q_values(
|
||||||
|
model_out, torch.from_numpy(actions_new))
|
||||||
|
print(f"Q-val batch={q_values_old}")
|
||||||
|
print(f"Q-val policy={q_values_new}")
|
||||||
|
|
||||||
trainer.stop()
|
trainer.stop()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@ from ray.rllib.execution.metric_ops import StandardMetricsReporting
|
||||||
DEFAULT_CONFIG = with_common_config({
|
DEFAULT_CONFIG = with_common_config({
|
||||||
# You should override this to point to an offline dataset (see agent.py).
|
# You should override this to point to an offline dataset (see agent.py).
|
||||||
"input": "sampler",
|
"input": "sampler",
|
||||||
# Use importance sampling estimators for reward
|
# Use importance sampling estimators for reward.
|
||||||
"input_evaluation": ["is", "wis"],
|
"input_evaluation": ["is", "wis"],
|
||||||
|
|
||||||
# If true, use the Generalized Advantage Estimator (GAE)
|
# If true, use the Generalized Advantage Estimator (GAE)
|
||||||
|
|
|
@ -57,7 +57,7 @@ class TestMARWIL(unittest.TestCase):
|
||||||
for i in range(num_iterations):
|
for i in range(num_iterations):
|
||||||
eval_results = trainer.train().get("evaluation")
|
eval_results = trainer.train().get("evaluation")
|
||||||
if eval_results:
|
if eval_results:
|
||||||
print("iter={} R={}".format(
|
print("iter={} R={} ".format(
|
||||||
i, eval_results["episode_reward_mean"]))
|
i, eval_results["episode_reward_mean"]))
|
||||||
# Learn until some reward is reached on an actual live env.
|
# Learn until some reward is reached on an actual live env.
|
||||||
if eval_results["episode_reward_mean"] > min_reward:
|
if eval_results["episode_reward_mean"] > min_reward:
|
||||||
|
|
|
@ -80,7 +80,7 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
# Check, whether we can learn from the given file in `num_iterations`
|
# Check, whether we can learn from the given file in `num_iterations`
|
||||||
# iterations, up to a reward of `min_reward`.
|
# iterations, up to a reward of `min_reward`.
|
||||||
num_iterations = 50
|
num_iterations = 5
|
||||||
min_reward = -300
|
min_reward = -300
|
||||||
|
|
||||||
# Test for torch framework (tf not implemented yet).
|
# Test for torch framework (tf not implemented yet).
|
||||||
|
@ -99,17 +99,39 @@ if __name__ == "__main__":
|
||||||
raise ValueError("CQLTrainer did not reach {} reward from expert "
|
raise ValueError("CQLTrainer did not reach {} reward from expert "
|
||||||
"offline data!".format(min_reward))
|
"offline data!".format(min_reward))
|
||||||
|
|
||||||
|
# Get policy, model, and replay-buffer.
|
||||||
|
pol = trainer.get_policy()
|
||||||
|
cql_model = pol.model
|
||||||
|
from ray.rllib.agents.cql.cql import replay_buffer
|
||||||
|
|
||||||
# If you would like to query CQL's learnt Q-function for arbitrary
|
# If you would like to query CQL's learnt Q-function for arbitrary
|
||||||
# (cont.) actions, do the following:
|
# (cont.) actions, do the following:
|
||||||
obs_batch = torch.from_numpy(np.random.random(size=(5, 3)))
|
obs_batch = torch.from_numpy(np.random.random(size=(5, 3)))
|
||||||
action_batch = torch.from_numpy(np.random.random(size=(5, 1)))
|
action_batch = torch.from_numpy(np.random.random(size=(5, 1)))
|
||||||
|
q_values = cql_model.get_q_values(obs_batch, action_batch)
|
||||||
cql_model = trainer.get_policy().model
|
|
||||||
q_values = cql_model.get_q_values([obs_batch], [action_batch])
|
|
||||||
# If you are using the "twin_q", there'll be 2 Q-networks and
|
# If you are using the "twin_q", there'll be 2 Q-networks and
|
||||||
# we usually consider the min of the 2 outputs, like so:
|
# we usually consider the min of the 2 outputs, like so:
|
||||||
twin_q_values = cql_model.get_twin_q_values([obs_batch], [action_batch])
|
twin_q_values = cql_model.get_twin_q_values(obs_batch, action_batch)
|
||||||
final_q_values = torch.min(q_values, twin_q_values)
|
final_q_values = torch.min(q_values, twin_q_values)
|
||||||
print(final_q_values)
|
print(final_q_values)
|
||||||
|
|
||||||
|
# Example on how to do evaluation on the trained Trainer
|
||||||
|
# using the data from our buffer.
|
||||||
|
# Get a sample (MultiAgentBatch -> SampleBatch).
|
||||||
|
batch = replay_buffer.replay().policy_batches["default_policy"]
|
||||||
|
obs = torch.from_numpy(batch["obs"])
|
||||||
|
# Pass the observations through our model to get the
|
||||||
|
# features, which then to pass through the Q-head.
|
||||||
|
model_out, _ = cql_model({"obs": obs})
|
||||||
|
# The estimated Q-values from the (historic) actions in the batch.
|
||||||
|
q_values_old = cql_model.get_q_values(model_out,
|
||||||
|
torch.from_numpy(batch["actions"]))
|
||||||
|
# The estimated Q-values for the new actions computed
|
||||||
|
# by our trainer policy.
|
||||||
|
actions_new = pol.compute_actions_from_input_dict({"obs": obs})[0]
|
||||||
|
q_values_new = cql_model.get_q_values(model_out,
|
||||||
|
torch.from_numpy(actions_new))
|
||||||
|
print(f"Q-val batch={q_values_old}")
|
||||||
|
print(f"Q-val policy={q_values_new}")
|
||||||
|
|
||||||
trainer.stop()
|
trainer.stop()
|
||||||
|
|
|
@ -1,40 +1,40 @@
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDBaDi/lZAxP2QkX76UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQA6NTq/dq0vP22RUD6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.3287242650985718]], "rewards": [-5.646285057067871], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [0], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDBaDi/lZAxP2QkX76UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQA6NTq/dq0vP22RUD6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.3287242650985718]], "rewards": [-5.646285057067871], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [0], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQA6NTq/dq0vP22RUD6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCankC/fJ8oPzirPj+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.08813309669494629]], "rewards": [-5.693763256072998], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [1], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQA6NTq/dq0vP22RUD6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCankC/fJ8oPzirPj+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.08813309669494629]], "rewards": [-5.693763256072998], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [1], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCankC/fJ8oPzirPj+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAkpEy/wc8ZPzjOvj+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.8395185470581055]], "rewards": [-5.9269185066223145], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [2], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCankC/fJ8oPzirPj+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAkpEy/wc8ZPzjOvj+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.8395185470581055]], "rewards": [-5.9269185066223145], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [2], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAkpEy/wc8ZPzjOvj+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCvD1q/9hsGP/J47j+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.2607402801513672]], "rewards": [-6.457780838012695], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [3], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAkpEy/wc8ZPzjOvj+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCvD1q/9hsGP/J47j+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.2607402801513672]], "rewards": [-6.457780838012695], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [3], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCvD1q/9hsGP/J47j+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAT92i/xUHUPi2dHkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.7412639856338501]], "rewards": [-7.058495044708252], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [4], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCvD1q/9hsGP/J47j+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAT92i/xUHUPi2dHkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.7412639856338501]], "rewards": [-7.058495044708252], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [4], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAT92i/xUHUPi2dHkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDZJna/WKWMPrT1PkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.6482666730880737]], "rewards": [-7.982394218444824], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [5], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAT92i/xUHUPi2dHkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDZJna/WKWMPrT1PkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.6482666730880737]], "rewards": [-7.982394218444824], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [5], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDZJna/WKWMPrT1PkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBnuX2/aDwIPodkOUCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.9767004251480103]], "rewards": [-9.092668533325195], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [6], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDZJna/WKWMPrT1PkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBnuX2/aDwIPodkOUCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.9767004251480103]], "rewards": [-9.092668533325195], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [6], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBnuX2/aDwIPodkOUCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCQ/n+/HgvZu5tHM0CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.6509912014007568]], "rewards": [-9.889808654785156], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [7], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBnuX2/aDwIPodkOUCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCQ/n+/HgvZu5tHM0CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.6509912014007568]], "rewards": [-9.889808654785156], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [7], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCQ/n+/HgvZu5tHM0CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAR63y/EWkevkdVPkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.5922571420669556]], "rewards": [-10.614130973815918], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [8], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCQ/n+/HgvZu5tHM0CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAR63y/EWkevkdVPkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.5922571420669556]], "rewards": [-10.614130973815918], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [8], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAR63y/EWkevkdVPkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBDYnW/K+qRvmAaK0CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.6148225665092468]], "rewards": [-9.803768157958984], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [9], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAR63y/EWkevkdVPkCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBDYnW/K+qRvmAaK0CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.6148225665092468]], "rewards": [-9.803768157958984], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [9], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBDYnW/K+qRvmAaK0CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDczmu/8E3HvuDfDUCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.8098440170288086]], "rewards": [-8.854684829711914], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [10], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBDYnW/K+qRvmAaK0CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDczmu/8E3HvuDfDUCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.8098440170288086]], "rewards": [-8.854684829711914], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [10], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDczmu/8E3HvuDfDUCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBvLmG/N43zvjqH9T+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.022167325019836426]], "rewards": [-8.008651733398438], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [11], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDczmu/8E3HvuDfDUCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBvLmG/N43zvjqH9T+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.022167325019836426]], "rewards": [-8.008651733398438], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [11], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBvLmG/N43zvjqH9T+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB/HFi/uDoJvyU5sz+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.5374762415885925]], "rewards": [-7.369612216949463], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [12], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBvLmG/N43zvjqH9T+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB/HFi/uDoJvyU5sz+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.5374762415885925]], "rewards": [-7.369612216949463], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [12], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB/HFi/uDoJvyU5sz+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+RFK/oQYSv606Uz+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.576760470867157]], "rewards": [-6.832327842712402], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [13], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB/HFi/uDoJvyU5sz+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+RFK/oQYSv606Uz+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.576760470867157]], "rewards": [-6.832327842712402], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [13], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+RFK/oQYSv606Uz+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+OlG/xYITv0L9ED6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.8523757457733154]], "rewards": [-6.495100021362305], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [14], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+RFK/oQYSv606Uz+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+OlG/xYITv0L9ED6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.8523757457733154]], "rewards": [-6.495100021362305], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [14], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+OlG/xYITv0L9ED6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCTNlG/VIgTv60DCDuUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.9754830598831177]], "rewards": [-6.3940863609313965], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [15], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+OlG/xYITv0L9ED6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCTNlG/VIgTv60DCDuUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.9754830598831177]], "rewards": [-6.3940863609313965], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [15], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCTNlG/VIgTv60DCDuUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDNWFK/XekRvzI/Hr6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.9187028408050537]], "rewards": [-6.391127109527588], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [16], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCTNlG/VIgTv60DCDuUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDNWFK/XekRvzI/Hr6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.9187028408050537]], "rewards": [-6.391127109527588], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [16], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDNWFK/XekRvzI/Hr6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAAU1W/54UNv3oh1L6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.5589845180511475]], "rewards": [-6.430506229400635], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [17], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDNWFK/XekRvzI/Hr6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAAU1W/54UNv3oh1L6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.5589845180511475]], "rewards": [-6.430506229400635], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [17], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAAU1W/54UNv3oh1L6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAYClu/bYEEv3iJVb+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.01730865240097046]], "rewards": [-6.549499034881592], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [18], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAAU1W/54UNv3oh1L6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAYClu/bYEEv3iJVb+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.01730865240097046]], "rewards": [-6.549499034881592], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [18], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAYClu/bYEEv3iJVb+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAFcWS/sRbnvpX5wb+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.9770012497901917]], "rewards": [-6.820656776428223], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [19], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAYClu/bYEEv3iJVb+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAFcWS/sRbnvpX5wb+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.9770012497901917]], "rewards": [-6.820656776428223], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [19], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAFcWS/sRbnvpX5wb+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDHO2+/xT62vnmPBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.776479959487915]], "rewards": [-7.378707408905029], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [20], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAFcWS/sRbnvpX5wb+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDHO2+/xT62vnmPBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.776479959487915]], "rewards": [-7.378707408905029], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [20], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDHO2+/xT62vnmPBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDFcHe/9UmDvtXlBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.8722989559173584]], "rewards": [-8.153972625732422], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [21], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDHO2+/xT62vnmPBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDFcHe/9UmDvtXlBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.8722989559173584]], "rewards": [-8.153972625732422], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [21], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDFcHe/9UmDvtXlBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+0Xy/GvEgvkPqAcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.8484913110733032]], "rewards": [-8.74808406829834], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [22], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDFcHe/9UmDvtXlBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+0Xy/GvEgvkPqAcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.8484913110733032]], "rewards": [-8.74808406829834], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [22], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+0Xy/GvEgvkPqAcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCPzH+/H0Iivb9LF8CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.7206584811210632]], "rewards": [-9.317010879516602], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [23], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB+0Xy/GvEgvkPqAcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCPzH+/H0Iivb9LF8CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.7206584811210632]], "rewards": [-9.317010879516602], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [23], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCPzH+/H0Iivb9LF8CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDmSH+/2fuYPUlsEsCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.3528306484222412]], "rewards": [-10.181554794311523], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [24], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCPzH+/H0Iivb9LF8CUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDmSH+/2fuYPUlsEsCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.3528306484222412]], "rewards": [-10.181554794311523], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [24], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDmSH+/2fuYPUlsEsCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCYEHu/LRlIPuoJHMCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.6875670552253723]], "rewards": [-9.930729866027832], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [25], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDmSH+/2fuYPUlsEsCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCYEHu/LRlIPuoJHMCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.6875670552253723]], "rewards": [-9.930729866027832], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [25], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCYEHu/LRlIPuoJHMCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQC8gHS//LaXPoVTBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.6944198608398438]], "rewards": [-9.26891040802002], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [26], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQCYEHu/LRlIPuoJHMCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQC8gHS//LaXPoVTBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.6944198608398438]], "rewards": [-9.26891040802002], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [26], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQC8gHS//LaXPoVTBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAjG2y/bePFPh7C9b+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.19667410850524902]], "rewards": [-8.504039764404297], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [27], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQC8gHS//LaXPoVTBcCUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAjG2y/bePFPh7C9b+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.19667410850524902]], "rewards": [-8.504039764404297], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [27], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAjG2y/bePFPh7C9b+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB8xGS/f8vlPuOir7+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.8598417043685913]], "rewards": [-7.905289649963379], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [28], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAjG2y/bePFPh7C9b+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB8xGS/f8vlPuOir7+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.8598417043685913]], "rewards": [-7.905289649963379], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [28], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB8xGS/f8vlPuOir7+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQD3vFy/7qcBPyELqL+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.9243161082267761]], "rewards": [-7.353479385375977], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [29], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB8xGS/f8vlPuOir7+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQD3vFy/7qcBPyELqL+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.9243161082267761]], "rewards": [-7.353479385375977], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [29], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQD3vFy/7qcBPyELqL+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAZdli/N60IPw1sJL+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.969048023223877]], "rewards": [-6.9908647537231445], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [30], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQD3vFy/7qcBPyELqL+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAZdli/N60IPw1sJL+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.969048023223877]], "rewards": [-6.9908647537231445], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [30], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAZdli/N60IPw1sJL+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDqrlW/+foMPxjnzL6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.5278222560882568]], "rewards": [-6.690484523773193], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [31], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAZdli/N60IPw1sJL+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDqrlW/+foMPxjnzL6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.5278222560882568]], "rewards": [-6.690484523773193], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [31], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDqrlW/+foMPxjnzL6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDMlVS/f6EOP/uXHr6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.5590147972106934]], "rewards": [-6.562597751617432], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [32], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDqrlW/+foMPxjnzL6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDMlVS/f6EOP/uXHr6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.5590147972106934]], "rewards": [-6.562597751617432], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [32], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDMlVS/f6EOP/uXHr6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQD8hFW/cjoNP0vSBj6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.43775200843811035]], "rewards": [-6.5089335441589355], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [33], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDMlVS/f6EOP/uXHr6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQD8hFW/cjoNP0vSBj6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.43775200843811035]], "rewards": [-6.5089335441589355], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [33], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQD8hFW/cjoNP0vSBj6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDru1m/y6MGPwVuHD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.21879124641418457]], "rewards": [-6.541318893432617], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [34], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQD8hFW/cjoNP0vSBj6UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDru1m/y6MGPwVuHD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.21879124641418457]], "rewards": [-6.541318893432617], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [34], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDru1m/y6MGPwVuHD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBAnV+/nUL5PqUzaD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.3282276391983032]], "rewards": [-6.734356880187988], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [35], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDru1m/y6MGPwVuHD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBAnV+/nUL5PqUzaD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.3282276391983032]], "rewards": [-6.734356880187988], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [35], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBAnV+/nUL5PqUzaD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAoXGi/0uTWPo3awD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.7816812992095947]], "rewards": [-7.018081188201904], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [36], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQBAnV+/nUL5PqUzaD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAoXGi/0uTWPo3awD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.7816812992095947]], "rewards": [-7.018081188201904], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [36], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAoXGi/0uTWPo3awD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDscHC//8WvPsGy0z+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.5585429072380066]], "rewards": [-7.564019203186035], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [37], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQAoXGi/0uTWPo3awD+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDscHC//8WvPsGy0z+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.5585429072380066]], "rewards": [-7.564019203186035], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [37], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDscHC//8WvPsGy0z+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB1dXe/mCaDPpj46T+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[-0.2782477140426636]], "rewards": [-8.064400672912598], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [38], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQDscHC//8WvPsGy0z+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB1dXe/mCaDPpj46T+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[-0.2782477140426636]], "rewards": [-8.064400672912598], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [38], "weights": [1.0]}
|
||||||
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB1dXe/mCaDPpj46T+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQC+Qn2/S2YVPn4nEECUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "actions": [[0.774653434753418]], "rewards": [-8.645625114440918], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [39], "weights": [1.0]}
|
{"type": "SampleBatch", "obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQB1dXe/mCaDPpj46T+UjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "new_obs": "BCJNGGhAjgAAAAAAAABxiQAAAFKABZWDAAEA8hmMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMLgDxAQC+Qn2/S2YVPn4nEECUjAU8APEYlIwFZHR5cGWUk5SMAmY0lEsASwGHlFKUKEsDjAE8lE5OTkr/////BQDwBUsAdJRiSwFLA4aUjAFDlHSUUpQuAAAAAA==", "action_prob": [0.1], "actions": [[0.774653434753418]], "rewards": [-8.645625114440918], "dones": [false], "infos": [{}], "agent_index": [0], "eps_id": [895625640], "unroll_id": [39], "weights": [1.0]}
|
||||||
|
|
Loading…
Add table
Reference in a new issue