[RLlib] Fix combination of lockstep and multiple agnts controlled by the same policy. (#9521)

* Change aggregation when lockstep is activated. Modification of MultiAgentBatch.timeslices to support the combination of lockstep and multiple agents controlled by the same policy. fix ray-project/ray#9295 * Line too long.
2025-03-05 18:11:42 -05:00 · 2020-07-20 08:03:12 +02:00 · 2020-07-20 08:03:12 +02:00 · 440c9c42be
commit 440c9c42be
parent 0cee75c86a
1 changed files with 3 additions and 3 deletions
--- a/rllib/policy/sample_batch.py
+++ b/rllib/policy/sample_batch.py
@ -438,8 +438,8 @@ class MultiAgentBatch:
        steps = []
        for policy_id, batch in self.policy_batches.items():
            for row in batch.rows():
-                steps.append((row[SampleBatch.EPS_ID], row["t"], policy_id,
-                              row))
+                steps.append((row[SampleBatch.EPS_ID], row["t"],
+                              row["agent_index"], policy_id, row))
        steps.sort()

        finished_slices = []
@ -458,7 +458,7 @@ class MultiAgentBatch:
        # For each unique env timestep.
        for _, group in itertools.groupby(steps, lambda x: x[:2]):
            # Accumulate into the current slice.
-            for _, _, policy_id, row in group:
+            for _, _, _, policy_id, row in group:
                cur_slice[policy_id].add_values(**row)
            cur_slice_size += 1
            # Slice has reached target number of env steps.