mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
[RLlib] Fix combination of lockstep and multiple agnts controlled by the same policy. (#9521)
* Change aggregation when lockstep is activated. Modification of MultiAgentBatch.timeslices to support the combination of lockstep and multiple agents controlled by the same policy. fix ray-project/ray#9295 * Line too long.
This commit is contained in:
parent
0cee75c86a
commit
440c9c42be
1 changed files with 3 additions and 3 deletions
|
@ -438,8 +438,8 @@ class MultiAgentBatch:
|
|||
steps = []
|
||||
for policy_id, batch in self.policy_batches.items():
|
||||
for row in batch.rows():
|
||||
steps.append((row[SampleBatch.EPS_ID], row["t"], policy_id,
|
||||
row))
|
||||
steps.append((row[SampleBatch.EPS_ID], row["t"],
|
||||
row["agent_index"], policy_id, row))
|
||||
steps.sort()
|
||||
|
||||
finished_slices = []
|
||||
|
@ -458,7 +458,7 @@ class MultiAgentBatch:
|
|||
# For each unique env timestep.
|
||||
for _, group in itertools.groupby(steps, lambda x: x[:2]):
|
||||
# Accumulate into the current slice.
|
||||
for _, _, policy_id, row in group:
|
||||
for _, _, _, policy_id, row in group:
|
||||
cur_slice[policy_id].add_values(**row)
|
||||
cur_slice_size += 1
|
||||
# Slice has reached target number of env steps.
|
||||
|
|
Loading…
Add table
Reference in a new issue