2022-06-14 01:57:27 -07:00
|
|
|
from typing import Optional
|
2020-04-10 00:56:08 -07:00
|
|
|
import random
|
|
|
|
|
2022-05-17 13:43:49 +02:00
|
|
|
from ray.rllib.utils.replay_buffers.replay_buffer import warn_replay_capacity
|
2020-08-15 13:24:22 +02:00
|
|
|
from ray.rllib.utils.typing import SampleBatchType
|
2020-04-10 00:56:08 -07:00
|
|
|
|
|
|
|
|
2020-06-12 20:17:27 -07:00
|
|
|
# TODO(ekl) deprecate this in favor of the replay_sequence_length option.
|
2020-05-12 13:07:19 -07:00
|
|
|
class SimpleReplayBuffer:
|
|
|
|
"""Simple replay buffer that operates over batches."""
|
|
|
|
|
2020-12-24 06:30:33 -08:00
|
|
|
def __init__(self, num_slots: int, replay_proportion: Optional[float] = None):
|
2020-05-12 13:07:19 -07:00
|
|
|
"""Initialize SimpleReplayBuffer.
|
|
|
|
|
|
|
|
Args:
|
2022-06-01 11:27:54 -07:00
|
|
|
num_slots: Number of batches to store in total.
|
2020-05-12 13:07:19 -07:00
|
|
|
"""
|
|
|
|
self.num_slots = num_slots
|
|
|
|
self.replay_batches = []
|
|
|
|
self.replay_index = 0
|
|
|
|
|
2020-12-24 06:30:33 -08:00
|
|
|
def add_batch(self, sample_batch: SampleBatchType) -> None:
|
2021-08-31 12:21:49 +02:00
|
|
|
warn_replay_capacity(item=sample_batch, num_items=self.num_slots)
|
2020-05-12 13:07:19 -07:00
|
|
|
if self.num_slots > 0:
|
|
|
|
if len(self.replay_batches) < self.num_slots:
|
|
|
|
self.replay_batches.append(sample_batch)
|
|
|
|
else:
|
|
|
|
self.replay_batches[self.replay_index] = sample_batch
|
|
|
|
self.replay_index += 1
|
|
|
|
self.replay_index %= self.num_slots
|
|
|
|
|
2020-12-24 06:30:33 -08:00
|
|
|
def replay(self) -> SampleBatchType:
|
2020-05-12 13:07:19 -07:00
|
|
|
return random.choice(self.replay_batches)
|
|
|
|
|
2022-01-27 22:07:05 +01:00
|
|
|
def __len__(self):
|
|
|
|
return len(self.replay_batches)
|