mirror of
https://github.com/vale981/ray
synced 2025-03-10 21:36:39 -04:00

The DDPG/TD3 algorithms currently do not have a PyTorch implementation. This PR adds PyTorch support for DDPG/TD3 to RLlib. This PR: - Depends on the re-factor PR for DDPG (Functional Algorithm API). - Adds learning regression tests for the PyTorch version of DDPG and a DDPG (torch) - Updates the documentation to reflect that DDPG and TD3 now support PyTorch. * Learning Pendulum-v0 on torch version (same config as tf). Wall time a little slower (~20% than tf). * Fix GPU target model problem.
26 lines
818 B
Python
26 lines
818 B
Python
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
|
|
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
|
|
from ray.rllib.utils.annotations import override
|
|
from ray.rllib.utils import try_import_tf
|
|
|
|
tf = try_import_tf()
|
|
|
|
|
|
class NoopModel(TFModelV2):
|
|
"""Trivial model that just returns the obs flattened.
|
|
|
|
This is the model used if use_state_preprocessor=False."""
|
|
|
|
@override(TFModelV2)
|
|
def forward(self, input_dict, state, seq_lens):
|
|
return tf.cast(input_dict["obs_flat"], tf.float32), state
|
|
|
|
|
|
class TorchNoopModel(TorchModelV2):
|
|
"""Trivial model that just returns the obs flattened.
|
|
|
|
This is the model used if use_state_preprocessor=False."""
|
|
|
|
@override(TorchModelV2)
|
|
def forward(self, input_dict, state, seq_lens):
|
|
return input_dict["obs_flat"].float(), state
|