mirror of
https://github.com/vale981/ray
synced 2025-03-08 11:31:40 -05:00
[RLlib] Issue 16117: DQN/APEX torch not working on GPU. (#16118)
This commit is contained in:
parent
86feccf9c7
commit
c74c5038d2
2 changed files with 9 additions and 11 deletions
|
@ -180,8 +180,6 @@ def build_q_model_and_distribution(
|
||||||
# generically into ModelCatalog.
|
# generically into ModelCatalog.
|
||||||
add_layer_norm=add_layer_norm)
|
add_layer_norm=add_layer_norm)
|
||||||
|
|
||||||
policy.q_func_vars = model.variables()
|
|
||||||
|
|
||||||
policy.target_q_model = ModelCatalog.get_model_v2(
|
policy.target_q_model = ModelCatalog.get_model_v2(
|
||||||
obs_space=obs_space,
|
obs_space=obs_space,
|
||||||
action_space=action_space,
|
action_space=action_space,
|
||||||
|
@ -201,8 +199,6 @@ def build_q_model_and_distribution(
|
||||||
# generically into ModelCatalog.
|
# generically into ModelCatalog.
|
||||||
add_layer_norm=add_layer_norm)
|
add_layer_norm=add_layer_norm)
|
||||||
|
|
||||||
policy.target_q_func_vars = policy.target_q_model.variables()
|
|
||||||
|
|
||||||
return model, TorchCategorical
|
return model, TorchCategorical
|
||||||
|
|
||||||
|
|
||||||
|
@ -237,6 +233,7 @@ def build_q_losses(policy: Policy, model, _,
|
||||||
Returns:
|
Returns:
|
||||||
TensorType: A single loss tensor.
|
TensorType: A single loss tensor.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
config = policy.config
|
config = policy.config
|
||||||
# Q-network evaluation.
|
# Q-network evaluation.
|
||||||
q_t, q_logits_t, q_probs_t, _ = compute_q_values(
|
q_t, q_logits_t, q_probs_t, _ = compute_q_values(
|
||||||
|
@ -302,6 +299,13 @@ def build_q_losses(policy: Policy, model, _,
|
||||||
|
|
||||||
def adam_optimizer(policy: Policy,
|
def adam_optimizer(policy: Policy,
|
||||||
config: TrainerConfigDict) -> "torch.optim.Optimizer":
|
config: TrainerConfigDict) -> "torch.optim.Optimizer":
|
||||||
|
|
||||||
|
# By this time, the models have been moved to the GPU - if any - and we
|
||||||
|
# can define our optimizers using the correct CUDA variables.
|
||||||
|
if not hasattr(policy, "q_func_vars"):
|
||||||
|
policy.q_func_vars = policy.model.variables()
|
||||||
|
policy.target_q_func_vars = policy.target_q_model.variables()
|
||||||
|
|
||||||
return torch.optim.Adam(
|
return torch.optim.Adam(
|
||||||
policy.q_func_vars, lr=policy.cur_lr, eps=config["adam_epsilon"])
|
policy.q_func_vars, lr=policy.cur_lr, eps=config["adam_epsilon"])
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ import gym
|
||||||
import ray
|
import ray
|
||||||
from ray.rllib.agents.dqn.dqn_tf_policy import (PRIO_WEIGHTS,
|
from ray.rllib.agents.dqn.dqn_tf_policy import (PRIO_WEIGHTS,
|
||||||
postprocess_nstep_and_prio)
|
postprocess_nstep_and_prio)
|
||||||
from ray.rllib.agents.dqn.dqn_torch_policy import \
|
from ray.rllib.agents.dqn.dqn_torch_policy import adam_optimizer, \
|
||||||
build_q_model_and_distribution, compute_q_values
|
build_q_model_and_distribution, compute_q_values
|
||||||
from ray.rllib.agents.dqn.r2d2_tf_policy import \
|
from ray.rllib.agents.dqn.r2d2_tf_policy import \
|
||||||
get_distribution_inputs_and_class
|
get_distribution_inputs_and_class
|
||||||
|
@ -232,12 +232,6 @@ class ComputeTDErrorMixin:
|
||||||
self.compute_td_error = compute_td_error
|
self.compute_td_error = compute_td_error
|
||||||
|
|
||||||
|
|
||||||
def adam_optimizer(policy: Policy,
|
|
||||||
config: TrainerConfigDict) -> "torch.optim.Optimizer":
|
|
||||||
return torch.optim.Adam(
|
|
||||||
policy.q_func_vars, lr=policy.cur_lr, eps=config["adam_epsilon"])
|
|
||||||
|
|
||||||
|
|
||||||
def build_q_stats(policy: Policy, batch) -> Dict[str, TensorType]:
|
def build_q_stats(policy: Policy, batch) -> Dict[str, TensorType]:
|
||||||
return dict({
|
return dict({
|
||||||
"cur_lr": policy.cur_lr,
|
"cur_lr": policy.cur_lr,
|
||||||
|
|
Loading…
Add table
Reference in a new issue