mirror of
https://github.com/vale981/ray
synced 2025-03-05 10:01:43 -05:00
[RLlib] Fixes CRR flakeyness (#26770)
This commit is contained in:
parent
5433c11650
commit
aec79afda1
3 changed files with 3 additions and 3 deletions
|
@ -248,7 +248,7 @@ class CRRTorchPolicy(TorchPolicyV2, TargetNetworkMixin):
|
|||
q_vals = torch.minimum(q_vals, q_twins)
|
||||
|
||||
probs = pi_s_t.dist.probs
|
||||
v_t = (q_t * probs).sum(-1, keepdims=True)
|
||||
v_t = (q_vals * probs).sum(-1, keepdims=True)
|
||||
else:
|
||||
policy_actions = pi_s_t.dist.sample((n_action_sample,)) # samples
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ cartpole_crr:
|
|||
evaluation_parallel_to_training: True
|
||||
# specific to CRR
|
||||
temperature: 1.0
|
||||
weight_type: bin
|
||||
weight_type: exp
|
||||
advantage_type: mean
|
||||
max_weight: 20.0
|
||||
n_action_sample: 4
|
||||
|
|
|
@ -36,7 +36,7 @@ cartpole_crr:
|
|||
evaluation_parallel_to_training: True
|
||||
# specific to CRR
|
||||
temperature: 1.0
|
||||
weight_type: bin
|
||||
weight_type: exp
|
||||
advantage_type: expectation
|
||||
max_weight: 20.0
|
||||
n_action_sample: 4
|
||||
|
|
Loading…
Add table
Reference in a new issue