[RLlib] Fixes CRR flakeyness (#26770)

2025-03-05 10:01:43 -05:00 · 2022-07-20 12:08:57 -07:00 · 2022-07-20 12:08:57 -07:00 · aec79afda1
commit aec79afda1
parent 5433c11650
3 changed files with 3 additions and 3 deletions
--- a/rllib/algorithms/crr/torch/crr_torch_policy.py
+++ b/rllib/algorithms/crr/torch/crr_torch_policy.py
@ -248,7 +248,7 @@ class CRRTorchPolicy(TorchPolicyV2, TargetNetworkMixin):
                q_vals = torch.minimum(q_vals, q_twins)

            probs = pi_s_t.dist.probs
-            v_t = (q_t * probs).sum(-1, keepdims=True)
+            v_t = (q_vals * probs).sum(-1, keepdims=True)
        else:
            policy_actions = pi_s_t.dist.sample((n_action_sample,))  # samples

--- a/rllib/tuned_examples/crr/cartpole-v0-crr.yaml
+++ b/rllib/tuned_examples/crr/cartpole-v0-crr.yaml
@ -36,7 +36,7 @@ cartpole_crr:
        evaluation_parallel_to_training: True
        # specific to CRR
        temperature: 1.0
-        weight_type: bin
+        weight_type: exp
        advantage_type: mean
        max_weight: 20.0
        n_action_sample: 4
--- a/rllib/tuned_examples/crr/cartpole-v0-crr_expectation.yaml
+++ b/rllib/tuned_examples/crr/cartpole-v0-crr_expectation.yaml
@ -36,7 +36,7 @@ cartpole_crr:
        evaluation_parallel_to_training: True
        # specific to CRR
        temperature: 1.0
-        weight_type: bin
+        weight_type: exp
        advantage_type: expectation
        max_weight: 20.0
        n_action_sample: 4