[RLlib] Fixes CRR flakeyness (#26770)

This commit is contained in:
kourosh hakhamaneshi 2022-07-20 12:08:57 -07:00 committed by GitHub
parent 5433c11650
commit aec79afda1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 3 additions and 3 deletions

View file

@ -248,7 +248,7 @@ class CRRTorchPolicy(TorchPolicyV2, TargetNetworkMixin):
q_vals = torch.minimum(q_vals, q_twins)
probs = pi_s_t.dist.probs
v_t = (q_t * probs).sum(-1, keepdims=True)
v_t = (q_vals * probs).sum(-1, keepdims=True)
else:
policy_actions = pi_s_t.dist.sample((n_action_sample,)) # samples

View file

@ -36,7 +36,7 @@ cartpole_crr:
evaluation_parallel_to_training: True
# specific to CRR
temperature: 1.0
weight_type: bin
weight_type: exp
advantage_type: mean
max_weight: 20.0
n_action_sample: 4

View file

@ -36,7 +36,7 @@ cartpole_crr:
evaluation_parallel_to_training: True
# specific to CRR
temperature: 1.0
weight_type: bin
weight_type: exp
advantage_type: expectation
max_weight: 20.0
n_action_sample: 4