2017-03-07 23:42:44 -08:00
|
|
|
from __future__ import absolute_import
|
|
|
|
from __future__ import division
|
|
|
|
from __future__ import print_function
|
|
|
|
|
2019-07-27 02:08:16 -07:00
|
|
|
from ray.rllib.utils.annotations import DeveloperAPI
|
2018-08-09 19:51:32 -07:00
|
|
|
|
2017-03-07 23:42:44 -08:00
|
|
|
|
2019-01-23 21:27:26 -08:00
|
|
|
@DeveloperAPI
|
2017-07-17 01:58:54 -07:00
|
|
|
class ActionDistribution(object):
|
|
|
|
"""The policy action distribution of an agent.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
inputs (Tensor): The input vector to compute samples from.
|
|
|
|
"""
|
|
|
|
|
2019-01-23 21:27:26 -08:00
|
|
|
@DeveloperAPI
|
2017-07-17 01:58:54 -07:00
|
|
|
def __init__(self, inputs):
|
|
|
|
self.inputs = inputs
|
2019-07-27 02:08:16 -07:00
|
|
|
|
|
|
|
@DeveloperAPI
|
|
|
|
def sample(self):
|
|
|
|
"""Draw a sample from the action distribution."""
|
|
|
|
raise NotImplementedError
|
2017-07-17 01:58:54 -07:00
|
|
|
|
2019-01-23 21:27:26 -08:00
|
|
|
@DeveloperAPI
|
2017-07-17 01:58:54 -07:00
|
|
|
def logp(self, x):
|
2017-09-12 23:38:21 -07:00
|
|
|
"""The log-likelihood of the action distribution."""
|
2017-07-17 01:58:54 -07:00
|
|
|
raise NotImplementedError
|
|
|
|
|
2019-01-23 21:27:26 -08:00
|
|
|
@DeveloperAPI
|
2017-07-17 01:58:54 -07:00
|
|
|
def kl(self, other):
|
2018-01-01 11:10:44 -08:00
|
|
|
"""The KL-divergence between two action distributions."""
|
2017-07-17 01:58:54 -07:00
|
|
|
raise NotImplementedError
|
|
|
|
|
2019-01-23 21:27:26 -08:00
|
|
|
@DeveloperAPI
|
2017-07-17 01:58:54 -07:00
|
|
|
def entropy(self):
|
2019-02-13 16:25:05 -08:00
|
|
|
"""The entropy of the action distribution."""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
2019-07-19 12:12:04 -07:00
|
|
|
def multi_kl(self, other):
|
|
|
|
"""The KL-divergence between two action distributions.
|
|
|
|
|
|
|
|
This differs from kl() in that it can return an array for
|
|
|
|
MultiDiscrete. TODO(ekl) consider removing this.
|
|
|
|
"""
|
|
|
|
return self.kl(other)
|
|
|
|
|
|
|
|
def multi_entropy(self):
|
|
|
|
"""The entropy of the action distribution.
|
|
|
|
|
|
|
|
This differs from entropy() in that it can return an array for
|
|
|
|
MultiDiscrete. TODO(ekl) consider removing this.
|
|
|
|
"""
|
|
|
|
return self.entropy()
|