diff --git a/dashboard/client/src/pages/dashboard/Dashboard.tsx b/dashboard/client/src/pages/dashboard/Dashboard.tsx
index 018bcad13..b3e631989 100644
--- a/dashboard/client/src/pages/dashboard/Dashboard.tsx
+++ b/dashboard/client/src/pages/dashboard/Dashboard.tsx
@@ -147,11 +147,11 @@ const Dashboard: React.FC = () => {
run the following command: `ray disable-usage-stats` before starting
the cluster. See{" "}
- https://github.com/ray-project/ray/issues/20857
+ https://docs.ray.io/en/master/cluster/usage-stats.html
{" "}
for more details.
diff --git a/doc/source/_toc.yml b/doc/source/_toc.yml
index 911fe3eb2..6827c0e11 100644
--- a/doc/source/_toc.yml
+++ b/doc/source/_toc.yml
@@ -132,6 +132,7 @@ parts:
- file: cluster/cloud
- file: cluster/deploy
- file: cluster/api
+ - file: cluster/usage-stats
- caption: References
chapters:
diff --git a/doc/source/cluster/usage-stats-data-ref.rst b/doc/source/cluster/usage-stats-data-ref.rst
new file mode 100644
index 000000000..f7d0bd949
--- /dev/null
+++ b/doc/source/cluster/usage-stats-data-ref.rst
@@ -0,0 +1,10 @@
+Usage Stats Data API
+====================
+
+.. _ray-usage-stats-data-ref:
+
+UsageStatsToReport
+~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: ray._private.usage.usage_lib.UsageStatsToReport
+ :members:
diff --git a/doc/source/cluster/usage-stats.rst b/doc/source/cluster/usage-stats.rst
new file mode 100644
index 000000000..0a79bb2b2
--- /dev/null
+++ b/doc/source/cluster/usage-stats.rst
@@ -0,0 +1,84 @@
+.. _ref-usage-stats:
+
+Usage Stats Collection
+======================
+
+Starting in Ray 1.13, Ray collects usage stats data by default (guarded by an opt-out prompt).
+This data will be used by the open-source Ray engineering team to better understand how to improve our libraries and core APIs, and how to prioritize bug fixes and enhancements.
+
+Here are the guiding principles of our collection policy:
+
+- **No surprises** — you will be notified before we begin collecting data. You will be notified of any changes to the data being collected or how it is used.
+- **Easy opt-out:** You will be able to easily opt-out of data collection
+- **Transparency** — you will be able to review all data that is sent to us
+- **Control** — you will have control over your data, and we will honor requests to delete your data.
+- We will **not** collect any personally identifiable data or proprietary code/data
+- We will **not** sell data or buy data about you.
+
+You will always be able to :ref:`disable the usage stats collection`.
+
+For more context, please refer to this `RFC `_.
+
+What data is collected?
+-----------------------
+
+We collect non-sensitive data that helps us understand how Ray is used (e.g., which Ray libraries are used).
+**Personally identifiable data will never be collected.** Please check :ref:`UsageStatsToReport ` to see the data we collect.
+
+.. _usage-disable:
+
+How to disable it
+-----------------
+There are multiple ways to disable usage stats collection before starting a cluster:
+
+#. Add ``--disable-usage-stats`` option to the command that starts the Ray cluster (e.g., ``ray start --head --disable-usage-stats`` :ref:`command `).
+
+#. Run :ref:`ray disable-usage-stats ` to disable collection for all future clusters. This won't affect currently running clusters. Under the hood, this command writes ``{"usage_stats": true}`` to the global config file ``~/.ray/config.json``.
+
+#. Set the environment variable ``RAY_USAGE_STATS_ENABLED`` to 0 (e.g., ``RAY_USAGE_STATS_ENABLED=0 ray start --head`` :ref:`command `).
+
+Currently there is no way to enable or disable collection for a running cluster; you have to stop and restart the cluster.
+
+
+How does it work?
+-----------------
+
+When a Ray cluster is started via :ref:`ray start --head `, :ref:`ray up `, :ref:`ray submit --start ` or :ref:`ray exec --start `,
+Ray will decide whether usage stats collection should be enabled or not by considering the following factors in order:
+
+#. It checks whether the environment variable ``RAY_USAGE_STATS_ENABLED`` is set: 1 means enabled and 0 means disabled.
+
+#. If the environment variable is not set, it reads the value of key ``usage_stats`` in the global config file ``~/.ray/config.json``: true means enabled and false means disabled.
+
+#. If neither is set and the console is interactive, then the user will be prompted to enable or disable the collection. If the console is non-interactive, usage stats collection will be enabled by default. The decision will be saved to ``~/.ray/config.json``, so the prompt is only shown once.
+
+Note: usage stats collection is not enabled when using local dev clusters started via ``ray.init()``. This means that Ray will never collect data from third-party library users not using Ray directly.
+
+If usage stats collection is enabled, a background process on the head node will collect the usage stats
+and report to ``https://usage-stats.ray.io/`` every hour. The reported usage stats will also be saved to
+``/tmp/ray/session_xxx/usage_stats.json`` on the head node for inspection. You can check the existence of this file to see if collection is enabled.
+
+Usage stats collection is very lightweight and should have no impact on your workload in any way.
+
+Requesting removal of collected data
+------------------------------------
+
+To request removal of collected data, please email us at ``usage_stats@ray.io`` with the ``session_id`` that you can find in ``/tmp/ray/session_xxx/usage_stats.json``.
+
+Frequently Asked Questions (FAQ)
+--------------------------------
+
+**Does the session_id map to personal data?**
+
+No, the uuid will be a Ray session/job-specific random ID that cannot be used to identify a specific person nor machine. It will not live beyond the lifetime of your Ray session; and is primarily captured to enable us to honor deletion requests.
+
+The session_id is logged so that deletion requests can be honored.
+
+**Could an enterprise easily configure an additional endpoint or substitute a different endpoint?**
+
+We definitely see this use case and would love to chat with you to make this work -- email ``usage_stats@ray.io``.
+
+
+Contact us
+----------
+If you have any feedback regarding usage stats collection, please email us at ``usage_stats@ray.io``.
diff --git a/doc/source/ray-core/package-ref.rst b/doc/source/ray-core/package-ref.rst
index a28e3ce84..27f6ae7b0 100644
--- a/doc/source/ray-core/package-ref.rst
+++ b/doc/source/ray-core/package-ref.rst
@@ -314,3 +314,15 @@ The Ray Command Line API
.. click:: ray.scripts.scripts:debug
:prog: ray debug
:show-nested:
+
+.. _ray-disable-usage-stats-doc:
+
+.. click:: ray.scripts.scripts:disable_usage_stats
+ :prog: ray disable-usage-stats
+ :show-nested:
+
+.. _ray-enable-usage-stats-doc:
+
+.. click:: ray.scripts.scripts:enable_usage_stats
+ :prog: ray enable-usage-stats
+ :show-nested:
diff --git a/doc/source/ray-references/api.rst b/doc/source/ray-references/api.rst
index aa1df60eb..a47568ad3 100644
--- a/doc/source/ray-references/api.rst
+++ b/doc/source/ray-references/api.rst
@@ -13,4 +13,5 @@ API References
../workflows/package-ref.rst
../ray-core/package-ref.rst
../cluster/reference.rst
- ../cluster/jobs-package-ref.rst
\ No newline at end of file
+ ../cluster/jobs-package-ref.rst
+ ../cluster/usage-stats-data-ref.rst
\ No newline at end of file
diff --git a/python/ray/_private/usage/usage_constants.py b/python/ray/_private/usage/usage_constants.py
index 9147d29f6..729c8ccf4 100644
--- a/python/ray/_private/usage/usage_constants.py
+++ b/python/ray/_private/usage/usage_constants.py
@@ -12,7 +12,7 @@ USAGE_STATS_ENABLED_MESSAGE = (
"Usage stats collection is enabled. To disable this, add `--disable-usage-stats` "
"to the command that starts the cluster, or run the following command:"
" `ray disable-usage-stats` before starting the cluster. "
- "See https://github.com/ray-project/ray/issues/20857 for more details."
+ "See https://docs.ray.io/en/master/cluster/usage-stats.html for more details."
)
USAGE_STATS_DISABLED_MESSAGE = "Usage stats collection is disabled."
@@ -23,7 +23,7 @@ USAGE_STATS_ENABLED_BY_DEFAULT_MESSAGE = (
"To disable this, add `--disable-usage-stats` to the command that starts "
"the cluster, or run the following command:"
" `ray disable-usage-stats` before starting the cluster. "
- "See https://github.com/ray-project/ray/issues/20857 for more details."
+ "See https://docs.ray.io/en/master/cluster/usage-stats.html for more details."
)
USAGE_STATS_CONFIRMATION_MESSAGE = (
diff --git a/python/ray/_private/usage/usage_lib.py b/python/ray/_private/usage/usage_lib.py
index 06a0351ab..adda162bf 100644
--- a/python/ray/_private/usage/usage_lib.py
+++ b/python/ray/_private/usage/usage_lib.py
@@ -92,30 +92,49 @@ class ClusterStatusToReport:
class UsageStatsToReport:
"""Usage stats to report"""
+ #: The Ray version in use.
ray_version: str
+ #: The Python version in use.
python_version: str
+ #: The schema version of the report.
schema_version: str
+ #: The source of the data (i.e. OSS).
source: str
+ #: A random id of the cluster session.
session_id: str
+ #: The git commit hash of Ray (i.e. ray.__commit__).
git_commit: str
+ #: The operating system in use.
os: str
+ #: When the data is collected and reported.
collect_timestamp_ms: int
+ #: When the cluster is started.
session_start_timestamp_ms: int
+ #: The cloud provider found in the cluster.yaml file (e.g., aws).
cloud_provider: Optional[str]
+ #: The min_workers found in the cluster.yaml file.
min_workers: Optional[int]
+ #: The max_workers found in the cluster.yaml file.
max_workers: Optional[int]
+ #: The head node instance type found in the cluster.yaml file (e.g., i3.8xlarge).
head_node_instance_type: Optional[str]
+ #: The worker node instance types found in the cluster.yaml file (e.g., i3.8xlarge).
worker_node_instance_types: Optional[List[str]]
+ #: The total num of cpus in the cluster.
total_num_cpus: Optional[int]
+ #: The total num of gpus in the cluster.
total_num_gpus: Optional[int]
+ #: The total size of memory in the cluster.
total_memory_gb: Optional[float]
+ #: The total size of object store memory in the cluster.
total_object_store_memory_gb: Optional[float]
+ #: The Ray libraries that are used (e.g., rllib).
library_usages: Optional[List[str]]
- # The total number of successful reports for the lifetime of the cluster.
+ #: The total number of successful reports for the lifetime of the cluster.
total_success: int
- # The total number of failed reports for the lifetime of the cluster.
+ #: The total number of failed reports for the lifetime of the cluster.
total_failed: int
- # The sequence number of the report.
+ #: The sequence number of the report.
seq_number: int