mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
[runtime env] [Doc] Runtime env doc and messaging improvements (#17547)
This commit is contained in:
parent
e3c09b0af1
commit
63708468df
4 changed files with 34 additions and 25 deletions
|
@ -464,6 +464,7 @@ The ``runtime_env`` is a Python dictionary including one or more of the followin
|
|||
- ``working_dir`` (Path): Specifies the working directory for your job. This must be an existing local directory.
|
||||
It will be cached on the cluster, so the next time you connect with Ray Client you will be able to skip uploading the directory contents.
|
||||
Furthermore, if you locally make a small change to your directory, the next time you connect only the updated part will be uploaded.
|
||||
All Ray workers for your job will be started in their node's copy of this working directory.
|
||||
|
||||
- Examples
|
||||
|
||||
|
@ -473,6 +474,13 @@ The ``runtime_env`` is a Python dictionary including one or more of the followin
|
|||
|
||||
Note: Setting this option per-task or per-actor is currently unsupported.
|
||||
|
||||
Note: If your working directory contains a `.gitignore` file, the files and paths specified therein will not be uploaded to the cluster.
|
||||
|
||||
- ``excludes`` (List[str]): When used with ``working_dir``, specifies a list of files or paths to exclude from being uploaded to the cluster.
|
||||
This field also supports the pattern-matching syntax used by ``.gitignore`` files: see `<https://git-scm.com/docs/gitignore>`_ for details.
|
||||
|
||||
- Example: ``["my_file.txt", "path/to/dir", "*.log"]``
|
||||
|
||||
- ``pip`` (List[str] | str): Either a list of pip packages, or a string containing the path to a pip
|
||||
`“requirements.txt” <https://pip.pypa.io/en/stable/user_guide/#requirements-files>`_ file. The path may be an absolute path or a relative path. (Note: A relative path will be interpreted relative to ``working_dir`` if ``working_dir`` is specified.)
|
||||
This will be dynamically installed in the ``runtime_env``.
|
||||
|
|
|
@ -443,7 +443,7 @@ class DataServicerProxy(ray_client_pb2_grpc.RayletDataStreamerServicer):
|
|||
f"using JobConfig: {job_config}!")
|
||||
raise RuntimeError(
|
||||
"Starting up Server Failed! Check "
|
||||
"`ray_client_server.err` on the cluster.")
|
||||
"`ray_client_server_[port].err` on the cluster.")
|
||||
channel = self.proxy_manager.get_channel(client_id)
|
||||
if channel is None:
|
||||
logger.error(f"Channel not found for {client_id}")
|
||||
|
|
|
@ -231,9 +231,8 @@ def current_ray_pip_specifier() -> Optional[str]:
|
|||
built from source locally (likely if you are developing Ray).
|
||||
|
||||
Examples:
|
||||
Returns "ray[all]==1.4.0" if running the stable release
|
||||
Returns "https://s3-us-west-2.amazonaws.com/ray-wheels/master/[..].whl"
|
||||
if running the nightly or a specific commit
|
||||
Returns "https://s3-us-west-2.amazonaws.com/ray-wheels/[..].whl"
|
||||
if running a stable release, a nightly or a specific commit
|
||||
"""
|
||||
logger = get_hook_logger()
|
||||
if os.environ.get("RAY_CI_POST_WHEEL_TESTS"):
|
||||
|
@ -245,12 +244,12 @@ def current_ray_pip_specifier() -> Optional[str]:
|
|||
Path(__file__).resolve().parents[3], ".whl", get_wheel_filename())
|
||||
elif ray.__commit__ == "{{RAY_COMMIT_SHA}}":
|
||||
# Running on a version built from source locally.
|
||||
if os.environ.get("RAY_RUNTIME_ENV_LOCAL_DEV_MODE") != "1":
|
||||
logger.warning(
|
||||
"Current Ray version could not be detected, most likely "
|
||||
"because you are using a version of Ray "
|
||||
"built from source. If you wish to use runtime_env, "
|
||||
"you can try building a wheel and including the wheel "
|
||||
"explicitly as a pip dependency.")
|
||||
"because you have manually built Ray from source. To use "
|
||||
"runtime_env in this case, set the environment variable "
|
||||
"RAY_RUNTIME_ENV_LOCAL_DEV_MODE=1.")
|
||||
return None
|
||||
elif "dev" in ray.__version__:
|
||||
# Running on a nightly wheel.
|
||||
|
|
|
@ -757,16 +757,18 @@ void NodeManager::WarnResourceDeadlock() {
|
|||
std::ostringstream error_message;
|
||||
error_message
|
||||
<< "The actor or task with ID " << exemplar.GetTaskSpecification().TaskId()
|
||||
<< " cannot be scheduled right now. It requires "
|
||||
<< " cannot be scheduled right now. You can ignore this message if this "
|
||||
<< "Ray cluster is expected to auto-scale or if you specified a "
|
||||
<< "runtime_env for this actor or task, which may take time to install. "
|
||||
<< "Otherwise, this is likely due to all cluster resources being claimed "
|
||||
<< "by actors. To resolve the issue, consider creating fewer actors or "
|
||||
<< "increasing the resources available to this Ray cluster.\n"
|
||||
<< "Required resources for this actor or task: "
|
||||
<< exemplar.GetTaskSpecification().GetRequiredPlacementResources().ToString()
|
||||
<< " for placement, but this node only has remaining " << available_resources
|
||||
<< ". In total there are " << pending_tasks << " pending tasks and "
|
||||
<< pending_actor_creations << " pending actors on this node. "
|
||||
<< "This is likely due to all cluster resources being claimed by actors. "
|
||||
<< "To resolve the issue, consider creating fewer actors or increase the "
|
||||
<< "resources available to this Ray cluster. You can ignore this message "
|
||||
<< "if this Ray cluster is expected to auto-scale or if you specified a "
|
||||
<< "runtime_env for this task or actor because it takes time to install.";
|
||||
<< "\n"
|
||||
<< "Available resources on this node: " << available_resources
|
||||
<< "In total there are " << pending_tasks << " pending tasks and "
|
||||
<< pending_actor_creations << " pending actors on this node.";
|
||||
|
||||
std::string error_message_str = error_message.str();
|
||||
RAY_LOG(WARNING) << error_message_str;
|
||||
|
|
Loading…
Add table
Reference in a new issue