diff --git a/python/ray/tests/test_client_proxy.py b/python/ray/tests/test_client_proxy.py index f886534b6..767b33719 100644 --- a/python/ray/tests/test_client_proxy.py +++ b/python/ray/tests/test_client_proxy.py @@ -211,6 +211,30 @@ def test_startup_error_yields_clean_result(shutdown_only): server.stop(0) +@pytest.mark.skipif( + sys.platform == "win32", + reason="PSUtil does not work the same on windows.") +@pytest.mark.parametrize( + "call_ray_start", [ + "ray start --head --ray-client-server-port 25031 " + "--port 0 --redis-password=password" + ], + indirect=True) +def test_runtime_install_error_message(call_ray_start): + """ + Check that an error while preparing the runtime environment for the client + server yields an actionable, clear error on the *client side*. + """ + with pytest.raises(ConnectionAbortedError) as excinfo: + ray.client("localhost:25031").env({ + "pip": ["ray-this-doesnt-exist"] + }).connect() + assert ("No matching distribution found for ray-this-doesnt-exist" in str( + excinfo.value)) + + ray.util.disconnect() + + def test_prepare_runtime_init_req_fails(): """ Check that a connection that is initiated with a non-Init request diff --git a/python/ray/util/client/server/proxier.py b/python/ray/util/client/server/proxier.py index 6494c7eef..cbc6a29ed 100644 --- a/python/ray/util/client/server/proxier.py +++ b/python/ray/util/client/server/proxier.py @@ -534,17 +534,31 @@ class DataServicerProxy(ray_client_pb2_grpc.RayletDataStreamerServicer): logger.error( f"Server startup failed for client: {client_id}, " f"using JobConfig: {job_config}!") + # TODO(architkulkarni): Once the client server runtime env + # setup is moved into the runtime env agent, revisit this + # and double check where the error logs end up being saved. + try: + with open("/tmp/ray/session_latest/logs/" + f"ray_client_server_{server.port}.err") as f: + runtime_env_error_str = f.read() + except FileNotFoundError: + runtime_env_error_str = "(File not found)" raise RuntimeError( "Starting Ray client server failed. This is most " "likely because the runtime_env failed to be " - "installed. See ray_client_server_[port].err on the " - "head node of the cluster for the relevant logs.") + f"installed. Printing the contents of " + f"ray_client_server_{server.port}.err below: \n" + f"{runtime_env_error_str}") channel = self.proxy_manager.get_channel(client_id) if channel is None: logger.error(f"Channel not found for {client_id}") raise RuntimeError( "Proxy failed to Connect to backend! Check " - "`ray_client_server.err` on the cluster.") + "`ray_client_server.err` and " + f"`ray_client_server_{server.port}.err` on the head " + "node of the cluster for the relevant logs. " + "By default these are located at " + "/tmp/ray/session_latest/logs.") stub = ray_client_pb2_grpc.RayletDataStreamerStub(channel) except Exception: init_resp = ray_client_pb2.DataResponse(