mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00

- Move the URI reference logic from raylet to agent. - Redefine the runtime env agent RPC to `CreateRuntimeEnvOrGet` and `DeleteRuntimeEnvIfPossible` - More details https://github.com/ray-project/ray/issues/21695#issuecomment-1032161528 Future works - We don't remove the `RuntimeEnvUris` from `RuntimeEnv` protobuf in current PR because gcs also uses those URIs to do GC by runtime_env_manager. We should also clear this. - Ray client server shouldn't interact with agent directly. Or Ray client server should also decrease the reference count. - Currently, `WorkerPool::HandleJobStarted` will be called multiple times for one job. So we should make sure this function is idempotent. Can we change this logic and make this function be called only once?
38 lines
1.5 KiB
Python
38 lines
1.5 KiB
Python
from ray.ray_constants import env_integer
|
|
|
|
DASHBOARD_LOG_FILENAME = "dashboard.log"
|
|
DASHBOARD_AGENT_PORT_PREFIX = "DASHBOARD_AGENT_PORT_PREFIX:"
|
|
DASHBOARD_AGENT_LOG_FILENAME = "dashboard_agent.log"
|
|
DASHBOARD_AGENT_CHECK_PARENT_INTERVAL_SECONDS = 2
|
|
RETRY_REDIS_CONNECTION_TIMES = 10
|
|
CONNECT_REDIS_INTERNAL_SECONDS = 2
|
|
PURGE_DATA_INTERVAL_SECONDS = 60 * 10
|
|
ORGANIZE_DATA_INTERVAL_SECONDS = 2
|
|
DASHBOARD_RPC_ADDRESS = "dashboard_rpc"
|
|
GCS_SERVER_ADDRESS = "GcsServerAddress"
|
|
# GCS check alive
|
|
GCS_CHECK_ALIVE_MAX_COUNT_OF_RPC_ERROR = env_integer(
|
|
"GCS_CHECK_ALIVE_MAX_COUNT_OF_RPC_ERROR", 10
|
|
)
|
|
GCS_CHECK_ALIVE_INTERVAL_SECONDS = env_integer("GCS_CHECK_ALIVE_INTERVAL_SECONDS", 5)
|
|
GCS_CHECK_ALIVE_RPC_TIMEOUT = env_integer("GCS_CHECK_ALIVE_RPC_TIMEOUT", 10)
|
|
GCS_RETRY_CONNECT_INTERVAL_SECONDS = env_integer(
|
|
"GCS_RETRY_CONNECT_INTERVAL_SECONDS", 2
|
|
)
|
|
# aiohttp_cache
|
|
AIOHTTP_CACHE_TTL_SECONDS = 2
|
|
AIOHTTP_CACHE_MAX_SIZE = 128
|
|
AIOHTTP_CACHE_DISABLE_ENVIRONMENT_KEY = "RAY_DASHBOARD_NO_CACHE"
|
|
# Named signals
|
|
SIGNAL_NODE_INFO_FETCHED = "node_info_fetched"
|
|
SIGNAL_NODE_SUMMARY_FETCHED = "node_summary_fetched"
|
|
SIGNAL_JOB_INFO_FETCHED = "job_info_fetched"
|
|
SIGNAL_WORKER_INFO_FETCHED = "worker_info_fetched"
|
|
# Default value for datacenter (the default value in protobuf)
|
|
DEFAULT_LANGUAGE = "PYTHON"
|
|
DEFAULT_JOB_ID = "ffff"
|
|
# Cache TTL for bad runtime env. After this time, delete the cache and retry to create
|
|
# runtime env if needed.
|
|
BAD_RUNTIME_ENV_CACHE_TTL_SECONDS = env_integer(
|
|
"BAD_RUNTIME_ENV_CACHE_TTL_SECONDS", 60 * 10
|
|
)
|