diff --git a/src/ray/rpc/grpc_server.cc b/src/ray/rpc/grpc_server.cc index c55422569..fdeafa3a8 100644 --- a/src/ray/rpc/grpc_server.cc +++ b/src/ray/rpc/grpc_server.cc @@ -59,6 +59,15 @@ void GrpcServer::Run() { RayConfig::instance().grpc_keepalive_timeout_ms()); builder.AddChannelArgument(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 0); + // NOTE(rickyyx): This argument changes how frequent the gRPC server expects a keepalive + // ping from the client. See https://github.com/grpc/grpc/blob/HEAD/doc/keepalive.md#faq + // We set this to 1min because GCS gRPC client currently sends keepalive every 1min: + // https://github.com/ray-project/ray/blob/releases/2.0.0/python/ray/_private/gcs_utils.py#L72 + // Setting this value larger will trigger GOAWAY from the gRPC server to be sent to the + // client to back-off keepalive pings. (https://github.com/ray-project/ray/issues/25367) + builder.AddChannelArgument(GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS, + 60000); + if (RayConfig::instance().USE_TLS()) { // Create credentials from locations specified in config std::string rootcert = ReadCert(RayConfig::instance().TLS_CA_CERT());