fix comments and typo (#23008)

Fix comments and typos for scheduler code.
This commit is contained in:
Chen Shen 2022-03-10 11:40:31 -08:00 committed by GitHub
parent 11c40e363d
commit 3ebc4ae289
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 20 additions and 33 deletions

View file

@ -47,14 +47,14 @@ class ClusterResourceManager {
/// Update node resources. This hanppens when a node resource usage udpated. /// Update node resources. This hanppens when a node resource usage udpated.
/// ///
/// \param node_id_string ID of the node which resoruces need to be udpated. /// \param node_id ID of the node which resoruces need to be udpated.
/// \param resource_data The node resource data. /// \param resource_data The node resource data.
bool UpdateNode(scheduling::NodeID node_id, const rpc::ResourcesData &resource_data); bool UpdateNode(scheduling::NodeID node_id, const rpc::ResourcesData &resource_data);
/// Remove node from the cluster data structure. This happens /// Remove node from the cluster data structure. This happens
/// when a node fails or it is removed from the cluster. /// when a node fails or it is removed from the cluster.
/// ///
/// \param node_id_string ID of the node to be removed. /// \param node_id ID of the node to be removed.
bool RemoveNode(scheduling::NodeID node_id); bool RemoveNode(scheduling::NodeID node_id);
/// Get number of nodes in the cluster. /// Get number of nodes in the cluster.
@ -62,16 +62,16 @@ class ClusterResourceManager {
/// Update total capacity of a given resource of a given node. /// Update total capacity of a given resource of a given node.
/// ///
/// \param node_name: Node whose resource we want to update. /// \param node_id: Node whose resource we want to update.
/// \param resource_name: Resource which we want to update. /// \param resource_id: Resource which we want to update.
/// \param resource_total: New capacity of the resource. /// \param resource_total: New capacity of the resource.
void UpdateResourceCapacity(scheduling::NodeID node_id, void UpdateResourceCapacity(scheduling::NodeID node_id,
scheduling::ResourceID resource_id, double resource_total); scheduling::ResourceID resource_id, double resource_total);
/// Delete a given resource from a given node. /// Delete a given resource from a given node.
/// ///
/// \param node_name: Node whose resource we want to delete. /// \param node_id: Node whose resource we want to delete.
/// \param resource_name: Resource we want to delete /// \param resource_id: Resource we want to delete
void DeleteResource(scheduling::NodeID node_id, scheduling::ResourceID resource_id); void DeleteResource(scheduling::NodeID node_id, scheduling::ResourceID resource_id);
/// Return local resources in human-readable string form. /// Return local resources in human-readable string form.

View file

@ -90,7 +90,7 @@ class ClusterResourceScheduler {
/// False otherwise. /// False otherwise.
bool AllocateRemoteTaskResources( bool AllocateRemoteTaskResources(
scheduling::NodeID node_id, scheduling::NodeID node_id,
const absl::flat_hash_map<std::string, double> &task_resources); const absl::flat_hash_map<std::string, double> &resource_request);
/// Return human-readable string for this scheduler state. /// Return human-readable string for this scheduler state.
std::string DebugString() const; std::string DebugString() const;

View file

@ -49,22 +49,11 @@ class ClusterTaskManager : public ClusterTaskManagerInterface {
/// \param self_node_id: ID of local node. /// \param self_node_id: ID of local node.
/// \param cluster_resource_scheduler: The resource scheduler which contains /// \param cluster_resource_scheduler: The resource scheduler which contains
/// the state of the cluster. /// the state of the cluster.
/// \param task_dependency_manager_ Used to fetch task's dependencies.
/// \param is_owner_alive: A callback which returns if the owner process is alive
/// (according to our ownership model).
/// \param get_node_info: Function that returns the node info for a node. /// \param get_node_info: Function that returns the node info for a node.
/// \param announce_infeasible_task: Callback that informs the user if a task /// \param announce_infeasible_task: Callback that informs the user if a task
/// is infeasible. /// is infeasible.
/// \param worker_pool: A reference to the worker pool. /// \param local_task_manager: Manages local tasks.
/// \param leased_workers: A reference to the leased workers map.
/// \param get_task_arguments: A callback for getting a tasks' arguments by
/// their ids.
/// \param max_pinned_task_arguments_bytes: The cap on pinned arguments.
/// \param get_time_ms: A callback which returns the current time in milliseconds. /// \param get_time_ms: A callback which returns the current time in milliseconds.
/// \param sched_cls_cap_interval_ms: The time before we increase the cap
/// on the number of tasks that can run per
/// scheduling class. If set to 0, there is no
/// cap. If it's a large number, the cap is hard.
ClusterTaskManager( ClusterTaskManager(
const NodeID &self_node_id, const NodeID &self_node_id,
std::shared_ptr<ClusterResourceScheduler> cluster_resource_scheduler, std::shared_ptr<ClusterResourceScheduler> cluster_resource_scheduler,
@ -75,12 +64,12 @@ class ClusterTaskManager : public ClusterTaskManagerInterface {
return (int64_t)(absl::GetCurrentTimeNanos() / 1e6); return (int64_t)(absl::GetCurrentTimeNanos() / 1e6);
}); });
/// (Step 1) Queue tasks and schedule.
/// Queue task and schedule. This hanppens when processing the worker lease request. /// Queue task and schedule. This hanppens when processing the worker lease request.
/// ///
/// \param task: The incoming task to be queued and scheduled. /// \param task: The incoming task to be queued and scheduled.
/// \param grant_or_reject: True if we we should either grant or reject the request /// \param grant_or_reject: True if we we should either grant or reject the request
/// but no spillback. /// but no spillback.
/// \param is_selected_based_on_locality : should schedule on local node if possible.
/// \param reply: The reply of the lease request. /// \param reply: The reply of the lease request.
/// \param send_reply_callback: The function used during dispatching. /// \param send_reply_callback: The function used during dispatching.
void QueueAndScheduleTask(const RayTask &task, bool grant_or_reject, void QueueAndScheduleTask(const RayTask &task, bool grant_or_reject,
@ -121,12 +110,12 @@ class ClusterTaskManager : public ClusterTaskManagerInterface {
/// Return if any tasks are pending resource acquisition. /// Return if any tasks are pending resource acquisition.
/// ///
/// \param[out] exemplar: An example task that is deadlocking. /// \param[out] example: An example task that is deadlocking.
/// \param[in,out] any_pending: True if there's any pending example.
/// \param[in,out] num_pending_actor_creation: Number of pending actor creation tasks. /// \param[in,out] num_pending_actor_creation: Number of pending actor creation tasks.
/// \param[in,out] num_pending_tasks: Number of pending tasks. /// \param[in,out] num_pending_tasks: Number of pending tasks.
/// \param[in,out] any_pending: True if there's any pending exemplar.
/// \return True if any progress is any tasks are pending. /// \return True if any progress is any tasks are pending.
bool AnyPendingTasksForResourceAcquisition(RayTask *exemplar, bool *any_pending, bool AnyPendingTasksForResourceAcquisition(RayTask *example, bool *any_pending,
int *num_pending_actor_creation, int *num_pending_actor_creation,
int *num_pending_tasks) const override; int *num_pending_tasks) const override;

View file

@ -49,19 +49,19 @@ class LocalResourceManager {
/// Add a local resource that is available. /// Add a local resource that is available.
/// ///
/// \param resource_name: Resource which we want to update. /// \param resource_id: Resource which we want to update.
/// \param resource_total: New capacity of the resource. /// \param resource_total: New capacity of the resource.
void AddLocalResourceInstances(scheduling::ResourceID resource_id, void AddLocalResourceInstances(scheduling::ResourceID resource_id,
const std::vector<FixedPoint> &instances); const std::vector<FixedPoint> &instances);
/// Delete a given resource from the local node. /// Delete a given resource from the local node.
/// ///
/// \param resource_name: Resource we want to delete /// \param resource_id: Resource we want to delete
void DeleteLocalResource(scheduling::ResourceID resource_id); void DeleteLocalResource(scheduling::ResourceID resource_id);
/// Check whether the available resources are empty. /// Check whether the available resources are empty.
/// ///
/// \param resource_name: Resource which we want to check. /// \param resource_id: Resource which we want to check.
bool IsAvailableResourceEmpty(scheduling::ResourceID resource_id) const; bool IsAvailableResourceEmpty(scheduling::ResourceID resource_id) const;
/// Return local resources. /// Return local resources.
@ -84,7 +84,7 @@ class LocalResourceManager {
/// ///
/// \return Underflow capacities of reousrce instances after subtracting the resources. /// \return Underflow capacities of reousrce instances after subtracting the resources.
std::vector<double> SubtractResourceInstances(scheduling::ResourceID resource_id, std::vector<double> SubtractResourceInstances(scheduling::ResourceID resource_id,
const std::vector<double> &cpu_instances, const std::vector<double> &instances,
bool allow_going_negative = false); bool allow_going_negative = false);
/// Subtract the resources required by a given resource request (resource_request) from /// Subtract the resources required by a given resource request (resource_request) from

View file

@ -63,8 +63,6 @@ class LocalTaskManager {
/// \param is_owner_alive: A callback which returns if the owner process is alive /// \param is_owner_alive: A callback which returns if the owner process is alive
/// (according to our ownership model). /// (according to our ownership model).
/// \param get_node_info: Function that returns the node info for a node. /// \param get_node_info: Function that returns the node info for a node.
/// \param announce_infeasible_task: Callback that informs the user if a task
/// is infeasible.
/// \param worker_pool: A reference to the worker pool. /// \param worker_pool: A reference to the worker pool.
/// \param leased_workers: A reference to the leased workers map. /// \param leased_workers: A reference to the leased workers map.
/// \param get_task_arguments: A callback for getting a tasks' arguments by /// \param get_task_arguments: A callback for getting a tasks' arguments by
@ -100,7 +98,7 @@ class LocalTaskManager {
/// Move tasks from waiting to ready for dispatch. Called when a task's /// Move tasks from waiting to ready for dispatch. Called when a task's
/// dependencies are resolved. /// dependencies are resolved.
/// ///
/// \param readyIds: The tasks which are now ready to be dispatched. /// \param ready_ids: The tasks which are now ready to be dispatched.
void TasksUnblocked(const std::vector<TaskID> &ready_ids); void TasksUnblocked(const std::vector<TaskID> &ready_ids);
/// Return the finished task and release the worker resources. /// Return the finished task and release the worker resources.
@ -125,12 +123,12 @@ class LocalTaskManager {
/// Return if any tasks are pending resource acquisition. /// Return if any tasks are pending resource acquisition.
/// ///
/// \param[out] exemplar: An example task that is deadlocking. /// \param[out] example: An example task that is deadlocking.
/// \param[in,out] any_pending: True if there's any pending example.
/// \param[in,out] num_pending_actor_creation: Number of pending actor creation tasks. /// \param[in,out] num_pending_actor_creation: Number of pending actor creation tasks.
/// \param[in,out] num_pending_tasks: Number of pending tasks. /// \param[in,out] num_pending_tasks: Number of pending tasks.
/// \param[in,out] any_pending: True if there's any pending exemplar.
/// \return True if any progress is any tasks are pending. /// \return True if any progress is any tasks are pending.
bool AnyPendingTasksForResourceAcquisition(RayTask *exemplar, bool *any_pending, bool AnyPendingTasksForResourceAcquisition(RayTask *example, bool *any_pending,
int *num_pending_actor_creation, int *num_pending_actor_creation,
int *num_pending_tasks) const; int *num_pending_tasks) const;