2017-02-27 12:24:07 -08:00
|
|
|
#ifndef LOCAL_SCHEDULER_H
|
|
|
|
#define LOCAL_SCHEDULER_H
|
2016-10-04 16:25:11 -07:00
|
|
|
|
|
|
|
#include "task.h"
|
2016-10-18 18:27:43 -07:00
|
|
|
#include "event_loop.h"
|
2016-10-04 16:25:11 -07:00
|
|
|
|
2016-10-04 17:06:52 -07:00
|
|
|
/**
|
|
|
|
* Establish a connection to a new client.
|
|
|
|
*
|
|
|
|
* @param loop Event loop of the local scheduler.
|
|
|
|
* @param listener_socket Socket the local scheduler is listening on for new
|
2016-10-05 13:30:10 -07:00
|
|
|
* client requests.
|
2016-10-04 17:06:52 -07:00
|
|
|
* @param context State of the local scheduler.
|
|
|
|
* @param events Flag for events that are available on the listener socket.
|
2016-10-05 13:30:10 -07:00
|
|
|
* @return Void.
|
2016-10-04 17:06:52 -07:00
|
|
|
*/
|
2016-10-18 18:27:43 -07:00
|
|
|
void new_client_connection(event_loop *loop,
|
|
|
|
int listener_sock,
|
|
|
|
void *context,
|
2016-10-04 16:25:11 -07:00
|
|
|
int events);
|
2016-09-27 19:11:09 -07:00
|
|
|
|
2017-04-24 18:10:21 -07:00
|
|
|
/**
|
|
|
|
* Check if a driver is still alive.
|
|
|
|
*
|
|
|
|
* @param driver_id The ID of the driver.
|
|
|
|
* @return True if the driver is still alive and false otherwise.
|
|
|
|
*/
|
|
|
|
bool is_driver_alive(WorkerID driver_id);
|
|
|
|
|
2016-10-04 17:06:52 -07:00
|
|
|
/**
|
2016-10-18 18:27:43 -07:00
|
|
|
* This function can be called by the scheduling algorithm to assign a task
|
|
|
|
* to a worker.
|
2016-10-04 17:06:52 -07:00
|
|
|
*
|
2016-10-18 18:27:43 -07:00
|
|
|
* @param info
|
|
|
|
* @param task The task that is submitted to the worker.
|
2017-02-05 14:52:28 -08:00
|
|
|
* @param worker The worker to assign the task to.
|
2016-10-05 13:30:10 -07:00
|
|
|
* @return Void.
|
2016-10-04 17:06:52 -07:00
|
|
|
*/
|
2017-02-26 00:32:43 -08:00
|
|
|
void assign_task_to_worker(LocalSchedulerState *state,
|
2017-12-14 20:47:54 -08:00
|
|
|
TaskExecutionSpec &task,
|
2017-02-26 00:32:43 -08:00
|
|
|
LocalSchedulerClient *worker);
|
2016-09-27 19:11:09 -07:00
|
|
|
|
2017-06-13 07:34:58 +00:00
|
|
|
/*
|
|
|
|
* This function is called whenever a task has finished on one of the workers.
|
|
|
|
* It updates the resource accounting and the global state store.
|
|
|
|
*
|
|
|
|
* @param state The local scheduler state.
|
|
|
|
* @param worker The worker that finished the task.
|
2017-10-12 09:53:32 -07:00
|
|
|
* @param actor_checkpoint_failed If the last task assigned was a checkpoint
|
|
|
|
* task that failed.
|
2017-06-13 07:34:58 +00:00
|
|
|
* @return Void.
|
|
|
|
*/
|
2017-10-12 09:53:32 -07:00
|
|
|
void finish_task(LocalSchedulerState *state,
|
|
|
|
LocalSchedulerClient *worker,
|
|
|
|
bool actor_checkpoint_failed);
|
2017-06-13 07:34:58 +00:00
|
|
|
|
2016-10-04 17:06:52 -07:00
|
|
|
/**
|
2016-10-18 18:27:43 -07:00
|
|
|
* This is the callback that is used to process a notification from the Plasma
|
|
|
|
* store that an object has been sealed.
|
2016-10-04 17:06:52 -07:00
|
|
|
*
|
2016-10-18 18:27:43 -07:00
|
|
|
* @param loop The local scheduler's event loop.
|
|
|
|
* @param client_sock The file descriptor to read the notification from.
|
|
|
|
* @param context The local scheduler state.
|
|
|
|
* @param events
|
2016-10-05 13:30:10 -07:00
|
|
|
* @return Void.
|
2016-10-04 17:06:52 -07:00
|
|
|
*/
|
2016-10-18 18:27:43 -07:00
|
|
|
void process_plasma_notification(event_loop *loop,
|
|
|
|
int client_sock,
|
|
|
|
void *context,
|
|
|
|
int events);
|
2016-09-27 19:11:09 -07:00
|
|
|
|
2016-12-12 23:17:22 -08:00
|
|
|
/**
|
|
|
|
* Reconstruct an object. If the object does not exist on any nodes, according
|
|
|
|
* to the state tables, and if the object is not already being reconstructed,
|
|
|
|
* this triggers a single reexecution of the task that originally created the
|
|
|
|
* object.
|
|
|
|
*
|
|
|
|
* @param state The local scheduler state.
|
|
|
|
* @param object_id The ID of the object to reconstruct.
|
|
|
|
* @return Void.
|
|
|
|
*/
|
2017-02-26 00:32:43 -08:00
|
|
|
void reconstruct_object(LocalSchedulerState *state, ObjectID object_id);
|
2016-12-12 23:17:22 -08:00
|
|
|
|
2017-03-05 02:05:02 -08:00
|
|
|
void print_resource_info(const LocalSchedulerState *s, const TaskSpec *spec);
|
2017-02-09 01:34:14 -08:00
|
|
|
|
2017-02-17 17:08:52 -08:00
|
|
|
/**
|
2017-04-26 23:52:13 -07:00
|
|
|
* Kill a worker, if it is a child process, and clean up all of its associated
|
|
|
|
* state. Note that this function is also called on drivers, but it should not
|
|
|
|
* actually send a kill signal to drivers.
|
2017-02-17 17:08:52 -08:00
|
|
|
*
|
2017-04-10 21:02:36 -07:00
|
|
|
* @param state The local scheduler state.
|
2017-02-17 17:08:52 -08:00
|
|
|
* @param worker The local scheduler client to kill.
|
|
|
|
* @param wait A boolean representing whether to wait for the killed worker to
|
|
|
|
* exit.
|
2017-04-26 23:52:13 -07:00
|
|
|
* @param suppress_warning A bool that is true if we should not warn the driver,
|
|
|
|
* and false otherwise. This should only be true when a driver is
|
|
|
|
* removed.
|
|
|
|
* @return Void.
|
2017-02-17 17:08:52 -08:00
|
|
|
*/
|
2017-04-10 21:02:36 -07:00
|
|
|
void kill_worker(LocalSchedulerState *state,
|
|
|
|
LocalSchedulerClient *worker,
|
2017-04-26 23:52:13 -07:00
|
|
|
bool wait,
|
|
|
|
bool suppress_warning);
|
2017-02-17 17:08:52 -08:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Start a worker. This forks a new worker process that can be added to the
|
|
|
|
* pool of available workers, pending registration of its PID with the local
|
|
|
|
* scheduler.
|
|
|
|
*
|
|
|
|
* @param state The local scheduler state.
|
|
|
|
* @param actor_id The ID of the actor for this worker. If this worker is not an
|
|
|
|
* actor, then NIL_ACTOR_ID should be used.
|
2017-08-02 18:02:52 -07:00
|
|
|
* @param reconstruct True if the worker is an actor and is being started in
|
|
|
|
* reconstruct mode.
|
2017-02-17 17:08:52 -08:00
|
|
|
* @param Void.
|
|
|
|
*/
|
2017-08-02 18:02:52 -07:00
|
|
|
void start_worker(LocalSchedulerState *state,
|
|
|
|
ActorID actor_id,
|
|
|
|
bool reconstruct);
|
2017-02-17 17:08:52 -08:00
|
|
|
|
|
|
|
/**
|
2017-04-28 12:09:47 -07:00
|
|
|
* Check if a certain quantity of dynamic resources are available. If num_cpus
|
|
|
|
* is 0, we ignore the dynamic number of available CPUs (which may be negative).
|
|
|
|
*
|
|
|
|
* @param state The state of the local scheduler.
|
2017-12-01 11:41:40 -08:00
|
|
|
* @param resources The resources to check.
|
2017-04-28 12:09:47 -07:00
|
|
|
* @return True if there are enough CPUs and GPUs and false otherwise.
|
|
|
|
*/
|
2017-12-01 11:41:40 -08:00
|
|
|
bool check_dynamic_resources(
|
|
|
|
LocalSchedulerState *state,
|
|
|
|
const std::unordered_map<std::string, double> &resources);
|
2017-04-28 12:09:47 -07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Acquire additional resources (CPUs and GPUs) for a worker.
|
|
|
|
*
|
|
|
|
* @param state The local scheduler state.
|
|
|
|
* @param worker The worker who is acquiring resources.
|
2017-12-01 11:41:40 -08:00
|
|
|
* @param resources The resources to acquire.
|
2017-04-28 12:09:47 -07:00
|
|
|
* @return Void.
|
|
|
|
*/
|
2017-12-01 11:41:40 -08:00
|
|
|
void acquire_resources(
|
|
|
|
LocalSchedulerState *state,
|
|
|
|
LocalSchedulerClient *worker,
|
|
|
|
const std::unordered_map<std::string, double> &resources);
|
2017-04-28 12:09:47 -07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Return resources (CPUs and GPUs) being used by a worker to the local
|
|
|
|
* scheduler.
|
2017-02-17 17:08:52 -08:00
|
|
|
*
|
|
|
|
* @param state The local scheduler state.
|
2017-04-28 12:09:47 -07:00
|
|
|
* @param worker The worker who is returning resources.
|
2017-12-01 11:41:40 -08:00
|
|
|
* @param resources The resources to release.
|
2017-02-17 17:08:52 -08:00
|
|
|
* @return Void.
|
|
|
|
*/
|
2017-12-01 11:41:40 -08:00
|
|
|
void release_resources(
|
|
|
|
LocalSchedulerState *state,
|
|
|
|
LocalSchedulerClient *worker,
|
|
|
|
const std::unordered_map<std::string, double> &resources);
|
2017-02-17 17:08:52 -08:00
|
|
|
|
2016-12-12 23:17:22 -08:00
|
|
|
/** The following methods are for testing purposes only. */
|
2017-02-27 12:24:07 -08:00
|
|
|
#ifdef LOCAL_SCHEDULER_TEST
|
2017-02-26 00:32:43 -08:00
|
|
|
LocalSchedulerState *LocalSchedulerState_init(
|
2016-12-20 20:21:35 -08:00
|
|
|
const char *node_ip_address,
|
2016-12-12 23:17:22 -08:00
|
|
|
event_loop *loop,
|
|
|
|
const char *redis_addr,
|
|
|
|
int redis_port,
|
2016-12-21 18:53:12 -08:00
|
|
|
const char *local_scheduler_socket_name,
|
2016-12-12 23:17:22 -08:00
|
|
|
const char *plasma_manager_socket_name,
|
|
|
|
const char *plasma_store_socket_name,
|
2016-12-13 17:21:38 -08:00
|
|
|
const char *plasma_manager_address,
|
2017-01-27 01:28:48 -08:00
|
|
|
bool global_scheduler_exists,
|
2017-12-01 11:41:40 -08:00
|
|
|
const std::unordered_map<std::string, double> &static_resource_vector,
|
2017-02-09 01:34:14 -08:00
|
|
|
const char *worker_path,
|
2017-02-10 12:46:23 -08:00
|
|
|
int num_workers);
|
2016-12-12 23:17:22 -08:00
|
|
|
|
2017-02-26 00:32:43 -08:00
|
|
|
SchedulingAlgorithmState *get_algorithm_state(LocalSchedulerState *state);
|
2016-12-12 23:17:22 -08:00
|
|
|
|
|
|
|
void process_message(event_loop *loop,
|
|
|
|
int client_sock,
|
|
|
|
void *context,
|
|
|
|
int events);
|
2017-02-10 12:46:23 -08:00
|
|
|
|
2016-12-12 23:17:22 -08:00
|
|
|
#endif
|
|
|
|
|
2017-02-27 12:24:07 -08:00
|
|
|
#endif /* LOCAL_SCHEDULER_H */
|