mirror of
https://github.com/vale981/ray
synced 2025-03-08 19:41:38 -05:00
When a task is passed to the global scheduler, if it is not received,… (#1106)
* When a task is passed to the global scheduler, if it is not received, then try again. * Call give_task_to_global_scheduler directly (same with local).
This commit is contained in:
parent
6109cc0782
commit
b585001881
3 changed files with 44 additions and 6 deletions
|
@ -6,7 +6,7 @@ include(${CMAKE_CURRENT_LIST_DIR}/cmake/Common.cmake)
|
||||||
|
|
||||||
add_subdirectory(redis_module)
|
add_subdirectory(redis_module)
|
||||||
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -g")
|
||||||
|
|
||||||
include_directories(thirdparty/ae)
|
include_directories(thirdparty/ae)
|
||||||
|
|
||||||
|
|
|
@ -416,8 +416,10 @@ void start_server(const char *node_ip_address,
|
||||||
* before this call to subscribe. */
|
* before this call to subscribe. */
|
||||||
db_client_table_subscribe(g_state->db, process_new_db_client,
|
db_client_table_subscribe(g_state->db, process_new_db_client,
|
||||||
(void *) g_state, NULL, NULL, NULL);
|
(void *) g_state, NULL, NULL, NULL);
|
||||||
/* Subscribe to notifications about waiting tasks. TODO(rkn): this may need to
|
/* Subscribe to notifications about waiting tasks. If a local scheduler
|
||||||
* get tasks that were submitted to the database before the subscribe. */
|
* submits tasks to the global scheduler before the global scheduler
|
||||||
|
* successfully subscribes, then the local scheduler that submitted the tasks
|
||||||
|
* will retry. */
|
||||||
task_table_subscribe(g_state->db, NIL_ID, TASK_STATUS_WAITING,
|
task_table_subscribe(g_state->db, NIL_ID, TASK_STATUS_WAITING,
|
||||||
process_task_waiting, (void *) g_state, NULL, NULL,
|
process_task_waiting, (void *) g_state, NULL, NULL,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
|
@ -14,6 +14,17 @@
|
||||||
/* Declared for convenience. */
|
/* Declared for convenience. */
|
||||||
void remove_actor(SchedulingAlgorithmState *algorithm_state, ActorID actor_id);
|
void remove_actor(SchedulingAlgorithmState *algorithm_state, ActorID actor_id);
|
||||||
|
|
||||||
|
void give_task_to_global_scheduler(LocalSchedulerState *state,
|
||||||
|
SchedulingAlgorithmState *algorithm_state,
|
||||||
|
TaskSpec *spec,
|
||||||
|
int64_t task_spec_size);
|
||||||
|
|
||||||
|
void give_task_to_local_scheduler(LocalSchedulerState *state,
|
||||||
|
SchedulingAlgorithmState *algorithm_state,
|
||||||
|
TaskSpec *spec,
|
||||||
|
int64_t task_spec_size,
|
||||||
|
DBClientID local_scheduler_id);
|
||||||
|
|
||||||
struct TaskQueueEntry {
|
struct TaskQueueEntry {
|
||||||
/** The task that is queued. */
|
/** The task that is queued. */
|
||||||
TaskSpec *spec;
|
TaskSpec *spec;
|
||||||
|
@ -892,8 +903,14 @@ void give_task_to_local_scheduler_retry(UniqueID id,
|
||||||
CHECK(Task_state(task) == TASK_STATUS_SCHEDULED);
|
CHECK(Task_state(task) == TASK_STATUS_SCHEDULED);
|
||||||
|
|
||||||
TaskSpec *spec = Task_task_spec(task);
|
TaskSpec *spec = Task_task_spec(task);
|
||||||
handle_actor_task_submitted(state, state->algorithm_state, spec,
|
|
||||||
Task_task_spec_size(task));
|
ActorID actor_id = TaskSpec_actor_id(spec);
|
||||||
|
CHECK(!ActorID_equal(actor_id, NIL_ACTOR_ID));
|
||||||
|
CHECK(state->actor_mapping.count(actor_id) == 1);
|
||||||
|
|
||||||
|
give_task_to_local_scheduler(
|
||||||
|
state, state->algorithm_state, spec, Task_task_spec_size(task),
|
||||||
|
state->actor_mapping[actor_id].local_scheduler_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -927,6 +944,20 @@ void give_task_to_local_scheduler(LocalSchedulerState *state,
|
||||||
task_table_add_task(state->db, task, &retryInfo, NULL, state);
|
task_table_add_task(state->db, task, &retryInfo, NULL, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void give_task_to_global_scheduler_retry(UniqueID id,
|
||||||
|
void *user_context,
|
||||||
|
void *user_data) {
|
||||||
|
LocalSchedulerState *state = (LocalSchedulerState *) user_context;
|
||||||
|
Task *task = (Task *) user_data;
|
||||||
|
CHECK(Task_state(task) == TASK_STATUS_WAITING);
|
||||||
|
|
||||||
|
TaskSpec *spec = Task_task_spec(task);
|
||||||
|
CHECK(ActorID_equal(TaskSpec_actor_id(spec), NIL_ACTOR_ID));
|
||||||
|
|
||||||
|
give_task_to_global_scheduler(state, state->algorithm_state, spec,
|
||||||
|
Task_task_spec_size(task));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Give a task to the global scheduler to schedule.
|
* Give a task to the global scheduler to schedule.
|
||||||
*
|
*
|
||||||
|
@ -948,7 +979,12 @@ void give_task_to_global_scheduler(LocalSchedulerState *state,
|
||||||
DCHECK(state->config.global_scheduler_exists);
|
DCHECK(state->config.global_scheduler_exists);
|
||||||
Task *task = Task_alloc(spec, task_spec_size, TASK_STATUS_WAITING, NIL_ID);
|
Task *task = Task_alloc(spec, task_spec_size, TASK_STATUS_WAITING, NIL_ID);
|
||||||
DCHECK(state->db != NULL);
|
DCHECK(state->db != NULL);
|
||||||
task_table_add_task(state->db, task, NULL, NULL, NULL);
|
auto retryInfo = RetryInfo{
|
||||||
|
.num_retries = 0, // This value is unused.
|
||||||
|
.timeout = 0, // This value is unused.
|
||||||
|
.fail_callback = give_task_to_global_scheduler_retry,
|
||||||
|
};
|
||||||
|
task_table_add_task(state->db, task, &retryInfo, NULL, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool resource_constraints_satisfied(LocalSchedulerState *state,
|
bool resource_constraints_satisfied(LocalSchedulerState *state,
|
||||||
|
|
Loading…
Add table
Reference in a new issue