Nondeterministic reconstruction for actors (#1344)

* Add failing unit test for nondeterministic reconstruction * Retry scheduling actor tasks if reassigned to local scheduler * Update execution edges asynchronously upon dispatch for nondeterministic reconstruction * Fix bug for updating checkpoint task execution dependencies * Update comments for deterministic reconstruction * cleanup * Add (and skip) failing test case for nondeterministic reconstruction * Suppress test output
2025-03-06 10:31:39 -05:00 · 2018-01-21 13:44:13 -08:00 · 2018-01-21 13:44:13 -08:00 · 74718efa73
commit 74718efa73
parent 83949a533b
4 changed files with 144 additions and 19 deletions
--- a/src/common/task.cc
+++ b/src/common/task.cc
@ -245,17 +245,12 @@ bool TaskSpec_is_actor_checkpoint_method(TaskSpec *spec) {
  return message->is_actor_checkpoint_method();
 }

-bool TaskSpec_arg_is_actor_dummy_object(TaskSpec *spec, int64_t arg_index) {
-  if (TaskSpec_actor_counter(spec) == 0) {
-    /* The first task does not have any dependencies. */
-    return false;
-  } else if (TaskSpec_is_actor_checkpoint_method(spec)) {
-    /* Checkpoint tasks do not have any dependencies. */
-    return false;
-  } else {
-    /* For all other tasks, the last argument is the dummy object. */
-    return arg_index == (TaskSpec_num_args(spec) - 1);
-  }
+ObjectID TaskSpec_actor_dummy_object(TaskSpec *spec) {
+  CHECK(TaskSpec_is_actor_task(spec));
+  /* The last return value for actor tasks is the dummy object that
+   * represents that this task has completed execution. */
+  int64_t num_returns = TaskSpec_num_returns(spec);
+  return TaskSpec_return(spec, num_returns - 1);
 }

 UniqueID TaskSpec_driver_id(const TaskSpec *spec) {
@ -392,6 +387,11 @@ std::vector<ObjectID> TaskExecutionSpec::ExecutionDependencies() {
  return execution_dependencies_;
 }

+void TaskExecutionSpec::SetExecutionDependencies(
+    const std::vector<ObjectID> &dependencies) {
+  execution_dependencies_ = dependencies;
+}
+
 int64_t TaskExecutionSpec::SpecSize() {
  return task_spec_size_;
 }
--- a/src/common/task.h
+++ b/src/common/task.h
@ -28,6 +28,12 @@ class TaskExecutionSpec {
  ///         dependencies.
  std::vector<ObjectID> ExecutionDependencies();

+  /// Set the task's execution dependencies.
+  ///
+  /// @param dependencies The value to set the execution dependencies to.
+  /// @return Void.
+  void SetExecutionDependencies(const std::vector<ObjectID> &dependencies);
+
  /// Get the task spec size.
  ///
  /// @return The size of the immutable task spec.
@ -239,14 +245,15 @@ int64_t TaskSpec_actor_counter(TaskSpec *spec);
 bool TaskSpec_is_actor_checkpoint_method(TaskSpec *spec);

 /**
- * Return whether the task's argument is a dummy object. Dummy objects are used
- * to encode an actor's state dependencies in the task graph.
+ * Return an actor task's dummy return value. Dummy objects are used to
+ * encode an actor's state dependencies in the task graph. The dummy object
+ * is local if and only if the task that returned it has completed
+ * execution.
 *
 * @param spec The task_spec in question.
- * @param arg_index The index of the argument in question.
- * @return Whether the argument at arg_index is a dummy object.
+ * @return The dummy object ID that the actor task will return.
 */
-bool TaskSpec_arg_is_actor_dummy_object(TaskSpec *spec, int64_t arg_index);
+ObjectID TaskSpec_actor_dummy_object(TaskSpec *spec);

 /**
 * Return the driver ID of the task.
--- a/src/local_scheduler/local_scheduler_algorithm.cc
+++ b/src/local_scheduler/local_scheduler_algorithm.cc
@ -49,6 +49,11 @@ typedef struct {
   *  order that the tasks were submitted, per handle. Tasks from different
   *  handles to the same actor may be interleaved. */
  std::unordered_map<ActorID, int64_t, UniqueIDHasher> task_counters;
+  /** The return value of the most recently executed task. The next task to
+   *  execute should take this as an execution dependency at dispatch time. Set
+   *  to nil if there are no execution dependencies (e.g., this is the first
+   *  task to execute). */
+  ObjectID execution_dependency;
  /** The index of the task assigned to this actor. Set to -1 if no task is
   *  currently assigned. If the actor process reports back success for the
   *  assigned task execution, then the corresponding task_counter should be
@ -219,6 +224,9 @@ void create_actor(SchedulingAlgorithmState *algorithm_state,
                  LocalSchedulerClient *worker) {
  LocalActorInfo entry;
  entry.task_counters[ActorID::nil()] = 0;
+  /* The actor has not yet executed any tasks, so there are no execution
+   * dependencies for the next task to be scheduled. */
+  entry.execution_dependency = ObjectID::nil();
  entry.assigned_task_counter = -1;
  entry.assigned_task_handle_id = ActorID::nil();
  entry.task_queue = new std::list<TaskExecutionSpec>();
@ -320,9 +328,31 @@ bool dispatch_actor_task(LocalSchedulerState *state,
    return false;
  }

+  /* Update the task's execution dependencies to reflect the actual execution
+   * order to support deterministic reconstruction. */
+  /* NOTE(swang): The update of an actor task's execution dependencies is
+   * performed asynchronously. This means that if this local scheduler dies, we
+   * may lose updates that are in flight to the task table. We only guarantee
+   * deterministic reconstruction ordering for tasks whose updates are
+   * reflected in the task table. */
+  std::vector<ObjectID> ordered_execution_dependencies;
+  /* Only overwrite execution dependencies for tasks that have a
+   * submission-time dependency (meaning it is not the initial task). */
+  if (!entry.execution_dependency.is_nil()) {
+    /* A checkpoint resumption should be able to run at any time, so only add
+     * execution dependencies for non-checkpoint tasks. */
+    if (!TaskSpec_is_actor_checkpoint_method(spec)) {
+      /* All other tasks have a dependency on the task that executed most
+       * recently on the actor. */
+      ordered_execution_dependencies.push_back(entry.execution_dependency);
+    }
+  }
+  task->SetExecutionDependencies(ordered_execution_dependencies);
+
  /* Assign the first task in the task queue to the worker and mark the worker
   * as unavailable. */
  assign_task_to_worker(state, *task, entry.worker);
+  entry.execution_dependency = TaskSpec_actor_dummy_object(spec);
  entry.assigned_task_counter = next_task_counter;
  entry.assigned_task_handle_id = next_task_handle_id;
  entry.worker_available = false;
@ -962,9 +992,17 @@ void give_task_to_local_scheduler_retry(UniqueID id,
  ActorID actor_id = TaskSpec_actor_id(spec);
  CHECK(state->actor_mapping.count(actor_id) == 1);

-  give_task_to_local_scheduler(
-      state, state->algorithm_state, *execution_spec,
-      state->actor_mapping[actor_id].local_scheduler_id);
+  if (state->actor_mapping[actor_id].local_scheduler_id ==
+      get_db_client_id(state->db)) {
+    /* The task is now scheduled to us. Call the callback directly. */
+    handle_task_scheduled(state, state->algorithm_state, *execution_spec);
+  } else {
+    /* The task is scheduled to a remote local scheduler. Try to hand it to
+     * them again. */
+    give_task_to_local_scheduler(
+        state, state->algorithm_state, *execution_spec,
+        state->actor_mapping[actor_id].local_scheduler_id);
+  }
 }

 /**
--- a/test/actor_test.py
+++ b/test/actor_test.py
@ -1625,6 +1625,86 @@ class DistributedActorHandles(unittest.TestCase):
        # self.assertRaises(Exception):
        #     ray.get(g.remote())

+    def _testNondeterministicReconstruction(self, num_forks,
+                                            num_items_per_fork,
+                                            num_forks_to_wait):
+        ray.worker._init(start_ray_local=True, num_local_schedulers=2,
+                         num_workers=0, redirect_output=True)
+
+        # Make a shared queue.
+        @ray.remote
+        class Queue(object):
+            def __init__(self):
+                self.queue = []
+
+            def local_plasma(self):
+                return ray.worker.global_worker.plasma_client.store_socket_name
+
+            def push(self, item):
+                self.queue.append(item)
+
+            def read(self):
+                return self.queue
+
+        # Schedule the shared queue onto the remote local scheduler.
+        local_plasma = ray.worker.global_worker.plasma_client.store_socket_name
+        actor = Queue.remote()
+        while ray.get(actor.local_plasma.remote()) == local_plasma:
+            actor = Queue.remote()
+
+        # A task that takes in the shared queue and a list of items to enqueue,
+        # one by one.
+        @ray.remote
+        def enqueue(queue, items):
+            done = None
+            for item in items:
+                done = queue.push.remote(item)
+            # TODO(swang): Return the object ID returned by the last method
+            # called on the shared queue, so that the caller of enqueue can
+            # wait for all of the queue methods to complete. This can be
+            # removed once join consistency is implemented.
+            return [done]
+
+        # Call the enqueue task num_forks times, each with num_items_per_fork
+        # unique objects to push onto the shared queue.
+        enqueue_tasks = []
+        for fork in range(num_forks):
+            enqueue_tasks.append(enqueue.remote(
+                actor, [(fork, i) for i in range(num_items_per_fork)]))
+        # Wait for the forks to complete their tasks.
+        enqueue_tasks = ray.get(enqueue_tasks)
+        enqueue_tasks = [fork_ids[0] for fork_ids in enqueue_tasks]
+        ray.wait(enqueue_tasks, num_returns=num_forks_to_wait)
+
+        # Read the queue to get the initial order of execution.
+        queue = ray.get(actor.read.remote())
+
+        # Kill the second plasma store to get rid of the cached objects and
+        # trigger the corresponding local scheduler to exit.
+        process = ray.services.all_processes[
+            ray.services.PROCESS_TYPE_PLASMA_STORE][1]
+        process.kill()
+        process.wait()
+
+        # Read the queue again and check for deterministic reconstruction.
+        ray.get(enqueue_tasks)
+        reconstructed_queue = ray.get(actor.read.remote())
+        # Make sure the final queue has all items from all forks.
+        self.assertEqual(len(reconstructed_queue), num_forks *
+                         num_items_per_fork)
+        # Make sure that the prefix of the final queue matches the queue from
+        # the initial execution.
+        self.assertEqual(queue, reconstructed_queue[:len(queue)])
+
+    def testNondeterministicReconstruction(self):
+        self._testNondeterministicReconstruction(10, 100, 10)
+
+    @unittest.skip("Nondeterministic reconstruction currently not supported "
+                   "when there are concurrent forks that didn't finish "
+                   "initial execution.")
+    def testNondeterministicReconstructionConcurrentForks(self):
+        self._testNondeterministicReconstruction(10, 100, 1)
+

@unittest.skip("Actor placement currently does not use custom resources.")
 class ActorPlacement(unittest.TestCase):