From f816f613c76ef2e48a8672526ef1cb26fa724098 Mon Sep 17 00:00:00 2001
From: SangBin Cho <rkooo567@gmail.com>
Date: Wed, 23 Jun 2021 16:24:12 -0700
Subject: [PATCH] [Test] Handle flaky tests (#16602)

* Handle flaky tests.

* lint

* tag more

* add test_scheduling

* Remove global gc

Co-authored-by: Amog Kamsetty <amogkamsetty@yahoo.com>
---
 .buildkite/pipeline.macos.yml               | 10 +--
 python/ray/tests/BUILD                      | 98 +++++++++++++--------
 python/ray/tests/test_array.py              |  1 +
 python/ray/tests/test_reference_counting.py |  1 +
 4 files changed, 69 insertions(+), 41 deletions(-)

diff --git a/.buildkite/pipeline.macos.yml b/.buildkite/pipeline.macos.yml
index c95acf1e6..8cb460a8d 100644
--- a/.buildkite/pipeline.macos.yml
+++ b/.buildkite/pipeline.macos.yml
@@ -25,7 +25,7 @@ steps:
   commands:
     - *prelude_commands
     - TORCH_VERSION=1.6 ./ci/travis/install-dependencies.sh
-    - bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=-flaky --
+    - bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=-flaky,-flaky-mac --
       //:all python/ray/serve/... python/ray/new_dashboard/... -rllib/... -core_worker_test
 
 - label: ":mac: :apple: Small and Large"
@@ -35,7 +35,7 @@ steps:
     - bazel test $(./scripts/bazel_export_options) --config=ci
       --test_env=CONDA_EXE --test_env=CONDA_PYTHON_EXE --test_env=CONDA_SHLVL --test_env=CONDA_PREFIX
       --test_env=CONDA_DEFAULT_ENV --test_env=CONDA_PROMPT_MODIFIER
-      --test_tag_filters=-kubernetes,-jenkins_only,-medium_size_python_tests_a_to_j,-medium_size_python_tests_k_to_z,-flaky
+      --test_tag_filters=-kubernetes,-jenkins_only,-medium_size_python_tests_a_to_j,-medium_size_python_tests_k_to_z,-flaky,-flaky-mac
       python/ray/tests/...
 
 - label: ":mac: :apple: Medium A-J"
@@ -43,7 +43,7 @@ steps:
   commands:
     - *prelude_commands
     - bazel test --config=ci $(./scripts/bazel_export_options)
-      --test_tag_filters=-kubernetes,-jenkins_only,medium_size_python_tests_a_to_j,-flaky
+      --test_tag_filters=-kubernetes,-jenkins_only,medium_size_python_tests_a_to_j,-flaky,-flaky-mac
       python/ray/tests/...
 
 - label: ":mac: :apple: Medium K-Z"
@@ -51,7 +51,7 @@ steps:
   commands:
     - *prelude_commands
     - bazel test --config=ci $(./scripts/bazel_export_options)
-      --test_tag_filters=-kubernetes,-jenkins_only,medium_size_python_tests_k_to_z,-flaky
+      --test_tag_filters=-kubernetes,-jenkins_only,medium_size_python_tests_k_to_z,-flaky,-flaky-mac
       python/ray/tests/...
 
 - label: ":mac: :apple: :snowflake: Flaky"
@@ -60,7 +60,7 @@ steps:
     - *prelude_commands
     - RLLIB_TESTING=1 ./ci/travis/install-dependencies.sh
     - bazel test --config=ci $(./scripts/bazel_export_options)
-      --test_tag_filters=-kubernetes,-jenkins_only,flaky
+      --test_tag_filters=-kubernetes,-jenkins_only,flaky,flaky-mac
       --test_env=CONDA_EXE
       --test_env=CONDA_PYTHON_EXE
       --test_env=CONDA_SHLVL
diff --git a/python/ray/tests/BUILD b/python/ray/tests/BUILD
index d852a63ac..84cc123ad 100644
--- a/python/ray/tests/BUILD
+++ b/python/ray/tests/BUILD
@@ -52,17 +52,6 @@ py_test_module_list(
   deps = ["//:ray_lib"],
 )
 
-py_test_module_list(
-  files = [
-    "test_actor.py",
-    "test_memstat.py",
-  ],
-  size = "medium",
-  extra_srcs = SRCS,
-  tags = ["exclusive", "medium_size_python_tests_a_to_j", "flaky"],
-  deps = ["//:ray_lib"],
-)
-
 py_test_module_list(
   files = [
     "test_memory_scheduling.py",
@@ -80,7 +69,6 @@ py_test_module_list(
     "test_resource_demand_scheduler.py",
     "test_runtime_env_env_vars.py",
     "test_runtime_env_fork_process.py",
-    "test_scheduling.py",
     "test_serialization.py",
     "test_shuffle.py",
     "test_stress.py",
@@ -94,28 +82,6 @@ py_test_module_list(
   deps = ["//:ray_lib"],
 )
 
-py_test_module_list(
-  files = [
-    "test_object_spilling_2.py",
-    "test_object_manager.py",
-    "test_multi_tenancy.py",
-  ],
-  size = "medium",
-  extra_srcs = SRCS,
-  tags = ["exclusive", "medium_size_python_tests_k_to_z", "flaky"],
-  deps = ["//:ray_lib"],
-)
-
-py_test_module_list(
-  files = [
-    "test_runtime_env_complicated.py"
-  ],
-  size = "large",
-  extra_srcs = SRCS,
-  tags = ["exclusive", "post_wheel_build", "flaky"],
-  deps = ["//:ray_lib"],
-)
-
 py_test_module_list(
   files = [
     "test_args.py",
@@ -168,9 +134,7 @@ py_test_module_list(
 py_test_module_list(
   files = [
     "test_failure_2.py",
-    "test_failure_3.py",
     "test_failure_4.py",
-    "test_reference_counting_2.py",
     "test_object_spilling.py",
   ],
   size = "large",
@@ -179,16 +143,78 @@ py_test_module_list(
   deps = ["//:ray_lib"],
 )
 
+
+
+
+# Flaky tests.
+py_test_module_list(
+  files = [
+    "test_runtime_env_complicated.py"
+  ],
+  size = "large",
+  extra_srcs = SRCS,
+  tags = ["exclusive", "post_wheel_build", "flaky"],
+  deps = ["//:ray_lib"],
+)
+
+py_test_module_list(
+  files = [
+    "test_actor.py",
+    "test_memstat.py",
+  ],
+  size = "medium",
+  extra_srcs = SRCS,
+  tags = ["exclusive", "medium_size_python_tests_a_to_j", "flaky"],
+  deps = ["//:ray_lib"],
+)
+
+py_test_module_list(
+  files = [
+    "test_object_spilling_2.py",
+    "test_object_manager.py",
+    "test_multi_tenancy.py",
+    "test_scheduling.py",
+  ],
+  size = "medium",
+  extra_srcs = SRCS,
+  tags = ["exclusive", "medium_size_python_tests_k_to_z", "flaky"],
+  deps = ["//:ray_lib"],
+)
+
+py_test_module_list(
+  files = [
+    "test_failure_3.py",
+    "test_reference_counting_2.py",
+  ],
+  size = "large",
+  extra_srcs = SRCS,
+  tags = ["exclusive", "flaky"],
+  deps = ["//:ray_lib"],
+)
+
+# Flaky tests only on Mac.
 py_test_module_list(
   files = [
     "test_placement_group.py",
+  ],
+  size = "large",
+  extra_srcs = SRCS,
+  tags = ["exclusive", "flaky-mac"],
+  deps = ["//:ray_lib"],
+)
+
+py_test_module_list(
+  files = [
     "test_runtime_env.py"
   ],
   size = "large",
   extra_srcs = SRCS,
+  tags = ["flaky-mac"],
   deps = ["//:ray_lib"],
 )
 
+
+
 # TODO(barakmich): aws/ might want its own buildfile, or
 #    py_test_module_list should support subdirectories.
 py_test(
diff --git a/python/ray/tests/test_array.py b/python/ray/tests/test_array.py
index a21ca4e3f..3b3b3a18f 100644
--- a/python/ray/tests/test_array.py
+++ b/python/ray/tests/test_array.py
@@ -55,6 +55,7 @@ def test_distributed_array_assemble(ray_start_2_cpus, reload_modules):
         ]))
 
 
+@pytest.mark.skipif(sys.platform == "win32", reason="Failing on Windows.")
 @pytest.mark.parametrize(
     "ray_start_cluster_2_nodes",
     [{
diff --git a/python/ray/tests/test_reference_counting.py b/python/ray/tests/test_reference_counting.py
index 66a6aa252..cafc54779 100644
--- a/python/ray/tests/test_reference_counting.py
+++ b/python/ray/tests/test_reference_counting.py
@@ -543,6 +543,7 @@ def _all_actors_dead():
                for actor in list(ray.state.actors().values()))
 
 
+@pytest.mark.skipif(sys.platform == "win32", reason="Failing on Windows.")
 def test_kill_actor_immediately_after_creation(ray_start_regular):
     @ray.remote
     class A: