[Modin] Add tests for modin (#16260)

Adds modin tests that run with and without ray client.
2025-03-04 17:41:43 -05:00 · 2021-06-11 12:23:33 -07:00 · 2021-06-11 12:23:33 -07:00 · 3fa9f2e5d6
commit 3fa9f2e5d6
parent 2cdaf132b5
8 changed files with 577 additions and 3 deletions
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@ -279,7 +279,7 @@
    - bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=-tf,pytorch,-py37,-flaky,-client python/ray/util/sgd/...
    - bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=client_unit_tests --test_env=RAY_CLIENT_MODE=1 python/ray/util/sgd/...

- label: ":octopus: Tune/SGD tests and examples. Python 3.7"
+- label: ":octopus: Tune/SGD/Modin tests and examples. Python 3.7"
  conditions: ["RAY_CI_TUNE_AFFECTED", "RAY_CI_SGD_AFFECTED"]
  commands:
    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
@ -288,6 +288,7 @@
    - rm -rf ./python/ray/thirdparty_files; ./ci/travis/ci.sh build
    - bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=py37,-flaky,-client python/ray/tune/...
    - bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=-client python/ray/util/xgboost/...
+    - bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only python/ray/tests/modin/...

 - label: ":tropical_fish: ML Libraries w/ Ray Client Examples (Python 3.7)."
  conditions: ["RAY_CI_TUNE_AFFECTED", "RAY_CI_SGD_AFFECTED"]
--- a/20
+++ b/20
@ -287,3 +287,23 @@ Code in python/ray/_private/prometheus_exporter.py is adapted from https://githu
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+--------------------------------------------------------------------------------
+Code in python/ray/tests/modin/test_modin and 
+python/ray/tests/modin/modin_test_utils adapted from:
+- http://github.com/modin-project/modin/master/modin/pandas/test/test_general.py
+- http://github.com/modin-project/modin/master/modin/pandas/test/utils.py
+
+Copyright (c) 2018-2020 Modin Developers.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
--- a/python/ray/tests/modin/BUILD
+++ b/python/ray/tests/modin/BUILD
@ -0,0 +1,7 @@
+py_test(
+ name = "test_modin",
+ size = "small",
+ srcs = ["test_modin.py"],
+ deps = ["//:ray_lib"],
+ tags = ["exclusive"],
+)
--- a/python/ray/tests/modin/modin_test_utils.py
+++ b/python/ray/tests/modin/modin_test_utils.py
@ -0,0 +1,134 @@
+# Licensed to Modin Development Team under one or more contributor license
+# agreements. See the NOTICE file distributed with this work for additional
+# information regarding copyright ownership.  The Modin Development Team
+# licenses this file to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+#
+# This file is copied and adapted from
+# http://github.com/modin-project/modin/master/modin/pandas/test/utils.py
+
+import pandas
+import modin.pandas as pd
+from modin.utils import to_pandas
+from pandas.testing import (assert_series_equal, assert_frame_equal,
+                            assert_extension_array_equal, assert_index_equal)
+import numpy as np
+
+
+def categories_equals(left, right):
+    assert (left.ordered and right.ordered) or (not left.ordered
+                                                and not right.ordered)
+    assert_extension_array_equal(left, right)
+
+
+def df_categories_equals(df1, df2):
+    if not hasattr(df1, "select_dtypes"):
+        if isinstance(df1, pandas.CategoricalDtype):
+            return categories_equals(df1, df2)
+        elif isinstance(getattr(df1, "dtype"),
+                        pandas.CategoricalDtype) and isinstance(
+                            getattr(df1, "dtype"), pandas.CategoricalDtype):
+            return categories_equals(df1.dtype, df2.dtype)
+        else:
+            return True
+
+    categories_columns = df1.select_dtypes(include="category").columns
+    for column in categories_columns:
+        assert_extension_array_equal(
+            df1[column].values,
+            df2[column].values,
+            check_dtype=False,
+        )
+
+
+def df_equals(df1, df2):
+    """Tests if df1 and df2 are equal.
+
+    Args:
+        df1: (pandas or modin DataFrame or series) dataframe to test if equal.
+        df2: (pandas or modin DataFrame or series) dataframe to test if equal.
+
+    Returns:
+        True if df1 is equal to df2.
+    """
+    # Gets AttributError if modin's groupby object is not import like this
+    from modin.pandas.groupby import DataFrameGroupBy
+
+    groupby_types = (pandas.core.groupby.DataFrameGroupBy, DataFrameGroupBy)
+
+    # The typing behavior of how pandas treats its index is not consistent when
+    # the length of the DataFrame or Series is 0, so we just verify that the
+    # contents are the same.
+    if (hasattr(df1, "index") and hasattr(df2, "index") and len(df1) == 0
+            and len(df2) == 0):
+        if type(df1).__name__ == type(df2).__name__:
+            if hasattr(df1, "name") and hasattr(
+                    df2, "name") and df1.name == df2.name:
+                return
+            if (hasattr(df1, "columns") and hasattr(df2, "columns")
+                    and df1.columns.equals(df2.columns)):
+                return
+        assert False
+
+    if isinstance(df1, (list, tuple)) and all(
+            isinstance(d, (pd.DataFrame, pd.Series, pandas.DataFrame,
+                           pandas.Series)) for d in df1):
+        assert isinstance(df2, type(df1)), "Different type of collection"
+        assert len(df1) == len(df2), "Different length result"
+        return (df_equals(d1, d2) for d1, d2 in zip(df1, df2))
+
+    # Convert to pandas
+    if isinstance(df1, (pd.DataFrame, pd.Series)):
+        df1 = to_pandas(df1)
+    if isinstance(df2, (pd.DataFrame, pd.Series)):
+        df2 = to_pandas(df2)
+
+    if isinstance(df1, pandas.DataFrame) and isinstance(df2, pandas.DataFrame):
+        if (df1.empty and not df2.empty) or (df2.empty and not df1.empty):
+            assert False, "One of the passed frames is empty, when other isn't"
+        elif df1.empty and df2.empty and type(df1) != type(df2):
+            assert (
+                False
+            ), f"Empty frames have different types: {type(df1)} != {type(df2)}"
+
+    if isinstance(df1, pandas.DataFrame) and isinstance(df2, pandas.DataFrame):
+        assert_frame_equal(
+            df1,
+            df2,
+            check_dtype=False,
+            check_datetimelike_compat=True,
+            check_index_type=False,
+            check_column_type=False,
+            check_categorical=False,
+        )
+        df_categories_equals(df1, df2)
+    elif isinstance(df1, pandas.Index) and isinstance(df2, pandas.Index):
+        assert_index_equal(df1, df2)
+    elif isinstance(df1, pandas.Series) and isinstance(df2, pandas.Series):
+        assert_series_equal(
+            df1, df2, check_dtype=False, check_series_type=False)
+    elif isinstance(df1, groupby_types) and isinstance(df2, groupby_types):
+        for g1, g2 in zip(df1, df2):
+            assert g1[0] == g2[0]
+            df_equals(g1[1], g2[1])
+    elif (isinstance(df1, pandas.Series) and isinstance(df2, pandas.Series)
+          and df1.empty and df2.empty):
+        assert all(df1.index == df2.index)
+        assert df1.dtypes == df2.dtypes
+    elif isinstance(df1, pandas.core.arrays.numpy_.PandasArray):
+        assert isinstance(df2, pandas.core.arrays.numpy_.PandasArray)
+        assert df1 == df2
+    elif isinstance(df1, np.recarray) and isinstance(df2, np.recarray):
+        np.testing.assert_array_equal(df1, df2)
+    else:
+        if df1 != df2:
+            np.testing.assert_almost_equal(df1, df2)
--- a/python/ray/tests/modin/test_modin.py
+++ b/python/ray/tests/modin/test_modin.py
@ -0,0 +1,410 @@
+# Licensed to Modin Development Team under one or more contributor license
+# agreements. See the NOTICE file distributed with this work for additional
+# information regarding copyright ownership.  The Modin Development Team
+# licenses this file to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+#
+# This file is copied and adapted from:
+# http://github.com/modin-project/modin/master/modin/pandas/test/test_general.py
+
+import sys
+import pytest
+import pandas
+import numpy as np
+from numpy.testing import assert_array_equal
+import ray
+from ray.util.client.ray_client_helpers import ray_start_client_server
+
+modin_compatible_version = sys.version_info >= (3, 7, 0)
+modin_installed = True
+
+if modin_compatible_version:
+    try:
+        import modin  # noqa: F401
+    except ModuleNotFoundError:
+        modin_installed = False
+
+skip = not modin_compatible_version or not modin_installed
+
+# These tests are written for versions of Modin that require python 3.7+
+pytestmark = pytest.mark.skipif(
+    skip, reason="Outdated or missing Modin dependency")
+
+if not skip:
+    from ray.tests.modin.modin_test_utils import df_equals
+    import modin.pandas as pd
+
+
+# Module scoped fixture. Will first run all tests without ray
+# client, then rerun all tests with a single ray client session.
+@pytest.fixture(params=[False, True], autouse=True, scope="module")
+def run_ray_client(request):
+    if request.param:
+        with ray_start_client_server() as client:
+            yield client
+    else:
+        # Run without ray client (do nothing)
+        yield
+        # Cleanup state before rerunning tests with client
+        ray.shutdown()
+
+
+random_state = np.random.RandomState(seed=42)
+
+# Size of test dataframes
+NCOLS, NROWS = (2**6, 2**8)
+
+# Range for values for test data
+RAND_LOW = 0
+RAND_HIGH = 100
+
+# Input data and functions for the tests
+# The test data that we will test our code against
+test_data = {
+    "int_data": {
+        "col{}".format(int((i - NCOLS / 2) % NCOLS + 1)): random_state.randint(
+            RAND_LOW, RAND_HIGH, size=(NROWS))
+        for i in range(NCOLS)
+    },
+    "float_nan_data": {
+        "col{}".format(int((i - NCOLS / 2) % NCOLS + 1)): [
+            x if (j % 4 == 0 and i > NCOLS // 2)
+            or (j != i and i <= NCOLS // 2) else np.NaN for j, x in enumerate(
+                random_state.uniform(RAND_LOW, RAND_HIGH, size=(NROWS)))
+        ]
+        for i in range(NCOLS)
+    },
+}
+
+test_data["int_data"]["index"] = test_data["int_data"].pop("col{}".format(
+    int(NCOLS / 2)))
+
+for col in test_data["float_nan_data"]:
+    for row in range(NROWS // 2):
+        if row % 16 == 0:
+            test_data["float_nan_data"][col][row] = np.NaN
+
+test_data_values = list(test_data.values())
+test_data_keys = list(test_data.keys())
+
+
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+def test_isna(data):
+    pandas_df = pandas.DataFrame(data)
+    modin_df = pd.DataFrame(data)
+
+    pandas_result = pandas.isna(pandas_df)
+    modin_result = pd.isna(modin_df)
+    df_equals(modin_result, pandas_result)
+
+    modin_result = pd.isna(pd.Series([1, np.nan, 2]))
+    pandas_result = pandas.isna(pandas.Series([1, np.nan, 2]))
+    df_equals(modin_result, pandas_result)
+
+    assert pd.isna(np.nan) == pandas.isna(np.nan)
+
+
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+def test_isnull(data):
+    pandas_df = pandas.DataFrame(data)
+    modin_df = pd.DataFrame(data)
+
+    pandas_result = pandas.isnull(pandas_df)
+    modin_result = pd.isnull(modin_df)
+    df_equals(modin_result, pandas_result)
+
+    modin_result = pd.isnull(pd.Series([1, np.nan, 2]))
+    pandas_result = pandas.isnull(pandas.Series([1, np.nan, 2]))
+    df_equals(modin_result, pandas_result)
+
+    assert pd.isna(np.nan) == pandas.isna(np.nan)
+
+
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+def test_notna(data):
+    pandas_df = pandas.DataFrame(data)
+    modin_df = pd.DataFrame(data)
+
+    pandas_result = pandas.notna(pandas_df)
+    modin_result = pd.notna(modin_df)
+    df_equals(modin_result, pandas_result)
+
+    modin_result = pd.notna(pd.Series([1, np.nan, 2]))
+    pandas_result = pandas.notna(pandas.Series([1, np.nan, 2]))
+    df_equals(modin_result, pandas_result)
+
+    assert pd.isna(np.nan) == pandas.isna(np.nan)
+
+
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+def test_notnull(data):
+    pandas_df = pandas.DataFrame(data)
+    modin_df = pd.DataFrame(data)
+
+    pandas_result = pandas.notnull(pandas_df)
+    modin_result = pd.notnull(modin_df)
+    df_equals(modin_result, pandas_result)
+
+    modin_result = pd.notnull(pd.Series([1, np.nan, 2]))
+    pandas_result = pandas.notnull(pandas.Series([1, np.nan, 2]))
+    df_equals(modin_result, pandas_result)
+
+    assert pd.isna(np.nan) == pandas.isna(np.nan)
+
+
+def test_merge():
+    frame_data = {
+        "col1": [0, 1, 2, 3],
+        "col2": [4, 5, 6, 7],
+        "col3": [8, 9, 0, 1],
+        "col4": [2, 4, 5, 6],
+    }
+
+    modin_df = pd.DataFrame(frame_data)
+    pandas_df = pandas.DataFrame(frame_data)
+
+    frame_data2 = {"col1": [0, 1, 2], "col2": [1, 5, 6]}
+    modin_df2 = pd.DataFrame(frame_data2)
+    pandas_df2 = pandas.DataFrame(frame_data2)
+
+    join_types = ["outer", "inner"]
+    for how in join_types:
+        # Defaults
+        modin_result = pd.merge(modin_df, modin_df2, how=how)
+        pandas_result = pandas.merge(pandas_df, pandas_df2, how=how)
+        df_equals(modin_result, pandas_result)
+
+        # left_on and right_index
+        modin_result = pd.merge(
+            modin_df, modin_df2, how=how, left_on="col1", right_index=True)
+        pandas_result = pandas.merge(
+            pandas_df, pandas_df2, how=how, left_on="col1", right_index=True)
+        df_equals(modin_result, pandas_result)
+
+        # left_index and right_on
+        modin_result = pd.merge(
+            modin_df, modin_df2, how=how, left_index=True, right_on="col1")
+        pandas_result = pandas.merge(
+            pandas_df, pandas_df2, how=how, left_index=True, right_on="col1")
+        df_equals(modin_result, pandas_result)
+
+        # left_on and right_on col1
+        modin_result = pd.merge(
+            modin_df, modin_df2, how=how, left_on="col1", right_on="col1")
+        pandas_result = pandas.merge(
+            pandas_df, pandas_df2, how=how, left_on="col1", right_on="col1")
+        df_equals(modin_result, pandas_result)
+
+        # left_on and right_on col2
+        modin_result = pd.merge(
+            modin_df, modin_df2, how=how, left_on="col2", right_on="col2")
+        pandas_result = pandas.merge(
+            pandas_df, pandas_df2, how=how, left_on="col2", right_on="col2")
+        df_equals(modin_result, pandas_result)
+
+        # left_index and right_index
+        modin_result = pd.merge(
+            modin_df, modin_df2, how=how, left_index=True, right_index=True)
+        pandas_result = pandas.merge(
+            pandas_df, pandas_df2, how=how, left_index=True, right_index=True)
+        df_equals(modin_result, pandas_result)
+
+    s = pd.Series(frame_data.get("col1"))
+    with pytest.raises(ValueError):
+        pd.merge(s, modin_df2)
+
+    with pytest.raises(TypeError):
+        pd.merge("Non-valid type", modin_df2)
+
+
+def test_pivot():
+    test_df = pd.DataFrame({
+        "foo": ["one", "one", "one", "two", "two", "two"],
+        "bar": ["A", "B", "C", "A", "B", "C"],
+        "baz": [1, 2, 3, 4, 5, 6],
+        "zoo": ["x", "y", "z", "q", "w", "t"],
+    })
+
+    df = pd.pivot(test_df, index="foo", columns="bar", values="baz")
+    assert isinstance(df, pd.DataFrame)
+
+    with pytest.raises(ValueError):
+        pd.pivot(test_df["bar"], index="foo", columns="bar", values="baz")
+
+
+def test_pivot_table():
+    test_df = pd.DataFrame({
+        "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
+        "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
+        "C": [
+            "small",
+            "large",
+            "large",
+            "small",
+            "small",
+            "large",
+            "small",
+            "small",
+            "large",
+        ],
+        "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
+        "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
+    })
+
+    df = pd.pivot_table(
+        test_df, values="D", index=["A", "B"], columns=["C"], aggfunc=np.sum)
+    assert isinstance(df, pd.DataFrame)
+
+    with pytest.raises(ValueError):
+        pd.pivot_table(
+            test_df["C"],
+            values="D",
+            index=["A", "B"],
+            columns=["C"],
+            aggfunc=np.sum)
+
+
+def test_unique():
+    modin_result = pd.unique([2, 1, 3, 3])
+    pandas_result = pandas.unique([2, 1, 3, 3])
+    assert_array_equal(modin_result, pandas_result)
+    assert modin_result.shape == pandas_result.shape
+
+    modin_result = pd.unique(pd.Series([2] + [1] * 5))
+    pandas_result = pandas.unique(pandas.Series([2] + [1] * 5))
+    assert_array_equal(modin_result, pandas_result)
+    assert modin_result.shape == pandas_result.shape
+
+    modin_result = pd.unique(
+        pd.Series([pd.Timestamp("20160101"),
+                   pd.Timestamp("20160101")]))
+    pandas_result = pandas.unique(
+        pandas.Series(
+            [pandas.Timestamp("20160101"),
+             pandas.Timestamp("20160101")]))
+    assert_array_equal(modin_result, pandas_result)
+    assert modin_result.shape == pandas_result.shape
+
+    modin_result = pd.unique(
+        pd.Series([
+            pd.Timestamp("20160101", tz="US/Eastern"),
+            pd.Timestamp("20160101", tz="US/Eastern"),
+        ]))
+    pandas_result = pandas.unique(
+        pandas.Series([
+            pandas.Timestamp("20160101", tz="US/Eastern"),
+            pandas.Timestamp("20160101", tz="US/Eastern"),
+        ]))
+    assert_array_equal(modin_result, pandas_result)
+    assert modin_result.shape == pandas_result.shape
+
+    modin_result = pd.unique(
+        pd.Index([
+            pd.Timestamp("20160101", tz="US/Eastern"),
+            pd.Timestamp("20160101", tz="US/Eastern"),
+        ]))
+    pandas_result = pandas.unique(
+        pandas.Index([
+            pandas.Timestamp("20160101", tz="US/Eastern"),
+            pandas.Timestamp("20160101", tz="US/Eastern"),
+        ]))
+    assert_array_equal(modin_result, pandas_result)
+    assert modin_result.shape == pandas_result.shape
+
+    modin_result = pd.unique(pd.Series(pd.Categorical(list("baabc"))))
+    pandas_result = pandas.unique(
+        pandas.Series(pandas.Categorical(list("baabc"))))
+    assert_array_equal(modin_result, pandas_result)
+    assert modin_result.shape == pandas_result.shape
+
+
+def test_to_datetime():
+    # DataFrame input for to_datetime
+    modin_df = pd.DataFrame({
+        "year": [2015, 2016],
+        "month": [2, 3],
+        "day": [4, 5]
+    })
+    pandas_df = pandas.DataFrame({
+        "year": [2015, 2016],
+        "month": [2, 3],
+        "day": [4, 5]
+    })
+    df_equals(pd.to_datetime(modin_df), pandas.to_datetime(pandas_df))
+
+    # Series input for to_datetime
+    modin_s = pd.Series(["3/11/2000", "3/12/2000", "3/13/2000"] * 1000)
+    pandas_s = pandas.Series(["3/11/2000", "3/12/2000", "3/13/2000"] * 1000)
+    df_equals(pd.to_datetime(modin_s), pandas.to_datetime(pandas_s))
+
+    # Other inputs for to_datetime
+    value = 1490195805
+    assert pd.to_datetime(
+        value, unit="s") == pandas.to_datetime(
+            value, unit="s")
+    value = 1490195805433502912
+    assert pd.to_datetime(
+        value, unit="ns") == pandas.to_datetime(
+            value, unit="ns")
+    value = [1, 2, 3]
+    assert pd.to_datetime(
+        value, unit="D", origin=pd.Timestamp("2000-01-01")).equals(
+            pandas.to_datetime(
+                value, unit="D", origin=pandas.Timestamp("2000-01-01")))
+
+
+@pytest.mark.parametrize(
+    "data, errors, downcast",
+    [
+        (["1.0", "2", -3], "raise", None),
+        (["1.0", "2", -3], "raise", "float"),
+        (["1.0", "2", -3], "raise", "signed"),
+        (["apple", "1.0", "2", -3], "ignore", None),
+        (["apple", "1.0", "2", -3], "coerce", None),
+    ],
+)
+def test_to_numeric(data, errors, downcast):
+    modin_series = pd.Series(data)
+    pandas_series = pandas.Series(data)
+    modin_result = pd.to_numeric(
+        modin_series, errors=errors, downcast=downcast)
+    pandas_result = pandas.to_numeric(
+        pandas_series, errors=errors, downcast=downcast)
+    df_equals(modin_result, pandas_result)
+
+
+def test_to_pandas_indices():
+    data = test_data_values[0]
+
+    md_df = pd.DataFrame(data)
+    index = pandas.MultiIndex.from_tuples(
+        [(i, i * 2) for i in np.arange(len(md_df) + 1)], names=["A",
+                                                                "B"]).drop(0)
+    columns = pandas.MultiIndex.from_tuples(
+        [(i, i * 2) for i in np.arange(len(md_df.columns) + 1)],
+        names=["A", "B"]).drop(0)
+
+    md_df.index = index
+    md_df.columns = columns
+
+    pd_df = md_df._to_pandas()
+
+    for axis in [0, 1]:
+        assert md_df.axes[axis].equals(
+            pd_df.axes[axis]), f"Indices at axis {axis} are different!"
+        assert md_df.axes[axis].equal_levels(pd_df.axes[
+            axis]), f"Levels of indices at axis {axis} are different!"
+
+
+def test_empty_dataframe():
+    df = pd.DataFrame(columns=["a", "b"])
+    df[(df.a == 1) & (df.b == 2)]
--- a/python/ray/tests/test_mldataset.py
+++ b/python/ray/tests/test_mldataset.py
@ -122,6 +122,8 @@ def test_union(ray_start_regular_shared):
    assert ds.batch_size == 0


+@pytest.mark.skipif(
+    True, reason="Broken on all platforms (incorrect use of gather_sync())")
 def test_from_modin(ray_start_regular_shared):
    try:
        import modin.pandas as pd
--- a/python/requirements.txt
+++ b/python/requirements.txt
@ -55,7 +55,6 @@ gym
 gym-minigrid
 kubernetes
 lxml
-modin
 moto
 mypy
 networkx
--- a/python/requirements/tune/requirements_upstream.txt
+++ b/python/requirements/tune/requirements_upstream.txt
@ -3,4 +3,5 @@
 # So we separate its own requirements file.

 tune-sklearn==0.3.0
-xgboost_ray==0.0.5
+xgboost_ray==0.0.5
+modin>=0.10.0; python_version >= '3.7'