[Dataframes] Implement .__len__(), .__contains__(), .first_valid_index(), and .last_valid_index() (#1664)

* added len, contains, first_valid_index, last_valid_index * fixed contains test cases * test files updated for PR
2025-03-06 10:31:39 -05:00 · 2018-03-06 23:56:11 -08:00 · 2018-03-06 23:56:11 -08:00 · 0abebb0975
commit 0abebb0975
parent 4af42d5bb6
2 changed files with 70 additions and 25 deletions
--- a/python/ray/dataframe/dataframe.py
+++ b/python/ray/dataframe/dataframe.py
@ -821,9 +821,15 @@ class DataFrame(object):
            "github.com/ray-project/ray.")

    def first_valid_index(self):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
+        """Return index for first non-NA/null value.
+
+        Returns:
+            scalar: type of index
+        """
+        idx = self._index
+        if (idx is not None):
+            return idx.first_valid_index()
+        return None

    def floordiv(self, other, axis='columns', level=None, fill_value=None):
        raise NotImplementedError(
@ -1173,9 +1179,15 @@ class DataFrame(object):
            "github.com/ray-project/ray.")

    def last_valid_index(self):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
+        """Return index for last non-NA/null value.
+
+        Returns:
+            scalar: type of index
+        """
+        idx = self._index
+        if (idx is not None):
+            return idx.last_valid_index()
+        return None

    def le(self, other, axis='columns', level=None):
        raise NotImplementedError(
@ -2105,9 +2117,12 @@ class DataFrame(object):
            "github.com/ray-project/ray.")

    def __len__(self):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
+        """Gets the length of the dataframe.
+
+        Returns:
+            Returns an integer length of the dataframe object.
+        """
+        return sum(self._lengths)

    def __unicode__(self):
        raise NotImplementedError(
@ -2133,7 +2148,15 @@ class DataFrame(object):
        return iter(self.columns)

    def __contains__(self, key):
-        return key in self.columns
+        """Searches columns for specific key
+
+        Args:
+            key : The column name
+
+        Returns:
+            Returns a boolean if the specified key exists as a column name
+        """
+        return self.columns.__contains__(key)

    def __nonzero__(self):
        raise NotImplementedError(
--- a/python/ray/dataframe/test/test_dataframe.py
+++ b/python/ray/dataframe/test/test_dataframe.py
@ -206,6 +206,10 @@ def test_int_dataframe():
    test_idxmin(ray_df, pandas_df)
    test_pop(ray_df, pandas_df)

+    test___len__(ray_df, pandas_df)
+    test_first_valid_index(ray_df, pandas_df)
+    test_last_valid_index(ray_df, pandas_df)
+
    for key in keys:
        test_get(ray_df, pandas_df, key)

@ -316,6 +320,10 @@ def test_float_dataframe():
    test_notna(ray_df, pandas_df)
    test_notnull(ray_df, pandas_df)

+    test___len__(ray_df, pandas_df)
+    test_first_valid_index(ray_df, pandas_df)
+    test_last_valid_index(ray_df, pandas_df)
+
    for key in keys:
        test_get(ray_df, pandas_df, key)

@ -343,6 +351,10 @@ def test_float_dataframe():
    test_reset_index(ray_df, pandas_df)
    test_reset_index(ray_df, pandas_df, inplace=True)

+    for key in keys:
+        test___contains__(ray_df, key, True)
+    test___contains__(ray_df, "Not Exists", False)
+
    for key in keys:
        test_insert(ray_df, pandas_df, 0, "New Column", ray_df[key])
        test_insert(ray_df, pandas_df, 0, "New Column", pandas_df[key])
@ -426,6 +438,10 @@ def test_mixed_dtype_dataframe():
    test_notna(ray_df, pandas_df)
    test_notnull(ray_df, pandas_df)

+    test___len__(ray_df, pandas_df)
+    test_first_valid_index(ray_df, pandas_df)
+    test_last_valid_index(ray_df, pandas_df)
+
    for key in keys:
        test_get(ray_df, pandas_df, key)

@ -452,6 +468,10 @@ def test_mixed_dtype_dataframe():
    test_reset_index(ray_df, pandas_df)
    test_reset_index(ray_df, pandas_df, inplace=True)

+    for key in keys:
+        test___contains__(ray_df, key, True)
+    test___contains__(ray_df, "Not Exists", False)
+
    for key in keys:
        test_insert(ray_df, pandas_df, 0, "New Column", ray_df[key])
        test_insert(ray_df, pandas_df, 0, "New Column", pandas_df[key])
@ -525,6 +545,10 @@ def test_nan_dataframe():
    test_notna(ray_df, pandas_df)
    test_notnull(ray_df, pandas_df)

+    test___len__(ray_df, pandas_df)
+    test_first_valid_index(ray_df, pandas_df)
+    test_last_valid_index(ray_df, pandas_df)
+
    for key in keys:
        test_get(ray_df, pandas_df, key)

@ -551,6 +575,10 @@ def test_nan_dataframe():
    test_reset_index(ray_df, pandas_df)
    test_reset_index(ray_df, pandas_df, inplace=True)

+    for key in keys:
+        test___contains__(ray_df, key, True)
+    test___contains__(ray_df, "Not Exists", False)
+
    for key in keys:
        test_insert(ray_df, pandas_df, 0, "New Column", ray_df[key])
        test_insert(ray_df, pandas_df, 0, "New Column", pandas_df[key])
@ -933,11 +961,9 @@ def test_first():
        ray_df.first(None)


-def test_first_valid_index():
-    ray_df = create_test_dataframe()
-
-    with pytest.raises(NotImplementedError):
-        ray_df.first_valid_index()
+@pytest.fixture
+def test_first_valid_index(ray_df, pandas_df):
+    assert(ray_df.first_valid_index() == (pandas_df.first_valid_index()))


 def test_floordiv():
@ -1130,11 +1156,9 @@ def test_last():
        ray_df.last(None)


-def test_last_valid_index():
-    ray_df = create_test_dataframe()
-
-    with pytest.raises(NotImplementedError):
-        ray_df.last_valid_index()
+@pytest.fixture
+def test_last_valid_index(ray_df, pandas_df):
+    assert(ray_df.last_valid_index() == (pandas_df.last_valid_index()))


 def test_le():
@ -1922,11 +1946,9 @@ def test___setitem__():
        ray_df.__setitem__(None, None)


-def test___len__():
-    ray_df = create_test_dataframe()
-
-    with pytest.raises(NotImplementedError):
-        ray_df.__len__()
+@pytest.fixture
+def test___len__(ray_df, pandas_df):
+    assert((len(ray_df) == len(pandas_df)))


 def test___unicode__():