[Dataframes] Implement .__len__(), .__contains__(), .first_valid_index(), and .last_valid_index() (#1664)

* added len, contains, first_valid_index, last_valid_index

* fixed contains test cases

* test files updated for PR
This commit is contained in:
Rohan Singh 2018-03-06 23:56:11 -08:00 committed by Devin Petersohn
parent 4af42d5bb6
commit 0abebb0975
2 changed files with 70 additions and 25 deletions

View file

@ -821,9 +821,15 @@ class DataFrame(object):
"github.com/ray-project/ray.")
def first_valid_index(self):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
"""Return index for first non-NA/null value.
Returns:
scalar: type of index
"""
idx = self._index
if (idx is not None):
return idx.first_valid_index()
return None
def floordiv(self, other, axis='columns', level=None, fill_value=None):
raise NotImplementedError(
@ -1173,9 +1179,15 @@ class DataFrame(object):
"github.com/ray-project/ray.")
def last_valid_index(self):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
"""Return index for last non-NA/null value.
Returns:
scalar: type of index
"""
idx = self._index
if (idx is not None):
return idx.last_valid_index()
return None
def le(self, other, axis='columns', level=None):
raise NotImplementedError(
@ -2105,9 +2117,12 @@ class DataFrame(object):
"github.com/ray-project/ray.")
def __len__(self):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
"""Gets the length of the dataframe.
Returns:
Returns an integer length of the dataframe object.
"""
return sum(self._lengths)
def __unicode__(self):
raise NotImplementedError(
@ -2133,7 +2148,15 @@ class DataFrame(object):
return iter(self.columns)
def __contains__(self, key):
return key in self.columns
"""Searches columns for specific key
Args:
key : The column name
Returns:
Returns a boolean if the specified key exists as a column name
"""
return self.columns.__contains__(key)
def __nonzero__(self):
raise NotImplementedError(

View file

@ -206,6 +206,10 @@ def test_int_dataframe():
test_idxmin(ray_df, pandas_df)
test_pop(ray_df, pandas_df)
test___len__(ray_df, pandas_df)
test_first_valid_index(ray_df, pandas_df)
test_last_valid_index(ray_df, pandas_df)
for key in keys:
test_get(ray_df, pandas_df, key)
@ -316,6 +320,10 @@ def test_float_dataframe():
test_notna(ray_df, pandas_df)
test_notnull(ray_df, pandas_df)
test___len__(ray_df, pandas_df)
test_first_valid_index(ray_df, pandas_df)
test_last_valid_index(ray_df, pandas_df)
for key in keys:
test_get(ray_df, pandas_df, key)
@ -343,6 +351,10 @@ def test_float_dataframe():
test_reset_index(ray_df, pandas_df)
test_reset_index(ray_df, pandas_df, inplace=True)
for key in keys:
test___contains__(ray_df, key, True)
test___contains__(ray_df, "Not Exists", False)
for key in keys:
test_insert(ray_df, pandas_df, 0, "New Column", ray_df[key])
test_insert(ray_df, pandas_df, 0, "New Column", pandas_df[key])
@ -426,6 +438,10 @@ def test_mixed_dtype_dataframe():
test_notna(ray_df, pandas_df)
test_notnull(ray_df, pandas_df)
test___len__(ray_df, pandas_df)
test_first_valid_index(ray_df, pandas_df)
test_last_valid_index(ray_df, pandas_df)
for key in keys:
test_get(ray_df, pandas_df, key)
@ -452,6 +468,10 @@ def test_mixed_dtype_dataframe():
test_reset_index(ray_df, pandas_df)
test_reset_index(ray_df, pandas_df, inplace=True)
for key in keys:
test___contains__(ray_df, key, True)
test___contains__(ray_df, "Not Exists", False)
for key in keys:
test_insert(ray_df, pandas_df, 0, "New Column", ray_df[key])
test_insert(ray_df, pandas_df, 0, "New Column", pandas_df[key])
@ -525,6 +545,10 @@ def test_nan_dataframe():
test_notna(ray_df, pandas_df)
test_notnull(ray_df, pandas_df)
test___len__(ray_df, pandas_df)
test_first_valid_index(ray_df, pandas_df)
test_last_valid_index(ray_df, pandas_df)
for key in keys:
test_get(ray_df, pandas_df, key)
@ -551,6 +575,10 @@ def test_nan_dataframe():
test_reset_index(ray_df, pandas_df)
test_reset_index(ray_df, pandas_df, inplace=True)
for key in keys:
test___contains__(ray_df, key, True)
test___contains__(ray_df, "Not Exists", False)
for key in keys:
test_insert(ray_df, pandas_df, 0, "New Column", ray_df[key])
test_insert(ray_df, pandas_df, 0, "New Column", pandas_df[key])
@ -933,11 +961,9 @@ def test_first():
ray_df.first(None)
def test_first_valid_index():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.first_valid_index()
@pytest.fixture
def test_first_valid_index(ray_df, pandas_df):
assert(ray_df.first_valid_index() == (pandas_df.first_valid_index()))
def test_floordiv():
@ -1130,11 +1156,9 @@ def test_last():
ray_df.last(None)
def test_last_valid_index():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.last_valid_index()
@pytest.fixture
def test_last_valid_index(ray_df, pandas_df):
assert(ray_df.last_valid_index() == (pandas_df.last_valid_index()))
def test_le():
@ -1922,11 +1946,9 @@ def test___setitem__():
ray_df.__setitem__(None, None)
def test___len__():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.__len__()
@pytest.fixture
def test___len__(ray_df, pandas_df):
assert((len(ray_df) == len(pandas_df)))
def test___unicode__():