[DataFrame] Add implementation for get method (#1496)

* Add implementation for get method
Add tests for get method
Add implementation/tests for get_dtype_counts method
Add implementation/tests for get_ftype_counts method

* Add test fixtures

* Change method tests to fixtures

* Flake8
This commit is contained in:
Helen Che 2018-02-08 22:12:03 -08:00 committed by Devin Petersohn
parent 41007722f9
commit 62680011ee
2 changed files with 119 additions and 25 deletions

View file

@ -596,13 +596,43 @@ class DataFrame(object):
raise NotImplementedError("Not Yet implemented.")
def get(self, key, default=None):
raise NotImplementedError("Not Yet implemented.")
"""Get item from object for given key (DataFrame column, Panel
slice, etc.). Returns default value if not found.
Args:
key (DataFrame column, Panel slice) : the key for which value
to get
Returns:
value (type of items contained in object) : A value that is
stored at the key
"""
temp_df = self._map_partitions(lambda df: df.get(key, default=default))
return to_pandas(temp_df)
def get_dtype_counts(self):
raise NotImplementedError("Not Yet implemented.")
"""Get the counts of dtypes in this object.
Returns:
The counts of dtypes in this object.
"""
return ray.get(
_deploy_func.remote(
lambda df: df.get_dtype_counts(), self._df[0]
)
)
def get_ftype_counts(self):
raise NotImplementedError("Not Yet implemented.")
"""Get the counts of ftypes in this object.
Returns:
The counts of ftypes in this object.
"""
return ray.get(
_deploy_func.remote(
lambda df: df.get_ftype_counts(), self._df[0]
)
)
def get_value(self, index, col, takeable=False):
raise NotImplementedError("Not Yet implemented.")

View file

@ -3,10 +3,10 @@ from __future__ import division
from __future__ import print_function
import pytest
import ray.dataframe as rdf
import numpy as np
import pandas as pd
import ray
import ray.dataframe as rdf
@pytest.fixture
@ -109,6 +109,24 @@ def test_transpose(ray_df, pandas_df):
assert(ray_df_equals_pandas(ray_df.transpose(), pandas_df.transpose()))
@pytest.fixture
def test_get(ray_df, pandas_df, key):
assert(ray_df.get(key).equals(pandas_df.get(key)))
assert ray_df.get(
key, default='default').equals(
pandas_df.get(key, default='default'))
@pytest.fixture
def test_get_dtype_counts(ray_df, pandas_df):
assert(ray_df.get_dtype_counts().equals(pandas_df.get_dtype_counts()))
@pytest.fixture
def test_get_ftype_counts(ray_df, pandas_df):
assert(ray_df.get_ftype_counts().equals(pandas_df.get_ftype_counts()))
@pytest.fixture
def create_test_dataframe():
df = pd.DataFrame({'col1': [0, 1, 2, 3],
@ -136,6 +154,11 @@ def test_int_dataframe():
lambda x: x,
lambda x: False]
keys = ['col1',
'col2',
'col3',
'col4']
test_roundtrip(ray_df, pandas_df)
test_index(ray_df, pandas_df)
test_size(ray_df, pandas_df)
@ -171,6 +194,12 @@ def test_int_dataframe():
test_idxmin(ray_df, pandas_df)
test_pop(ray_df, pandas_df)
for key in keys:
test_get(ray_df, pandas_df, key)
test_get_dtype_counts(ray_df, pandas_df)
test_get_ftype_counts(ray_df, pandas_df)
def test_float_dataframe():
@ -188,6 +217,11 @@ def test_float_dataframe():
lambda x: x,
lambda x: False]
keys = ['col1',
'col2',
'col3',
'col4']
test_roundtrip(ray_df, pandas_df)
test_index(ray_df, pandas_df)
test_size(ray_df, pandas_df)
@ -223,6 +257,57 @@ def test_float_dataframe():
test_idxmin(ray_df, pandas_df)
test_pop(ray_df, pandas_df)
for key in keys:
test_get(ray_df, pandas_df, key)
test_get_dtype_counts(ray_df, pandas_df)
test_get_ftype_counts(ray_df, pandas_df)
def test_mixed_dtype_dataframe():
pandas_df = pd.DataFrame({
'col1': [1, 2, 3, 4],
'col2': [4, 5, 6, 7],
'col3': [8.0, 9.4, 10.1, 11.3],
'col4': ['a', 'b', 'c', 'd']})
ray_df = rdf.from_pandas(pandas_df, 2)
testfuncs = [lambda x: x + x,
lambda x: str(x),
lambda x: x,
lambda x: False]
keys = ['col1',
'col2',
'col3',
'col4']
test_roundtrip(ray_df, pandas_df)
test_index(ray_df, pandas_df)
test_size(ray_df, pandas_df)
test_ndim(ray_df, pandas_df)
test_ftypes(ray_df, pandas_df)
test_values(ray_df, pandas_df)
test_axes(ray_df, pandas_df)
test_shape(ray_df, pandas_df)
test_add_prefix(ray_df, pandas_df)
test_add_suffix(ray_df, pandas_df)
for testfunc in testfuncs:
test_applymap(ray_df, pandas_df, testfunc)
test_copy(ray_df)
test_sum(ray_df, pandas_df)
test_keys(ray_df, pandas_df)
test_transpose(ray_df, pandas_df)
for key in keys:
test_get(ray_df, pandas_df, key)
test_get_dtype_counts(ray_df, pandas_df)
test_get_ftype_counts(ray_df, pandas_df)
def test_add():
ray_df = create_test_dataframe()
@ -631,27 +716,6 @@ def test_ge():
ray_df.ge(None)
def test_get():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.get(None)
def test_get_dtype_counts():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.get_dtype_counts()
def test_get_ftype_counts():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.get_ftype_counts()
def test_get_value():
ray_df = create_test_dataframe()