From 857458c37c618863dbf198de86a5e54cc4a994d3 Mon Sep 17 00:00:00 2001 From: Hari Subbaraj Date: Sat, 5 May 2018 21:25:42 -0700 Subject: [PATCH] [DataFrame] Implemented prod, product, added test suite (#1994) * implemented prod/product, modified declaration for sum, added pandas test suite * fixed tests * removed test_analytics file * implemented nunique, skew * fixed requested changes * added nunique, skew * fixed tests in request * added newline back * fixed newlines hopefully * fixed flake8 issues * more flake8 issues * fixed test for prod --- python/ray/dataframe/dataframe.py | 49 ++++++++++++++++----- python/ray/dataframe/test/test_dataframe.py | 20 ++++----- 2 files changed, 49 insertions(+), 20 deletions(-) diff --git a/python/ray/dataframe/dataframe.py b/python/ray/dataframe/dataframe.py index 4b640875a..b886691d5 100644 --- a/python/ray/dataframe/dataframe.py +++ b/python/ray/dataframe/dataframe.py @@ -646,7 +646,8 @@ class DataFrame(object): return DataFrameGroupBy(self, by, axis, level, as_index, sort, group_keys, squeeze, **kwargs) - def sum(self, axis=None, skipna=True, level=None, numeric_only=None): + def sum(self, axis=None, skipna=True, level=None, numeric_only=None, + min_count=1, **kwargs): """Perform a sum across the DataFrame. Args: @@ -658,7 +659,8 @@ class DataFrame(object): """ def remote_func(df): return df.sum(axis=axis, skipna=skipna, level=level, - numeric_only=numeric_only) + numeric_only=numeric_only, min_count=min_count, + **kwargs) return self._arithmetic_helper(remote_func, axis, level) @@ -3040,16 +3042,43 @@ class DataFrame(object): fill_value) def prod(self, axis=None, skipna=None, level=None, numeric_only=None, - min_count=0, **kwargs): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + min_count=1, **kwargs): + """Return the product of the values for the requested axis + + Args: + axis : {index (0), columns (1)} + skipna : boolean, default True + level : int or level name, default None + numeric_only : boolean, default None + min_count : int, default 1 + + Returns: + prod : Series or DataFrame (if level specified) + """ + def remote_func(df): + return df.prod(axis=axis, skipna=skipna, level=level, + numeric_only=numeric_only, min_count=min_count, + **kwargs) + + return self._arithmetic_helper(remote_func, axis, level) def product(self, axis=None, skipna=None, level=None, numeric_only=None, - min_count=0, **kwargs): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + min_count=1, **kwargs): + """Return the product of the values for the requested axis + + Args: + axis : {index (0), columns (1)} + skipna : boolean, default True + level : int or level name, default None + numeric_only : boolean, default None + min_count : int, default 1 + + Returns: + product : Series or DataFrame (if level specified) + """ + return self.prod(axis=axis, skipna=skipna, level=level, + numeric_only=numeric_only, min_count=min_count, + **kwargs) def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation='linear'): diff --git a/python/ray/dataframe/test/test_dataframe.py b/python/ray/dataframe/test/test_dataframe.py index bf7e17daf..43d11b6b4 100644 --- a/python/ray/dataframe/test/test_dataframe.py +++ b/python/ray/dataframe/test/test_dataframe.py @@ -212,6 +212,8 @@ def test_int_dataframe(): test_copy(ray_df) test_sum(ray_df, pandas_df) + test_prod(ray_df, pandas_df) + test_product(ray_df, pandas_df) test_abs(ray_df, pandas_df) test_keys(ray_df, pandas_df) test_transpose(ray_df, pandas_df) @@ -376,6 +378,8 @@ def test_float_dataframe(): test_copy(ray_df) test_sum(ray_df, pandas_df) + test_prod(ray_df, pandas_df) + test_product(ray_df, pandas_df) test_abs(ray_df, pandas_df) test_keys(ray_df, pandas_df) test_transpose(ray_df, pandas_df) @@ -2347,18 +2351,14 @@ def test_pow(): test_inter_df_math("pow", simple=False) -def test_prod(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.prod(None) +@pytest.fixture +def test_prod(ray_df, pandas_df): + assert(ray_df.prod().equals(pandas_df.prod())) -def test_product(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.product() +@pytest.fixture +def test_product(ray_df, pandas_df): + assert(ray_df.product().equals(pandas_df.product())) @pytest.fixture