diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py index 658d6460c..229b84c89 100644 --- a/sdc/datatypes/hpat_pandas_series_functions.py +++ b/sdc/datatypes/hpat_pandas_series_functions.py @@ -1497,7 +1497,7 @@ def hpat_pandas_series_var_impl(self, axis=None, skipna=None, level=None, ddof=1 if valuable_length <= ddof: return numpy.nan - return numpy.nanvar(self._data) * valuable_length / (valuable_length - ddof) + return numpy_like.nanvar(self._data) * valuable_length / (valuable_length - ddof) if len(self._data) <= ddof: return numpy.nan diff --git a/sdc/functions/numpy_like.py b/sdc/functions/numpy_like.py index cbdb43904..4a23519ca 100644 --- a/sdc/functions/numpy_like.py +++ b/sdc/functions/numpy_like.py @@ -497,3 +497,32 @@ def nanmean_impl(a): return np.divide(c, count) return nanmean_impl + + +def nanvar(a): + pass + + +@sdc_overload(nanvar) +def np_nanvar(a): + if not isinstance(a, types.Array): + return + isnan = get_isnan(a.dtype) + + def nanvar_impl(a): + # Compute the mean + m = nanmean(a) + + # Compute the sum of square diffs + ssd = 0.0 + count = 0 + for i in prange(len(a)): + v = a[i] + if not isnan(v): + val = (v.item() - m) + ssd += np.real(val * np.conj(val)) + count += 1 + # np.divide() doesn't raise ZeroDivisionError + return np.divide(ssd, count) + + return nanvar_impl diff --git a/sdc/tests/test_sdc_numpy.py b/sdc/tests/test_sdc_numpy.py index f2acd405b..8c630e191 100644 --- a/sdc/tests/test_sdc_numpy.py +++ b/sdc/tests/test_sdc_numpy.py @@ -243,7 +243,10 @@ def sdc_impl(): class TestArrayReductions(TestCase): - def check_reduction_basic(self, pyfunc, alt_pyfunc, all_nans=True): + def check_reduction_basic(self, pyfunc, alt_pyfunc, all_nans=True, comparator=None): + if not comparator: + comparator = np.testing.assert_array_equal + alt_cfunc = self.jit(alt_pyfunc) def cases(): @@ -262,7 +265,7 @@ def cases(): for case in cases(): with self.subTest(data=case): - np.testing.assert_array_equal(alt_cfunc(case), pyfunc(case)) + comparator(alt_cfunc(case), pyfunc(case)) def test_nanmean(self): def ref_impl(a): @@ -309,6 +312,16 @@ def sdc_impl(a): self.check_reduction_basic(ref_impl, sdc_impl) + def test_nanvar(self): + def ref_impl(a): + return np.nanvar(a) + + def sdc_impl(a): + return numpy_like.nanvar(a) + + self.check_reduction_basic(ref_impl, sdc_impl, + comparator=np.testing.assert_array_almost_equal) + def test_sum(self): def ref_impl(a): return np.sum(a) diff --git a/sdc/tests/tests_perf/test_perf_numpy.py b/sdc/tests/tests_perf/test_perf_numpy.py index 1d928e35d..d5343cc4f 100644 --- a/sdc/tests/tests_perf/test_perf_numpy.py +++ b/sdc/tests/tests_perf/test_perf_numpy.py @@ -109,6 +109,11 @@ def _test_case(self, cases, name, total_data_length, data_num=1, input_data=test CE(type_='Numba', code='np.nanprod(data)', jitted=True), CE(type_='SDC', code='sdc.functions.numpy_like.nanprod(data)', jitted=True), ], usecase_params='data'), + TC(name='nanvar', size=[10 ** 7], call_expr=[ + CE(type_='Python', code='np.nanvar(data)', jitted=False), + CE(type_='Numba', code='np.nanvar(data)', jitted=True), + CE(type_='SDC', code='sdc.functions.numpy_like.nanvar(data)', jitted=True), + ], usecase_params='data'), TC(name='sum', size=[10 ** 7], call_expr=[ CE(type_='Python', code='np.sum(data)', jitted=False), CE(type_='Numba', code='np.sum(data)', jitted=True), diff --git a/sdc/tests/tests_perf/test_perf_series.py b/sdc/tests/tests_perf/test_perf_series.py index 272d211bd..20f1c3e19 100644 --- a/sdc/tests/tests_perf/test_perf_series.py +++ b/sdc/tests/tests_perf/test_perf_series.py @@ -127,14 +127,16 @@ def _test_case(self, pyfunc, name, total_data_length, data_num=1, input_data=tes TC(name='shift', size=[10 ** 8]), TC(name='size', size=[10 ** 7], call_expr='data.size', usecase_params='data'), TC(name='sort_values', size=[10 ** 5]), - TC(name='std', size=[10 ** 7]), + TC(name='std', size=[10 ** 7], params='skipna=True'), + TC(name='std', size=[10 ** 7], params='skipna=False'), TC(name='sub', size=[10 ** 7], params='other', data_num=2), TC(name='sum', size=[10 ** 8]), TC(name='take', size=[10 ** 7], call_expr='data.take([0])', usecase_params='data'), TC(name='truediv', size=[10 ** 7], params='other', data_num=2), TC(name='values', size=[10 ** 7], call_expr='data.values', usecase_params='data'), TC(name='value_counts', size=[10 ** 6]), - TC(name='var', size=[10 ** 8]), + TC(name='var', size=[10 ** 8], params='skipna=True'), + TC(name='var', size=[10 ** 8], params='skipna=False'), TC(name='unique', size=[10 ** 5]), ]