From 25f3f0fd1cd0552d9c0f31b2771352229b4fb01a Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 15:06:25 -0400 Subject: [PATCH 1/8] Avoid computing mean for p=0,1 central moments --- src/summary_statistics/means.rs | 68 ++++++++++++++++----------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index a2b5bcd6..14051e10 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -56,21 +56,21 @@ where where A: Float + FromPrimitive, { - let mean = self.mean(); - match mean { - None => None, - Some(mean) => match order { - 0 => Some(A::one()), - 1 => Some(A::zero()), - n => { - let shifted_array = self.map(|x| x.clone() - mean); - let shifted_moments = moments(shifted_array, n); - let correction_term = -shifted_moments[1].clone(); + if self.is_empty() { + return None; + } + match order { + 0 => Some(A::one()), + 1 => Some(A::zero()), + n => { + let mean = self.mean().unwrap(); + let shifted_array = self.map(|x| x.clone() - mean); + let shifted_moments = moments(shifted_array, n); + let correction_term = -shifted_moments[1].clone(); - let coefficients = central_moment_coefficients(&shifted_moments); - Some(horner_method(coefficients, correction_term)) - } - }, + let coefficients = central_moment_coefficients(&shifted_moments); + Some(horner_method(coefficients, correction_term)) + } } } @@ -78,29 +78,27 @@ where where A: Float + FromPrimitive, { - let mean = self.mean(); - match mean { - None => None, - Some(mean) => { - match order { - 0 => Some(vec![A::one()]), - 1 => Some(vec![A::one(), A::zero()]), - n => { - // We only perform this operations once, and then reuse their - // result to compute all the required moments - let shifted_array = self.map(|x| x.clone() - mean); - let shifted_moments = moments(shifted_array, n); - let correction_term = -shifted_moments[1].clone(); + if self.is_empty() { + return None; + } + match order { + 0 => Some(vec![A::one()]), + 1 => Some(vec![A::one(), A::zero()]), + n => { + // We only perform these operations once, and then reuse their + // result to compute all the required moments + let mean = self.mean().unwrap(); + let shifted_array = self.map(|x| x.clone() - mean); + let shifted_moments = moments(shifted_array, n); + let correction_term = -shifted_moments[1].clone(); - let mut central_moments = vec![A::one(), A::zero()]; - for k in 2..=n { - let coefficients = central_moment_coefficients(&shifted_moments[..=k]); - let central_moment = horner_method(coefficients, correction_term); - central_moments.push(central_moment) - } - Some(central_moments) - } + let mut central_moments = vec![A::one(), A::zero()]; + for k in 2..=n { + let coefficients = central_moment_coefficients(&shifted_moments[..=k]); + let central_moment = horner_method(coefficients, correction_term); + central_moments.push(central_moment) } + Some(central_moments) } } } From d9ba4bbb098f92d5feda430e8025040ee36207f2 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 15:18:55 -0400 Subject: [PATCH 2/8] Replace map + clone with mapv --- src/summary_statistics/means.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 14051e10..19002745 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -64,7 +64,7 @@ where 1 => Some(A::zero()), n => { let mean = self.mean().unwrap(); - let shifted_array = self.map(|x| x.clone() - mean); + let shifted_array = self.mapv(|x| x - mean); let shifted_moments = moments(shifted_array, n); let correction_term = -shifted_moments[1].clone(); @@ -88,7 +88,7 @@ where // We only perform these operations once, and then reuse their // result to compute all the required moments let mean = self.mean().unwrap(); - let shifted_array = self.map(|x| x.clone() - mean); + let shifted_array = self.mapv(|x| x - mean); let shifted_moments = moments(shifted_array, n); let correction_term = -shifted_moments[1].clone(); From 4a052884a2f51ce0c586b02c225b54cc053eb235 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 15:41:53 -0400 Subject: [PATCH 3/8] Check for moment order overflowing `i32` In reality, this probably isn't necessary because I'd expect someone to terminate their program when trying to calculate a moment of order greater than `i32::MAX` (because it would take so long), but it's nice to have an explicit check anyway. --- src/summary_statistics/means.rs | 7 +++++-- src/summary_statistics/mod.rs | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 19002745..4fd66b46 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -1,6 +1,6 @@ use super::SummaryStatisticsExt; use ndarray::{ArrayBase, Data, Dimension}; -use num_traits::{Float, FromPrimitive, Zero}; +use num_traits::{Float, FromPrimitive, ToPrimitive, Zero}; use std::ops::{Add, Div}; impl SummaryStatisticsExt for ArrayBase @@ -124,6 +124,9 @@ where { let n_elements = A::from_usize(a.len()).expect("Converting number of elements to `A` must not fail"); + let order = order + .to_i32() + .expect("Moment order must not overflow `i32`."); // When k=0, we are raising each element to the 0th power // No need to waste CPU cycles going through the array @@ -135,7 +138,7 @@ where } for k in 2..=order { - moments.push(a.map(|x| x.powi(k as i32)).sum() / n_elements) + moments.push(a.map(|x| x.powi(k)).sum() / n_elements) } moments } diff --git a/src/summary_statistics/mod.rs b/src/summary_statistics/mod.rs index ff873d3b..a5664a0a 100644 --- a/src/summary_statistics/mod.rs +++ b/src/summary_statistics/mod.rs @@ -113,7 +113,8 @@ where /// The *p*-th central moment is computed using a corrected two-pass algorithm (see Section 3.5 /// in [Pébay et al., 2016]). Complexity is *O(np)* when *n >> p*, *p > 1*. /// - /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. + /// **Panics** if `A::from_usize()` fails to convert the number of elements + /// in the array or if `order` overflows `i32`. /// /// [central moment]: https://en.wikipedia.org/wiki/Central_moment /// [Pébay et al., 2016]: https://www.osti.gov/pages/servlets/purl/1427275 @@ -130,7 +131,8 @@ where /// being thus more efficient than repeated calls to [central moment] if the computation /// of central moments of multiple orders is required. /// - /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. + /// **Panics** if `A::from_usize()` fails to convert the number of elements + /// in the array or if `order` overflows `i32`. /// /// [central moments]: https://en.wikipedia.org/wiki/Central_moment /// [central moment]: #tymethod.central_moment From 3aaab13934681707a3c4648a807ac08f1f310399 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 15:49:59 -0400 Subject: [PATCH 4/8] Take advantage of IterBinomial from num-integer --- Cargo.toml | 1 + src/lib.rs | 1 + src/summary_statistics/means.rs | 29 ++++------------------------- 3 files changed, 6 insertions(+), 25 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 57795ce8..645c41c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ categories = ["data-structures", "science"] [dependencies] ndarray = "0.12.1" noisy_float = "0.1.8" +num-integer = "0.1" num-traits = "0.2" rand = "0.6" itertools = { version = "0.7.0", default-features = false } diff --git a/src/lib.rs b/src/lib.rs index 9cf586f1..37499368 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,6 +28,7 @@ extern crate itertools; extern crate ndarray; extern crate noisy_float; +extern crate num_integer; extern crate num_traits; extern crate rand; diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 4fd66b46..126c352f 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -1,5 +1,6 @@ use super::SummaryStatisticsExt; use ndarray::{ArrayBase, Data, Dimension}; +use num_integer::IterBinomial; use num_traits::{Float, FromPrimitive, ToPrimitive, Zero}; use std::ops::{Add, Div}; @@ -153,34 +154,12 @@ where A: Float + FromPrimitive, { let order = moments.len(); - moments - .iter() - .rev() - .enumerate() - .map(|(k, moment)| A::from_usize(binomial_coefficient(order, k)).unwrap() * *moment) + IterBinomial::new(order) + .zip(moments.iter().rev()) + .map(|(binom, &moment)| A::from_usize(binom).unwrap() * moment) .collect() } -/// Returns the binomial coefficient "n over k". -/// -/// **Panics** if k > n. -fn binomial_coefficient(n: usize, k: usize) -> usize { - if k > n { - panic!( - "Tried to compute the binomial coefficient of {0} over {1}, \ - but {1} is strictly greater than {0}!" - ) - } - // BC(n, k) = BC(n, n-k) - let k = if k > n - k { n - k } else { k }; - let mut result = 1; - for i in 0..k { - result = result * (n - i); - result = result / (i + 1); - } - result -} - /// Uses [Horner's method] to evaluate a polynomial with a single indeterminate. /// /// Coefficients are expected to be sorted by ascending order From 7efba82454e54537bd695b80a86bae54dc9b1557 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 16:02:11 -0400 Subject: [PATCH 5/8] Remove unnecessary explicit clones `A: Float` requires `A: Copy`. --- src/summary_statistics/means.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 126c352f..8ef1d48e 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -67,7 +67,7 @@ where let mean = self.mean().unwrap(); let shifted_array = self.mapv(|x| x - mean); let shifted_moments = moments(shifted_array, n); - let correction_term = -shifted_moments[1].clone(); + let correction_term = -shifted_moments[1]; let coefficients = central_moment_coefficients(&shifted_moments); Some(horner_method(coefficients, correction_term)) @@ -91,7 +91,7 @@ where let mean = self.mean().unwrap(); let shifted_array = self.mapv(|x| x - mean); let shifted_moments = moments(shifted_array, n); - let correction_term = -shifted_moments[1].clone(); + let correction_term = -shifted_moments[1]; let mut central_moments = vec![A::one(), A::zero()]; for k in 2..=n { From 0b0b0bab3acfbfc1379ba64fbd522058a7832d72 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 16:04:02 -0400 Subject: [PATCH 6/8] Replace .map() with ? I think this is a bit easier to understand at first glance. --- src/summary_statistics/means.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 8ef1d48e..d9ed10b1 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -41,16 +41,16 @@ where where A: Float + FromPrimitive, { - let central_moments = self.central_moments(4); - central_moments.map(|moments| moments[4] / moments[2].powi(2)) + let central_moments = self.central_moments(4)?; + Some(central_moments[4] / central_moments[2].powi(2)) } fn skewness(&self) -> Option where A: Float + FromPrimitive, { - let central_moments = self.central_moments(3); - central_moments.map(|moments| moments[3] / moments[2].sqrt().powi(3)) + let central_moments = self.central_moments(3)?; + Some(central_moments[3] / central_moments[2].sqrt().powi(3)) } fn central_moment(&self, order: usize) -> Option From f992453db3c55e04f188ce382e38df3d12c3b9e3 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 16:09:21 -0400 Subject: [PATCH 7/8] Test more cases in central moment tests --- src/summary_statistics/means.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index d9ed10b1..ec0fd80f 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -252,8 +252,10 @@ mod tests { #[test] fn test_central_order_moment_with_empty_array_of_floats() { let a: Array1 = array![]; - assert!(a.central_moment(1).is_none()); - assert!(a.central_moments(1).is_none()); + for order in 0..=3 { + assert!(a.central_moment(order).is_none()); + assert!(a.central_moments(order).is_none()); + } } #[test] From a8af8801f49b449279428ac224c755ca6247a076 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 16:10:45 -0400 Subject: [PATCH 8/8] Rename central moment tests --- src/summary_statistics/means.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index ec0fd80f..1609b531 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -250,7 +250,7 @@ mod tests { } #[test] - fn test_central_order_moment_with_empty_array_of_floats() { + fn test_central_moment_with_empty_array_of_floats() { let a: Array1 = array![]; for order in 0..=3 { assert!(a.central_moment(order).is_none()); @@ -259,7 +259,7 @@ mod tests { } #[test] - fn test_zeroth_central_order_moment_is_one() { + fn test_zeroth_central_moment_is_one() { let n = 50; let bound: f64 = 200.; let a = Array::random(n, Uniform::new(-bound.abs(), bound.abs())); @@ -267,7 +267,7 @@ mod tests { } #[test] - fn test_first_central_order_moment_is_zero() { + fn test_first_central_moment_is_zero() { let n = 50; let bound: f64 = 200.; let a = Array::random(n, Uniform::new(-bound.abs(), bound.abs())); @@ -275,7 +275,7 @@ mod tests { } #[test] - fn test_central_order_moments() { + fn test_central_moments() { let a: Array1 = array![ 0.07820559, 0.5026185, 0.80935324, 0.39384033, 0.9483038, 0.62516215, 0.90772261, 0.87329831, 0.60267392, 0.2960298, 0.02810356, 0.31911966, 0.86705506, 0.96884832, @@ -306,7 +306,7 @@ mod tests { } #[test] - fn test_bulk_central_order_moments() { + fn test_bulk_central_moments() { // Test that the bulk method is coherent with the non-bulk method let n = 50; let bound: f64 = 200.;