Skip to content

Commit 5289403

Browse files
authored
Merge pull request #12 from circstat/omnibus-test-overflow
fix: `omnibus_test` pval computation overflow for large n
2 parents 62d285f + f53fc8c commit 5289403

File tree

3 files changed

+54
-11
lines changed

3 files changed

+54
-11
lines changed

pycircstat2/hypothesis.py

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -361,14 +361,16 @@ def omnibus_test(
361361
verbose: bool = False,
362362
) -> tuple[float, float]:
363363
"""
364-
A simple alternative to the Rayleigh test, aka Hodges-Ajne test,
365-
which does not assume sampling from a specific distribution. This
366-
is called an "omnibus test" because it works well for unimodal,
367-
bimodal, and multimodal distributions (for ungrouped data).
364+
Hodges–Ajne omnibus test for circular uniformity.
368365
369366
- H0: The population is uniformly distributed around the circle
370367
- H1: The population is not uniformly distributed.
371368
369+
This test is distribution-free and handles uni-, bi-, and multimodal
370+
alternatives. The classical p-value involves factorials and
371+
overflows for large *n*. We therefore compute it in log-space
372+
(``math.lgamma``) and exponentiate at the very end.
373+
372374
Parameters
373375
----------
374376
alpha: np.array or None
@@ -403,12 +405,41 @@ def omnibus_test(
403405
lines_rotated > 0.0, lines_rotated < np.round(np.pi, 5)
404406
).sum(1)
405407
m = int(np.min(right))
406-
pval = (
407-
(n - 2 * m)
408-
* math.factorial(n)
409-
/ (math.factorial(m) * math.factorial(n - m))
410-
/ 2 ** (n - 1)
408+
409+
# ------------------------------------------------------------------
410+
# 2. p-value ——— analytical formula and its log form
411+
# ------------------------------------------------------------------
412+
# Classical (Zar 2010, eq. 27-4):
413+
#
414+
# p = (n − 2m) · n! / [ m! · (n − m)! · 2^(n−1) ] …(1)
415+
# # pval = (
416+
# # (n - 2 * m)
417+
# # * math.factorial(n)
418+
# # / (math.factorial(m) * math.factorial(n - m))
419+
# # / 2 ** (n - 1)
420+
# # ) # eq(27.7)
421+
422+
# Taking natural logs and using Γ(k+1) = k! with log Γ = lgamma:
423+
#
424+
# ln p = ln(n − 2m)
425+
# + lgamma(n + 1)
426+
# − lgamma(m + 1)
427+
# − lgamma(n − m + 1)
428+
# − (n − 1)·ln 2 …(2)
429+
#
430+
# Eq. (2) is numerically safe for very large n; we exponentiate at
431+
# the end, knowing the result may under-flow to 0.0 in double precision.
432+
# ------------------------------------------------------------------
433+
434+
logp = (
435+
math.log(n - 2*m)
436+
+ math.lgamma(n + 1)
437+
- math.lgamma(m + 1)
438+
- math.lgamma(n - m + 1)
439+
- (n - 1)*math.log(2.0)
411440
)
441+
pval = np.exp(logp)
442+
412443
A = np.pi * np.sqrt(n) / (2 * (n - 2 * m))
413444

414445
if verbose:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "pycircstat2"
7-
version = "0.1.12"
7+
version = "0.1.13"
88
description = "Circular statistcs with Python."
99
authors = [
1010
]

tests/test_hypothesis.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,18 @@ def test_omnibus_test():
104104

105105
np.testing.assert_approx_equal(pval, 0.0043, significant=2)
106106

107+
# test large sample size
108+
# (factorial division overflow while computing p-val)
109+
# fixed in PR 12
110+
from pycircstat2.distributions import circularuniform, vonmises
111+
d0 = vonmises.rvs(mu=0, kappa=1, size=10_000)
112+
d1 = circularuniform.rvs(size=10_000)
113+
114+
_, pval = omnibus_test(alpha=d0)
115+
assert pval < 0.05, "Expected significant p-value for von Mises distribution"
116+
_, pval = omnibus_test(alpha=d1)
117+
assert pval > 0.05, "Expected non-significant p-value for uniform distribution"
118+
107119

108120
def test_batschelet_test():
109121
data_zar_ex5_ch27 = load_data("D8", source="zar")
@@ -752,4 +764,4 @@ def test_equal_median_small_sample():
752764

753765
result = common_median_test([alpha1, alpha2])
754766
assert result["reject"] is np.False_
755-
assert not np.isnan(result["common_median"])
767+
assert not np.isnan(result["common_median"])

0 commit comments

Comments
 (0)