Skip to content

Commit 19b7c38

Browse files
committed
Change data representation required by histogramming algorithm
While it was attractive on the surface to permit a simple `reinterpret` of a data matrix as a vector of coordinate tuples, this was a very restrictive requirement for non-bitstype data types. Instead, switch to taking in a tuple of vectors. (The matrix case can still be made essentially copy-free by using `view`s of rows or columns in a tuple.)
1 parent e9d77bb commit 19b7c38

File tree

3 files changed

+45
-27
lines changed

3 files changed

+45
-27
lines changed

src/histogram.jl

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ struct HistEdge{T,I}
2121
return new{T,I}(lo, hi, nbin, Δx, Δs)
2222
end
2323
end
24-
HistEdge(edge::AbstractRange) = HistEdge(first(edge), last(edge), length(edge) - 1)
24+
HistEdge(edge::AbstractRange) = HistEdge(first(edge), last(edge), Int(length(edge) - 1))
2525

2626
Base.eltype(::HistEdge{T}) where {T} = T
2727

@@ -97,15 +97,21 @@ end
9797
function _histogram!(binning::B,
9898
dest::AbstractArray{R,N},
9999
edges::Tuple{Vararg{HistEdge,N}},
100-
data::AbstractVector{<:Tuple{Vararg{Any,N}}},
100+
data::Tuple{Vararg{AbstractVector,N}},
101101
weights::Union{Nothing,<:AbstractVector},
102102
) where {B<:AbstractBinningKDE, R, N}
103103
Z = ntuple(identity, Val(N))
104104

105105
# run through data vector and bin entries if they are within bounds
106-
wsum = isnothing(weights) ? zero(_unitless(R)) : zero(eltype(weights))
107-
for ii in eachindex(data)
108-
coord = @inbounds data[ii]
106+
I = eachindex(data...)
107+
if isnothing(weights)
108+
wsum = zero(_unitless(R))
109+
else
110+
I = eachindex(I, weights)
111+
wsum = zero(eltype(weights))
112+
end
113+
for ii in I
114+
coord = map(i -> (@inbounds data[i][ii]), Z)
109115
if !mapreduce(i -> edges[i].lo coord[i] edges[i].hi, &, Z)
110116
continue
111117
end
@@ -136,7 +142,7 @@ _hist_size(edges::Tuple{Vararg{AbstractRange}}) = map(e -> length(e) - 1, edges)
136142
_hist_size(edges::Tuple{Vararg{HistEdge}}) = map(e -> e.nbin, edges)
137143

138144
function _histogram(binning::AbstractBinningKDE,
139-
data::AbstractVector{<:Tuple{Vararg{Any,N}}},
145+
data::Tuple{Vararg{AbstractVector,N}},
140146
edges::Tuple{Vararg{HistEdge,N}};
141147
weights::Union{Nothing,<:AbstractVector} = nothing
142148
) where {N}
@@ -145,15 +151,15 @@ function _histogram(binning::AbstractBinningKDE,
145151
return hist
146152
end
147153
function _histogram(binning::AbstractBinningKDE,
148-
data::AbstractVector{<:Tuple{Vararg{Any,N}}},
154+
data::Tuple{Vararg{AbstractVector,N}},
149155
edges::Tuple{Vararg{AbstractRange,N}};
150156
weights::Union{Nothing,<:AbstractVector} = nothing
151157
) where {N}
152158
edges′ = map(HistEdge, edges)
153159
return _histogram(binning, data, edges′; weights)
154160
end
155161
function _histogram(binning::AbstractBinningKDE,
156-
data::AbstractVector{<:Tuple{Vararg{Any,N}}},
162+
data::Tuple{Vararg{AbstractVector,N}},
157163
edges::Union{<:HistEdge,<:AbstractRange}...;
158164
weights::Union{Nothing,<:AbstractVector} = nothing
159165
) where {N}

src/kde.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -407,8 +407,7 @@ function estimate(method::AbstractBinningKDE,
407407
info::UnivariateKDEInfo) where {T}
408408
lo, hi, nbins = info.lo, info.hi, info.nbins
409409

410-
data′ = reinterpret(reshape, Tuple{T}, data)
411-
f = Histogramming._histogram(method, data′,
410+
f = Histogramming._histogram(method, (data,),
412411
(Histogramming.HistEdge(lo, hi, nbins),); weights)
413412
if lo == hi
414413
centers = range(lo, hi, length = 1)

test/histogram.jl

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,10 @@ end
9696
hist = similar(first(centers), (axes(c, 1) for c in centers)...)
9797

9898
# values for dims 2 and up
99-
coord_rest = ((0.0 for _ in 2:N)...,)
100-
index_rest = ((1 for _ in 2:N)...,)
99+
coord_rest = (([0.0] for _ in 2:N)...,)
101100

102101
# nothing weights are interpreted as unity weight
103-
x1 = [(0.33, coord_rest...)]
102+
x1 = ([0.33], coord_rest...)
104103
fill!(hist, 0)
105104
_histogram!(style, hist, edges′, x1, nothing)
106105
@test sum(hist) * step(edges[1])^N == 1.0
@@ -109,7 +108,7 @@ end
109108
@test hist == _histogram(style, x1, edges′; weights = [1])
110109

111110
# out-of-bounds elements are not binned
112-
x0 = [(-1.0, (0.0 for _ in 1:N-1)...,)]
111+
x0 = ([-1.0], ([0.0] for _ in 1:N-1)...)
113112
fill!(hist, 0)
114113
_histogram!(style, hist, edges′, x0, nothing)
115114
@test sum(hist) == 0.0
@@ -138,7 +137,7 @@ end
138137

139138
# make data unitful, with mixed unit axes
140139
units = (u"m", u"s^-2", u"kg")[1:N]
141-
vals = [x .* units for x in x1]
140+
vals = map((x, u) -> x .* u, x1, units)
142141
uedges = map((e, u) -> e .* u, edges, units)
143142
uhist = zeros(_hist_eltype(uedges), axes(hist)...)
144143
# verify that the function accepts unitful quantities
@@ -153,14 +152,30 @@ end
153152
@test eltype(uhist3) == eltype(uhist)
154153
@test uhist3 == uhist
155154
end
155+
156+
@testset "Non-bitstype numbers" begin
157+
edges = (big"0.5":1:big"5.5", 0.5:1:5.5, 0.5f0:1:5.5f0)
158+
159+
coords = ([big"1.0"], [2.0], [3f0])
160+
H1 = _histogram(HB, coords[1:1], edges[1:1]...)
161+
@test eltype(H1) == BigFloat
162+
@test H1[1] == 1 && sum(H1) == 1.0
163+
164+
H2 = _histogram(HB, coords[1:2], edges[1:2]...)
165+
@test eltype(H2) == BigFloat
166+
@test H2[1,2] == 1 && sum(H2) == 1.0
167+
168+
H3 = _histogram(HB, coords[1:3], edges[1:3]...)
169+
@test eltype(H3) == BigFloat
170+
@test H3[1,2,3] == 1 && sum(H3) == 1.0
171+
end
156172
end
157173

158174
@testset "Weighting" begin
159175
using .Histogramming: HistEdge, _histogram!
160176

161177
N = 100
162178
rv = randn(N)
163-
data = reinterpret(reshape, Tuple{Float64}, rv)
164179

165180
Nlen = Float64(N)
166181
Npos = Float64(count(>=(0), rv))
@@ -179,9 +194,9 @@ end
179194
fill!(H0, 0); fill!(H1, 0); fill!(H2, 0)
180195

181196
# binning uses the sum of weights (not effective sample size as KDE does)
182-
wsum0 = _histogram!(style, H0, edges, data, nothing)
183-
wsum1 = _histogram!(style, H1, edges, data, weight1)
184-
wsum2 = _histogram!(style, H2, edges, data, weight2)
197+
wsum0 = _histogram!(style, H0, edges, (rv,), nothing)
198+
wsum1 = _histogram!(style, H1, edges, (rv,), weight1)
199+
wsum2 = _histogram!(style, H2, edges, (rv,), weight2)
185200
@test wsum0 == N
186201
@test wsum1 == N
187202
@test wsum2 == 2N
@@ -194,9 +209,9 @@ end
194209
# binning weights respect limits and ignore out-of-bounds entries
195210
fill!(H0, 0); fill!(H1, 0); fill!(H2, 0)
196211

197-
wsum0 = _histogram!(style, @view(H1[1:end÷2]), edges_pos, data, weight1)
198-
wsum1 = _histogram!(style, @view(H1[1:end÷2]), edges_pos, data, weight1)
199-
wsum2 = _histogram!(style, @view(H2[1:end÷2]), edges_pos, data, weight2)
212+
wsum0 = _histogram!(style, @view(H1[1:end÷2]), edges_pos, (rv,), weight1)
213+
wsum1 = _histogram!(style, @view(H1[1:end÷2]), edges_pos, (rv,), weight1)
214+
wsum2 = _histogram!(style, @view(H2[1:end÷2]), edges_pos, (rv,), weight2)
200215
@test wsum0 == Npos
201216
@test wsum1 == Npos
202217
@test wsum2 == 2Npos
@@ -212,13 +227,11 @@ end
212227
l32 = LinRange(r32[1], r32[end], length(r32))
213228

214229
@testset "$r" for r in (r64, l64, r32, l32)
215-
v = reinterpret(reshape, Tuple{eltype(r)}, Vector(r))
216-
217230
edges = (HistEdge(r),)
218231
# For regular histogram binning, using the bin edges as values must result in a uniform
219232
# distribution except the last bin which is doubled (due to being closed on the right).
220233
H = zeros(eltype(r), length(r) - 1)
221-
ν = _histogram!(HB, H, edges, v, nothing)
234+
ν = _histogram!(HB, H, edges, (r,), nothing)
222235
@test ν == length(r)
223236
@test_broken all(@view(H[1:end-1]) .== H[1])
224237
@test H[end] == 2H[1]
@@ -228,7 +241,7 @@ end
228241
# neighbors. (The remaining interior bins give up half of their weight but
229242
# simultaneously gain from a neighbor, so they are unchanged.)
230243
fill!(H, 0.0)
231-
ν = _histogram!(LB, H, edges, v, nothing)
244+
ν = _histogram!(LB, H, edges, (r,), nothing)
232245
@test ν == length(r)
233246
@test all(@view(H[2:end-1]) .≈ H[2])
234247
@test H[end] H[1]
@@ -249,7 +262,7 @@ end
249262

250263
H = zeros(nbins)
251264
edges = (HistEdge(lo, hi, nbins),)
252-
_histogram!(LB, H, edges, [(x,)], nothing)
265+
_histogram!(LB, H, edges, ([x],), nothing)
253266
@test all(iszero, @view H[1:end-1])
254267
@test H[end] > 0.0
255268
end

0 commit comments

Comments
 (0)