Skip to content

Commit 0a4a469

Browse files
committed
Spin off the HVG-choosing code into a separate "topicker" library.
This allows us to re-use the code elsewhere, e.g., to choose top marker genes.
1 parent 69b05e1 commit 0a4a469

File tree

4 files changed

+22
-149
lines changed

4 files changed

+22
-149
lines changed

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,10 @@ else()
2727
find_package(ltla_WeightedLowess 2.0.0 CONFIG REQUIRED)
2828
find_package(libscran_scran_blocks 0.1.0 CONFIG REQUIRED)
2929
find_package(ltla_sanisizer 0.1.0 CONFIG REQUIRED)
30+
find_package(libscran_topicks 0.1.0 CONFIG REQUIRED)
3031
endif()
3132

32-
target_link_libraries(scran_variances INTERFACE tatami::tatami tatami::tatami_stats ltla::WeightedLowess libscran::scran_blocks ltla::sanisizer)
33+
target_link_libraries(scran_variances INTERFACE tatami::tatami tatami::tatami_stats ltla::WeightedLowess libscran::scran_blocks ltla::sanisizer libscran::topicks)
3334

3435
# Tests
3536
if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)

cmake/Config.cmake.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@ find_dependency(tatami_tatami_stats 2.0.0 CONFIG REQUIRED)
66
find_dependency(ltla_WeightedLowess 2.0.0 CONFIG REQUIRED)
77
find_dependency(libscran_scran_blocks 0.1.0 CONFIG REQUIRED)
88
find_dependency(ltla_sanisizer 0.1.0 CONFIG REQUIRED)
9+
find_dependency(libscran_topicks 0.1.0 CONFIG REQUIRED)
910

1011
include("${CMAKE_CURRENT_LIST_DIR}/libscran_scran_variancesTargets.cmake")

extern/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,15 @@ FetchContent_Declare(
3030
GIT_TAG master # ^0.1.0
3131
)
3232

33+
FetchContent_Declare(
34+
topicks
35+
GIT_REPOSITORY https://github.com/libscran/topicks
36+
GIT_TAG master # ^0.1.0
37+
)
38+
3339
FetchContent_MakeAvailable(tatami)
3440
FetchContent_MakeAvailable(tatami_stats)
3541
FetchContent_MakeAvailable(WeightedLowess)
3642
FetchContent_MakeAvailable(scran_blocks)
3743
FetchContent_MakeAvailable(sanisizer)
44+
FetchContent_MakeAvailable(topicks)

include/scran_variances/choose_highly_variable_genes.hpp

Lines changed: 12 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <cstddef>
88

99
#include "sanisizer/sanisizer.hpp"
10+
#include "topicks/topicks.hpp"
1011

1112
/**
1213
* @file choose_highly_variable_genes.hpp
@@ -61,113 +62,14 @@ struct ChooseHighlyVariableGenesOptions {
6162
*/
6263
namespace internal {
6364

64-
template<bool keep_index_, typename Index_, typename Stat_, class Output_, class Cmp_, class CmpEqual_>
65-
void choose_highly_variable_genes(Index_ n, const Stat_* statistic, Output_& output, Cmp_ cmp, CmpEqual_ cmpeq, const ChooseHighlyVariableGenesOptions& options) {
66-
if (options.top == 0) {
67-
if constexpr(keep_index_) {
68-
; // no-op, we assume it's already empty.
69-
} else {
70-
std::fill_n(output, n, false);
71-
}
72-
return;
73-
}
74-
75-
Stat_ bound = options.bound;
76-
if (sanisizer::is_greater_than(options.top, n)) {
77-
if (options.use_bound) {
78-
for (Index_ i = 0; i < n; ++i) {
79-
bool ok = cmp(statistic[i], bound);
80-
if constexpr(keep_index_) {
81-
if (ok) {
82-
output.push_back(i);
83-
}
84-
} else {
85-
output[i] = ok;
86-
}
87-
}
88-
} else {
89-
if constexpr(keep_index_) {
90-
output.resize(sanisizer::cast<decltype(output.size())>(n));
91-
std::iota(output.begin(), output.end(), static_cast<Index_>(0));
92-
} else {
93-
std::fill_n(output, n, true);
94-
}
95-
}
96-
return;
97-
}
98-
99-
auto semi_sorted = sanisizer::create<std::vector<Index_> >(n);
100-
std::iota(semi_sorted.begin(), semi_sorted.end(), static_cast<Index_>(0));
101-
auto cBegin = semi_sorted.begin(), cMid = cBegin + options.top - 1, cEnd = semi_sorted.end();
102-
std::nth_element(cBegin, cMid, cEnd, [&](Index_ l, Index_ r) -> bool {
103-
auto L = statistic[l], R = statistic[r];
104-
if (L == R) {
105-
return l < r; // always favor the earlier index for a stable sort, even if options.larger = false.
106-
} else {
107-
return cmp(L, R);
108-
}
109-
});
110-
Stat_ threshold = statistic[*cMid];
111-
112-
if (options.keep_ties) {
113-
if (options.use_bound && !cmp(threshold, bound)) {
114-
for (Index_ i = 0; i < n; ++i) {
115-
bool ok = cmp(statistic[i], bound);
116-
if constexpr(keep_index_) {
117-
if (ok) {
118-
output.push_back(i);
119-
}
120-
} else {
121-
output[i] = ok;
122-
}
123-
}
124-
} else {
125-
for (Index_ i = 0; i < n; ++i) {
126-
bool ok = cmpeq(statistic[i], threshold);
127-
if constexpr(keep_index_) {
128-
if (ok) {
129-
output.push_back(i);
130-
}
131-
} else {
132-
output[i] = ok;
133-
}
134-
}
135-
}
136-
return;
137-
}
138-
139-
if constexpr(keep_index_) {
140-
output.reserve(options.top);
141-
} else {
142-
std::fill_n(output, n, false);
143-
}
144-
145-
if (options.use_bound) {
146-
Index_ counter = options.top;
147-
while (counter > 0) {
148-
--counter;
149-
auto pos = semi_sorted[counter];
150-
if (cmp(statistic[pos], bound)) {
151-
if constexpr(keep_index_) {
152-
output.push_back(pos);
153-
} else {
154-
output[pos] = true;
155-
}
156-
}
157-
}
158-
} else {
159-
if constexpr(keep_index_) {
160-
output.insert(output.end(), semi_sorted.begin(), semi_sorted.begin() + options.top);
161-
} else {
162-
for (decltype(options.top) i = 0; i < options.top; ++i) {
163-
output[semi_sorted[i]] = true;
164-
}
165-
}
166-
}
167-
168-
if constexpr(keep_index_) {
169-
std::sort(output.begin(), output.end());
65+
template<typename Stat_>
66+
topicks::PickTopGenesOptions<Stat_> translate_options(const ChooseHighlyVariableGenesOptions& chvg_options) {
67+
topicks::PickTopGenesOptions<Stat_> opt;
68+
opt.keep_ties = chvg_options.keep_ties;
69+
if (chvg_options.use_bound) {
70+
opt.bound = chvg_options.bound;
17071
}
72+
return opt;
17173
}
17274

17375
}
@@ -186,26 +88,8 @@ void choose_highly_variable_genes(Index_ n, const Stat_* statistic, Output_& out
18688
* @param options Further options.
18789
*/
18890
template<typename Stat_, typename Bool_>
189-
void choose_highly_variable_genes(size_t n, const Stat_* statistic, Bool_* output, const ChooseHighlyVariableGenesOptions& options) {
190-
if (options.larger) {
191-
internal::choose_highly_variable_genes<false>(
192-
n,
193-
statistic,
194-
output,
195-
[](Stat_ l, Stat_ r) -> bool { return l > r; },
196-
[](Stat_ l, Stat_ r) -> bool { return l >= r; },
197-
options
198-
);
199-
} else {
200-
internal::choose_highly_variable_genes<false>(
201-
n,
202-
statistic,
203-
output,
204-
[](Stat_ l, Stat_ r) -> bool { return l < r; },
205-
[](Stat_ l, Stat_ r) -> bool { return l <= r; },
206-
options
207-
);
208-
}
91+
void choose_highly_variable_genes(std::size_t n, const Stat_* statistic, Bool_* output, const ChooseHighlyVariableGenesOptions& options) {
92+
topicks::pick_top_genes(n, statistic, options.top, options.larger, output, internal::translate_options<Stat_>(options));
20993
}
21094

21195
/**
@@ -219,7 +103,7 @@ void choose_highly_variable_genes(size_t n, const Stat_* statistic, Bool_* outpu
219103
* @return A vector of booleans of length `n`, indicating whether each gene is to be retained.
220104
*/
221105
template<typename Bool_ = char, typename Stat_>
222-
std::vector<Bool_> choose_highly_variable_genes(size_t n, const Stat_* statistic, const ChooseHighlyVariableGenesOptions& options) {
106+
std::vector<Bool_> choose_highly_variable_genes(std::size_t n, const Stat_* statistic, const ChooseHighlyVariableGenesOptions& options) {
223107
auto output = sanisizer::create<std::vector<Bool_> >(n
224108
#ifdef SCRAN_VARIANCES_TEST_INIT
225109
, SCRAN_VARIANCES_TEST_INIT
@@ -242,27 +126,7 @@ std::vector<Bool_> choose_highly_variable_genes(size_t n, const Stat_* statistic
242126
*/
243127
template<typename Index_, typename Stat_>
244128
std::vector<Index_> choose_highly_variable_genes_index(Index_ n, const Stat_* statistic, const ChooseHighlyVariableGenesOptions& options) {
245-
std::vector<Index_> output;
246-
if (options.larger) {
247-
internal::choose_highly_variable_genes<true>(
248-
n,
249-
statistic,
250-
output,
251-
[](Stat_ l, Stat_ r) -> bool { return l > r; },
252-
[](Stat_ l, Stat_ r) -> bool { return l >= r; },
253-
options
254-
);
255-
} else {
256-
internal::choose_highly_variable_genes<true>(
257-
n,
258-
statistic,
259-
output,
260-
[](Stat_ l, Stat_ r) -> bool { return l < r; },
261-
[](Stat_ l, Stat_ r) -> bool { return l <= r; },
262-
options
263-
);
264-
}
265-
return output;
129+
return topicks::pick_top_genes_index<Index_>(n, statistic, options.top, options.larger, internal::translate_options<Stat_>(options));
266130
}
267131

268132
}

0 commit comments

Comments
 (0)