7
7
#include < cstddef>
8
8
9
9
#include " sanisizer/sanisizer.hpp"
10
+ #include " topicks/topicks.hpp"
10
11
11
12
/* *
12
13
* @file choose_highly_variable_genes.hpp
@@ -61,113 +62,14 @@ struct ChooseHighlyVariableGenesOptions {
61
62
*/
62
63
namespace internal {
63
64
64
- template <bool keep_index_, typename Index_, typename Stat_, class Output_ , class Cmp_ , class CmpEqual_ >
65
- void choose_highly_variable_genes (Index_ n, const Stat_* statistic, Output_& output, Cmp_ cmp, CmpEqual_ cmpeq, const ChooseHighlyVariableGenesOptions& options) {
66
- if (options.top == 0 ) {
67
- if constexpr (keep_index_) {
68
- ; // no-op, we assume it's already empty.
69
- } else {
70
- std::fill_n (output, n, false );
71
- }
72
- return ;
73
- }
74
-
75
- Stat_ bound = options.bound ;
76
- if (sanisizer::is_greater_than (options.top , n)) {
77
- if (options.use_bound ) {
78
- for (Index_ i = 0 ; i < n; ++i) {
79
- bool ok = cmp (statistic[i], bound);
80
- if constexpr (keep_index_) {
81
- if (ok) {
82
- output.push_back (i);
83
- }
84
- } else {
85
- output[i] = ok;
86
- }
87
- }
88
- } else {
89
- if constexpr (keep_index_) {
90
- output.resize (sanisizer::cast<decltype (output.size ())>(n));
91
- std::iota (output.begin (), output.end (), static_cast <Index_>(0 ));
92
- } else {
93
- std::fill_n (output, n, true );
94
- }
95
- }
96
- return ;
97
- }
98
-
99
- auto semi_sorted = sanisizer::create<std::vector<Index_> >(n);
100
- std::iota (semi_sorted.begin (), semi_sorted.end (), static_cast <Index_>(0 ));
101
- auto cBegin = semi_sorted.begin (), cMid = cBegin + options.top - 1 , cEnd = semi_sorted.end ();
102
- std::nth_element (cBegin, cMid, cEnd, [&](Index_ l, Index_ r) -> bool {
103
- auto L = statistic[l], R = statistic[r];
104
- if (L == R) {
105
- return l < r; // always favor the earlier index for a stable sort, even if options.larger = false.
106
- } else {
107
- return cmp (L, R);
108
- }
109
- });
110
- Stat_ threshold = statistic[*cMid];
111
-
112
- if (options.keep_ties ) {
113
- if (options.use_bound && !cmp (threshold, bound)) {
114
- for (Index_ i = 0 ; i < n; ++i) {
115
- bool ok = cmp (statistic[i], bound);
116
- if constexpr (keep_index_) {
117
- if (ok) {
118
- output.push_back (i);
119
- }
120
- } else {
121
- output[i] = ok;
122
- }
123
- }
124
- } else {
125
- for (Index_ i = 0 ; i < n; ++i) {
126
- bool ok = cmpeq (statistic[i], threshold);
127
- if constexpr (keep_index_) {
128
- if (ok) {
129
- output.push_back (i);
130
- }
131
- } else {
132
- output[i] = ok;
133
- }
134
- }
135
- }
136
- return ;
137
- }
138
-
139
- if constexpr (keep_index_) {
140
- output.reserve (options.top );
141
- } else {
142
- std::fill_n (output, n, false );
143
- }
144
-
145
- if (options.use_bound ) {
146
- Index_ counter = options.top ;
147
- while (counter > 0 ) {
148
- --counter;
149
- auto pos = semi_sorted[counter];
150
- if (cmp (statistic[pos], bound)) {
151
- if constexpr (keep_index_) {
152
- output.push_back (pos);
153
- } else {
154
- output[pos] = true ;
155
- }
156
- }
157
- }
158
- } else {
159
- if constexpr (keep_index_) {
160
- output.insert (output.end (), semi_sorted.begin (), semi_sorted.begin () + options.top );
161
- } else {
162
- for (decltype (options.top ) i = 0 ; i < options.top ; ++i) {
163
- output[semi_sorted[i]] = true ;
164
- }
165
- }
166
- }
167
-
168
- if constexpr (keep_index_) {
169
- std::sort (output.begin (), output.end ());
65
+ template <typename Stat_>
66
+ topicks::PickTopGenesOptions<Stat_> translate_options (const ChooseHighlyVariableGenesOptions& chvg_options) {
67
+ topicks::PickTopGenesOptions<Stat_> opt;
68
+ opt.keep_ties = chvg_options.keep_ties ;
69
+ if (chvg_options.use_bound ) {
70
+ opt.bound = chvg_options.bound ;
170
71
}
72
+ return opt;
171
73
}
172
74
173
75
}
@@ -186,26 +88,8 @@ void choose_highly_variable_genes(Index_ n, const Stat_* statistic, Output_& out
186
88
* @param options Further options.
187
89
*/
188
90
template <typename Stat_, typename Bool_>
189
- void choose_highly_variable_genes (size_t n, const Stat_* statistic, Bool_* output, const ChooseHighlyVariableGenesOptions& options) {
190
- if (options.larger ) {
191
- internal::choose_highly_variable_genes<false >(
192
- n,
193
- statistic,
194
- output,
195
- [](Stat_ l, Stat_ r) -> bool { return l > r; },
196
- [](Stat_ l, Stat_ r) -> bool { return l >= r; },
197
- options
198
- );
199
- } else {
200
- internal::choose_highly_variable_genes<false >(
201
- n,
202
- statistic,
203
- output,
204
- [](Stat_ l, Stat_ r) -> bool { return l < r; },
205
- [](Stat_ l, Stat_ r) -> bool { return l <= r; },
206
- options
207
- );
208
- }
91
+ void choose_highly_variable_genes (std::size_t n, const Stat_* statistic, Bool_* output, const ChooseHighlyVariableGenesOptions& options) {
92
+ topicks::pick_top_genes (n, statistic, options.top , options.larger , output, internal::translate_options<Stat_>(options));
209
93
}
210
94
211
95
/* *
@@ -219,7 +103,7 @@ void choose_highly_variable_genes(size_t n, const Stat_* statistic, Bool_* outpu
219
103
* @return A vector of booleans of length `n`, indicating whether each gene is to be retained.
220
104
*/
221
105
template <typename Bool_ = char , typename Stat_>
222
- std::vector<Bool_> choose_highly_variable_genes (size_t n, const Stat_* statistic, const ChooseHighlyVariableGenesOptions& options) {
106
+ std::vector<Bool_> choose_highly_variable_genes (std:: size_t n, const Stat_* statistic, const ChooseHighlyVariableGenesOptions& options) {
223
107
auto output = sanisizer::create<std::vector<Bool_> >(n
224
108
#ifdef SCRAN_VARIANCES_TEST_INIT
225
109
, SCRAN_VARIANCES_TEST_INIT
@@ -242,27 +126,7 @@ std::vector<Bool_> choose_highly_variable_genes(size_t n, const Stat_* statistic
242
126
*/
243
127
template <typename Index_, typename Stat_>
244
128
std::vector<Index_> choose_highly_variable_genes_index (Index_ n, const Stat_* statistic, const ChooseHighlyVariableGenesOptions& options) {
245
- std::vector<Index_> output;
246
- if (options.larger ) {
247
- internal::choose_highly_variable_genes<true >(
248
- n,
249
- statistic,
250
- output,
251
- [](Stat_ l, Stat_ r) -> bool { return l > r; },
252
- [](Stat_ l, Stat_ r) -> bool { return l >= r; },
253
- options
254
- );
255
- } else {
256
- internal::choose_highly_variable_genes<true >(
257
- n,
258
- statistic,
259
- output,
260
- [](Stat_ l, Stat_ r) -> bool { return l < r; },
261
- [](Stat_ l, Stat_ r) -> bool { return l <= r; },
262
- options
263
- );
264
- }
265
- return output;
129
+ return topicks::pick_top_genes_index<Index_>(n, statistic, options.top , options.larger , internal::translate_options<Stat_>(options));
266
130
}
267
131
268
132
}
0 commit comments