Skip to content

Commit 1558239

Browse files
committed
bit more stack efficient
more stack efficient more stack efficient foo foo foo
1 parent b5bc5d8 commit 1558239

File tree

4 files changed

+45
-13
lines changed

4 files changed

+45
-13
lines changed

META/MAYO-1_META.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ implementations:
1515
- name: opt
1616
version: round2
1717
folder_name: .
18-
compile_opts: -DMAYO_VARIANT=MAYO_1 -DMAYO_BUILD_TYPE_OPT -DHAVE_RANDOMBYTES_NORETVAL
18+
compile_opts: -DMAYO_VARIANT=MAYO_1 -DMAYO_BUILD_TYPE_OPT -DHAVE_RANDOMBYTES_NORETVAL -DHAVE_STACKEFFICIENT
1919
signature_keypair: pqmayo_MAYO_1_opt_crypto_sign_keypair
2020
signature_signature: pqmayo_MAYO_1_opt_crypto_sign_signature
2121
signature_verify: pqmayo_MAYO_1_opt_crypto_sign_verify

META/MAYO-3_META.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ implementations:
1515
- name: opt
1616
version: round2
1717
folder_name: .
18-
compile_opts: -DMAYO_VARIANT=MAYO_3 -DMAYO_BUILD_TYPE_OPT -DHAVE_RANDOMBYTES_NORETVAL
18+
compile_opts: -DMAYO_VARIANT=MAYO_3 -DMAYO_BUILD_TYPE_OPT -DHAVE_RANDOMBYTES_NORETVAL -DHAVE_STACKEFFICIENT
1919
signature_keypair: pqmayo_MAYO_3_opt_crypto_sign_keypair
2020
signature_signature: pqmayo_MAYO_3_opt_crypto_sign_signature
2121
signature_verify: pqmayo_MAYO_3_opt_crypto_sign_verify

META/MAYO-5_META.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ implementations:
1515
- name: opt
1616
version: round2
1717
folder_name: .
18-
compile_opts: -DMAYO_VARIANT=MAYO_5 -DMAYO_BUILD_TYPE_OPT -DHAVE_RANDOMBYTES_NORETVAL
18+
compile_opts: -DMAYO_VARIANT=MAYO_5 -DMAYO_BUILD_TYPE_OPT -DHAVE_RANDOMBYTES_NORETVAL -DHAVE_STACKEFFICIENT
1919
signature_keypair: pqmayo_MAYO_5_opt_crypto_sign_keypair
2020
signature_signature: pqmayo_MAYO_5_opt_crypto_sign_signature
2121
signature_verify: pqmayo_MAYO_5_opt_crypto_sign_verify

src/generic/generic_arithmetic.h

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -86,18 +86,22 @@ void P1_times_Vt(const mayo_params_t* p, const uint64_t* P1, const unsigned char
8686
mul_add_m_upper_triangular_mat_x_mat_trans(PARAM_m_vec_limbs(p), P1, V, acc, PARAM_v(p), PARAM_v(p), PARAM_k(p), 1);
8787
}
8888

89+
#if defined(HAVE_STACKEFFICIENT) || defined(PQM4)
8990
// compute P * S^t = [ P1 P2 ] * [S1] = [P1*S1 + P2*S2]
9091
// [ 0 P3 ] [S2] [ P3*S2]
91-
static inline void mayo_generic_m_calculate_PS(const uint64_t *P1, const uint64_t *P2, const uint64_t *P3, const unsigned char *S,
92-
const int m, const int v, const int o, const int k, uint64_t *PS) {
92+
// compute S * PS = [ S1 S2 ] * [ P1*S1 + P2*S2 = P1 ] = [ S1*P1 + S2*P2 ]
93+
// [ P3*S2 = P2 ]
94+
static inline void mayo_generic_m_calculate_PS_SPS(const uint64_t *P1, const uint64_t *P2, const uint64_t *P3, const unsigned char *S,
95+
const int m, const int v, const int o, const int k, uint64_t *SPS) {
9396

9497
const int n = o + v;
9598
const int m_vec_limbs = (m + 15)/16;
9699

97-
#if defined(HAVE_STACKEFFICIENT) || defined(PQM4)
100+
uint64_t PS[(N_MAX + K_MAX) * M_VEC_LIMBS_MAX] = { 0 };
98101
uint64_t accumulator[16 * ((M_MAX+15)/16) * N_MAX] = {0};
99102
int P1_used;
100103
int P3_used;
104+
101105
for (int col = 0; col < k; col++) {
102106
for(unsigned int i = 0; i < sizeof(accumulator)/8; i++) {
103107
accumulator[i] = 0;
@@ -123,11 +127,33 @@ static inline void mayo_generic_m_calculate_PS(const uint64_t *P1, const uint64_
123127
}
124128
}
125129

126-
for (int row = 0; row < n; row++) {
127-
m_vec_multiply_bins(m_vec_limbs, accumulator + row * 16 * m_vec_limbs, PS + (row * k + col) * m_vec_limbs);
128-
}
130+
for (int row = 0; row < n; row++) {
131+
m_vec_multiply_bins(m_vec_limbs, accumulator + row * 16 * m_vec_limbs, PS + (row + col) * m_vec_limbs);
132+
}
133+
134+
for (int row = 0; row < k; row++) {
135+
for (unsigned int i = 0; i < 16*((M_MAX+15)/16); ++i)
136+
accumulator[i] = 0;
137+
for (int j = 0; j < n; j++) {
138+
m_vec_add(m_vec_limbs, PS + (j + col) * m_vec_limbs, accumulator + S[row * n + j]*m_vec_limbs);
139+
}
140+
m_vec_multiply_bins(m_vec_limbs, accumulator, SPS + (row * k + col) * m_vec_limbs);
141+
}
142+
129143
}
130-
#else
144+
145+
}
146+
147+
#else
148+
149+
// compute P * S^t = [ P1 P2 ] * [S1] = [P1*S1 + P2*S2]
150+
// [ 0 P3 ] [S2] [ P3*S2]
151+
static inline void mayo_generic_m_calculate_PS(const uint64_t *P1, const uint64_t *P2, const uint64_t *P3, const unsigned char *S,
152+
const int m, const int v, const int o, const int k, uint64_t *SPS) {
153+
154+
const int n = o + v;
155+
const int m_vec_limbs = (m + 15)/16;
156+
131157
uint64_t accumulator[16 * ((M_MAX+15)/16) * K_MAX * N_MAX] = {0};
132158
int P1_used = 0;
133159
for (int row = 0; row < v; row++) {
@@ -158,14 +184,14 @@ static inline void mayo_generic_m_calculate_PS(const uint64_t *P1, const uint64_
158184
// multiply stuff according to the bins of the accumulator and add to PS.
159185
int i = 0;
160186
while (i < n * k) {
161-
m_vec_multiply_bins(m_vec_limbs, accumulator + i * 16 * m_vec_limbs, PS + i * m_vec_limbs);
187+
m_vec_multiply_bins(m_vec_limbs, accumulator + i * 16 * m_vec_limbs, SPS + i * m_vec_limbs);
162188
i++;
163189
}
164190

165-
#endif
166191
}
167192

168-
193+
// compute S * PS = [ S1 S2 ] * [ P1*S1 + P2*S2 = P1 ] = [ S1*P1 + S2*P2 ]
194+
// [ P3*S2 = P2 ]
169195
static inline void mayo_generic_m_calculate_SPS(const uint64_t *PS, const unsigned char *S, int m, int k, int n, uint64_t *SPS){
170196
uint64_t accumulator[16*((M_MAX+15)/16)*K_MAX*K_MAX] = {0};
171197
const int m_vec_limbs = (m + 15)/ 16;
@@ -185,6 +211,8 @@ static inline void mayo_generic_m_calculate_SPS(const uint64_t *PS, const unsign
185211
}
186212
}
187213

214+
#endif
215+
188216

189217
static inline
190218
void P1P1t_times_O(const mayo_params_t* p, const uint64_t* P1, const unsigned char* O, uint64_t* acc){
@@ -252,11 +280,15 @@ static inline void m_calculate_PS_SPS(const mayo_params_t *p, const uint64_t *P1
252280
#ifndef ENABLE_PARAMS_DYNAMIC
253281
(void) p;
254282
#endif
283+
#if defined(HAVE_STACKEFFICIENT) || defined(PQM4)
284+
mayo_generic_m_calculate_PS_SPS(P1, P2, P3, s, PARAM_m(p), PARAM_v(p), PARAM_o(p), PARAM_k(p), SPS);
285+
#else
255286
uint64_t PS[N_MAX * K_MAX * M_VEC_LIMBS_MAX] = { 0 };
256287
mayo_generic_m_calculate_PS(P1, P2, P3, s, PARAM_m(p), PARAM_v(p), PARAM_o(p), PARAM_k(p), PS);
257288

258289
// compute S * P * S = S* (P*S)
259290
mayo_generic_m_calculate_SPS(PS, s, PARAM_m(p), PARAM_k(p), PARAM_n(p), SPS);
291+
#endif
260292
}
261293

262294
#endif

0 commit comments

Comments
 (0)