Skip to content

Commit f87b420

Browse files
committed
save about 1% by normalising in the xdbl biscalar ladder
1 parent 3f0db90 commit f87b420

File tree

4 files changed

+112
-9
lines changed

4 files changed

+112
-9
lines changed

Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,8 @@ harness = false
3333
# name = "bench_product_isogeny"
3434
# path = "benches/bench_product_isogeny.rs"
3535
# harness = false
36+
37+
# [[bench]]
38+
# name = "bench_biscalar_ladder"
39+
# path = "benches/bench_biscalar_ladder.rs"
40+
# harness = false

benches/bench_biscalar_ladder.rs

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#![allow(non_snake_case)]
2+
3+
mod benchmark_biscalar {
4+
use std::time::Duration;
5+
6+
use criterion::{Criterion, black_box, criterion_group};
7+
use fp2::fq::Fq as FqTrait;
8+
use rand_core::RngCore;
9+
10+
use isogeny::elliptic::basis::BasisX;
11+
use isogeny::elliptic::curve::Curve;
12+
use isogeny::fields::sqisign::SqiField248 as Fp2;
13+
use isogeny::utilities::test_utils::drng::DRNG;
14+
15+
fn benchmark_ladder_biscalar(c: &mut Criterion) {
16+
let mut rng = DRNG::from_seed("test_biscalar_ladder".as_bytes());
17+
18+
let A = Fp2::from_i32(6);
19+
let E = Curve::new(&A);
20+
21+
let mut a: [u8; 32] = [0; 32];
22+
let mut b: [u8; 32] = [0; 32];
23+
rng.fill_bytes(&mut a);
24+
rng.fill_bytes(&mut b);
25+
26+
// Compute [a]P + [b]Q with projective points
27+
let P = E.rand_point(&mut rng);
28+
let Q = E.rand_point(&mut rng);
29+
let PQ = E.sub(&P, &Q);
30+
31+
let aP = E.mul(&P, &a, 32 << 3);
32+
let bQ = E.mul(&Q, &b, 32 << 3);
33+
let aPbQ = E.add(&aP, &bQ);
34+
35+
// Compute [a]P + [b]Q with x-only points
36+
let xP = P.to_pointx();
37+
let xQ = Q.to_pointx();
38+
let xPQ = PQ.to_pointx();
39+
let basis = BasisX::from_points(&xP, &xQ, &xPQ);
40+
let xaPbQ = E.ladder_biscalar(&basis, &a, &b, 32 << 3, 32 << 3);
41+
42+
// Ensure they're the same.
43+
assert!(xaPbQ.equals(&aPbQ.to_pointx()) == u32::MAX);
44+
45+
let bench_id = format!("Benchmarking biscalar ladder with 128 bit scalar",);
46+
c.bench_function(&bench_id, |bb| {
47+
bb.iter(|| {
48+
black_box(E).ladder_biscalar(
49+
&black_box(basis),
50+
&black_box(a),
51+
&black_box(b),
52+
black_box(32 << 3),
53+
black_box(32 << 3),
54+
)
55+
})
56+
});
57+
}
58+
59+
criterion_group! {
60+
name = benchmark_biscalar;
61+
config = Criterion::default().measurement_time(Duration::from_secs(10));
62+
targets = benchmark_ladder_biscalar,
63+
}
64+
}
65+
66+
fn main() {
67+
benchmark_biscalar::benchmark_biscalar();
68+
}

src/elliptic/x_only_arithmetic.rs

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,23 @@ impl<Fq: FqTrait> Curve<Fq> {
7373
*ZQ = *XPQ * (V1 - V2).square();
7474
}
7575

76+
/// x-only differential addition with PointX type, sets `R` to x(P + Q) given x(P)
77+
/// x(Q) and x(P - Q) as `PointX<Fq>`.
78+
#[inline]
79+
fn xadd_aff_add_into(R: &mut PointX<Fq>, xP: &PointX<Fq>, xQ: &PointX<Fq>, xPmQ: &Fq) {
80+
R.X = xQ.X;
81+
R.Z = xQ.Z;
82+
Self::xadd_aff(&xPmQ, &xP.X, &xP.Z, &mut R.X, &mut R.Z);
83+
}
84+
85+
/// Return x(P + Q) given x(P), x(Q) and x(P - Q) as `PointX<Fq>`.
86+
#[inline]
87+
fn xdiff_add_add(xP: &PointX<Fq>, xQ: &PointX<Fq>, xPmQ: &Fq) -> PointX<Fq> {
88+
let mut R = PointX::INFINITY;
89+
Self::xadd_aff_add_into(&mut R, xP, xQ, xPmQ);
90+
R
91+
}
92+
7693
/// P3 <- n*P, x-only variant.
7794
/// Integer n is encoded as unsigned little-endian, with length
7895
/// nbitlen bits. Bits beyond that length are ignored.
@@ -326,11 +343,23 @@ impl<Fq: FqTrait> Curve<Fq> {
326343
R[2] = T[s1];
327344

328345
// Compute the difference points for T, R
329-
let mut D1 = R[1];
330-
let mut D2 = R[2];
346+
let D1 = R[1];
347+
let D2 = R[2];
331348
R[2] = Self::xdiff_add(&R[1], &R[2], &B.PQ);
332-
let mut F1 = R[2];
333-
let mut F2 = B.PQ;
349+
let F1 = R[2];
350+
let F2 = B.PQ;
351+
352+
// The cost for the main loop is k doubles and 2*k differential adds.
353+
// If we normalise D1, D2, F1, F2 then we can save one mul per diff.
354+
// add, saving 2*k multiplications in total. As this function is usually
355+
// called with scalars of size log(p)/2 > 30, then it's worth normalising
356+
// the points.
357+
let mut inverses: [Fq; 4] = [D1.Z, D2.Z, F1.Z, F2.Z];
358+
Fq::batch_invert(&mut inverses);
359+
let mut xD1 = D1.X * inverses[0];
360+
let mut xD2 = D2.X * inverses[1];
361+
let mut xF1 = F1.X * inverses[2];
362+
let mut xF2 = F2.X * inverses[3];
334363

335364
// Main ladder loop, compute [a]P + [b]Q
336365
for i in (0..k).rev() {
@@ -345,10 +374,10 @@ impl<Fq: FqTrait> Curve<Fq> {
345374
T[0] = self.xdouble(&T[h >> 1]);
346375
T[1] = R[r2];
347376
T[2] = R[r2 + 1];
348-
PointX::condswap(&mut D1, &mut D2, (r2 as u32).wrapping_neg());
349-
T[1] = Self::xdiff_add(&T[1], &T[2], &D1);
350-
T[2] = Self::xdiff_add(&R[0], &R[2], &F1);
351-
PointX::condswap(&mut F1, &mut F2, ((h & 1) as u32).wrapping_neg());
377+
Fq::condswap(&mut xD1, &mut xD2, (r2 as u32).wrapping_neg());
378+
T[1] = Self::xdiff_add_add(&T[1], &T[2], &xD1);
379+
T[2] = Self::xdiff_add_add(&R[0], &R[2], &xF1);
380+
Fq::condswap(&mut xF1, &mut xF2, ((h & 1) as u32).wrapping_neg());
352381

353382
// Update R values from T values.
354383
R = T;

src/theta/theta_gluing.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ impl<Fq: FqTrait> EllipticProduct<Fq> {
7777
// We can do a single inversion during action by translation for a cost of
7878
// 21M + 4*(2S + 11M) + 1I to compute the coefficients, then the coefficients below
7979
// cost 44M to compute. If we model 1I to be log(p) multiplications then this
80-
// is ~100M for level 1 bringing the total to about 209 M.
80+
// is 30M for level 1 bringing the total to about 140 M. For other impl, inversion
81+
// could well be more expensive (without the use of binary gcd for inversion in Fp).
8182
//
8283
// If we don't do any inversions then four action_by_translation costs 4*(9M + 3S)
8384
// but then the computation of the coefficients grows. For example, the coefficient

0 commit comments

Comments
 (0)