Skip to content

Commit e6a0979

Browse files
committed
perf: don't alloc zeroed in encode
1 parent 3ecac1e commit e6a0979

File tree

2 files changed

+91
-82
lines changed

2 files changed

+91
-82
lines changed

README.md

Lines changed: 74 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -33,94 +33,94 @@ You can run these benchmarks with `cargo bench --features std` on a nightly
3333
compiler.
3434

3535
```log
36-
test decode::const_hex::bench1_32b ... bench: 14 ns/iter (+/- 0)
37-
test decode::const_hex::bench2_256b ... bench: 37 ns/iter (+/- 4)
38-
test decode::const_hex::bench3_2k ... bench: 226 ns/iter (+/- 7)
39-
test decode::const_hex::bench4_16k ... bench: 1,636 ns/iter (+/- 13)
40-
test decode::const_hex::bench5_128k ... bench: 12,644 ns/iter (+/- 84)
41-
test decode::const_hex::bench6_1m ... bench: 102,836 ns/iter (+/- 3,236)
36+
test decode::const_hex::bench1_32b ... bench: 16 ns/iter (+/- 5)
37+
test decode::const_hex::bench2_256b ... bench: 37 ns/iter (+/- 0)
38+
test decode::const_hex::bench3_2k ... bench: 232 ns/iter (+/- 2)
39+
test decode::const_hex::bench4_16k ... bench: 1,672 ns/iter (+/- 12)
40+
test decode::const_hex::bench5_128k ... bench: 12,979 ns/iter (+/- 91)
41+
test decode::const_hex::bench6_1m ... bench: 104,751 ns/iter (+/- 2,068)
4242
test decode::faster_hex::bench1_32b ... bench: 15 ns/iter (+/- 0)
43-
test decode::faster_hex::bench2_256b ... bench: 50 ns/iter (+/- 1)
44-
test decode::faster_hex::bench3_2k ... bench: 244 ns/iter (+/- 4)
45-
test decode::faster_hex::bench4_16k ... bench: 1,782 ns/iter (+/- 31)
46-
test decode::faster_hex::bench5_128k ... bench: 13,745 ns/iter (+/- 66)
47-
test decode::faster_hex::bench6_1m ... bench: 115,126 ns/iter (+/- 1,544)
48-
test decode::hex::bench1_32b ... bench: 101 ns/iter (+/- 6)
49-
test decode::hex::bench2_256b ... bench: 701 ns/iter (+/- 12)
50-
test decode::hex::bench3_2k ... bench: 5,287 ns/iter (+/- 97)
51-
test decode::hex::bench4_16k ... bench: 41,743 ns/iter (+/- 1,420)
52-
test decode::hex::bench5_128k ... bench: 782,327 ns/iter (+/- 18,876)
53-
test decode::hex::bench6_1m ... bench: 6,283,181 ns/iter (+/- 88,813)
43+
test decode::faster_hex::bench2_256b ... bench: 54 ns/iter (+/- 1)
44+
test decode::faster_hex::bench3_2k ... bench: 253 ns/iter (+/- 3)
45+
test decode::faster_hex::bench4_16k ... bench: 1,831 ns/iter (+/- 20)
46+
test decode::faster_hex::bench5_128k ... bench: 14,120 ns/iter (+/- 57)
47+
test decode::faster_hex::bench6_1m ... bench: 115,291 ns/iter (+/- 1,325)
48+
test decode::hex::bench1_32b ... bench: 104 ns/iter (+/- 1)
49+
test decode::hex::bench2_256b ... bench: 697 ns/iter (+/- 7)
50+
test decode::hex::bench3_2k ... bench: 5,189 ns/iter (+/- 86)
51+
test decode::hex::bench4_16k ... bench: 42,355 ns/iter (+/- 21,853)
52+
test decode::hex::bench5_128k ... bench: 765,278 ns/iter (+/- 4,091)
53+
test decode::hex::bench6_1m ... bench: 6,161,416 ns/iter (+/- 64,954)
5454
5555
test decode_to_slice::const_hex::bench1_32b ... bench: 5 ns/iter (+/- 0)
56-
test decode_to_slice::const_hex::bench2_256b ... bench: 25 ns/iter (+/- 0)
57-
test decode_to_slice::const_hex::bench3_2k ... bench: 201 ns/iter (+/- 3)
58-
test decode_to_slice::const_hex::bench4_16k ... bench: 1,600 ns/iter (+/- 17)
59-
test decode_to_slice::const_hex::bench5_128k ... bench: 12,732 ns/iter (+/- 119)
60-
test decode_to_slice::const_hex::bench6_1m ... bench: 103,414 ns/iter (+/- 2,402)
56+
test decode_to_slice::const_hex::bench2_256b ... bench: 26 ns/iter (+/- 0)
57+
test decode_to_slice::const_hex::bench3_2k ... bench: 210 ns/iter (+/- 10)
58+
test decode_to_slice::const_hex::bench4_16k ... bench: 1,667 ns/iter (+/- 13)
59+
test decode_to_slice::const_hex::bench5_128k ... bench: 13,043 ns/iter (+/- 19)
60+
test decode_to_slice::const_hex::bench6_1m ... bench: 105,883 ns/iter (+/- 1,427)
6161
test decode_to_slice::faster_hex::bench1_32b ... bench: 6 ns/iter (+/- 0)
6262
test decode_to_slice::faster_hex::bench2_256b ... bench: 28 ns/iter (+/- 0)
63-
test decode_to_slice::faster_hex::bench3_2k ... bench: 206 ns/iter (+/- 3)
64-
test decode_to_slice::faster_hex::bench4_16k ... bench: 1,640 ns/iter (+/- 13)
65-
test decode_to_slice::faster_hex::bench5_128k ... bench: 13,065 ns/iter (+/- 92)
66-
test decode_to_slice::faster_hex::bench6_1m ... bench: 105,963 ns/iter (+/- 2,831)
67-
test decode_to_slice::hex::bench1_32b ... bench: 37 ns/iter (+/- 0)
68-
test decode_to_slice::hex::bench2_256b ... bench: 298 ns/iter (+/- 6)
69-
test decode_to_slice::hex::bench3_2k ... bench: 2,552 ns/iter (+/- 27)
70-
test decode_to_slice::hex::bench4_16k ... bench: 20,335 ns/iter (+/- 581)
71-
test decode_to_slice::hex::bench5_128k ... bench: 611,494 ns/iter (+/- 11,531)
72-
test decode_to_slice::hex::bench6_1m ... bench: 4,941,477 ns/iter (+/- 180,172)
73-
74-
test encode::const_hex::bench1_32b ... bench: 10 ns/iter (+/- 0)
75-
test encode::const_hex::bench2_256b ... bench: 27 ns/iter (+/- 0)
76-
test encode::const_hex::bench3_2k ... bench: 97 ns/iter (+/- 0)
77-
test encode::const_hex::bench4_16k ... bench: 644 ns/iter (+/- 8)
78-
test encode::const_hex::bench5_128k ... bench: 4,967 ns/iter (+/- 52)
79-
test encode::const_hex::bench6_1m ... bench: 45,424 ns/iter (+/- 1,922)
63+
test decode_to_slice::faster_hex::bench3_2k ... bench: 214 ns/iter (+/- 2)
64+
test decode_to_slice::faster_hex::bench4_16k ... bench: 1,710 ns/iter (+/- 6)
65+
test decode_to_slice::faster_hex::bench5_128k ... bench: 13,304 ns/iter (+/- 37)
66+
test decode_to_slice::faster_hex::bench6_1m ... bench: 110,276 ns/iter (+/- 3,475)
67+
test decode_to_slice::hex::bench1_32b ... bench: 38 ns/iter (+/- 2)
68+
test decode_to_slice::hex::bench2_256b ... bench: 300 ns/iter (+/- 185)
69+
test decode_to_slice::hex::bench3_2k ... bench: 2,717 ns/iter (+/- 64)
70+
test decode_to_slice::hex::bench4_16k ... bench: 19,257 ns/iter (+/- 530)
71+
test decode_to_slice::hex::bench5_128k ... bench: 624,172 ns/iter (+/- 15,725)
72+
test decode_to_slice::hex::bench6_1m ... bench: 5,333,915 ns/iter (+/- 298,093)
73+
74+
test encode::const_hex::bench1_32b ... bench: 6 ns/iter (+/- 0)
75+
test encode::const_hex::bench2_256b ... bench: 10 ns/iter (+/- 0)
76+
test encode::const_hex::bench3_2k ... bench: 72 ns/iter (+/- 1)
77+
test encode::const_hex::bench4_16k ... bench: 462 ns/iter (+/- 4)
78+
test encode::const_hex::bench5_128k ... bench: 3,600 ns/iter (+/- 28)
79+
test encode::const_hex::bench6_1m ... bench: 29,447 ns/iter (+/- 858)
8080
test encode::faster_hex::bench1_32b ... bench: 17 ns/iter (+/- 0)
81-
test encode::faster_hex::bench2_256b ... bench: 36 ns/iter (+/- 0)
82-
test encode::faster_hex::bench3_2k ... bench: 95 ns/iter (+/- 1)
83-
test encode::faster_hex::bench4_16k ... bench: 597 ns/iter (+/- 10)
84-
test encode::faster_hex::bench5_128k ... bench: 4,538 ns/iter (+/- 180)
85-
test encode::faster_hex::bench6_1m ... bench: 41,513 ns/iter (+/- 779)
86-
test encode::hex::bench1_32b ... bench: 97 ns/iter (+/- 0)
87-
test encode::hex::bench2_256b ... bench: 694 ns/iter (+/- 4)
88-
test encode::hex::bench3_2k ... bench: 5,476 ns/iter (+/- 28)
89-
test encode::hex::bench4_16k ... bench: 43,617 ns/iter (+/- 215)
90-
test encode::hex::bench5_128k ... bench: 348,646 ns/iter (+/- 1,155)
91-
test encode::hex::bench6_1m ... bench: 2,895,775 ns/iter (+/- 95,699)
81+
test encode::faster_hex::bench2_256b ... bench: 37 ns/iter (+/- 3)
82+
test encode::faster_hex::bench3_2k ... bench: 102 ns/iter (+/- 1)
83+
test encode::faster_hex::bench4_16k ... bench: 614 ns/iter (+/- 6)
84+
test encode::faster_hex::bench5_128k ... bench: 4,764 ns/iter (+/- 12)
85+
test encode::faster_hex::bench6_1m ... bench: 40,894 ns/iter (+/- 1,223)
86+
test encode::hex::bench1_32b ... bench: 112 ns/iter (+/- 0)
87+
test encode::hex::bench2_256b ... bench: 812 ns/iter (+/- 5)
88+
test encode::hex::bench3_2k ... bench: 6,404 ns/iter (+/- 26)
89+
test encode::hex::bench4_16k ... bench: 51,039 ns/iter (+/- 595)
90+
test encode::hex::bench5_128k ... bench: 408,378 ns/iter (+/- 23,022)
91+
test encode::hex::bench6_1m ... bench: 3,571,916 ns/iter (+/- 142,828)
9292
9393
test encode_to_slice::const_hex::bench1_32b ... bench: 1 ns/iter (+/- 0)
9494
test encode_to_slice::const_hex::bench2_256b ... bench: 6 ns/iter (+/- 0)
95-
test encode_to_slice::const_hex::bench3_2k ... bench: 59 ns/iter (+/- 0)
96-
test encode_to_slice::const_hex::bench4_16k ... bench: 438 ns/iter (+/- 2)
97-
test encode_to_slice::const_hex::bench5_128k ... bench: 3,414 ns/iter (+/- 10)
98-
test encode_to_slice::const_hex::bench6_1m ... bench: 28,947 ns/iter (+/- 546)
95+
test encode_to_slice::const_hex::bench3_2k ... bench: 53 ns/iter (+/- 0)
96+
test encode_to_slice::const_hex::bench4_16k ... bench: 452 ns/iter (+/- 3)
97+
test encode_to_slice::const_hex::bench5_128k ... bench: 3,550 ns/iter (+/- 10)
98+
test encode_to_slice::const_hex::bench6_1m ... bench: 29,605 ns/iter (+/- 916)
9999
test encode_to_slice::faster_hex::bench1_32b ... bench: 4 ns/iter (+/- 0)
100100
test encode_to_slice::faster_hex::bench2_256b ... bench: 7 ns/iter (+/- 0)
101-
test encode_to_slice::faster_hex::bench3_2k ... bench: 63 ns/iter (+/- 0)
102-
test encode_to_slice::faster_hex::bench4_16k ... bench: 390 ns/iter (+/- 5)
103-
test encode_to_slice::faster_hex::bench5_128k ... bench: 3,012 ns/iter (+/- 22)
104-
test encode_to_slice::faster_hex::bench6_1m ... bench: 26,138 ns/iter (+/- 596)
101+
test encode_to_slice::faster_hex::bench3_2k ... bench: 47 ns/iter (+/- 0)
102+
test encode_to_slice::faster_hex::bench4_16k ... bench: 402 ns/iter (+/- 5)
103+
test encode_to_slice::faster_hex::bench5_128k ... bench: 3,121 ns/iter (+/- 25)
104+
test encode_to_slice::faster_hex::bench6_1m ... bench: 26,171 ns/iter (+/- 573)
105105
test encode_to_slice::hex::bench1_32b ... bench: 11 ns/iter (+/- 0)
106-
test encode_to_slice::hex::bench2_256b ... bench: 116 ns/iter (+/- 0)
107-
test encode_to_slice::hex::bench3_2k ... bench: 971 ns/iter (+/- 6)
108-
test encode_to_slice::hex::bench4_16k ... bench: 7,821 ns/iter (+/- 48)
109-
test encode_to_slice::hex::bench5_128k ... bench: 61,907 ns/iter (+/- 377)
110-
test encode_to_slice::hex::bench6_1m ... bench: 499,203 ns/iter (+/- 3,771)
106+
test encode_to_slice::hex::bench2_256b ... bench: 118 ns/iter (+/- 0)
107+
test encode_to_slice::hex::bench3_2k ... bench: 994 ns/iter (+/- 4)
108+
test encode_to_slice::hex::bench4_16k ... bench: 8,065 ns/iter (+/- 31)
109+
test encode_to_slice::hex::bench5_128k ... bench: 63,982 ns/iter (+/- 2,026)
110+
test encode_to_slice::hex::bench6_1m ... bench: 515,171 ns/iter (+/- 2,789)
111111
112-
test format::const_hex::bench1_32b ... bench: 10 ns/iter (+/- 1)
112+
test format::const_hex::bench1_32b ... bench: 9 ns/iter (+/- 0)
113113
test format::const_hex::bench2_256b ... bench: 18 ns/iter (+/- 0)
114-
test format::const_hex::bench3_2k ... bench: 134 ns/iter (+/- 2)
115-
test format::const_hex::bench4_16k ... bench: 1,151 ns/iter (+/- 5)
116-
test format::const_hex::bench5_128k ... bench: 9,298 ns/iter (+/- 83)
117-
test format::const_hex::bench6_1m ... bench: 83,611 ns/iter (+/- 1,530)
118-
test format::std::bench1_32b ... bench: 359 ns/iter (+/- 6)
119-
test format::std::bench2_256b ... bench: 2,773 ns/iter (+/- 44)
120-
test format::std::bench3_2k ... bench: 22,620 ns/iter (+/- 213)
121-
test format::std::bench4_16k ... bench: 183,197 ns/iter (+/- 1,512)
122-
test format::std::bench5_128k ... bench: 1,481,851 ns/iter (+/- 9,791)
123-
test format::std::bench6_1m ... bench: 11,947,054 ns/iter (+/- 132,579)
114+
test format::const_hex::bench3_2k ... bench: 119 ns/iter (+/- 1)
115+
test format::const_hex::bench4_16k ... bench: 1,157 ns/iter (+/- 3)
116+
test format::const_hex::bench5_128k ... bench: 9,560 ns/iter (+/- 443)
117+
test format::const_hex::bench6_1m ... bench: 85,479 ns/iter (+/- 1,498)
118+
test format::std::bench1_32b ... bench: 374 ns/iter (+/- 6)
119+
test format::std::bench2_256b ... bench: 2,952 ns/iter (+/- 10)
120+
test format::std::bench3_2k ... bench: 23,767 ns/iter (+/- 61)
121+
test format::std::bench4_16k ... bench: 183,579 ns/iter (+/- 2,078)
122+
test format::std::bench5_128k ... bench: 1,498,391 ns/iter (+/- 8,445)
123+
test format::std::bench6_1m ... bench: 11,965,082 ns/iter (+/- 43,784)
124124
```
125125

126126
## Acknowledgements

src/lib.rs

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
)]
4949

5050
#[cfg(feature = "alloc")]
51+
#[allow(unused_imports)]
5152
#[macro_use]
5253
extern crate alloc;
5354

@@ -576,16 +577,24 @@ pub fn decode_to_array<T: AsRef<[u8]>, const N: usize>(input: T) -> Result<[u8;
576577

577578
#[cfg(feature = "alloc")]
578579
fn encode_inner<const UPPER: bool, const PREFIX: bool>(data: &[u8]) -> String {
579-
let mut buf = vec![0; (PREFIX as usize + data.len()) * 2];
580-
let output = if PREFIX {
581-
buf[0] = b'0';
582-
buf[1] = b'x';
583-
&mut buf[2..]
584-
} else {
585-
&mut buf[..]
580+
let capacity = PREFIX as usize * 2 + data.len() * 2;
581+
let mut buf = Vec::<u8>::with_capacity(capacity);
582+
// SAFETY: The entire vec is never read from, and gets dropped if decoding fails.
583+
#[allow(clippy::uninit_vec)]
584+
unsafe {
585+
buf.set_len(capacity)
586586
};
587+
let mut output = buf.as_mut_ptr();
588+
if PREFIX {
589+
// SAFETY: `output` is long enough.
590+
unsafe {
591+
output.add(0).write(b'0');
592+
output.add(1).write(b'x');
593+
output = output.add(2);
594+
}
595+
}
587596
// SAFETY: `output` is long enough (input.len() * 2).
588-
unsafe { imp::encode::<UPPER>(data, output.as_mut_ptr()) };
597+
unsafe { imp::encode::<UPPER>(data, output) };
589598
// SAFETY: We only write only ASCII bytes.
590599
unsafe { String::from_utf8_unchecked(buf) }
591600
}

0 commit comments

Comments
 (0)