Skip to content

Commit d62aa26

Browse files
committed
Deal with padding in one place, in blockwise.
We introduce two functions, to assist with processing sequences of fixed bytes. One processes a single byte a bunch of times, the other does something more complicated. We use this for all hashes, CMAC and CBCMAC. This gives a good performance improvement.
1 parent 8f2769c commit d62aa26

File tree

11 files changed

+167
-50
lines changed

11 files changed

+167
-50
lines changed

src/blockwise.c

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,70 @@ void cf_blockwise_xor(uint8_t *partial, size_t *npartial, size_t nblock,
126126
inb += taken;
127127
}
128128
}
129+
130+
void cf_blockwise_acc_byte(uint8_t *partial, size_t *npartial,
131+
size_t nblock,
132+
uint8_t byte, size_t nbytes,
133+
cf_blockwise_in_fn process,
134+
void *ctx)
135+
{
136+
/* only memset the whole of the block once */
137+
int filled = 0;
138+
139+
while (nbytes)
140+
{
141+
size_t start = *npartial;
142+
size_t count = MIN(nbytes, nblock - start);
143+
144+
if (!filled)
145+
memset(partial + start, byte, count);
146+
147+
if (start == 0 && count == nblock)
148+
filled = 1;
149+
150+
if (start + count == nblock)
151+
{
152+
process(ctx, partial);
153+
*npartial = 0;
154+
} else {
155+
*npartial += count;
156+
}
157+
158+
nbytes -= count;
159+
}
160+
}
161+
162+
void cf_blockwise_acc_pad(uint8_t *partial, size_t *npartial,
163+
size_t nblock,
164+
uint8_t fbyte, uint8_t mbyte, uint8_t lbyte,
165+
size_t nbytes,
166+
cf_blockwise_in_fn process,
167+
void *ctx)
168+
{
169+
170+
switch (nbytes)
171+
{
172+
case 0: break;
173+
case 1: fbyte ^= lbyte;
174+
cf_blockwise_accumulate(partial, npartial, nblock, &fbyte, 1, process, ctx);
175+
break;
176+
case 2:
177+
cf_blockwise_accumulate(partial, npartial, nblock, &fbyte, 1, process, ctx);
178+
cf_blockwise_accumulate(partial, npartial, nblock, &lbyte, 1, process, ctx);
179+
break;
180+
default:
181+
cf_blockwise_accumulate(partial, npartial, nblock, &fbyte, 1, process, ctx);
182+
183+
/* If the middle and last bytes differ, then process the last byte separately.
184+
* Otherwise, just extend the middle block size. */
185+
if (lbyte != mbyte)
186+
{
187+
cf_blockwise_acc_byte(partial, npartial, nblock, mbyte, nbytes - 2, process, ctx);
188+
cf_blockwise_accumulate(partial, npartial, nblock, &lbyte, 1, process, ctx);
189+
} else {
190+
cf_blockwise_acc_byte(partial, npartial, nblock, mbyte, nbytes - 1, process, ctx);
191+
}
192+
193+
break;
194+
}
195+
}

src/blockwise.h

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,56 @@ void cf_blockwise_xor(uint8_t *partial, size_t *npartial,
9292
cf_blockwise_out_fn newblock,
9393
void *ctx);
9494

95+
/* This function processes a single byte a number of times. It's useful
96+
* for padding, and more efficient than calling cf_blockwise_accumulate
97+
* a bunch of times.
98+
*
99+
* partial is the buffer (maintained by the caller)
100+
* on entry, npartial is the currently valid count of used bytes on
101+
* the front of partial.
102+
* on exit, npartial is updated to reflect the status of partial.
103+
* nblock is the blocksize to accumulate -- partial must be at least
104+
* this long!
105+
* process is the processing function, passed ctx and a pointer
106+
* to the data to process (always exactly nblock bytes long!)
107+
* which may not neccessarily be the same as partial.
108+
* byte is the byte to process, nbytes times.
109+
*/
110+
void cf_blockwise_acc_byte(uint8_t *partial, size_t *npartial,
111+
size_t nblock,
112+
uint8_t byte, size_t nbytes,
113+
cf_blockwise_in_fn process,
114+
void *ctx);
115+
116+
/* This function attempts to process patterns of bytes common in
117+
* block cipher padding.
118+
*
119+
* This takes three bytes:
120+
* - a first byte, fbyte,
121+
* - a middle byte, mbyte,
122+
* - a last byte, lbyte.
123+
*
124+
* If nbytes is zero, nothing happens.
125+
* If nbytes is one, the byte fbyte ^ lbyte is processed.
126+
* If nbytes is two, the fbyte then lbyte are processed.
127+
* If nbytes is three or more, fbyte, then one or more mbytes, then fbyte
128+
* is processed.
129+
*
130+
* partial is the buffer (maintained by the caller)
131+
* on entry, npartial is the currently valid count of used bytes on
132+
* the front of partial.
133+
* on exit, npartial is updated to reflect the status of partial.
134+
* nblock is the blocksize to accumulate -- partial must be at least
135+
* this long!
136+
* process is the processing function, passed ctx and a pointer
137+
* to the data to process (always exactly nblock bytes long!)
138+
* which may not neccessarily be the same as partial.
139+
*/
140+
void cf_blockwise_acc_pad(uint8_t *partial, size_t *npartial,
141+
size_t nblock,
142+
uint8_t fbyte, uint8_t mbyte, uint8_t lbyte,
143+
size_t nbytes,
144+
cf_blockwise_in_fn process,
145+
void *ctx);
146+
95147
#endif

src/cbcmac.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,16 @@ void cf_cbcmac_stream_update(cf_cbcmac_stream *ctx, const uint8_t *data, size_t
5353
ctx);
5454
}
5555

56+
void cf_cbcmac_stream_finish_block_zero(cf_cbcmac_stream *ctx)
57+
{
58+
if (ctx->used == 0)
59+
return;
60+
61+
memset(ctx->buffer + ctx->used, 0, ctx->prp->blocksz - ctx->used);
62+
cbcmac_process(ctx, ctx->buffer);
63+
ctx->used = 0;
64+
}
65+
5666
void cf_cbcmac_stream_nopad_final(cf_cbcmac_stream *ctx, uint8_t out[CF_MAXBLOCK])
5767
{
5868
assert(ctx->used == 0);
@@ -62,7 +72,8 @@ void cf_cbcmac_stream_nopad_final(cf_cbcmac_stream *ctx, uint8_t out[CF_MAXBLOCK
6272
void cf_cbcmac_stream_pad_final(cf_cbcmac_stream *ctx, uint8_t out[CF_MAXBLOCK])
6373
{
6474
uint8_t npad = ctx->prp->blocksz - ctx->used;
65-
for (size_t i = 0; i < npad; i++)
66-
cf_cbcmac_stream_update(ctx, &npad, 1);
75+
cf_blockwise_acc_byte(ctx->buffer, &ctx->used, ctx->prp->blocksz,
76+
npad, npad,
77+
cbcmac_process, ctx);
6778
cf_cbcmac_stream_nopad_final(ctx, out);
6879
}

src/ccm.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,7 @@ static void write_be(uint8_t *out, size_t value, size_t bytes)
3535

3636
static void zero_pad(cf_cbcmac_stream *cm)
3737
{
38-
const uint8_t zero_byte = 0;
39-
while (cm->used != 0)
40-
cf_cbcmac_stream_update(cm, &zero_byte, 1);
38+
cf_cbcmac_stream_finish_block_zero(cm);
4139
}
4240

4341
/* nb. block is general workspace. */

src/cmac.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,9 @@ void cf_cmac_stream_update(cf_cmac_stream *ctx, const uint8_t *data, size_t len,
132132
/* Input padding */
133133
if (needpad)
134134
{
135-
uint8_t pad_block[CF_MAXBLOCK] = { 0x80 };
136-
cf_blockwise_accumulate(ctx->buffer, &ctx->used, blocksz,
137-
pad_block, blocksz - ctx->used,
138-
cmac_process_final_pad, ctx);
135+
cf_blockwise_acc_pad(ctx->buffer, &ctx->used, blocksz,
136+
0x80, 0x00, 0x00, blocksz - ctx->used,
137+
cmac_process_final_pad, ctx);
139138
}
140139
}
141140

src/gcm.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,20 @@ static void ghash_block(void *vctx, const uint8_t *data)
5656
static void ghash_add(ghash_ctx *ctx, const uint8_t *buf, size_t n)
5757
{
5858
cf_blockwise_accumulate(ctx->buffer, &ctx->buffer_used,
59-
16,
59+
sizeof ctx->buffer,
6060
buf, n,
6161
ghash_block,
6262
ctx);
6363
}
6464

6565
static void ghash_add_pad(ghash_ctx *ctx)
6666
{
67-
uint8_t byte = 0x00;
68-
while (ctx->buffer_used != 0)
69-
ghash_add(ctx, &byte, 1);
67+
if (ctx->buffer_used == 0)
68+
return;
69+
70+
memset(ctx->buffer + ctx->buffer_used, 0, sizeof(ctx->buffer) - ctx->buffer_used);
71+
ghash_block(ctx, ctx->buffer);
72+
ctx->buffer_used = 0;
7073
}
7174

7275
static void ghash_add_aad(ghash_ctx *ctx, const uint8_t *buf, size_t n)

src/modes.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,11 @@ void cf_cbcmac_stream_reset(cf_cbcmac_stream *ctx);
194194
* Process ndata bytes at data. */
195195
void cf_cbcmac_stream_update(cf_cbcmac_stream *ctx, const uint8_t *data, size_t ndata);
196196

197+
/* .. c:function:: $DECL
198+
* Finish the current block of data by adding zeroes. Does nothing if there
199+
* are no bytes awaiting processing. */
200+
void cf_cbcmac_stream_finish_block_zero(cf_cbcmac_stream *ctx);
201+
197202
/* .. c:function:: $DECL
198203
* Output the MAC to ctx->prp->blocksz bytes at out.
199204
* ctx->used must be zero: the inputed message must be an exact number of

src/sha1.c

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -116,18 +116,15 @@ void cf_sha1_digest_final(cf_sha1_context *ctx, uint8_t hash[CF_SHA1_HASHSZ])
116116
digested_bytes = digested_bytes * CF_SHA1_BLOCKSZ + ctx->npartial;
117117
uint64_t digested_bits = digested_bytes * 8;
118118

119-
size_t zeroes = CF_SHA1_BLOCKSZ - ((digested_bytes + 1 + 8) % CF_SHA1_BLOCKSZ);
119+
size_t padbytes = CF_SHA1_BLOCKSZ - ((digested_bytes + 8) % CF_SHA1_BLOCKSZ);
120120

121121
/* Hash 0x80 00 ... block first. */
122-
uint8_t buf[8];
123-
buf[0] = 0x80;
124-
buf[1] = 0x00;
125-
cf_sha1_update(ctx, &buf[0], 1);
126-
127-
while (zeroes--)
128-
cf_sha1_update(ctx, &buf[1], 1);
122+
cf_blockwise_acc_pad(ctx->partial, &ctx->npartial, sizeof ctx->partial,
123+
0x80, 0x00, 0x00, padbytes,
124+
sha1_update_block, ctx);
129125

130126
/* Now hash length. */
127+
uint8_t buf[8];
131128
write64_be(digested_bits, buf);
132129
cf_sha1_update(ctx, buf, 8);
133130

src/sha256.c

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -172,18 +172,15 @@ void cf_sha256_digest_final(cf_sha256_context *ctx, uint8_t hash[CF_SHA256_HASHS
172172
digested_bytes = digested_bytes * CF_SHA256_BLOCKSZ + ctx->npartial;
173173
uint64_t digested_bits = digested_bytes * 8;
174174

175-
size_t zeroes = CF_SHA256_BLOCKSZ - ((digested_bytes + 1 + 8) % CF_SHA256_BLOCKSZ);
175+
size_t padbytes = CF_SHA256_BLOCKSZ - ((digested_bytes + 8) % CF_SHA256_BLOCKSZ);
176176

177177
/* Hash 0x80 00 ... block first. */
178-
uint8_t buf[8];
179-
buf[0] = 0x80;
180-
buf[1] = 0x00;
181-
cf_sha256_update(ctx, &buf[0], 1);
182-
183-
while (zeroes--)
184-
cf_sha256_update(ctx, &buf[1], 1);
178+
cf_blockwise_acc_pad(ctx->partial, &ctx->npartial, sizeof ctx->partial,
179+
0x80, 0x00, 0x00, padbytes,
180+
sha256_update_block, ctx);
185181

186182
/* Now hash length. */
183+
uint8_t buf[8];
187184
write64_be(digested_bits, buf);
188185
cf_sha256_update(ctx, buf, 8);
189186

src/sha3.c

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -311,21 +311,12 @@ static void sha3_update(cf_sha3_context *ctx, const void *data, size_t nbytes)
311311

312312
static void pad(cf_sha3_context *ctx, uint8_t domain, size_t npad)
313313
{
314-
uint8_t padding[CF_SHA3_224_BLOCKSZ];
315-
316314
assert(npad >= 1);
317315

318-
if (npad == 1)
319-
{
320-
padding[0] = domain | 0x80;
321-
sha3_update(ctx, padding, 1);
322-
return;
323-
}
324-
325-
memset(padding, 0, npad);
326-
padding[0] = domain;
327-
padding[npad - 1] = 0x80;
328-
sha3_update(ctx, padding, npad);
316+
cf_blockwise_acc_pad(ctx->partial, &ctx->npartial, ctx->rate,
317+
domain, 0x00, 0x80,
318+
npad,
319+
sha3_block, ctx);
329320
}
330321

331322
static void pad_and_squeeze(cf_sha3_context *ctx, uint8_t *out, size_t nout)

0 commit comments

Comments
 (0)