@@ -532,27 +532,27 @@ static unsigned int rej_eta(int32_t *a,
532
532
* or AES256CTR(seed,nonce).
533
533
*
534
534
* Arguments: - poly *a: pointer to output polynomial
535
- * - const uint8_t seed[]: byte array with seed of length SEEDBYTES
535
+ * - const uint8_t seed[]: byte array with seed of length CRHBYTES
536
536
* - uint16_t nonce: 2-byte nonce
537
537
**************************************************/
538
538
void poly_uniform_eta_preinit (poly * a , stream128_state * state )
539
539
{
540
540
unsigned int ctr ;
541
- ALIGNED_UINT8 (REJ_UNIFORM_BUFLEN * STREAM128_BLOCKBYTES ) buf ;
541
+ ALIGNED_UINT8 (REJ_UNIFORM_ETA_BUFLEN ) buf ;
542
542
543
- stream128_squeezeblocks (buf .coeffs , REJ_UNIFORM_ETA_NBLOCKS , state );
543
+ stream256_squeezeblocks (buf .coeffs , REJ_UNIFORM_ETA_NBLOCKS , state );
544
544
ctr = rej_eta_avx (a -> coeffs , buf .coeffs );
545
545
546
546
while (ctr < N ) {
547
- stream128_squeezeblocks (buf .coeffs , 1 , state );
548
- ctr += rej_eta (a -> coeffs + ctr , N - ctr , buf .coeffs , STREAM128_BLOCKBYTES );
547
+ stream256_squeezeblocks (buf .coeffs , 1 , state );
548
+ ctr += rej_eta (a -> coeffs + ctr , N - ctr , buf .coeffs , STREAM256_BLOCKBYTES );
549
549
}
550
550
}
551
551
552
- void poly_uniform_eta (poly * a , const uint8_t seed [SEEDBYTES ], uint16_t nonce )
552
+ void poly_uniform_eta (poly * a , const uint8_t seed [CRHBYTES ], uint16_t nonce )
553
553
{
554
- stream128_state state ;
555
- stream128_init (& state , seed , nonce );
554
+ stream256_state state ;
555
+ stream256_init (& state , seed , nonce );
556
556
poly_uniform_eta_preinit (a , & state );
557
557
}
558
558
@@ -561,7 +561,7 @@ void poly_uniform_eta_4x(poly *a0,
561
561
poly * a1 ,
562
562
poly * a2 ,
563
563
poly * a3 ,
564
- const uint8_t seed [32 ],
564
+ const uint8_t seed [64 ],
565
565
uint16_t nonce0 ,
566
566
uint16_t nonce1 ,
567
567
uint16_t nonce2 ,
@@ -573,36 +573,41 @@ void poly_uniform_eta_4x(poly *a0,
573
573
__m256i f ;
574
574
keccakx4_state state ;
575
575
576
- f = _mm256_loadu_si256 ((__m256i * )seed );
577
- _mm256_store_si256 (buf [0 ].vec ,f );
578
- _mm256_store_si256 (buf [1 ].vec ,f );
579
- _mm256_store_si256 (buf [2 ].vec ,f );
580
- _mm256_store_si256 (buf [3 ].vec ,f );
581
-
582
- buf [0 ].coeffs [SEEDBYTES + 0 ] = nonce0 ;
583
- buf [0 ].coeffs [SEEDBYTES + 1 ] = nonce0 >> 8 ;
584
- buf [1 ].coeffs [SEEDBYTES + 0 ] = nonce1 ;
585
- buf [1 ].coeffs [SEEDBYTES + 1 ] = nonce1 >> 8 ;
586
- buf [2 ].coeffs [SEEDBYTES + 0 ] = nonce2 ;
587
- buf [2 ].coeffs [SEEDBYTES + 1 ] = nonce2 >> 8 ;
588
- buf [3 ].coeffs [SEEDBYTES + 0 ] = nonce3 ;
589
- buf [3 ].coeffs [SEEDBYTES + 1 ] = nonce3 >> 8 ;
590
-
591
- shake128x4_absorb_once (& state , buf [0 ].coeffs , buf [1 ].coeffs , buf [2 ].coeffs , buf [3 ].coeffs , SEEDBYTES + 2 );
592
- shake128x4_squeezeblocks (buf [0 ].coeffs , buf [1 ].coeffs , buf [2 ].coeffs , buf [3 ].coeffs , REJ_UNIFORM_ETA_NBLOCKS , & state );
576
+ f = _mm256_loadu_si256 ((__m256i * )& seed [0 ]);
577
+ _mm256_store_si256 (& buf [0 ].vec [0 ],f );
578
+ _mm256_store_si256 (& buf [1 ].vec [0 ],f );
579
+ _mm256_store_si256 (& buf [2 ].vec [0 ],f );
580
+ _mm256_store_si256 (& buf [3 ].vec [0 ],f );
581
+ f = _mm256_loadu_si256 ((__m256i * )& seed [32 ]);
582
+ _mm256_store_si256 (& buf [0 ].vec [1 ],f );
583
+ _mm256_store_si256 (& buf [1 ].vec [1 ],f );
584
+ _mm256_store_si256 (& buf [2 ].vec [1 ],f );
585
+ _mm256_store_si256 (& buf [3 ].vec [1 ],f );
586
+
587
+ buf [0 ].coeffs [64 ] = nonce0 ;
588
+ buf [0 ].coeffs [65 ] = nonce0 >> 8 ;
589
+ buf [1 ].coeffs [64 ] = nonce1 ;
590
+ buf [1 ].coeffs [65 ] = nonce1 >> 8 ;
591
+ buf [2 ].coeffs [64 ] = nonce2 ;
592
+ buf [2 ].coeffs [65 ] = nonce2 >> 8 ;
593
+ buf [3 ].coeffs [64 ] = nonce3 ;
594
+ buf [3 ].coeffs [65 ] = nonce3 >> 8 ;
595
+
596
+ shake256x4_absorb_once (& state , buf [0 ].coeffs , buf [1 ].coeffs , buf [2 ].coeffs , buf [3 ].coeffs , 66 );
597
+ shake256x4_squeezeblocks (buf [0 ].coeffs , buf [1 ].coeffs , buf [2 ].coeffs , buf [3 ].coeffs , REJ_UNIFORM_ETA_NBLOCKS , & state );
593
598
594
599
ctr0 = rej_eta_avx (a0 -> coeffs , buf [0 ].coeffs );
595
600
ctr1 = rej_eta_avx (a1 -> coeffs , buf [1 ].coeffs );
596
601
ctr2 = rej_eta_avx (a2 -> coeffs , buf [2 ].coeffs );
597
602
ctr3 = rej_eta_avx (a3 -> coeffs , buf [3 ].coeffs );
598
603
599
604
while (ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N ) {
600
- shake128x4_squeezeblocks (buf [0 ].coeffs , buf [1 ].coeffs , buf [2 ].coeffs , buf [3 ].coeffs , 1 , & state );
605
+ shake256x4_squeezeblocks (buf [0 ].coeffs , buf [1 ].coeffs , buf [2 ].coeffs , buf [3 ].coeffs , 1 , & state );
601
606
602
- ctr0 += rej_eta (a0 -> coeffs + ctr0 , N - ctr0 , buf [0 ].coeffs , SHAKE128_RATE );
603
- ctr1 += rej_eta (a1 -> coeffs + ctr1 , N - ctr1 , buf [1 ].coeffs , SHAKE128_RATE );
604
- ctr2 += rej_eta (a2 -> coeffs + ctr2 , N - ctr2 , buf [2 ].coeffs , SHAKE128_RATE );
605
- ctr3 += rej_eta (a3 -> coeffs + ctr3 , N - ctr3 , buf [3 ].coeffs , SHAKE128_RATE );
607
+ ctr0 += rej_eta (a0 -> coeffs + ctr0 , N - ctr0 , buf [0 ].coeffs , SHAKE256_RATE );
608
+ ctr1 += rej_eta (a1 -> coeffs + ctr1 , N - ctr1 , buf [1 ].coeffs , SHAKE256_RATE );
609
+ ctr2 += rej_eta (a2 -> coeffs + ctr2 , N - ctr2 , buf [2 ].coeffs , SHAKE256_RATE );
610
+ ctr3 += rej_eta (a3 -> coeffs + ctr3 , N - ctr3 , buf [3 ].coeffs , SHAKE256_RATE );
606
611
}
607
612
}
608
613
#endif
@@ -639,7 +644,7 @@ void poly_uniform_gamma1_4x(poly *a0,
639
644
poly * a1 ,
640
645
poly * a2 ,
641
646
poly * a3 ,
642
- const uint8_t seed [48 ],
647
+ const uint8_t seed [64 ],
643
648
uint16_t nonce0 ,
644
649
uint16_t nonce1 ,
645
650
uint16_t nonce2 ,
@@ -648,29 +653,28 @@ void poly_uniform_gamma1_4x(poly *a0,
648
653
ALIGNED_UINT8 (POLY_UNIFORM_GAMMA1_NBLOCKS * STREAM256_BLOCKBYTES + 14 ) buf [4 ];
649
654
keccakx4_state state ;
650
655
__m256i f ;
651
- __m128i g ;
652
656
653
- f = _mm256_loadu_si256 ((__m256i * )seed );
654
- _mm256_store_si256 (buf [0 ].vec ,f );
655
- _mm256_store_si256 (buf [1 ].vec ,f );
656
- _mm256_store_si256 (buf [2 ].vec ,f );
657
- _mm256_store_si256 (buf [3 ].vec ,f );
658
- g = _mm_loadu_si128 (( __m128i * )& seed [32 ]);
659
- _mm_store_si128 (( __m128i * ) & buf [0 ].vec [1 ],g );
660
- _mm_store_si128 (( __m128i * ) & buf [1 ].vec [1 ],g );
661
- _mm_store_si128 (( __m128i * ) & buf [2 ].vec [1 ],g );
662
- _mm_store_si128 (( __m128i * ) & buf [3 ].vec [1 ],g );
663
-
664
- buf [0 ].coeffs [CRHBYTES + 0 ] = nonce0 ;
665
- buf [0 ].coeffs [CRHBYTES + 1 ] = nonce0 >> 8 ;
666
- buf [1 ].coeffs [CRHBYTES + 0 ] = nonce1 ;
667
- buf [1 ].coeffs [CRHBYTES + 1 ] = nonce1 >> 8 ;
668
- buf [2 ].coeffs [CRHBYTES + 0 ] = nonce2 ;
669
- buf [2 ].coeffs [CRHBYTES + 1 ] = nonce2 >> 8 ;
670
- buf [3 ].coeffs [CRHBYTES + 0 ] = nonce3 ;
671
- buf [3 ].coeffs [CRHBYTES + 1 ] = nonce3 >> 8 ;
672
-
673
- shake256x4_absorb_once (& state , buf [0 ].coeffs , buf [1 ].coeffs , buf [2 ].coeffs , buf [3 ].coeffs , CRHBYTES + 2 );
657
+ f = _mm256_loadu_si256 ((__m256i * )& seed [ 0 ] );
658
+ _mm256_store_si256 (& buf [0 ].vec [ 0 ] ,f );
659
+ _mm256_store_si256 (& buf [1 ].vec [ 0 ] ,f );
660
+ _mm256_store_si256 (& buf [2 ].vec [ 0 ] ,f );
661
+ _mm256_store_si256 (& buf [3 ].vec [ 0 ] ,f );
662
+ f = _mm256_loadu_si256 (( __m256i * )& seed [32 ]);
663
+ _mm256_store_si256 ( & buf [0 ].vec [1 ],f );
664
+ _mm256_store_si256 ( & buf [1 ].vec [1 ],f );
665
+ _mm256_store_si256 ( & buf [2 ].vec [1 ],f );
666
+ _mm256_store_si256 ( & buf [3 ].vec [1 ],f );
667
+
668
+ buf [0 ].coeffs [64 ] = nonce0 ;
669
+ buf [0 ].coeffs [65 ] = nonce0 >> 8 ;
670
+ buf [1 ].coeffs [64 ] = nonce1 ;
671
+ buf [1 ].coeffs [65 ] = nonce1 >> 8 ;
672
+ buf [2 ].coeffs [64 ] = nonce2 ;
673
+ buf [2 ].coeffs [65 ] = nonce2 >> 8 ;
674
+ buf [3 ].coeffs [64 ] = nonce3 ;
675
+ buf [3 ].coeffs [65 ] = nonce3 >> 8 ;
676
+
677
+ shake256x4_absorb_once (& state , buf [0 ].coeffs , buf [1 ].coeffs , buf [2 ].coeffs , buf [3 ].coeffs , 66 );
674
678
shake256x4_squeezeblocks (buf [0 ].coeffs , buf [1 ].coeffs , buf [2 ].coeffs , buf [3 ].coeffs , POLY_UNIFORM_GAMMA1_NBLOCKS , & state );
675
679
676
680
polyz_unpack (a0 , buf [0 ].coeffs );
0 commit comments