@@ -40,7 +40,7 @@ constexpr uint Nk = 4; // Number of 32-bit words in the key
40
40
constexpr uint Nb = 4 ; // Number of columns (32-bit words) comprising the state
41
41
constexpr uint Nr = 10 ; // Number of rounds, which is a function of Nk and Nb
42
42
43
- void KeyExpansion (std::array<uint8_t , 4 *Nb*(Nr+1 )> w, const std::array<uint8_t,16> key) {
43
+ void KeyExpansion (std::array<uint8_t , 4 *Nb*(Nr+1 )>& w, const std::array<uint8_t,16>& key) {
44
44
std::array<uint8_t ,4 > temp;
45
45
46
46
unsigned int i = 0 ;
@@ -126,14 +126,26 @@ void AESSboxROM(std::array<TLWE<typename brP::targetP>,8> &res,
126
126
}
127
127
128
128
template <class iksP , class brP >
129
- void SubBytes (std::array<TLWE<typename brP::targetP>, 128 > &res,
130
- const std::array<TLWE<typename iksP::domainP>, 128 > &tlwe,
129
+ void SubBytes (std::array<TLWE<typename brP::targetP>, 128 >& state,
131
130
const EvalKey &ek)
132
131
{
133
132
for (int i = 0 ; i < 16 ; i++)
134
- AESSboxROM<iksP, brP>(std::span (res ).subspan (i*8 ).template first <8 >(), std::span (tlwe ).subspan (i*8 ).template first <8 >(), ek);
133
+ AESSboxROM<iksP, brP>(std::span (state ).subspan (i*8 ).template first <8 >(), std::span (state ).subspan (i*8 ).template first <8 >(), ek);
135
134
}
136
135
136
+ template <class iksP , class brP , class cbiksP , class cbbrP >
137
+ void SubBytes (std::array<TLWE<typename brP::targetP>, 128 >& state,
138
+ const EvalKey &ek)
139
+ {
140
+ for (int i = 0 ; i < 16 ; i++){
141
+ std::array<TLWE<typename cbbrP::targetP>, 8 > temp;
142
+ AESSboxROM<iksP, cbbrP>(std::span (temp), std::span (state).subspan (i*8 ).template first <8 >(), ek);
143
+ for (int j = 0 ; j < 8 ; j++)
144
+ GateBootstrapping<cbiksP, brP, 1ULL << (std::numeric_limits<typename brP::targetP::T>::digits - 2 )>(state[i*8 +j], temp[j], ek);
145
+ }
146
+ }
147
+
148
+
137
149
template <class P >
138
150
inline Polynomial<P> AESInvSboxPoly (const uint8_t upperindex)
139
151
{
@@ -357,6 +369,121 @@ void MixColumn(std::array<TLWE<P>, 32>& y_out, const std::array<TLWE<P>, 32>& x)
357
369
TLWEAdd<P>(y_out[28 ], x[20 ], t[59 ]);
358
370
}
359
371
372
+ // https://eprint.iacr.org/2024/1076
373
+ template <class P >
374
+ void MixColumnDepth4 (std::array<TLWE<P>, 32 >& y, const std::array<TLWE<P>, 32 >& x){
375
+ // r0 … r64
376
+ std::array<TLWE<P>, 65 > r;
377
+
378
+ // --- first stage --------------------------------------------------------
379
+ TLWEAdd<P>(r[ 0 ], x[23 ], x[31 ]);
380
+ TLWEAdd<P>(r[ 1 ], x[21 ], x[29 ]);
381
+ TLWEAdd<P>(r[ 2 ], x[17 ], x[25 ]);
382
+ TLWEAdd<P>(r[ 3 ], x[16 ], x[24 ]);
383
+ TLWEAdd<P>(r[ 4 ], x[15 ], x[23 ]);
384
+ TLWEAdd<P>(r[ 5 ], x[14 ], x[22 ]);
385
+ TLWEAdd<P>(r[ 6 ], x[12 ], x[20 ]);
386
+ TLWEAdd<P>(r[ 7 ], x[12 ], x[13 ]);
387
+ TLWEAdd<P>(r[ 8 ], x[11 ], x[20 ]);
388
+ TLWEAdd<P>(r[ 9 ], x[10 ], x[25 ]);
389
+ TLWEAdd<P>(r[10 ], x[10 ], x[18 ]);
390
+ TLWEAdd<P>(r[11 ], x[ 9 ], x[18 ]);
391
+ TLWEAdd<P>(r[12 ], x[ 7 ], x[31 ]);
392
+ TLWEAdd<P>(r[13 ], x[ 7 ], x[15 ]);
393
+ TLWEAdd<P>(r[14 ], x[ 6 ], x[31 ]);
394
+ TLWEAdd<P>(r[15 ], x[ 6 ], x[30 ]);
395
+ TLWEAdd<P>(r[16 ], x[ 5 ], x[13 ]);
396
+ TLWEAdd<P>(r[17 ], x[ 5 ], x[ 6 ]);
397
+ TLWEAdd<P>(r[18 ], x[ 4 ], x[28 ]);
398
+ TLWEAdd<P>(r[19 ], x[ 3 ], x[27 ]);
399
+ TLWEAdd<P>(r[20 ], x[ 3 ], x[11 ]);
400
+ TLWEAdd<P>(r[21 ], x[ 2 ], x[26 ]);
401
+ TLWEAdd<P>(r[22 ], x[ 1 ], x[ 9 ]);
402
+ TLWEAdd<P>(r[23 ], x[ 0 ], x[ 8 ]);
403
+
404
+ // --- second stage -------------------------------------------------------
405
+ TLWEAdd<P>(r[24 ], r[ 0 ], x[27 ]);
406
+ TLWEAdd<P>(r[25 ], r[ 0 ], x[ 7 ]);
407
+ TLWEAdd<P>(r[26 ], r[ 1 ], x[ 5 ]);
408
+ TLWEAdd<P>(r[27 ], r[ 1 ], x[ 4 ]);
409
+ TLWEAdd<P>(r[28 ], r[ 2 ], x[ 1 ]);
410
+ TLWEAdd<P>(r[29 ], r[ 3 ], x[ 8 ]);
411
+ TLWEAdd<P>(r[30 ], r[ 3 ], r[ 0 ]);
412
+ TLWEAdd<P>(r[31 ], r[ 4 ], x[14 ]);
413
+ TLWEAdd<P>(r[32 ], r[ 4 ], x[ 7 ]);
414
+ TLWEAdd<P>(r[33 ], r[ 4 ], x[ 0 ]);
415
+ TLWEAdd<P>(r[34 ], r[ 5 ], x[29 ]);
416
+ TLWEAdd<P>(r[35 ], r[ 6 ], x[28 ]);
417
+ TLWEAdd<P>(r[36 ], r[ 6 ], x[ 4 ]);
418
+ TLWEAdd<P>(r[37 ], r[10 ], x[26 ]);
419
+ TLWEAdd<P>(r[38 ], r[10 ], r[ 4 ]);
420
+ TLWEAdd<P>(r[39 ], r[13 ], r[ 2 ]);
421
+ TLWEAdd<P>(r[40 ], r[15 ], x[22 ]);
422
+ TLWEAdd<P>(r[41 ], r[16 ], x[30 ]);
423
+ TLWEAdd<P>(r[42 ], r[16 ], x[28 ]);
424
+ TLWEAdd<P>(r[43 ], r[18 ], x[19 ]);
425
+ TLWEAdd<P>(r[44 ], r[19 ], x[19 ]);
426
+ TLWEAdd<P>(r[45 ], r[19 ], r[12 ]);
427
+ TLWEAdd<P>(r[46 ], r[20 ], x[26 ]);
428
+ TLWEAdd<P>(r[47 ], r[20 ], r[13 ]);
429
+ TLWEAdd<P>(r[48 ], r[21 ], x[17 ]);
430
+ TLWEAdd<P>(r[49 ], r[22 ], x[25 ]);
431
+ TLWEAdd<P>(r[50 ], r[22 ], x[17 ]);
432
+ TLWEAdd<P>(r[51 ], r[23 ], x[16 ]);
433
+ TLWEAdd<P>(r[52 ], r[23 ], x[ 9 ]);
434
+ TLWEAdd<P>(r[53 ], r[24 ], x[18 ]);
435
+ TLWEAdd<P>(r[54 ], r[24 ], x[12 ]);
436
+
437
+ // --- outputs that depend only on r0 … r54 -------------------------------
438
+ TLWEAdd<P>(y[15 ], r[25 ], r[ 5 ]);
439
+ TLWEAdd<P>(y[13 ], r[26 ], r[ 6 ]);
440
+ TLWEAdd<P>(y[ 5 ], r[27 ], r[ 7 ]);
441
+ TLWEAdd<P>(y[ 0 ], r[29 ], r[13 ]);
442
+ TLWEAdd<P>(y[ 7 ], r[31 ], r[14 ]);
443
+ TLWEAdd<P>(y[31 ], r[32 ], r[15 ]);
444
+ TLWEAdd<P>(y[ 8 ], r[33 ], r[ 3 ]);
445
+ TLWEAdd<P>(y[30 ], r[34 ], r[17 ]);
446
+ TLWEAdd<P>(y[ 2 ], r[37 ], r[22 ]);
447
+
448
+ // --- remaining intermediates -------------------------------------------
449
+ TLWEAdd<P>(r[55 ], r[37 ], r[28 ]);
450
+ TLWEAdd<P>(r[56 ], r[40 ], x[21 ]);
451
+ TLWEAdd<P>(r[57 ], r[40 ], r[13 ]);
452
+ TLWEAdd<P>(y[ 6 ], r[41 ], r[ 5 ]);
453
+ TLWEAdd<P>(r[58 ], r[42 ], x[29 ]);
454
+ TLWEAdd<P>(r[59 ], r[43 ], r[ 4 ]);
455
+ TLWEAdd<P>(r[60 ], r[44 ], x[ 2 ]);
456
+ TLWEAdd<P>(y[11 ], r[44 ], r[38 ]);
457
+ TLWEAdd<P>(y[28 ], r[45 ], r[36 ]);
458
+ TLWEAdd<P>(r[61 ], r[46 ], r[45 ]);
459
+ TLWEAdd<P>(r[62 ], r[47 ], x[10 ]);
460
+ TLWEAdd<P>(y[ 4 ], r[47 ], r[35 ]);
461
+ TLWEAdd<P>(y[18 ], r[48 ], r[ 9 ]);
462
+ TLWEAdd<P>(y[10 ], r[48 ], r[11 ]);
463
+ TLWEAdd<P>(y[17 ], r[49 ], r[30 ]);
464
+ TLWEAdd<P>(r[63 ], r[50 ], r[29 ]);
465
+ TLWEAdd<P>(y[24 ], r[51 ], r[12 ]);
466
+ TLWEAdd<P>(y[16 ], r[51 ], r[30 ]);
467
+ TLWEAdd<P>(r[64 ], r[51 ], r[33 ]);
468
+ TLWEAdd<P>(y[ 1 ], r[52 ], r[39 ]);
469
+ TLWEAdd<P>(y[19 ], r[53 ], r[46 ]);
470
+ TLWEAdd<P>(y[20 ], r[54 ], r[43 ]);
471
+ TLWEAdd<P>(y[26 ], r[55 ], r[48 ]);
472
+ TLWEAdd<P>(y[14 ], r[56 ], x[13 ]);
473
+ TLWEAdd<P>(y[22 ], r[56 ], r[34 ]);
474
+ TLWEAdd<P>(y[23 ], r[57 ], r[14 ]);
475
+ TLWEAdd<P>(y[21 ], r[58 ], x[20 ]);
476
+ TLWEAdd<P>(y[29 ], r[58 ], r[27 ]);
477
+ TLWEAdd<P>(y[12 ], r[59 ], r[ 8 ]);
478
+ TLWEAdd<P>(y[27 ], r[61 ], r[60 ]);
479
+ TLWEAdd<P>(y[ 3 ], r[62 ], r[60 ]);
480
+
481
+ // y25 depends on y24 already produced
482
+ TLWEAdd<P>(y[25 ], y[24 ], r[63 ]);
483
+
484
+ TLWEAdd<P>(y[ 9 ], r[64 ], r[28 ]);
485
+ }
486
+
360
487
// https://eprint.iacr.org/2019/833
361
488
template <class P >
362
489
void MixColumns (std::array<TLWE<P>, 128 > &state) {
@@ -374,6 +501,7 @@ void MixColumns(std::array<TLWE<P>, 128> &state) {
374
501
// Apply the MixColumn transformation
375
502
std::array<TLWE<P>, 32 > y_out; // For final output bits y0...y31
376
503
MixColumn<P>(y_out, x);
504
+ // MixColumnDepth4<P>(y_out, x);
377
505
378
506
// Place the resulting 32-bit column (y_out) back into the state array
379
507
for (int i = 0 ; i < 4 ; ++i)
@@ -466,17 +594,49 @@ void AESEnc(std::array<TLWE<typename brP::targetP>, 128> &cipher,
466
594
TLWEAdd<typename iksP::domainP>(state[i*Nb*8 +j*8 +k], plain[j*4 *8 +i*8 +k], expandedkey[0 ][j*4 *8 +i*8 +k]);
467
595
state[i*Nb*8 +j*8 +k][iksP::domainP::k * iksP::domainP::n] += 1ULL << (std::numeric_limits<typename iksP::domainP::T>::digits - 2 );
468
596
}
469
- // state[i*Nb+j] = plain[j*4+i];]
470
- // AddRoundKey<typename iksP::domainP>(state, expandedkey[0]);
471
597
472
598
// Rounds
473
599
for (int round = 1 ; round < Nr; round++) {
474
- SubBytes<iksP, brP>(state, state, ek);
600
+ SubBytes<iksP, brP>(state, ek);
601
+ ShiftRows<typename brP::targetP>(state);
602
+ MixColumns<typename brP::targetP>(state);
603
+ AddRoundKey<typename brP::targetP>(state, expandedkey[round]);
604
+ }
605
+ SubBytes<iksP, brP>(state, ek);
606
+ ShiftRows<typename brP::targetP>(state);
607
+ AddRoundKey<typename brP::targetP>(state, expandedkey[Nr]);
608
+
609
+ // Copy state to ciphertext with transposition
610
+ for (int i = 0 ; i < 4 ; i++)
611
+ for (int j = 0 ; j < Nb; j++)
612
+ for (int k = 0 ; k < 8 ; k++)
613
+ cipher[j*4 *8 +i*8 +k] = state[i*Nb*8 +j*8 +k];
614
+ }
615
+
616
+ template <class iksP , class brP , class cbiksP , class cbbrP >
617
+ void AESEnc (std::array<TLWE<typename brP::targetP>, 128 > &cipher,
618
+ const std::array<TLWE<typename iksP::domainP>, 128 > &plain,
619
+ const std::array<std::array<TLWE<typename brP::targetP>, 128 >, Nr+1 > &expandedkey,
620
+ EvalKey &ek)
621
+ {
622
+ std::array<TLWE<typename iksP::domainP>, 128 > state;
623
+ // Copy plaintext to state with transposition
624
+ // Initial AddRoundKey
625
+ for (int i = 0 ; i < 4 ; i++)
626
+ for (int j = 0 ; j < Nb; j++)
627
+ for (int k = 0 ; k < 8 ; k++){
628
+ TLWEAdd<typename iksP::domainP>(state[i*Nb*8 +j*8 +k], plain[j*4 *8 +i*8 +k], expandedkey[0 ][j*4 *8 +i*8 +k]);
629
+ state[i*Nb*8 +j*8 +k][iksP::domainP::k * iksP::domainP::n] += 1ULL << (std::numeric_limits<typename iksP::domainP::T>::digits - 2 );
630
+ }
631
+
632
+ // Rounds
633
+ for (int round = 1 ; round < Nr; round++) {
634
+ SubBytes<iksP, brP, cbiksP, cbbrP>(state, ek);
475
635
ShiftRows<typename brP::targetP>(state);
476
636
MixColumns<typename brP::targetP>(state);
477
637
AddRoundKey<typename brP::targetP>(state, expandedkey[round]);
478
638
}
479
- SubBytes<iksP, brP>(state, state, ek);
639
+ SubBytes<iksP, brP, cbiksP, cbbrP>( state, ek);
480
640
ShiftRows<typename brP::targetP>(state);
481
641
AddRoundKey<typename brP::targetP>(state, expandedkey[Nr]);
482
642
0 commit comments