virtualsecureplatform
diff --git a/‎.gitmodules
Lines changed: 3 additions & 0 deletions b/‎.gitmodules
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/aes.hpp
Lines changed: 7 additions & 0 deletions b/‎include/aes.hpp
Lines changed: 7 additions & 0 deletions
diff --git a/‎include/evalkeygens.hpp
Lines changed: 1 addition & 1 deletion b/‎include/evalkeygens.hpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/key.hpp
Lines changed: 0 additions & 4 deletions b/‎include/key.hpp
Lines changed: 0 additions & 4 deletions
diff --git a/‎include/keyswitch.hpp
Lines changed: 162 additions & 101 deletions b/‎include/keyswitch.hpp
Lines changed: 162 additions & 101 deletions
@@ -16,3 +16,6 @@
 [submodule "thirdparties/BLAKE3"]
 	path = thirdparties/BLAKE3
 	url = https://github.com/BLAKE3-team/BLAKE3.git
+[submodule "thirdparties/AES"]
+	path = thirdparties/AES
+	url = https://github.com/SergeyBel/AES.git
@@ -1,6 +1,6 @@
 [![Test](https://github.com/virtualsecureplatform/TFHEpp/actions/workflows/test.yml/badge.svg)](https://github.com/virtualsecureplatform/TFHEpp/actions/workflows/test.yml)
 # TFHEpp
-TFHEpp is full Scracthed pure C++ Ver. of TFHE. TFHEpp is slightly(about 10%) faster than original [TFHE implementation](https://github.com/tfhe/tfhe). In addition to that, THFEpp supports [Circuit Bootstrapping](https://eprint.iacr.org/2018/421), [Programable Boootstrapping many LUT](https://eprint.iacr.org/2021/729), and [Modifed Cheng's Packing](https://eprint.iacr.org/2024/1318) (We call it as annihilate key switching in our code). 
+TFHEpp is full Scracthed pure C++ Ver. of TFHE. TFHEpp is slightly(about 10%) faster than original [TFHE implementation](https://github.com/tfhe/tfhe). In addition to that, THFEpp supports [Circuit Bootstrapping](https://eprint.iacr.org/2018/421), [Programable Boootstrapping many LUT](https://eprint.iacr.org/2021/729), and [Modifed Chen's Packing](https://eprint.iacr.org/2024/1318) (We call it as annihilate key switching in our code). 
 We also includes partial support for B/FV, written in include/bfv++.hpp.
 TFHEpp depends on AVX2 because we use SPQLIOS FMA. If you want run TFHEpp without AVX2, see spqlios++ branch. It include pure C++ implementation of SPQLIOS as header only library, but slow.
 
 
@@ -0,0 +1,7 @@
+#pragma once
+//Transciphering by AES
+// Based on Hippogryph
+namespace TFHEpp
+{
+
+}
@@ -177,7 +177,7 @@ template <class P>
 void annihilatekeygen(AnnihilateKey<P>& ahk, const Key<P>& key)
 {
     for (int i = 0; i < P::nbit; i++)
-        evalautokeygen<P>(ahk[i], (1 << (P::nbit - i)) + 1, key);
+        evalautokeygen<P>(ahk[i], (1 << (i+1)) + 1, key);
 }
 
 template <class P>
 
@@ -1,9 +1,5 @@
 #pragma once
 
-#ifdef USE_RANDEN
-#include <randen.h>
-#endif
-
 #include <algorithm>
 #include <array>
 #include <cereal/archives/portable_binary.hpp>
 
@@ -1,6 +1,8 @@
 #pragma once
 
 #include <array>
+#include <span>
+#include <bit>
 
 #include "params.hpp"
 #include "trgsw.hpp"
@@ -183,6 +185,72 @@ void SubsetIdentityKeySwitch(TLWE<typename P::targetP> &res,
     }
 }
 
+template <class P>
+void PrivKeySwitch(TRLWE<typename P::targetP> &res,
+                   const TLWE<typename P::domainP> &tlwe,
+                   const PrivateKeySwitchingKey<P> &privksk)
+{
+    constexpr typename P::domainP::T roundoffset =
+        1ULL << (std::numeric_limits<typename P::domainP::T>::digits -
+                 (1 + P::basebit * P::t));
+
+    // Koga's Optimization
+    constexpr typename P::domainP::T offset = iksoffsetgen<P>();
+    constexpr typename P::domainP::T mask = (1ULL << P::basebit) - 1;
+    constexpr typename P::domainP::T halfbase = 1ULL << (P::basebit - 1);
+    res = {};
+    for (int i = 0; i <= P::domainP::k * P::domainP::n; i++) {
+        const typename P::domainP::T aibar = tlwe[i] + offset + roundoffset;
+
+        for (int j = 0; j < P::t; j++) {
+            const int32_t aij =
+                ((aibar >>
+                  (std::numeric_limits<typename P::domainP::T>::digits -
+                   (j + 1) * P::basebit)) &
+                 mask) -
+                halfbase;
+
+            if (aij > 0)
+                for (int k = 0; k < P::targetP::k + 1; k++)
+                    for (int p = 0; p < P::targetP::n; p++)
+                        res[k][p] -= privksk[i][j][aij - 1][k][p];
+            else if (aij < 0)
+                for (int k = 0; k < P::targetP::k + 1; k++)
+                    for (int p = 0; p < P::targetP::n; p++)
+                        res[k][p] += privksk[i][j][abs(aij) - 1][k][p];
+        }
+    }
+}
+
+template <class P>
+void SubsetPrivKeySwitch(TRLWE<typename P::targetP> &res,
+                         const TLWE<typename P::targetP> &tlwe,
+                         const SubsetPrivateKeySwitchingKey<P> &privksk)
+{
+    constexpr uint32_t mask = (1 << P::basebit) - 1;
+    constexpr uint64_t prec_offset =
+        1ULL << (std::numeric_limits<typename P::targetP::T>::digits -
+                 (1 + P::basebit * P::t));
+
+    res = {};
+    for (int i = 0; i <= P::targetP::k * P::targetP::n; i++) {
+        const typename P::targetP::T aibar = tlwe[i] + prec_offset;
+
+        for (int j = 0; j < P::t; j++) {
+            const typename P::domainP::T aij =
+                (aibar >> (std::numeric_limits<typename P::targetP::T>::digits -
+                           (j + 1) * P::basebit)) &
+                mask;
+
+            if (aij != 0) {
+                for (int p = 0; p < P::targetP::n; p++)
+                    for (int k = 0; k < P::targetP::k + 1; k++)
+                        res[k][p] -= privksk[i][j][aij - 1][k][p];
+            }
+        }
+    }
+}
+
 template <class P>
 void TLWE2TRLWEIKS(TRLWE<typename P::targetP> &res,
                    const TLWE<typename P::domainP> &tlwe,
@@ -239,113 +307,85 @@ void EvalAuto(TRLWE<P> &res, const TRLWE<P> &trlwe, const int d,
 }
 
 // https://eprint.iacr.org/2024/1318
+// Reversed order but this is easily proved by packing trivial all 0 TRLWE. 
 // TODO: They says we should divide by N first, not by 2 for each step. Why?
 template <class P>
 void AnnihilateKeySwitching(TRLWE<P> &res, const TRLWE<P> &trlwe,
                             const AnnihilateKey<P> &ahk)
 {
     res = trlwe;
-    for (int j = 0; j < (P::k + 1) * P::n; j++) res[0][j] /= P::n;
+    // for (int j = 0; j < (P::k + 1) * P::n; j++) res[0][j] /= P::n;
     for (int i = 0; i < P::nbit; i++) {
+        for (int j = 0; j < (P::k + 1) * P::n; j++) res[0][j] /= 2;
         TRLWE<P> evaledauto;
-        EvalAuto<P>(evaledauto, res, (1 << (P::nbit - i)) + 1, ahk[i]);
+        EvalAuto<P>(evaledauto, res, (1 << (i+1)) + 1, ahk[i]);
         for (int j = 0; j < (P::k + 1) * P::n; j++)
             res[0][j] += evaledauto[0][j];
     }
 }
 
-template <class P, uint num_func>
-void AnnihilatePrivateKeySwitching(
-    std::array<TRLWE<P>, num_func> &res, const TRLWE<P> &trlwe,
-    const AnnihilateKey<P> &ahk,
-    const std::array<TRGSWFFT<P>, num_func> &privks)
-{
-    static_assert(num_func > 0, "num_func must be bigger than 0");
-    res[num_func - 1] = trlwe;
-    for (int i = 0; i < P::nbit - 1; i++) {
-        TRLWE<P> evaledauto;
-        EvalAuto<P>(evaledauto, res[num_func - 1], (1 << (P::nbit - i)) + 1,
-                    ahk[i]);
-        for (int j = 0; j < (P::k + 1) * P::n; j++)
-            res[num_func - 1][0][j] += evaledauto[0][j];
-    }
-    for (int i = 0; i < num_func; i++) {
-        TRLWE<P> evaledauto;
-        EvalAuto<P>(evaledauto, res[num_func - 1], (1 << (P::nbit - i)) + 1,
-                    privks[i]);
-        for (int j = 0; j < (P::k + 1) * P::n; j++)
-            res[i][0][j] += res[num_func - 1][0][j] + evaledauto[0][j];
-    }
-}
+// template <class P, uint num_func>
+// void AnnihilatePrivateKeySwitching(
+//     std::array<TRLWE<P>, num_func> &res, const TRLWE<P> &trlwe,
+//     const AnnihilateKey<P> &ahk,
+//     const std::array<TRGSWFFT<P>, num_func> &privks)
+// {
+//     static_assert(num_func > 0, "num_func must be bigger than 0");
+//     res[num_func - 1] = trlwe;
+//     for (int i = 0; i < P::nbit - 1; i++) {
+//         TRLWE<P> evaledauto;
+//         EvalAuto<P>(evaledauto, res[num_func - 1], (1 << (P::nbit - i)) + 1,
+//                     ahk[i]);
+//         for (int j = 0; j < (P::k + 1) * P::n; j++)
+//             res[num_func - 1][0][j] += evaledauto[0][j];
+//     }
+//     for (int i = 0; i < num_func; i++) {
+//         TRLWE<P> evaledauto;
+//         EvalAuto<P>(evaledauto, res[num_func - 1], (1 << (P::nbit - i)) + 1,
+//                     privks[i]);
+//         for (int j = 0; j < (P::k + 1) * P::n; j++)
+//             res[i][0][j] += res[num_func - 1][0][j] + evaledauto[0][j];
+//     }
+// }
 
-template <class P>
-void PrivKeySwitch(TRLWE<typename P::targetP> &res,
-                   const TLWE<typename P::domainP> &tlwe,
-                   const PrivateKeySwitchingKey<P> &privksk)
-{
-    constexpr typename P::domainP::T roundoffset =
-        1ULL << (std::numeric_limits<typename P::domainP::T>::digits -
-                 (1 + P::basebit * P::t));
-
-    // Koga's Optimization
-    constexpr typename P::domainP::T offset = iksoffsetgen<P>();
-    constexpr typename P::domainP::T mask = (1ULL << P::basebit) - 1;
-    constexpr typename P::domainP::T halfbase = 1ULL << (P::basebit - 1);
-    res = {};
-    for (int i = 0; i <= P::domainP::k * P::domainP::n; i++) {
-        const typename P::domainP::T aibar = tlwe[i] + offset + roundoffset;
-
-        for (int j = 0; j < P::t; j++) {
-            const int32_t aij =
-                ((aibar >>
-                  (std::numeric_limits<typename P::domainP::T>::digits -
-                   (j + 1) * P::basebit)) &
-                 mask) -
-                halfbase;
-
-            if (aij > 0)
-                for (int k = 0; k < P::targetP::k + 1; k++)
-                    for (int p = 0; p < P::targetP::n; p++)
-                        res[k][p] -= privksk[i][j][aij - 1][k][p];
-            else if (aij < 0)
-                for (int k = 0; k < P::targetP::k + 1; k++)
-                    for (int p = 0; p < P::targetP::n; p++)
-                        res[k][p] += privksk[i][j][abs(aij) - 1][k][p];
-        }
-    }
-}
-
-template <class P>
-void SubsetPrivKeySwitch(TRLWE<typename P::targetP> &res,
-                         const TLWE<typename P::targetP> &tlwe,
-                         const SubsetPrivateKeySwitchingKey<P> &privksk)
-{
-    constexpr uint32_t mask = (1 << P::basebit) - 1;
-    constexpr uint64_t prec_offset =
-        1ULL << (std::numeric_limits<typename P::targetP::T>::digits -
-                 (1 + P::basebit * P::t));
-
-    res = {};
-    for (int i = 0; i <= P::targetP::k * P::targetP::n; i++) {
-        const typename P::targetP::T aibar = tlwe[i] + prec_offset;
-
-        for (int j = 0; j < P::t; j++) {
-            const typename P::domainP::T aij =
-                (aibar >> (std::numeric_limits<typename P::targetP::T>::digits -
-                           (j + 1) * P::basebit)) &
-                mask;
+// template <class P, uint num_tlwe>
+// void AnnihilatePacking(TRLWE<P> &res, const std::array<TLWE<P>, num_tlwe> &tlwes,
+//                             const AnnihilateKey<P> &ahk)
+// {
+//     static_assert(std::has_single_bit(num_tlwe), "Currently, num_tlwe must be power of 2");
+//     std::array<TRLWE<P>, num_tlwe> trlwes;
+//     constexpr uint l = std::count_zero(num_tlwe);
+//     for (int i = 0; i < num_tlwe; i++) {
+//         InvSampleExtractIndex<P>(trlwes[i], tlwes[i], 0);
+//         for (int j = 0; j <= P::k * P::n; j++)//rest are known to be 0
+//             trlwes[i][0][j] /= P::n;
+//     }
+//     // Using res as a temporary variable
+//     for (int i = 0; i < l; i++){
+//         constexpr uint stride = 1 << (l - i - 1);
+//         for(int j = 0; j < stride; j++){
+//             PolynomialMulByXai<P>(res, trlwes[stride+j], P::n >> i);
+//             for(int k = 0; i < (P::k+1) * P::n; k++)
+//                 trlwes[stride+j][k] = trlwes[j][k] - res[k];
+//             for(int k = 0; i < (P::k+1) * P::n; k++)
+//                 trlwes[j][k] += res[k];
+//             EvalAuto<P>(res, trlwes[stride+j], (1 << (P::nbit - i)) + 1, ahk[i]);
+//             for(int k = 0; i < (P::k+1) * P::n; k++)
+//                 trlwes[j][k] += res[k];
+//         }
+//     }
+//     res = trlwes[0];
+//     // using trlews[0] and trlwes[1] as temporary variables
+//     for (int i = l; i < P::nbit; i++) {
+//         PolynomialMulByXai<P>(res, trlwes[(1<<i)+j], P::n >> i);
+//         EvalAuto<P>(evaledauto, res, (1 << (P::nbit - i)) + 1, ahk[i]);
+//         for (int j = 0; j < (P::k + 1) * P::n; j++)
+//             res[0][j] += evaledauto[0][j];
+//     }
+// }
 
-            if (aij != 0) {
-                for (int p = 0; p < P::targetP::n; p++)
-                    for (int k = 0; k < P::targetP::k + 1; k++)
-                        res[k][p] -= privksk[i][j][aij - 1][k][p];
-            }
-        }
-    }
-}
-
-template <class P>
-void PackLWEs(TRLWE<P> &res, const std::vector<TLWE<P>> &tlwe,
+template <class P, class Container>
+void PackLWEs(TRLWE<P> &res, const Container &tlwe,
               const AnnihilateKey<P> &ahk, const uint l, const uint offset,
               const uint interval)
 {
@@ -363,19 +403,17 @@ void PackLWEs(TRLWE<P> &res, const std::vector<TLWE<P>> &tlwe,
                 tempeven[i][j] /= 2;
                 tempoddmul[i][j] /= 2;
                 tempodd[i][j] = tempeven[i][j] - tempoddmul[i][j];
-                // tempodd[i][j] = (tempeven[i][j] - tempoddmul[i][j])/2;
             }
         }
-        EvalAuto<P>(res, tempodd, (1 << l) + 1, ahk[P::nbit - l]);
+        EvalAuto<P>(res, tempodd, (1 << l) + 1, ahk[l-1]);
         for (int i = 0; i < P::k + 1; i++)
             for (int j = 0; j < P::n; j++)
                 res[i][j] += tempeven[i][j] + tempoddmul[i][j];
-        // res[i][j] += (tempeven[i][j] + tempoddmul[i][j])/2;
     }
 }
 
 template <class P>
-void TLWE2TRLWEChengsPacking(TRLWE<P> &res, std::vector<TLWE<P>> &tlwe,
+void TLWE2TRLWEChensPacking(TRLWE<P> &res, std::vector<TLWE<P>> &tlwe,
                              const AnnihilateKey<P> &ahk)
 {
     uint l = std::bit_width(tlwe.size()) - 1;
@@ -384,15 +422,40 @@ void TLWE2TRLWEChengsPacking(TRLWE<P> &res, std::vector<TLWE<P>> &tlwe,
         tlwe.resize(1 << l);
     }
     PackLWEs<P>(res, tlwe, ahk, l, 0, 1);
-    for (int i = 0; i < P::nbit - l; i++) {
+    for (int i = l; i < P::nbit; i++) {
         TRLWE<P> evaledauto;
         for (int j = 0; j < (P::k + 1) * P::n; j++) res[0][j] /= 2;
-        EvalAuto<P>(evaledauto, res, (1 << (P::nbit - i)) + 1, ahk[i]);
+        EvalAuto<P>(evaledauto, res, (1 << (i+1)) + 1, ahk[i]);
         for (int j = 0; j < (P::k + 1) * P::n; j++)
             res[0][j] += evaledauto[0][j];
     }
 }
 
+template <class P, uint num_tlwe>
+void TLWE2TablePacking(TRLWE<P> &res, std::array<TLWE<P>,num_tlwe> &tlwe,
+                             const AnnihilateKey<P> &ahk)
+{
+    static_assert(std::has_single_bit(num_tlwe), "Currently, num_tlwe must be power of 2");
+    constexpr uint l = std::countr_zero(num_tlwe);
+    PackLWEs<P>(res, tlwe, ahk, l, 0, 1);
+    for (int i = l; i < P::nbit; i++) {
+        TRLWE<P> tempmul;
+        for (int j = 0; j < P::k + 1; j++) 
+            PolynomialMulByXai<P>(tempmul[j], res[j], P::n >> (i+1));
+        TRLWE<P> tempsub;
+        for (int j = 0; j < (P::k + 1) * P::n; j++){
+            res[0][j] /= 2;
+            tempmul[0][j] /= 2;
+            tempsub[0][j] = res[0][j] - tempmul[0][j];
+            res[0][j] += tempmul[0][j];
+        }
+        //reuse tempmul
+        EvalAuto<P>(tempmul, tempsub, (1 << (i+1)) + 1, ahk[i]);
+        for (int j = 0; j < (P::k + 1) * P::n; j++)
+            res[0][j] += tempmul[0][j];
+    }
+}
+
 template <class P>
 void PackLWEsLSB(TRLWE<P> &res, const std::vector<TLWE<P>> &tlwe,
                  const AnnihilateKey<P> &ahk, const uint l, const uint offset,
@@ -415,14 +478,12 @@ void PackLWEsLSB(TRLWE<P> &res, const std::vector<TLWE<P>> &tlwe,
                 tempeven[i][j] /= 2;
                 tempoddmul[i][j] /= 2;
                 tempodd[i][j] = tempeven[i][j] - tempoddmul[i][j];
-                // tempodd[i][j] = (tempeven[i][j] - tempoddmul[i][j])/2;
             }
         }
-        EvalAuto<P>(res, tempodd, (1 << l) + 1, ahk[P::nbit - l]);
+        EvalAuto<P>(res, tempodd, (1 << l) + 1, ahk[l-1]);
         for (int i = 0; i < P::k + 1; i++)
             for (int j = 0; j < P::n; j++)
                 res[i][j] += tempeven[i][j] + tempoddmul[i][j];
-        // res[i][j] += (tempeven[i][j] + tempoddmul[i][j])/2;
     }
 }
Original file line number	Diff line number	Diff line change
`@@ -177,7 +177,7 @@ template <class P>`
`177`	`177`	`void annihilatekeygen(AnnihilateKey<P>& ahk, const Key<P>& key)`
`178`	`178`	`{`
`179`	`179`	`for (int i = 0; i < P::nbit; i++)`
`180`		`- evalautokeygen<P>(ahk[i], (1 << (P::nbit - i)) + 1, key);`
	`180`	`+ evalautokeygen<P>(ahk[i], (1 << (i+1)) + 1, key);`
`181`	`181`	`}`
`182`	`182`
`183`	`183`	`template <class P>`