Skip to content

Commit f2b1a7e

Browse files
authored
Merge pull request #826 from pq-code-package/keccak_proof_v84a
Add proof for AArch64+SHA3 Keccak-f1600-x1
2 parents dfa048b + e3883dd commit f2b1a7e

File tree

7 files changed

+487
-17
lines changed

7 files changed

+487
-17
lines changed

.github/workflows/hol_light.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
hol_light_proofs:
2424
strategy:
2525
matrix:
26-
proof: [mlkem_ntt,mlkem_intt,mlkem_keccak_f1600]
26+
proof: [mlkem_ntt,mlkem_intt,mlkem_keccak_f1600,mlkem_keccak_f1600_alt]
2727
name: HOL Light proof for ${{ matrix.proof }}.S
2828
runs-on: pqcp-arm64
2929
if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork

dev/fips202/aarch64/src/keccak_f1600_x1_v84a_asm_clean.S

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949

5050
input_addr .req x0
5151
input_rc .req x1
52-
const_addr .req x1
5352
count .req x2
5453
cur_const .req x3
5554

@@ -210,7 +209,7 @@
210209
str Asud, [input_addr, #0xC0]
211210
.endm
212211

213-
#define STACK_SIZE (16*4 + 16*6) /* VREGS (16*4) + GPRS (TODO: Remove) */
212+
#define STACK_SIZE (16*4) /* VREGS (16*4) */
214213

215214
#define STACK_BASE_GPRS (16*4)
216215
.macro alloc_stack
@@ -219,7 +218,7 @@
219218

220219
.macro free_stack
221220
add sp, sp, #(STACK_SIZE)
222-
.endm
221+
.endm
223222

224223
.macro save_vregs
225224
stp d8, d9, [sp, #(16*0)]
@@ -300,7 +299,7 @@
300299
xar_m0 Ame_, Aga, E0, 28
301300
xar_m0 Abe_, Age, E1, 20
302301

303-
ld1r {v31.2d}, [const_addr], #8
302+
ld1r {v31.2d}, [input_rc], #8
304303

305304
bcax_m0 Aga, Aga_, Agi_, Age_
306305
bcax_m0 Age, Age_, Ago_, Agi_
@@ -341,7 +340,6 @@
341340
MLK_ASM_FN_SYMBOL(keccak_f1600_x1_v84a_asm_clean)
342341
alloc_stack
343342
save_vregs
344-
mov const_addr, input_rc
345343
load_input
346344

347345
mov count, #(KECCAK_F1600_ROUNDS)
@@ -358,7 +356,6 @@ keccak_f1600_x1_v84a_loop:
358356
/****************** REGISTER DEALLOCATIONS *******************/
359357
.unreq input_addr
360358
.unreq input_rc
361-
.unreq const_addr
362359
.unreq count
363360
.unreq cur_const
364361
.unreq Aba

mlkem/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm_clean.S

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,11 @@
5555
.global MLK_ASM_NAMESPACE(keccak_f1600_x1_v84a_asm_clean)
5656
MLK_ASM_FN_SYMBOL(keccak_f1600_x1_v84a_asm_clean)
5757

58-
sub sp, sp, #0xa0
58+
sub sp, sp, #0x40
5959
stp d8, d9, [sp]
6060
stp d10, d11, [sp, #0x10]
6161
stp d12, d13, [sp, #0x20]
6262
stp d14, d15, [sp, #0x30]
63-
mov x1, x1
6463
ldp d0, d1, [x0]
6564
ldp d2, d3, [x0, #0x10]
6665
ldp d4, d5, [x0, #0x20]
@@ -163,7 +162,7 @@ keccak_f1600_x1_v84a_loop:
163162
ldp d10, d11, [sp, #0x10]
164163
ldp d12, d13, [sp, #0x20]
165164
ldp d14, d15, [sp, #0x30]
166-
add sp, sp, #0xa0
165+
add sp, sp, #0x40
167166
ret
168167

169168
#endif /* __ARM_FEATURE_SHA3 */

proofs/hol_light/arm/Makefile

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,11 @@ SRC_ARM ?= $(SRC)/arm
2525
# Some clang-based preprocessors seem to behave differently, and get confused
2626
# by single-quote characters in comments, so we eliminate // comments first.
2727

28+
ARCHFLAGS=-march=armv8.4-a+sha3
2829
ifeq ($(OSTYPE_RESULT),Darwin)
2930
PREPROCESS=sed -e 's/\/\/.*//' | $(CC) -E -xassembler-with-cpp -
3031
else
31-
PREPROCESS=$(CC) -E -xassembler-with-cpp -
32+
PREPROCESS=$(CC) $(ARCHFLAGS) -E -xassembler-with-cpp -
3233
endif
3334

3435
# Generally GNU-type assemblers are happy with multiple instructions on
@@ -47,24 +48,27 @@ SPLIT=tr ';' '\n'
4748
# sudo apt-get install binutils-aarch64-linux-gnu
4849

4950
ifeq ($(ARCHTYPE_RESULT),aarch64)
50-
ASSEMBLE=as
51+
ASSEMBLE=as $(ARCHFLAGS)
5152
OBJDUMP=objdump -d
5253
else
5354
ifeq ($(ARCHTYPE_RESULT),arm64)
54-
ASSEMBLE=as
55+
ASSEMBLE=as $(ARCHFLAGS)
5556
OBJDUMP=objdump -d
5657
else
5758
ifeq ($(OSTYPE_RESULT),Darwin)
5859
ASSEMBLE=as -arch arm64
5960
OBJDUMP=otool -tvV
6061
else
61-
ASSEMBLE=aarch64-linux-gnu-as
62+
ASSEMBLE=aarch64-linux-gnu-as $(ARCHFLAGS)
6263
OBJDUMP=aarch64-linux-gnu-objdump -d
6364
endif
6465
endif
6566
endif
6667

67-
OBJ = mlkem/mlkem_ntt.o mlkem/mlkem_intt.o mlkem/mlkem_keccak_f1600.o
68+
OBJ = mlkem/mlkem_ntt.o \
69+
mlkem/mlkem_intt.o \
70+
mlkem/mlkem_keccak_f1600.o \
71+
mlkem/mlkem_keccak_f1600_alt.o
6872

6973
# According to
7074
# https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms,
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
/*
2+
* Copyright (c) 2024-2025 The mlkem-native project authors
3+
* Copyright (c) 2021-2022 Arm Limited
4+
* Copyright (c) 2022 Matthias Kannwischer
5+
* SPDX-License-Identifier: MIT
6+
*
7+
* Permission is hereby granted, free of charge, to any person obtaining a copy
8+
* of this software and associated documentation files (the "Software"), to deal
9+
* in the Software without restriction, including without limitation the rights
10+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
* copies of the Software, and to permit persons to whom the Software is
12+
* furnished to do so, subject to the following conditions:
13+
*
14+
* The above copyright notice and this permission notice shall be included in all
15+
* copies or substantial portions of the Software.
16+
*
17+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
* SOFTWARE.
24+
*
25+
*/
26+
27+
//
28+
// Author: Hanno Becker <hanno.becker@arm.com>
29+
// Author: Matthias Kannwischer <matthias@kannwischer.eu>
30+
//
31+
// This implementation is essentially from the paper
32+
//
33+
// Hybrid scalar/vector implementations of Keccak and SPHINCS+ on AArch64
34+
// https://eprint.iacr.org/2022/1243
35+
//
36+
// The only difference is interleaving/deinterleaving of Keccak state
37+
// during load and store, so that the caller need not do this.
38+
//
39+
40+
41+
42+
/*
43+
* WARNING: This file is auto-derived from the mlkem-native source file
44+
* dev/fips202/aarch64/src/keccak_f1600_x1_v84a_asm_clean.S using scripts/simpasm. Do not modify it directly.
45+
*/
46+
47+
48+
.text
49+
.balign 4
50+
#ifdef __APPLE__
51+
.global _PQCP_MLKEM_NATIVE_MLKEM768_keccak_f1600_x1_v84a_asm_clean
52+
_PQCP_MLKEM_NATIVE_MLKEM768_keccak_f1600_x1_v84a_asm_clean:
53+
#else
54+
.global PQCP_MLKEM_NATIVE_MLKEM768_keccak_f1600_x1_v84a_asm_clean
55+
PQCP_MLKEM_NATIVE_MLKEM768_keccak_f1600_x1_v84a_asm_clean:
56+
#endif
57+
58+
sub sp, sp, #0x40
59+
stp d8, d9, [sp]
60+
stp d10, d11, [sp, #0x10]
61+
stp d12, d13, [sp, #0x20]
62+
stp d14, d15, [sp, #0x30]
63+
ldp d0, d1, [x0]
64+
ldp d2, d3, [x0, #0x10]
65+
ldp d4, d5, [x0, #0x20]
66+
ldp d6, d7, [x0, #0x30]
67+
ldp d8, d9, [x0, #0x40]
68+
ldp d10, d11, [x0, #0x50]
69+
ldp d12, d13, [x0, #0x60]
70+
ldp d14, d15, [x0, #0x70]
71+
ldp d16, d17, [x0, #0x80]
72+
ldp d18, d19, [x0, #0x90]
73+
ldp d20, d21, [x0, #0xa0]
74+
ldp d22, d23, [x0, #0xb0]
75+
ldr d24, [x0, #0xc0]
76+
mov x2, #0x18 // =24
77+
78+
keccak_f1600_x1_v84a_loop:
79+
eor3 v30.16b, v0.16b, v5.16b, v10.16b
80+
eor3 v29.16b, v1.16b, v6.16b, v11.16b
81+
eor3 v28.16b, v2.16b, v7.16b, v12.16b
82+
eor3 v27.16b, v3.16b, v8.16b, v13.16b
83+
eor3 v26.16b, v4.16b, v9.16b, v14.16b
84+
eor3 v30.16b, v30.16b, v15.16b, v20.16b
85+
eor3 v29.16b, v29.16b, v16.16b, v21.16b
86+
eor3 v28.16b, v28.16b, v17.16b, v22.16b
87+
eor3 v27.16b, v27.16b, v18.16b, v23.16b
88+
eor3 v26.16b, v26.16b, v19.16b, v24.16b
89+
rax1 v25.2d, v30.2d, v28.2d
90+
rax1 v28.2d, v28.2d, v26.2d
91+
rax1 v26.2d, v26.2d, v29.2d
92+
rax1 v29.2d, v29.2d, v27.2d
93+
rax1 v27.2d, v27.2d, v30.2d
94+
eor v30.16b, v0.16b, v26.16b
95+
xar v0.2d, v2.2d, v29.2d, #0x2
96+
xar v2.2d, v12.2d, v29.2d, #0x15
97+
xar v12.2d, v13.2d, v28.2d, #0x27
98+
xar v13.2d, v19.2d, v27.2d, #0x38
99+
xar v19.2d, v23.2d, v28.2d, #0x8
100+
xar v23.2d, v15.2d, v26.2d, #0x17
101+
xar v15.2d, v1.2d, v25.2d, #0x3f
102+
xar v1.2d, v8.2d, v28.2d, #0x9
103+
xar v8.2d, v16.2d, v25.2d, #0x13
104+
xar v16.2d, v7.2d, v29.2d, #0x3a
105+
xar v7.2d, v10.2d, v26.2d, #0x3d
106+
xar v10.2d, v3.2d, v28.2d, #0x24
107+
xar v3.2d, v18.2d, v28.2d, #0x2b
108+
xar v18.2d, v17.2d, v29.2d, #0x31
109+
xar v17.2d, v11.2d, v25.2d, #0x36
110+
xar v11.2d, v9.2d, v27.2d, #0x2c
111+
xar v9.2d, v22.2d, v29.2d, #0x3
112+
xar v22.2d, v14.2d, v27.2d, #0x19
113+
xar v14.2d, v20.2d, v26.2d, #0x2e
114+
xar v20.2d, v4.2d, v27.2d, #0x25
115+
xar v4.2d, v24.2d, v27.2d, #0x32
116+
xar v24.2d, v21.2d, v25.2d, #0x3e
117+
xar v21.2d, v5.2d, v26.2d, #0x1c
118+
xar v27.2d, v6.2d, v25.2d, #0x14
119+
ld1r { v31.2d }, [x1], #8
120+
bcax v5.16b, v10.16b, v7.16b, v11.16b
121+
bcax v6.16b, v11.16b, v8.16b, v7.16b
122+
bcax v7.16b, v7.16b, v9.16b, v8.16b
123+
bcax v8.16b, v8.16b, v10.16b, v9.16b
124+
bcax v9.16b, v9.16b, v11.16b, v10.16b
125+
bcax v10.16b, v15.16b, v12.16b, v16.16b
126+
bcax v11.16b, v16.16b, v13.16b, v12.16b
127+
bcax v12.16b, v12.16b, v14.16b, v13.16b
128+
bcax v13.16b, v13.16b, v15.16b, v14.16b
129+
bcax v14.16b, v14.16b, v16.16b, v15.16b
130+
bcax v15.16b, v20.16b, v17.16b, v21.16b
131+
bcax v16.16b, v21.16b, v18.16b, v17.16b
132+
bcax v17.16b, v17.16b, v19.16b, v18.16b
133+
bcax v18.16b, v18.16b, v20.16b, v19.16b
134+
bcax v19.16b, v19.16b, v21.16b, v20.16b
135+
bcax v20.16b, v0.16b, v22.16b, v1.16b
136+
bcax v21.16b, v1.16b, v23.16b, v22.16b
137+
bcax v22.16b, v22.16b, v24.16b, v23.16b
138+
bcax v23.16b, v23.16b, v0.16b, v24.16b
139+
bcax v24.16b, v24.16b, v1.16b, v0.16b
140+
bcax v0.16b, v30.16b, v2.16b, v27.16b
141+
bcax v1.16b, v27.16b, v3.16b, v2.16b
142+
bcax v2.16b, v2.16b, v4.16b, v3.16b
143+
bcax v3.16b, v3.16b, v30.16b, v4.16b
144+
bcax v4.16b, v4.16b, v27.16b, v30.16b
145+
eor v0.16b, v0.16b, v31.16b
146+
sub x2, x2, #0x1
147+
cbnz x2, keccak_f1600_x1_v84a_loop
148+
stp d0, d1, [x0]
149+
stp d2, d3, [x0, #0x10]
150+
stp d4, d5, [x0, #0x20]
151+
stp d6, d7, [x0, #0x30]
152+
stp d8, d9, [x0, #0x40]
153+
stp d10, d11, [x0, #0x50]
154+
stp d12, d13, [x0, #0x60]
155+
stp d14, d15, [x0, #0x70]
156+
stp d16, d17, [x0, #0x80]
157+
stp d18, d19, [x0, #0x90]
158+
stp d20, d21, [x0, #0xa0]
159+
stp d22, d23, [x0, #0xb0]
160+
str d24, [x0, #0xc0]
161+
ldp d8, d9, [sp]
162+
ldp d10, d11, [sp, #0x10]
163+
ldp d12, d13, [sp, #0x20]
164+
ldp d14, d15, [sp, #0x30]
165+
add sp, sp, #0x40
166+
ret

0 commit comments

Comments
 (0)