Skip to content

Commit b222316

Browse files
authored
Merge pull request #327 from FloydZ/add_eon_instruction
AArch64: Add eon instruction
2 parents 436ac95 + d4a32d1 commit b222316

File tree

4 files changed

+34
-14
lines changed

4 files changed

+34
-14
lines changed

slothy/targets/aarch64/aarch64_neon.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2170,12 +2170,6 @@ class asr_wform(AArch64Instruction):
21702170
outputs = ["Wd"]
21712171

21722172

2173-
class eor_wform(AArch64Instruction):
2174-
pattern = "eor <Wd>, <Wa>, <Wb>"
2175-
inputs = ["Wa", "Wb"]
2176-
outputs = ["Wd"]
2177-
2178-
21792173
class AArch64BasicArithmetic(AArch64Instruction):
21802174
pass
21812175

@@ -2502,12 +2496,30 @@ class rev_w(AArch64Logical):
25022496
outputs = ["Wd"]
25032497

25042498

2499+
class eor_wform(AArch64Logical):
2500+
pattern = "eor <Wd>, <Wa>, <Wb>"
2501+
inputs = ["Wa", "Wb"]
2502+
outputs = ["Wd"]
2503+
2504+
2505+
class eon_wform(AArch64Logical):
2506+
pattern = "eon <Wd>, <Wa>, <Wb>"
2507+
inputs = ["Wa", "Wb"]
2508+
outputs = ["Wd"]
2509+
2510+
25052511
class eor(AArch64Logical):
25062512
pattern = "eor <Xd>, <Xa>, <Xb>"
25072513
inputs = ["Xa", "Xb"]
25082514
outputs = ["Xd"]
25092515

25102516

2517+
class eon(AArch64Logical):
2518+
pattern = "eon <Xd>, <Xa>, <Xb>"
2519+
inputs = ["Xa", "Xb"]
2520+
outputs = ["Xd"]
2521+
2522+
25112523
class orr(AArch64Logical):
25122524
pattern = "orr <Xd>, <Xa>, <Xb>"
25132525
inputs = ["Xa", "Xb"]

slothy/targets/aarch64/cortex_a55.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@
119119
bic,
120120
bic_reg,
121121
eor,
122+
eon,
122123
ror,
123124
eor_shifted,
124125
bic_shifted,
@@ -149,6 +150,7 @@
149150
asr_wform,
150151
and_imm_wform,
151152
eor_wform,
153+
eon_wform,
152154
lsr_wform,
153155
ASimdCompare,
154156
and_twoarg,
@@ -383,6 +385,7 @@ def get_min_max_objective(slothy):
383385
movw_imm,
384386
cmp_imm,
385387
eor,
388+
eon,
386389
eor_shifted,
387390
bic_shifted,
388391
ror,
@@ -410,6 +413,7 @@ def get_min_max_objective(slothy):
410413
and_imm_wform,
411414
lsr_wform,
412415
eor_wform,
416+
eon_wform,
413417
): ExecutionUnit.SCALAR(),
414418
AArch64ConditionalCompare: ExecutionUnit.SCALAR(),
415419
# NOTE: AESE/AESMC and AESD/AESIMC pairs can be dual-issued on A55 but this
@@ -489,8 +493,8 @@ def get_min_max_objective(slothy):
489493
(b_ldr_stack_with_inc, d_ldr_stack_with_inc): 1,
490494
(mov_d01, mov_b00): 1,
491495
(vzip1, vzip2): 1,
492-
(eor_wform): 1,
493-
(eor, bic, bic_reg, eor_shifted, bic_shifted): 1,
496+
(eor_wform, eon_wform): 1,
497+
(eon, eor, bic, bic_reg, eor_shifted, bic_shifted): 1,
494498
AArch64ConditionalCompare: 1,
495499
AESInstruction: 1,
496500
fmov_s_form: 1, # from double/single to gen reg
@@ -565,12 +569,12 @@ def get_min_max_objective(slothy):
565569
(b_ldr_stack_with_inc, d_ldr_stack_with_inc): 3,
566570
(mov_d01, mov_b00): 2,
567571
(vzip1, vzip2): 2,
568-
(eor_wform): 1,
572+
(eor_wform, eon_wform): 1,
569573
# According to SWOG, this is 2 cycles, byt if the output is used as a
570574
# _non-shifted_ input to the next instruction, the effective latency
571575
# seems to be 1 cycle. See https://eprint.iacr.org/2022/1243.pdf
572576
(eor_shifted, bic_shifted): 1,
573-
(ror, eor, bic, bic_reg): 1,
577+
(eon, ror, eor, bic, bic_reg): 1,
574578
AArch64ConditionalCompare: 1,
575579
# NOTE: AESE/AESMC and AESD/AESIMC pairs can be dual-issued on A55 but this
576580
# is not modeled

slothy/targets/aarch64/cortex_a72_frontend.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
find_class,
6060
all_subclass_leaves,
6161
AArch64ConditionalCompare,
62+
AArch64Logical,
6263
Ldr_X,
6364
Str_X,
6465
Ldr_Q,
@@ -93,7 +94,6 @@
9394
St2,
9495
Ld3,
9596
Ld4,
96-
ubfx,
9797
AESInstruction,
9898
vext,
9999
AArch64NeonCount,
@@ -203,14 +203,15 @@ def get_min_max_objective(slothy):
203203
],
204204
AArch64NeonShiftInsert: [ExecutionUnit.ASIMD1],
205205
AArch64ConditionalCompare: ExecutionUnit.INT(),
206+
AArch64Logical: [ExecutionUnit.INT()],
206207
# 8B/8H occupies both F0, F1
207208
vuaddlv_sform: [[ExecutionUnit.ASIMD0, ExecutionUnit.ASIMD1]],
208209
Vins: [ExecutionUnit.ASIMD0, ExecutionUnit.ASIMD1],
209210
umov_d: ExecutionUnit.LOAD(), # ???
210211
(Ldr_Q, Ldr_X): ExecutionUnit.LOAD(),
211212
(Str_Q, Str_X): ExecutionUnit.STORE(),
212213
AArch64Move: ExecutionUnit.SCALAR(),
213-
(add, add_imm, add_shifted, ubfx): ExecutionUnit.SCALAR(),
214+
(add, add_imm, add_shifted): ExecutionUnit.SCALAR(),
214215
(VShiftImmediateRounding, VShiftImmediateBasic): [ExecutionUnit.ASIMD1],
215216
(St4, St3, St2): [ExecutionUnit.ASIMD0, ExecutionUnit.ASIMD1],
216217
(Ld3, Ld4): [
@@ -244,6 +245,7 @@ def get_min_max_objective(slothy):
244245
AArch64NeonLogical: 1,
245246
AArch64NeonShiftInsert: 1,
246247
AArch64ConditionalCompare: 1,
248+
AArch64Logical: 1,
247249
Vins: 1,
248250
umov_d: 1,
249251
(add, add_imm, add_shifted): 1,
@@ -255,7 +257,6 @@ def get_min_max_objective(slothy):
255257
St4: 8,
256258
Ld3: 3,
257259
Ld4: 4,
258-
ubfx: 1,
259260
vtbl: 1, # SWOG contains a blank throughput (approximating from AArch32)
260261
AESInstruction: 1,
261262
sub_imm: 1,
@@ -292,6 +293,7 @@ def get_min_max_objective(slothy):
292293
AArch64NeonLogical: 3,
293294
AArch64NeonShiftInsert: 3,
294295
AArch64ConditionalCompare: 1,
296+
AArch64Logical: 1,
295297
(Ldr_Q, Ldr_X, Str_Q, Str_X): 4, # approx
296298
Vins: 6, # approx
297299
umov_d: 4, # approx
@@ -305,7 +307,6 @@ def get_min_max_objective(slothy):
305307
St4: 8,
306308
Ld3: 3,
307309
Ld4: 4,
308-
ubfx: 1,
309310
vtbl: 6, # q-form: 3*N+3 cycles (N = number of registers in the table)
310311
AESInstruction: 3,
311312
sub_imm: 3,

tests/naive/aarch64/instructions.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ start:
55
add x2, x1, #64
66
add x2, x1, 64
77

8+
eon x2, x2, x1
9+
eon w2, w2, w1
10+
811
shl v0.16b, v1.16b, #4
912
shl d2, d3, #8
1013
sshr v4.16b, v5.16b, #2

0 commit comments

Comments
 (0)