Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions slothy/targets/arm_v81m/arch_v81m.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ def is_load_store_instruction(self):
vst42_with_writeback,
vst43_with_writeback,
ldrd,
ldrd_no_imm,
ldrd_with_writeback,
ldrd_with_post,
strd,
Expand Down Expand Up @@ -402,6 +403,7 @@ def is_scalar_load(self):
return self._is_instance_of(
[
ldrd,
ldrd_no_imm,
ldrd_with_writeback,
ldrd_with_post,
ldr,
Expand Down Expand Up @@ -1075,6 +1077,27 @@ def write(self):
return super().write()


class ldrd_no_imm(MVEInstruction):
pattern = "ldrd <Rt0>, <Rt1>, [<Rn>]"
inputs = ["Rn"]
outputs = ["Rt0", "Rt1"]

@classmethod
def make(cls, src):
obj = MVEInstruction.build(cls, src)
obj.increment = None
obj.pre_index = 0
obj.addr = obj.args_in[0]
return obj

def write(self):
self.immediate = simplify(self.pre_index)

if int(self.immediate) != 0:
self.pattern = ldrd.pattern
return super().write()


class ldrd_with_writeback(MVEInstruction):
pattern = "ldrd <Rt0>, <Rt1>, [<Rn>, <imm>]!"
inputs = ["Rn"]
Expand Down Expand Up @@ -1425,6 +1448,12 @@ class vbic(MVEInstruction):
outputs = ["Qd"]


class vbic_nodt(MVEInstruction):
pattern = "vbic <Qd>, <Qn>, <Qm>"
inputs = ["Qn", "Qm"]
outputs = ["Qd"]


class vorr(MVEInstruction):
pattern = "vorr.<dt> <Qd>, <Qn>, <Qm>"
inputs = ["Qn", "Qm"]
Expand All @@ -1437,6 +1466,12 @@ class veor(MVEInstruction):
outputs = ["Qd"]


class veor_nodt(MVEInstruction):
pattern = "veor <Qd>, <Qn>, <Qm>"
inputs = ["Qn", "Qm"]
outputs = ["Qd"]


class nop(MVEInstruction):
pattern = "nop"

Expand Down
14 changes: 14 additions & 0 deletions slothy/targets/arm_v81m/cortex_m55r1.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@
vhcadd,
vand,
vbic,
vbic_nodt,
vorr,
veor,
veor_nodt,
vmulh,
vmul_T1,
vmul_T2,
Expand All @@ -100,6 +102,7 @@
vmulf_T1,
vmulf_T2,
ldrd,
ldrd_no_imm,
ldrd_with_writeback,
ldrd_with_post,
strd,
Expand Down Expand Up @@ -316,8 +319,10 @@ def get_min_max_objective(slothy):
vhcadd: ExecutionUnit.VEC_INT,
vand: ExecutionUnit.VEC_INT,
vbic: ExecutionUnit.VEC_INT,
vbic_nodt: ExecutionUnit.VEC_INT,
vorr: ExecutionUnit.VEC_INT,
veor: ExecutionUnit.VEC_INT,
veor_nodt: ExecutionUnit.VEC_INT,
vmulh: ExecutionUnit.VEC_MUL,
vmul_T1: ExecutionUnit.VEC_MUL,
vmul_T2: ExecutionUnit.VEC_MUL,
Expand All @@ -337,6 +342,7 @@ def get_min_max_objective(slothy):
vmulf_T1: ExecutionUnit.VEC_FPU,
vmulf_T2: ExecutionUnit.VEC_FPU,
ldrd: ExecutionUnit.LOAD,
ldrd_no_imm: ExecutionUnit.LOAD,
ldrd_with_writeback: ExecutionUnit.LOAD,
ldrd_with_post: ExecutionUnit.LOAD,
strd: ExecutionUnit.STORE,
Expand Down Expand Up @@ -422,6 +428,7 @@ def get_min_max_objective(slothy):
ldr_with_writeback,
ldr_with_post,
ldrd,
ldrd_no_imm,
ldrd_with_writeback,
ldrd_with_post,
strd,
Expand Down Expand Up @@ -459,8 +466,10 @@ def get_min_max_objective(slothy):
vhcadd,
vand,
vbic,
vbic_nodt,
vorr,
veor,
veor_nodt,
vmulh,
vmul_T1,
vmul_T2,
Expand Down Expand Up @@ -544,6 +553,7 @@ def get_min_max_objective(slothy):
default_latencies = {
(
ldrd,
ldrd_no_imm,
ldrd_with_writeback,
ldrd_with_post,
): 2,
Expand Down Expand Up @@ -580,8 +590,10 @@ def get_min_max_objective(slothy):
vaddva,
vand,
vbic,
vbic_nodt,
vorr,
veor,
veor_nodt,
qsave,
save,
qrestore,
Expand Down Expand Up @@ -744,8 +756,10 @@ def get_latency(src, out_idx, dst):
vhcadd,
vand,
vbic,
vbic_nodt,
vorr,
veor,
veor_nodt,
vrshr,
vshrnb,
vshrnt,
Expand Down
14 changes: 14 additions & 0 deletions slothy/targets/arm_v81m/cortex_m85r1.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@
vhcadd,
vand,
vbic,
vbic_nodt,
vorr,
veor,
veor_nodt,
vmulh,
vmul_T1,
vmul_T2,
Expand All @@ -100,6 +102,7 @@
vmulf_T1,
vmulf_T2,
ldrd,
ldrd_no_imm,
ldrd_with_writeback,
ldrd_with_post,
strd,
Expand Down Expand Up @@ -308,8 +311,10 @@ def get_min_max_objective(slothy):
vhcadd: ExecutionUnit.VEC_INT,
vand: [ExecutionUnit.VEC_BITWA, ExecutionUnit.VEC_BITWB],
vbic: [ExecutionUnit.VEC_BITWA, ExecutionUnit.VEC_BITWB],
vbic_nodt: [ExecutionUnit.VEC_BITWA, ExecutionUnit.VEC_BITWB],
vorr: [ExecutionUnit.VEC_BITWA, ExecutionUnit.VEC_BITWB],
veor: [ExecutionUnit.VEC_BITWA, ExecutionUnit.VEC_BITWB],
veor_nodt: [ExecutionUnit.VEC_BITWA, ExecutionUnit.VEC_BITWB],
vmulh: ExecutionUnit.VEC_MUL,
vmul_T1: ExecutionUnit.VEC_MUL,
vmul_T2: ExecutionUnit.VEC_MUL,
Expand All @@ -330,6 +335,7 @@ def get_min_max_objective(slothy):
vmulf_T1: ExecutionUnit.VEC_FPMUL,
vmulf_T2: ExecutionUnit.VEC_FPMUL,
ldrd: ExecutionUnit.LOAD,
ldrd_no_imm: ExecutionUnit.LOAD,
ldrd_with_writeback: ExecutionUnit.LOAD,
ldrd_with_post: ExecutionUnit.LOAD,
strd: ExecutionUnit.STORE,
Expand Down Expand Up @@ -416,6 +422,7 @@ def get_min_max_objective(slothy):
ldr_with_writeback,
ldr_with_post,
ldrd,
ldrd_no_imm,
ldrd_with_writeback,
ldrd_with_post,
strd,
Expand Down Expand Up @@ -453,8 +460,10 @@ def get_min_max_objective(slothy):
vhcadd,
vand,
vbic,
vbic_nodt,
vorr,
veor,
veor_nodt,
vmulh,
vmul_T1,
vmul_T2,
Expand Down Expand Up @@ -535,6 +544,7 @@ def get_min_max_objective(slothy):
default_latencies = {
(
ldrd,
ldrd_no_imm,
ldrd_with_post,
ldrd_with_writeback,
): 2,
Expand Down Expand Up @@ -570,8 +580,10 @@ def get_min_max_objective(slothy):
vhcadd,
vand,
vbic,
vbic_nodt,
vorr,
veor,
veor_nodt,
qsave,
save,
qrestore,
Expand Down Expand Up @@ -755,8 +767,10 @@ def get_latency(src, out_idx, dst):
vhcadd,
vand,
vbic,
vbic_nodt,
vorr,
veor,
veor_nodt,
vrshr,
vshrnb,
vshrnt,
Expand Down
14 changes: 14 additions & 0 deletions slothy/targets/arm_v81m/helium_experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,10 @@
vhcadd,
vand,
vbic,
vbic_nodt,
vorr,
veor,
veor_nodt,
vmulh,
vmul_T1,
vmul_T2,
Expand All @@ -93,6 +95,7 @@
vmulf_T1,
vmulf_T2,
ldrd,
ldrd_no_imm,
ldrd_with_writeback,
ldrd_with_post,
strd,
Expand Down Expand Up @@ -234,8 +237,10 @@ def get_min_max_objective(slothy):
vhcadd: ExecutionUnit.VEC_INT,
vand: ExecutionUnit.VEC_INT,
vbic: ExecutionUnit.VEC_INT,
vbic_nodt: ExecutionUnit.VEC_INT,
vorr: ExecutionUnit.VEC_INT,
veor: ExecutionUnit.VEC_INT,
veor_nodt: ExecutionUnit.VEC_INT,
vmulh: ExecutionUnit.VEC_MUL,
vmul_T1: ExecutionUnit.VEC_MUL,
vmul_T2: ExecutionUnit.VEC_MUL,
Expand All @@ -254,6 +259,7 @@ def get_min_max_objective(slothy):
vmulf_T1: ExecutionUnit.VEC_FPU,
vmulf_T2: ExecutionUnit.VEC_FPU,
ldrd: ExecutionUnit.LOAD,
ldrd_no_imm: ExecutionUnit.LOAD,
ldrd_with_writeback: ExecutionUnit.LOAD,
ldrd_with_post: ExecutionUnit.LOAD,
strd: ExecutionUnit.STORE,
Expand Down Expand Up @@ -339,6 +345,7 @@ def get_min_max_objective(slothy):
ldr_with_writeback,
ldr_with_post,
ldrd,
ldrd_no_imm,
ldrd_with_writeback,
ldrd_with_post,
strd,
Expand Down Expand Up @@ -376,8 +383,10 @@ def get_min_max_objective(slothy):
vhcadd,
vand,
vbic,
vbic_nodt,
vorr,
veor,
veor_nodt,
vmulh,
vmul_T1,
vmul_T2,
Expand Down Expand Up @@ -458,6 +467,7 @@ def get_min_max_objective(slothy):
default_latencies = {
(
ldrd,
ldrd_no_imm,
ldrd_with_writeback,
ldrd_with_post,
): 2,
Expand Down Expand Up @@ -493,8 +503,10 @@ def get_min_max_objective(slothy):
vhcadd,
vand,
vbic,
vbic_nodt,
vorr,
veor,
veor_nodt,
qsave,
save,
qrestore,
Expand Down Expand Up @@ -676,8 +688,10 @@ def get_latency(src, out_idx, dst):
vhcadd,
vand,
vbic,
vbic_nodt,
vorr,
veor,
veor_nodt,
vrshr,
vshrnb,
vshrnt,
Expand Down
10 changes: 7 additions & 3 deletions tests/naive/armv8m/instructions.s
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ vqdmulh.s8 q2, q0, q1
vqdmulh.s16 q2, q0, q1
vqdmulh.s32 q2, q0, q1

ldrd r0, r1, [r2]
ldrd r0, r1, [r2, #16]
ldrd r0, r1, [r2, #-16]
ldrd r0, r1, [r2], #16
Expand Down Expand Up @@ -236,15 +237,18 @@ vand.u64 q2, q0, q1
vbic.u8 q2, q0, q1
vbic.u16 q2, q0, q1
vbic.u32 q2, q0, q1
vbic q2, q0, q1

vorr.u8 q2, q0, q1
vorr.u16 q2, q0, q1
vorr.u32 q2, q0, q1
vorr.u64 q2, q0, q1

vorr.u8 q2, q0, q1
vorr.u16 q2, q0, q1
vorr.u32 q2, q0, q1
veor.u8 q2, q0, q1
veor.u16 q2, q0, q1
veor.u32 q2, q0, q1
veor.u64 q2, q0, q1
veor q2, q0, q1

nop

Expand Down