From a793d601f046e88179c1caff0164d92da800af09 Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Tue, 18 Mar 2025 23:45:50 +0530 Subject: [PATCH 01/19] feat(insn): emit number(0) on xor of same registers Signed-off-by: vibhatsu --- capa/features/extractors/viv/insn.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 0b3e79f990..61a99f6380 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -594,6 +594,13 @@ def extract_op_number_features( insn: envi.Opcode = ih.inner f: viv_utils.Function = fh.inner + if insn.mnem == "xor" and insn.opers[0].isReg() and insn.opers[1].isReg() and insn.opers[0].reg == insn.opers[1].reg: + # for pattern like: + # + # xor eax, eax + # + yield Number(0), ih.address + # this is for both x32 and x64 if not isinstance(oper, (envi.archs.i386.disasm.i386ImmOper, envi.archs.i386.disasm.i386ImmMemOper)): return From 828509c184836b54e2e9e22f4db4795839e470b0 Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Fri, 21 Mar 2025 15:34:35 +0530 Subject: [PATCH 02/19] add test case for emit number(0) for insn like " xor eax, eax" Signed-off-by: vibhatsu --- tests/fixtures.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/fixtures.py b/tests/fixtures.py index b9199061d5..b7a09016e0 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -1021,6 +1021,7 @@ def parametrize(params, values, **kwargs): ("7351f.elf", "function=0x408753,bb=0x408781", capa.features.insn.API("open"), True), ("79abd...", "function=0x10002385,bb=0x10002385", capa.features.common.Characteristic("call $+5"), True), ("946a9...", "function=0x10001510,bb=0x100015c0", capa.features.common.Characteristic("call $+5"), True), + ("9324d...", "function=0x40806C,bb=0x40806C,insn=0x40806C", capa.features.insn.Number(0), True), ], # order tests by (file, item) # so that our LRU cache is most effective. From d940b7bb6df890a88da7340576ab4fe3ded489a7 Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Fri, 21 Mar 2025 15:36:02 +0530 Subject: [PATCH 03/19] format conditional check for xor insn Signed-off-by: vibhatsu --- capa/features/extractors/viv/insn.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 61a99f6380..a56fd03c3a 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -594,7 +594,12 @@ def extract_op_number_features( insn: envi.Opcode = ih.inner f: viv_utils.Function = fh.inner - if insn.mnem == "xor" and insn.opers[0].isReg() and insn.opers[1].isReg() and insn.opers[0].reg == insn.opers[1].reg: + if ( + insn.mnem == "xor" + and insn.opers[0].isReg() + and insn.opers[1].isReg() + and insn.opers[0].reg == insn.opers[1].reg + ): # for pattern like: # # xor eax, eax From d753fdd84ad6c63db27d326da40e6ff8d10880b6 Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Fri, 21 Mar 2025 15:36:39 +0530 Subject: [PATCH 04/19] udpate CHANGELOG Signed-off-by: vibhatsu --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dcee148421..05a73666cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,8 @@ ## master (unreleased) ### New Features - +- emit number(0) for instructions like "xor eax,eax" #2622 @v1bh475u +- ### Breaking Changes ### New Rules (4) @@ -11,7 +12,6 @@ - communication/socket/connect-socket moritz.raabe@mandiant.com joakim@intezer.com mrhafizfarhad@gmail.com - communication/socket/udp/connect-udp-socket mrhafizfarhad@gmail.com - nursery/enter-debug-mode-in-dotnet @v1bh475u -- ### Bug Fixes - cape: make some fields optional @williballenthin #2631 #2632 From 172b36545f875583704694805472108f25a3b6e2 Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Fri, 21 Mar 2025 18:55:28 +0530 Subject: [PATCH 05/19] feat(insn): emit number(0) on xor of same registers Signed-off-by: vibhatsu --- capa/features/extractors/ida/insn.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 0e92b21f5e..8edd414594 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -160,6 +160,13 @@ def extract_insn_number_features( # .text:00401145 add esp, 0Ch return + if insn.itype == idaapi.NN_xor: + # for pattern like: + # + # xor eax, eax + if insn.ops[0].type == idaapi.o_reg and insn.ops[1].type == idaapi.o_reg and insn.ops[0].reg == insn.ops[1].reg: + yield Number(0), ih.address + for i, op in enumerate(insn.ops): if op.type == idaapi.o_void: break From b3b51a333df3c63fdc720944252038dd7bdea380 Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Fri, 21 Mar 2025 20:47:50 +0530 Subject: [PATCH 06/19] ghidra: emit number(0) on xor of same registers Signed-off-by: vibhatsu --- capa/features/extractors/ghidra/insn.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/capa/features/extractors/ghidra/insn.py b/capa/features/extractors/ghidra/insn.py index 4cfc8b7856..9432a477d1 100644 --- a/capa/features/extractors/ghidra/insn.py +++ b/capa/features/extractors/ghidra/insn.py @@ -156,6 +156,14 @@ def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl # .text:00401145 add esp, 0Ch return + if insn.getMnemonicString().startswith("XOR"): + # for patern like: + # + # xor eax, eax + if insn.getNumOperands() == 2: + if insn.getOpObjects(0)[-1] == insn.getOpObjects(1)[-1]: + yield Number(0), ih.address + for i in range(insn.getNumOperands()): # Exceptions for LEA insn: # invalid operand encoding, considered numbers instead of offsets From 998e8506ddf43f34860850f28608b362494d4ac8 Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Sat, 22 Mar 2025 01:16:05 +0530 Subject: [PATCH 07/19] binexport2: emit number(0) on xor of same registers Signed-off-by: vibhatsu --- capa/features/extractors/binexport2/arch/arm/insn.py | 10 ++++++++++ .../features/extractors/binexport2/arch/intel/insn.py | 11 +++++++++++ 2 files changed, 21 insertions(+) diff --git a/capa/features/extractors/binexport2/arch/arm/insn.py b/capa/features/extractors/binexport2/arch/arm/insn.py index 8b481040dd..6a9a801e5a 100644 --- a/capa/features/extractors/binexport2/arch/arm/insn.py +++ b/capa/features/extractors/binexport2/arch/arm/insn.py @@ -53,6 +53,16 @@ def extract_insn_number_features( mnemonic: str = get_instruction_mnemonic(be2, instruction) + if mnemonic == "xor": + instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index] + operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] + if operands[1] == operands[2]: + # for pattern like: + # + # eor x0, x0, x0 + # + yield Number(0), ih.address + if mnemonic in ("add", "sub"): assert len(instruction.operand_index) == 3 diff --git a/capa/features/extractors/binexport2/arch/intel/insn.py b/capa/features/extractors/binexport2/arch/intel/insn.py index 02e51a6dc9..25c04856d9 100644 --- a/capa/features/extractors/binexport2/arch/intel/insn.py +++ b/capa/features/extractors/binexport2/arch/intel/insn.py @@ -81,6 +81,17 @@ def extract_insn_number_features( match = NUMBER_PATTERNS.match_with_be2(be2, ii.instruction_index) if not match: + if BinExport2InstructionPatternMatcher.from_str("xor reg, reg").match_with_be2(be2, ii.instruction_index): + # for pattern like: + # + # xor eax, eax + # + instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index] + operands: list[BinExport2.Operand] = [ + be2.operand[operand_index] for operand_index in instruction.operand_index + ] + if operands[0] == operands[1]: + yield Number(0), ih.address return value: int = mask_immediate(fhi.arch, match.expression.immediate) From 976a1ec844fde62f7e0d4e0f995dc54cbd479dba Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Sat, 22 Mar 2025 01:19:16 +0530 Subject: [PATCH 08/19] add fixture for mimikatz with number(0) Signed-off-by: vibhatsu --- tests/fixtures.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/fixtures.py b/tests/fixtures.py index b7a09016e0..46f19c8567 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -1022,6 +1022,7 @@ def parametrize(params, values, **kwargs): ("79abd...", "function=0x10002385,bb=0x10002385", capa.features.common.Characteristic("call $+5"), True), ("946a9...", "function=0x10001510,bb=0x100015c0", capa.features.common.Characteristic("call $+5"), True), ("9324d...", "function=0x40806C,bb=0x40806C,insn=0x40806C", capa.features.insn.Number(0), True), + ("mimikatz", "function=0x40105d", capa.features.insn.Number(0), True), ], # order tests by (file, item) # so that our LRU cache is most effective. From f278a35165edb9057706194f0d3da225e6e05498 Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Sat, 22 Mar 2025 01:47:56 +0530 Subject: [PATCH 09/19] refactor: rename instruction variable Signed-off-by: vibhatsu --- capa/features/extractors/binexport2/arch/arm/insn.py | 1 - capa/features/extractors/binexport2/arch/intel/insn.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/binexport2/arch/arm/insn.py b/capa/features/extractors/binexport2/arch/arm/insn.py index 6a9a801e5a..84abd8eccc 100644 --- a/capa/features/extractors/binexport2/arch/arm/insn.py +++ b/capa/features/extractors/binexport2/arch/arm/insn.py @@ -54,7 +54,6 @@ def extract_insn_number_features( mnemonic: str = get_instruction_mnemonic(be2, instruction) if mnemonic == "xor": - instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index] operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] if operands[1] == operands[2]: # for pattern like: diff --git a/capa/features/extractors/binexport2/arch/intel/insn.py b/capa/features/extractors/binexport2/arch/intel/insn.py index 25c04856d9..d48de66c94 100644 --- a/capa/features/extractors/binexport2/arch/intel/insn.py +++ b/capa/features/extractors/binexport2/arch/intel/insn.py @@ -102,9 +102,9 @@ def extract_insn_number_features( yield OperandNumber(match.operand_index, value), ih.address instruction_index: int = ii.instruction_index - instruction: BinExport2.Instruction = be2.instruction[instruction_index] + current_instruction: BinExport2.Instruction = be2.instruction[instruction_index] - mnemonic: str = get_instruction_mnemonic(be2, instruction) + mnemonic: str = get_instruction_mnemonic(be2, current_instruction) if mnemonic.startswith("add"): if 0 < value < MAX_STRUCTURE_SIZE: yield Offset(value), ih.address From 310eb0c636c2ed47ce19b7c2184213d11a58453d Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Tue, 25 Mar 2025 15:45:46 +0530 Subject: [PATCH 10/19] add helper functions to identify XOR insns & zeored XORs Signed-off-by: vibhatsu --- capa/features/extractors/viv/helpers.py | 12 ++++++++++++ capa/features/extractors/viv/insn.py | 13 +++---------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/capa/features/extractors/viv/helpers.py b/capa/features/extractors/viv/helpers.py index 9442e0ebaf..077bd94d8c 100644 --- a/capa/features/extractors/viv/helpers.py +++ b/capa/features/extractors/viv/helpers.py @@ -14,6 +14,7 @@ from typing import Optional +import envi from vivisect import VivWorkspace from vivisect.const import XR_TO, REF_CODE @@ -28,3 +29,14 @@ def get_coderef_from(vw: VivWorkspace, va: int) -> Optional[int]: return xrefs[0][XR_TO] else: return None + + +def is_xor(insn: envi.Opcode): + return insn.mnem in ("xor", "xorpd", "xorps", "pxor") + + +def is_zxor(insn: envi.Opcode): + if is_xor(insn): + return insn.opers[0] == insn.opers[1] + + return True diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index a56fd03c3a..3fbea69bc8 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -387,10 +387,8 @@ def extract_insn_nzxor_characteristic_features( bb: viv_utils.BasicBlock = bbhandle.inner f: viv_utils.Function = fh.inner - if insn.mnem not in ("xor", "xorpd", "xorps", "pxor"): - return - - if insn.opers[0] == insn.opers[1]: + # also checks if the insn is xor + if capa.features.extractors.viv.helpers.is_zxor(insn): return if is_security_cookie(f, bb, insn): @@ -594,12 +592,7 @@ def extract_op_number_features( insn: envi.Opcode = ih.inner f: viv_utils.Function = fh.inner - if ( - insn.mnem == "xor" - and insn.opers[0].isReg() - and insn.opers[1].isReg() - and insn.opers[0].reg == insn.opers[1].reg - ): + if capa.features.extractors.viv.helpers.is_zxor(insn): # for pattern like: # # xor eax, eax From e2397845fae06a7b2fa536ff388480d57f4a54d1 Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Thu, 27 Mar 2025 20:56:55 +0530 Subject: [PATCH 11/19] fix is_zxor & add is_operands_equal helper function Signed-off-by: vibhatsu --- capa/features/extractors/viv/helpers.py | 8 ++++++-- capa/features/extractors/viv/insn.py | 6 ++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/capa/features/extractors/viv/helpers.py b/capa/features/extractors/viv/helpers.py index 077bd94d8c..cc21a3699a 100644 --- a/capa/features/extractors/viv/helpers.py +++ b/capa/features/extractors/viv/helpers.py @@ -35,8 +35,12 @@ def is_xor(insn: envi.Opcode): return insn.mnem in ("xor", "xorpd", "xorps", "pxor") +def is_operands_equal(insn: envi.Opcode): + return insn.opers[0] == insn.opers[1] + + def is_zxor(insn: envi.Opcode): if is_xor(insn): - return insn.opers[0] == insn.opers[1] + return is_operands_equal(insn) - return True + return False diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 3fbea69bc8..7d5b28e767 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -387,8 +387,10 @@ def extract_insn_nzxor_characteristic_features( bb: viv_utils.BasicBlock = bbhandle.inner f: viv_utils.Function = fh.inner - # also checks if the insn is xor - if capa.features.extractors.viv.helpers.is_zxor(insn): + if not capa.features.extractors.viv.helpers.is_xor(insn): + return + + if capa.features.extractors.viv.helpers.is_operands_equal(insn): return if is_security_cookie(f, bb, insn): From f27c9bab276b4515dc77cbc47bebfc992d2dec61 Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Fri, 28 Mar 2025 13:25:51 +0530 Subject: [PATCH 12/19] inline is_operand_equal logic into is_zxor and insn extraction Signed-off-by: vibhatsu --- capa/features/extractors/viv/helpers.py | 6 +----- capa/features/extractors/viv/insn.py | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/capa/features/extractors/viv/helpers.py b/capa/features/extractors/viv/helpers.py index cc21a3699a..c07f7e53e6 100644 --- a/capa/features/extractors/viv/helpers.py +++ b/capa/features/extractors/viv/helpers.py @@ -35,12 +35,8 @@ def is_xor(insn: envi.Opcode): return insn.mnem in ("xor", "xorpd", "xorps", "pxor") -def is_operands_equal(insn: envi.Opcode): - return insn.opers[0] == insn.opers[1] - - def is_zxor(insn: envi.Opcode): if is_xor(insn): - return is_operands_equal(insn) + return insn.opers[0] == insn.opers[1] return False diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 7d5b28e767..bf81dd479a 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -390,7 +390,7 @@ def extract_insn_nzxor_characteristic_features( if not capa.features.extractors.viv.helpers.is_xor(insn): return - if capa.features.extractors.viv.helpers.is_operands_equal(insn): + if insn.opers[0] == insn.opers[1]: return if is_security_cookie(f, bb, insn): From cf0831d275ddd4eaafdeacf83d5b088f7dd107e1 Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Mon, 31 Mar 2025 22:02:28 +0530 Subject: [PATCH 13/19] add is_xor and is_zxor helper functions Signed-off-by: vibhatsu --- capa/features/extractors/ida/helpers.py | 10 ++++++++++ capa/features/extractors/ida/insn.py | 7 +++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index 365a20675c..6b85e15186 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -188,6 +188,10 @@ def get_instructions_in_range(start: int, end: int) -> Iterator[idaapi.insn_t]: yield insn +def is_xor(insn: idaapi.insn_t) -> bool: + return insn.itype in (idaapi.NN_xor, idaapi.NN_xorpd, idaapi.NN_xorps, idaapi.NN_pxor) + + def is_operand_equal(op1: idaapi.op_t, op2: idaapi.op_t) -> bool: """compare two IDA op_t""" if op1.flags != op2.flags: @@ -214,6 +218,12 @@ def is_operand_equal(op1: idaapi.op_t, op2: idaapi.op_t) -> bool: return True +def is_zxor(insn: idaapi.insn_t) -> bool: + if is_xor(insn): + return is_operand_equal(insn.Op1, insn.Op2) + return False + + def is_basic_block_equal(bb1: idaapi.BasicBlock, bb2: idaapi.BasicBlock) -> bool: """compare two IDA BasicBlock""" if bb1.start_ea != bb2.start_ea: diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 8edd414594..c45273e5a9 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -160,12 +160,11 @@ def extract_insn_number_features( # .text:00401145 add esp, 0Ch return - if insn.itype == idaapi.NN_xor: + if capa.features.extractors.ida.helpers.is_zxor(insn): # for pattern like: # # xor eax, eax - if insn.ops[0].type == idaapi.o_reg and insn.ops[1].type == idaapi.o_reg and insn.ops[0].reg == insn.ops[1].reg: - yield Number(0), ih.address + yield Number(0), ih.address for i, op in enumerate(insn.ops): if op.type == idaapi.o_void: @@ -390,7 +389,7 @@ def extract_insn_nzxor_characteristic_features( """ insn: idaapi.insn_t = ih.inner - if insn.itype not in (idaapi.NN_xor, idaapi.NN_xorpd, idaapi.NN_xorps, idaapi.NN_pxor): + if not capa.features.extractors.ida.helpers.is_xor(insn): return if capa.features.extractors.ida.helpers.is_operand_equal(insn.Op1, insn.Op2): return From 237c9ef45b7037346c6f1f2ceef2223792e2865a Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Mon, 31 Mar 2025 23:34:24 +0530 Subject: [PATCH 14/19] refactor extract_insn_number_feature to use is_zxor helper Signed-off-by: vibhatsu --- capa/features/extractors/ghidra/insn.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/capa/features/extractors/ghidra/insn.py b/capa/features/extractors/ghidra/insn.py index 9432a477d1..161a9e6c19 100644 --- a/capa/features/extractors/ghidra/insn.py +++ b/capa/features/extractors/ghidra/insn.py @@ -157,12 +157,11 @@ def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl return if insn.getMnemonicString().startswith("XOR"): - # for patern like: - # - # xor eax, eax - if insn.getNumOperands() == 2: - if insn.getOpObjects(0)[-1] == insn.getOpObjects(1)[-1]: - yield Number(0), ih.address + if capa.features.extractors.ghidra.helpers.is_zxor(insn): + # for patern like: + # + # xor eax, eax + yield Number(0), ih.address for i in range(insn.getNumOperands()): # Exceptions for LEA insn: From fa550589e1b88187b0d65845077a3d1ff6ab1bfc Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Mon, 31 Mar 2025 23:36:17 +0530 Subject: [PATCH 15/19] add is_operands_equal helper function for instruction operand comparison Signed-off-by: vibhatsu --- .../extractors/binexport2/arch/arm/helpers.py | 5 +++++ .../extractors/binexport2/arch/arm/insn.py | 9 +++------ .../extractors/binexport2/arch/intel/helpers.py | 5 +++++ .../extractors/binexport2/arch/intel/insn.py | 16 +++++----------- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/capa/features/extractors/binexport2/arch/arm/helpers.py b/capa/features/extractors/binexport2/arch/arm/helpers.py index 01f1ae79cb..a51380ce32 100644 --- a/capa/features/extractors/binexport2/arch/arm/helpers.py +++ b/capa/features/extractors/binexport2/arch/arm/helpers.py @@ -20,3 +20,8 @@ def is_stack_register_expression(be2: BinExport2, expression: BinExport2.Express return bool( expression and expression.type == BinExport2.Expression.REGISTER and expression.symbol.lower().endswith("sp") ) + + +def is_operands_equal(be2: BinExport2, instruction: BinExport2.Instruction) -> bool: + operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] + return operands[1] == operands[2] diff --git a/capa/features/extractors/binexport2/arch/arm/insn.py b/capa/features/extractors/binexport2/arch/arm/insn.py index 84abd8eccc..e1638782d8 100644 --- a/capa/features/extractors/binexport2/arch/arm/insn.py +++ b/capa/features/extractors/binexport2/arch/arm/insn.py @@ -30,7 +30,7 @@ get_operand_immediate_expression, ) from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2 -from capa.features.extractors.binexport2.arch.arm.helpers import is_stack_register_expression +from capa.features.extractors.binexport2.arch.arm.helpers import is_operands_equal, is_stack_register_expression logger = logging.getLogger(__name__) @@ -54,8 +54,7 @@ def extract_insn_number_features( mnemonic: str = get_instruction_mnemonic(be2, instruction) if mnemonic == "xor": - operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] - if operands[1] == operands[2]: + if is_operands_equal(be2, instruction): # for pattern like: # # eor x0, x0, x0 @@ -147,9 +146,7 @@ def extract_insn_nzxor_characteristic_features( instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index] # guaranteed to be simple int/reg operands # so we don't have to realize the tree/list. - operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] - - if operands[1] != operands[2]: + if not is_operands_equal(be2, instruction): yield Characteristic("nzxor"), ih.address diff --git a/capa/features/extractors/binexport2/arch/intel/helpers.py b/capa/features/extractors/binexport2/arch/intel/helpers.py index ce50607545..40c2191ba8 100644 --- a/capa/features/extractors/binexport2/arch/intel/helpers.py +++ b/capa/features/extractors/binexport2/arch/intel/helpers.py @@ -140,3 +140,8 @@ def get_operand_phrase_info(be2: BinExport2, operand: BinExport2.Operand) -> Opt raise NotImplementedError(len(expressions)) return None + + +def is_operands_equal(be2: BinExport2, instruction: BinExport2.Instruction) -> bool: + operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] + return operands[0] == operands[1] diff --git a/capa/features/extractors/binexport2/arch/intel/insn.py b/capa/features/extractors/binexport2/arch/intel/insn.py index d48de66c94..f1031b66df 100644 --- a/capa/features/extractors/binexport2/arch/intel/insn.py +++ b/capa/features/extractors/binexport2/arch/intel/insn.py @@ -29,7 +29,7 @@ get_instruction_mnemonic, ) from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2 -from capa.features.extractors.binexport2.arch.intel.helpers import SECURITY_COOKIE_BYTES_DELTA +from capa.features.extractors.binexport2.arch.intel.helpers import SECURITY_COOKIE_BYTES_DELTA, is_operands_equal logger = logging.getLogger(__name__) @@ -86,11 +86,7 @@ def extract_insn_number_features( # # xor eax, eax # - instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index] - operands: list[BinExport2.Operand] = [ - be2.operand[operand_index] for operand_index in instruction.operand_index - ] - if operands[0] == operands[1]: + if is_operands_equal(be2, be2.instruction[ii.instruction_index]): yield Number(0), ih.address return @@ -102,9 +98,9 @@ def extract_insn_number_features( yield OperandNumber(match.operand_index, value), ih.address instruction_index: int = ii.instruction_index - current_instruction: BinExport2.Instruction = be2.instruction[instruction_index] + instruction: BinExport2.Instruction = be2.instruction[instruction_index] - mnemonic: str = get_instruction_mnemonic(be2, current_instruction) + mnemonic: str = get_instruction_mnemonic(be2, instruction) if mnemonic.startswith("add"): if 0 < value < MAX_STRUCTURE_SIZE: yield Offset(value), ih.address @@ -227,9 +223,7 @@ def extract_insn_nzxor_characteristic_features( instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index] # guaranteed to be simple int/reg operands # so we don't have to realize the tree/list. - operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] - - if operands[0] == operands[1]: + if is_operands_equal(be2, instruction): return instruction_address: int = idx.insn_address_by_index[ii.instruction_index] From 51c16ab9dcab59e60d2fc42501a7d423dd0d78b9 Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Tue, 1 Apr 2025 11:01:47 +0530 Subject: [PATCH 16/19] simplify is_zxor logic Signed-off-by: vibhatsu --- capa/features/extractors/ida/helpers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index 6b85e15186..79d183e257 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -219,9 +219,7 @@ def is_operand_equal(op1: idaapi.op_t, op2: idaapi.op_t) -> bool: def is_zxor(insn: idaapi.insn_t) -> bool: - if is_xor(insn): - return is_operand_equal(insn.Op1, insn.Op2) - return False + return is_xor(insn) and is_operand_equal(insn) def is_basic_block_equal(bb1: idaapi.BasicBlock, bb2: idaapi.BasicBlock) -> bool: From e91ba10049af69be8ff310b728869b024b3b47d6 Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Tue, 1 Apr 2025 11:27:48 +0530 Subject: [PATCH 17/19] fix is_zxor to compare both operands directly Signed-off-by: vibhatsu --- capa/features/extractors/ida/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index 79d183e257..9a5a41c5ee 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -219,7 +219,7 @@ def is_operand_equal(op1: idaapi.op_t, op2: idaapi.op_t) -> bool: def is_zxor(insn: idaapi.insn_t) -> bool: - return is_xor(insn) and is_operand_equal(insn) + return is_xor(insn) and is_operand_equal(insn.Op1, insn.Op2) def is_basic_block_equal(bb1: idaapi.BasicBlock, bb2: idaapi.BasicBlock) -> bool: From 46b3d4e75856f1a6ed9b65d58ba660d4edcc7b1c Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Wed, 2 Apr 2025 00:11:53 +0530 Subject: [PATCH 18/19] viv-backend: refactor is_zxor Signed-off-by: vibhatsu --- capa/features/extractors/viv/helpers.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/capa/features/extractors/viv/helpers.py b/capa/features/extractors/viv/helpers.py index c07f7e53e6..cb66eb25c6 100644 --- a/capa/features/extractors/viv/helpers.py +++ b/capa/features/extractors/viv/helpers.py @@ -36,7 +36,4 @@ def is_xor(insn: envi.Opcode): def is_zxor(insn: envi.Opcode): - if is_xor(insn): - return insn.opers[0] == insn.opers[1] - - return False + return is_xor(insn) and insn.opers[0] == insn.opers[1] From 6e3944c39cee3287e0fe8ff1d78f66cd979884c6 Mon Sep 17 00:00:00 2001 From: vibhatsu Date: Thu, 3 Apr 2025 00:12:44 +0530 Subject: [PATCH 19/19] rename is_operands_equal to are_operands_equal for consistency Signed-off-by: vibhatsu --- capa/features/extractors/binexport2/arch/arm/helpers.py | 2 +- capa/features/extractors/binexport2/arch/arm/insn.py | 6 +++--- capa/features/extractors/binexport2/arch/intel/helpers.py | 2 +- capa/features/extractors/binexport2/arch/intel/insn.py | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/capa/features/extractors/binexport2/arch/arm/helpers.py b/capa/features/extractors/binexport2/arch/arm/helpers.py index a51380ce32..43c9ae0241 100644 --- a/capa/features/extractors/binexport2/arch/arm/helpers.py +++ b/capa/features/extractors/binexport2/arch/arm/helpers.py @@ -22,6 +22,6 @@ def is_stack_register_expression(be2: BinExport2, expression: BinExport2.Express ) -def is_operands_equal(be2: BinExport2, instruction: BinExport2.Instruction) -> bool: +def are_operands_equal(be2: BinExport2, instruction: BinExport2.Instruction) -> bool: operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] return operands[1] == operands[2] diff --git a/capa/features/extractors/binexport2/arch/arm/insn.py b/capa/features/extractors/binexport2/arch/arm/insn.py index e1638782d8..61da4ac39e 100644 --- a/capa/features/extractors/binexport2/arch/arm/insn.py +++ b/capa/features/extractors/binexport2/arch/arm/insn.py @@ -30,7 +30,7 @@ get_operand_immediate_expression, ) from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2 -from capa.features.extractors.binexport2.arch.arm.helpers import is_operands_equal, is_stack_register_expression +from capa.features.extractors.binexport2.arch.arm.helpers import are_operands_equal, is_stack_register_expression logger = logging.getLogger(__name__) @@ -54,7 +54,7 @@ def extract_insn_number_features( mnemonic: str = get_instruction_mnemonic(be2, instruction) if mnemonic == "xor": - if is_operands_equal(be2, instruction): + if are_operands_equal(be2, instruction): # for pattern like: # # eor x0, x0, x0 @@ -146,7 +146,7 @@ def extract_insn_nzxor_characteristic_features( instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index] # guaranteed to be simple int/reg operands # so we don't have to realize the tree/list. - if not is_operands_equal(be2, instruction): + if not are_operands_equal(be2, instruction): yield Characteristic("nzxor"), ih.address diff --git a/capa/features/extractors/binexport2/arch/intel/helpers.py b/capa/features/extractors/binexport2/arch/intel/helpers.py index 40c2191ba8..cc08ec32b9 100644 --- a/capa/features/extractors/binexport2/arch/intel/helpers.py +++ b/capa/features/extractors/binexport2/arch/intel/helpers.py @@ -142,6 +142,6 @@ def get_operand_phrase_info(be2: BinExport2, operand: BinExport2.Operand) -> Opt return None -def is_operands_equal(be2: BinExport2, instruction: BinExport2.Instruction) -> bool: +def are_operands_equal(be2: BinExport2, instruction: BinExport2.Instruction) -> bool: operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] return operands[0] == operands[1] diff --git a/capa/features/extractors/binexport2/arch/intel/insn.py b/capa/features/extractors/binexport2/arch/intel/insn.py index f1031b66df..e7f0954831 100644 --- a/capa/features/extractors/binexport2/arch/intel/insn.py +++ b/capa/features/extractors/binexport2/arch/intel/insn.py @@ -29,7 +29,7 @@ get_instruction_mnemonic, ) from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2 -from capa.features.extractors.binexport2.arch.intel.helpers import SECURITY_COOKIE_BYTES_DELTA, is_operands_equal +from capa.features.extractors.binexport2.arch.intel.helpers import SECURITY_COOKIE_BYTES_DELTA, are_operands_equal logger = logging.getLogger(__name__) @@ -86,7 +86,7 @@ def extract_insn_number_features( # # xor eax, eax # - if is_operands_equal(be2, be2.instruction[ii.instruction_index]): + if are_operands_equal(be2, be2.instruction[ii.instruction_index]): yield Number(0), ih.address return @@ -223,7 +223,7 @@ def extract_insn_nzxor_characteristic_features( instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index] # guaranteed to be simple int/reg operands # so we don't have to realize the tree/list. - if is_operands_equal(be2, instruction): + if are_operands_equal(be2, instruction): return instruction_address: int = idx.insn_address_by_index[ii.instruction_index]