From 132f01dcbe3ccd657df53730e7202fd74c6740a2 Mon Sep 17 00:00:00 2001 From: Lzu Tao Date: Fri, 25 Apr 2025 00:10:01 +0700 Subject: [PATCH 1/5] Use capstone to implement ELF.libc_start_main_return --- CHANGELOG.md | 2 ++ pwnlib/asm.py | 60 ++++++++++++++++++++++++++++++++++++++----- pwnlib/elf/elf.py | 65 ++++++++++++++++++++++++++++++----------------- 3 files changed, 96 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0dd23f2a5..bb72fbbf7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -75,6 +75,7 @@ The table below shows which release corresponds to each branch, and what date th ## 5.0.0 (`dev`) +- [#2580][2580] Use capstone to implement `ELF.libc_start_main_return` - [#2419][2419] riscv: avoid compressed instructions (if you need compressed, use .option rvc) - [#2551][2551] Detect when kitty is being used as terminal - [#2519][2519] Drop Python 2.7 support / Require Python 3.10 @@ -94,6 +95,7 @@ The table below shows which release corresponds to each branch, and what date th - [#2575][2575] Detect when Terminator is being used as terminal - [#2578][2578] Add gnome-terminal, Alacritty, Ttilix for run_in_new_terminal +[2580]: https://github.com/Gallopsled/pwntools/pull/2580 [2419]: https://github.com/Gallopsled/pwntools/pull/2419 [2551]: https://github.com/Gallopsled/pwntools/pull/2551 [2519]: https://github.com/Gallopsled/pwntools/pull/2519 diff --git a/pwnlib/asm.py b/pwnlib/asm.py index 8716e8976..2c1155cad 100644 --- a/pwnlib/asm.py +++ b/pwnlib/asm.py @@ -388,10 +388,10 @@ def _bfdname(): 'sparc64' : 'elf64-sparc', } - if arch in bfdnames: - return bfdnames[arch] - else: + name = bfdnames.get(arch) + if not name: raise Exception("Cannot find bfd name for architecture %r" % arch) + return name def _bfdarch(): @@ -409,10 +409,7 @@ def _bfdarch(): 'loongarch64': 'loongarch64' } - if arch in convert: - return convert[arch] - - return arch + return convert.get(arch, arch) def _run(cmd, stdin = None): log.debug('%s', subprocess.list2cmdline(cmd)) @@ -1015,3 +1012,52 @@ def disasm(data, vma = 0, byte = True, offset = True, instructions = True): lines.append(line) return re.sub(',([^ ])', r', \1', '\n'.join(lines)) + +@LocalContext +def get_cs_disassembler(eabi=None): + import capstone as cs + E = { + 'big': cs.CS_MODE_BIG_ENDIAN, + 'little': cs.CS_MODE_LITTLE_ENDIAN, + }[context.endianness] + + B = {16: cs.CS_MODE_16, 32: cs.CS_MODE_32, 64: cs.CS_MODE_64}[context.bits] + + params = { + 'i386' : (cs.CS_ARCH_X86, B), + 'amd64' : (cs.CS_ARCH_X86, B), + 'thumb' : (cs.CS_ARCH_ARM, cs.CS_MODE_THUMB + E), + 'arm' : (cs.CS_ARCH_ARM, cs.CS_MODE_ARM + E), + 'aarch64': (cs.CS_ARCH_AARCH64, cs.CS_MODE_ARM + E), + 'armhf' : (cs.CS_ARCH_ARM, cs.CS_MODE_ARM + cs.CS_MODE_THUMB + E), + 'mips' : (cs.CS_ARCH_MIPS, cs.CS_MODE_32 + E), + 'mips64' : (cs.CS_ARCH_MIPS, cs.CS_MODE_64 + E), + 'sparc': (cs.CS_ARCH_SPARC, cs.CS_MODE_32 + E), + 'sparc64': (cs.CS_ARCH_SPARC, cs.CS_MODE_64 + E), + 'ppc': (cs.CS_ARCH_PPC, B + E), + 'powerpc': (cs.CS_ARCH_PPC, E + cs.CS_MODE_32), + 'powerpc64': (cs.CS_ARCH_PPC, E + cs.CS_MODE_64), + #'ia64': None, + #'sysz': cs.CS_ARCH_SYSZ, + #'m68k': cs.CS_ARCH_M68K, + #'xcore': cs.CS_ARCH_XCORE, + #'tms320c64x': cs.CS_ARCH_TMS320C64X, + #'m680x': cs.CS_ARCH_M680X, + #'evm': cs.CS_ARCH_EVM, + #'mos65xx': cs.CS_ARCH_MOS65XX, + #'bpf': cs.CS_ARCH_BPF, + #'riscv': cs.CS_ARCH_RISCV, + #'tricore': cs.CS_ARCH_TRICORE, + #'wasm': cs.CS_ARCH_WASM, + #'sh': cs.CS_ARCH_SH, + } + + arch = context.arch + if arch == 'arm' and eabi == 'hf': arch = 'armhf' + param = params.get(arch) + if not param: + raise Exception(f"unsupported {context.arch} for capstone") + arch, mode = param + md = cs.Cs(arch, mode) + md.detail = True + return md diff --git a/pwnlib/elf/elf.py b/pwnlib/elf/elf.py index ba07a2cb2..f13a320af 100644 --- a/pwnlib/elf/elf.py +++ b/pwnlib/elf/elf.py @@ -54,6 +54,7 @@ import re import subprocess import tempfile +import capstone as cs from io import BytesIO @@ -1143,6 +1144,12 @@ def _populate_kernel_version(self): self.config['version'] = self.version + def cs_disasm(self, md: cs.Cs, address, n_bytes): + if self.arch == 'arm' and address & 1: + address -= 1 + + return md.disasm(self.read(address, n_bytes), address) + @property def libc_start_main_return(self): """:class:`int`: Address of the return address into __libc_start_main from main. @@ -1163,56 +1170,66 @@ def libc_start_main_return(self): if 'exit' not in self.symbols: return 0 + eabi = None # If there's no delay slot, execution continues on the next instruction after a call. call_return_offset = 1 if self.arch in ['arm', 'thumb']: - call_instructions = set(['blx', 'bl']) + if b'armhf' in self.linker: + eabi = 'hf' + call_instructions = set([cs.CS_GRP_CALL]) elif self.arch == 'aarch64': - call_instructions = set(['blr', 'bl']) + call_instructions = set([cs.CS_GRP_CALL]) elif self.arch in ['mips', 'mips64']: - call_instructions = set(['bal', 'jalr']) + # FIXME: `bal` was not included in CS_GRP_CALL. This is fixed on capstone v6.alpha + #call_instructions = set([cs.CS_GRP_CALL]) + call_instructions = set([cs.CS_GRP_CALL, cs.CS_GRP_BRANCH_RELATIVE]) # Account for the delay slot. call_return_offset = 2 elif self.arch in ['i386', 'amd64', 'ia64']: - call_instructions = set(['call']) + call_instructions = set([cs.CS_GRP_CALL]) else: log.error('Unsupported architecture %s in ELF.libc_start_main_return', self.arch) return 0 - lines = self.functions['__libc_start_main'].disasm().split('\n') - exit_addr = hex(self.symbols['exit']) - calls = [(index, line) for index, line in enumerate(lines) if set(line.split()) & call_instructions] + from pwnlib.asm import get_cs_disassembler + md = get_cs_disassembler(arch=self.arch, endian=self.endian, bits=self.bits, eabi=eabi) + func = self.functions['__libc_start_main'] + dis = list(self.cs_disasm(md, func.address, func.size)) - def find_ret_main_addr(lines, calls): - exit_calls = [index for index, line in enumerate(calls) if exit_addr in line[1]] - if len(exit_calls) != 1: + exit_addr = self.symbols['exit'] + if self.arch == 'arm' and exit_addr & 1: exit_addr -= 1 + + calls = [(i, x) for i, x in enumerate(dis) if call_instructions & set(x.groups)] + + def find_ret_main_addr(caller_dis, calls): + call_to_main = -1 + for i, insn in calls: + if insn.operands[0].imm == exit_addr: break + call_to_main = i + else: return 0 - call_to_main = calls[exit_calls[0] - 1] - return_from_main = lines[call_to_main[0] + call_return_offset].lstrip() - return_from_main = int(return_from_main[ : return_from_main.index(':') ], 16) - return return_from_main + return_from_main = caller_dis[call_to_main + call_return_offset] + return return_from_main.address # Starting with glibc-2.34 calling `main` is split out into `__libc_start_call_main` - ret_addr = find_ret_main_addr(lines, calls) + ret_addr = find_ret_main_addr(dis, calls) # Pre glibc-2.34 case - `main` is called directly if ret_addr: return ret_addr # `__libc_start_main` -> `__libc_start_call_main` -> `main` # Find a direct call which calls `exit` once. That's probably `__libc_start_call_main`. - direct_call_pattern = re.compile(r'['+r'|'.join(call_instructions)+r']\s+(0x[0-9a-zA-Z]+)') - for line in calls: - match = direct_call_pattern.search(line[1]) - if not match: - continue + for _, insn in calls: + op = insn.operands[0] + if op.type != cs.CS_OP_IMM: continue - target_addr = int(match.group(1), 0) + target_addr = op.imm # `__libc_start_call_main` is usually smaller than `__libc_start_main`, so # we might disassemble a bit too much, but it's a good dynamic estimate. - callee_lines = self.disasm(target_addr, self.functions['__libc_start_main'].size).split('\n') - callee_calls = [(index, line) for index, line in enumerate(callee_lines) if set(line.split()) & call_instructions] - ret_addr = find_ret_main_addr(callee_lines, callee_calls) + callee_dis = list(self.cs_disasm(md, target_addr, func.size)) + callee_calls = [(i, x) for i, x in enumerate(callee_dis) if call_instructions & set(x.groups)] + ret_addr = find_ret_main_addr(callee_dis, callee_calls) if ret_addr: return ret_addr return 0 From f1eaafb05e3026d02a036c5a1780bd52359e0735 Mon Sep 17 00:00:00 2001 From: Lzu Tao Date: Fri, 25 Apr 2025 08:38:19 +0700 Subject: [PATCH 2/5] add support for ppc64el and s390 --- pwnlib/asm.py | 2 +- pwnlib/context/__init__.py | 1 + pwnlib/elf/elf.py | 50 ++++++++++++++++++++++++++------------ 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/pwnlib/asm.py b/pwnlib/asm.py index 2c1155cad..401425c6b 100644 --- a/pwnlib/asm.py +++ b/pwnlib/asm.py @@ -1037,8 +1037,8 @@ def get_cs_disassembler(eabi=None): 'ppc': (cs.CS_ARCH_PPC, B + E), 'powerpc': (cs.CS_ARCH_PPC, E + cs.CS_MODE_32), 'powerpc64': (cs.CS_ARCH_PPC, E + cs.CS_MODE_64), + 'em_s390': (cs.CS_ARCH_SYSTEMZ, cs.CS_MODE_BIG_ENDIAN + cs.CS_MODE_64), #'ia64': None, - #'sysz': cs.CS_ARCH_SYSZ, #'m68k': cs.CS_ARCH_M68K, #'xcore': cs.CS_ARCH_XCORE, #'tms320c64x': cs.CS_ARCH_TMS320C64X, diff --git a/pwnlib/context/__init__.py b/pwnlib/context/__init__.py index 4ffed2cf0..a1b02f0e4 100644 --- a/pwnlib/context/__init__.py +++ b/pwnlib/context/__init__.py @@ -427,6 +427,7 @@ class ContextType(object): 's390': big_32, 'sparc': big_32, 'sparc64': big_64, + 'em_s390': big_64, 'thumb': little_32, 'vax': little_32, 'none': {}, diff --git a/pwnlib/elf/elf.py b/pwnlib/elf/elf.py index f13a320af..7c399bc79 100644 --- a/pwnlib/elf/elf.py +++ b/pwnlib/elf/elf.py @@ -1170,41 +1170,57 @@ def libc_start_main_return(self): if 'exit' not in self.symbols: return 0 + func = self.functions['__libc_start_main'] + exit_addr = self.symbols['exit'] eabi = None + # `__libc_start_call_main` is usually smaller than `__libc_start_main`, + # (except for powerpc which uses a bigger `generic_start_main`), so + # we might disassemble a bit too much, but it's a good dynamic estimate. + callee_size = func.size + # most arch's call instruction has the first operands as an intermidiate, except s390 + imm_index = 0 + # If there's no delay slot, execution continues on the next instruction after a call. call_return_offset = 1 + call_instructions = set([cs.CS_GRP_CALL]) if self.arch in ['arm', 'thumb']: if b'armhf' in self.linker: eabi = 'hf' - call_instructions = set([cs.CS_GRP_CALL]) + if exit_addr & 1: exit_addr -= 1 elif self.arch == 'aarch64': - call_instructions = set([cs.CS_GRP_CALL]) + pass elif self.arch in ['mips', 'mips64']: # FIXME: `bal` was not included in CS_GRP_CALL. This is fixed on capstone v6.alpha - #call_instructions = set([cs.CS_GRP_CALL]) - call_instructions = set([cs.CS_GRP_CALL, cs.CS_GRP_BRANCH_RELATIVE]) + call_instructions = call_instructions.add(cs.CS_GRP_BRANCH_RELATIVE) # Account for the delay slot. call_return_offset = 2 elif self.arch in ['i386', 'amd64', 'ia64']: - call_instructions = set([cs.CS_GRP_CALL]) + pass + elif self.arch in ['ppc', 'powerpc', 'powerpc64']: + callee_size *= 2 + if exit_addr & 1 == 0: + # powepc often jumps to the local entry point after TOC setup + exit_addr += 8 + pass + elif self.arch in ['em_s390', 's390']: + imm_index = 1 + pass else: log.error('Unsupported architecture %s in ELF.libc_start_main_return', self.arch) return 0 from pwnlib.asm import get_cs_disassembler md = get_cs_disassembler(arch=self.arch, endian=self.endian, bits=self.bits, eabi=eabi) - func = self.functions['__libc_start_main'] dis = list(self.cs_disasm(md, func.address, func.size)) - exit_addr = self.symbols['exit'] - if self.arch == 'arm' and exit_addr & 1: exit_addr -= 1 - - calls = [(i, x) for i, x in enumerate(dis) if call_instructions & set(x.groups)] + filter_calls = lambda dis: ((i, x) for i, x in enumerate(dis) if call_instructions & set(x.groups)) + calls = list(filter_calls(dis)) def find_ret_main_addr(caller_dis, calls): call_to_main = -1 for i, insn in calls: - if insn.operands[0].imm == exit_addr: break + if cs.CS_GRP_CALL in insn.groups and insn.operands[imm_index].imm == exit_addr: + break call_to_main = i else: return 0 @@ -1218,17 +1234,19 @@ def find_ret_main_addr(caller_dis, calls): if ret_addr: return ret_addr + if self.arch in ['ppc', 'powerpc', 'powerpc64']: + filter_calls = lambda dis: ((i, x) for i, x in enumerate(dis) if set([x.mnemonic]) & set(['bctrl', 'bl'])) + # `__libc_start_main` -> `__libc_start_call_main` -> `main` # Find a direct call which calls `exit` once. That's probably `__libc_start_call_main`. for _, insn in calls: - op = insn.operands[0] + op = insn.operands[imm_index] if op.type != cs.CS_OP_IMM: continue target_addr = op.imm - # `__libc_start_call_main` is usually smaller than `__libc_start_main`, so - # we might disassemble a bit too much, but it's a good dynamic estimate. - callee_dis = list(self.cs_disasm(md, target_addr, func.size)) - callee_calls = [(i, x) for i, x in enumerate(callee_dis) if call_instructions & set(x.groups)] + callee_dis = list(self.cs_disasm(md, target_addr, callee_size)) + callee_calls = filter_calls(callee_dis) + ret_addr = find_ret_main_addr(callee_dis, callee_calls) if ret_addr: return ret_addr From 82cbe056fb16c7ca82cec14d47780edeb1c5b508 Mon Sep 17 00:00:00 2001 From: Lzu Tao Date: Fri, 25 Apr 2025 08:55:47 +0700 Subject: [PATCH 3/5] why armhf --- pwnlib/asm.py | 6 +++--- pwnlib/context/__init__.py | 1 + pwnlib/elf/elf.py | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pwnlib/asm.py b/pwnlib/asm.py index 401425c6b..93cd6fcbc 100644 --- a/pwnlib/asm.py +++ b/pwnlib/asm.py @@ -1029,12 +1029,12 @@ def get_cs_disassembler(eabi=None): 'thumb' : (cs.CS_ARCH_ARM, cs.CS_MODE_THUMB + E), 'arm' : (cs.CS_ARCH_ARM, cs.CS_MODE_ARM + E), 'aarch64': (cs.CS_ARCH_AARCH64, cs.CS_MODE_ARM + E), - 'armhf' : (cs.CS_ARCH_ARM, cs.CS_MODE_ARM + cs.CS_MODE_THUMB + E), + 'armhf' : (cs.CS_ARCH_ARM, cs.CS_MODE_THUMB + E), 'mips' : (cs.CS_ARCH_MIPS, cs.CS_MODE_32 + E), 'mips64' : (cs.CS_ARCH_MIPS, cs.CS_MODE_64 + E), - 'sparc': (cs.CS_ARCH_SPARC, cs.CS_MODE_32 + E), + 'sparc' : (cs.CS_ARCH_SPARC, cs.CS_MODE_32 + E), 'sparc64': (cs.CS_ARCH_SPARC, cs.CS_MODE_64 + E), - 'ppc': (cs.CS_ARCH_PPC, B + E), + 'ppc' : (cs.CS_ARCH_PPC, B + E), 'powerpc': (cs.CS_ARCH_PPC, E + cs.CS_MODE_32), 'powerpc64': (cs.CS_ARCH_PPC, E + cs.CS_MODE_64), 'em_s390': (cs.CS_ARCH_SYSTEMZ, cs.CS_MODE_BIG_ENDIAN + cs.CS_MODE_64), diff --git a/pwnlib/context/__init__.py b/pwnlib/context/__init__.py index a1b02f0e4..cf389fb10 100644 --- a/pwnlib/context/__init__.py +++ b/pwnlib/context/__init__.py @@ -412,6 +412,7 @@ class ContextType(object): 'avr': little_8, 'amd64': little_64, 'arm': little_32, + 'armhf': little_32, 'cris': little_32, 'i386': little_32, 'ia64': big_64, diff --git a/pwnlib/elf/elf.py b/pwnlib/elf/elf.py index 7c399bc79..368bf9b70 100644 --- a/pwnlib/elf/elf.py +++ b/pwnlib/elf/elf.py @@ -1172,19 +1172,20 @@ def libc_start_main_return(self): func = self.functions['__libc_start_main'] exit_addr = self.symbols['exit'] - eabi = None # `__libc_start_call_main` is usually smaller than `__libc_start_main`, # (except for powerpc which uses a bigger `generic_start_main`), so # we might disassemble a bit too much, but it's a good dynamic estimate. callee_size = func.size # most arch's call instruction has the first operands as an intermidiate, except s390 imm_index = 0 + eabi = None # If there's no delay slot, execution continues on the next instruction after a call. call_return_offset = 1 call_instructions = set([cs.CS_GRP_CALL]) if self.arch in ['arm', 'thumb']: if b'armhf' in self.linker: + # FIXME: I have no idea why setting self.arch = 'armhf' does not work eabi = 'hf' if exit_addr & 1: exit_addr -= 1 elif self.arch == 'aarch64': From a225ecf73686690435431c636c85a0d4ea6f40b0 Mon Sep 17 00:00:00 2001 From: Lzu Tao Date: Fri, 25 Apr 2025 09:05:26 +0700 Subject: [PATCH 4/5] old capstone --- pwnlib/asm.py | 14 ++++++++++++-- pwnlib/elf/elf.py | 12 +++++++----- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/pwnlib/asm.py b/pwnlib/asm.py index 93cd6fcbc..5ad59caa5 100644 --- a/pwnlib/asm.py +++ b/pwnlib/asm.py @@ -1023,12 +1023,22 @@ def get_cs_disassembler(eabi=None): B = {16: cs.CS_MODE_16, 32: cs.CS_MODE_32, 64: cs.CS_MODE_64}[context.bits] + try: + CS_ARCH_AARCH64 = cs.CS_ARCH_AARCH64 + except Exception: + CS_ARCH_AARCH64 = cs.CS_ARCH_ARM64 + + try: + CS_ARCH_SYSTEMZ = cs.CS_ARCH_SYSTEMZ + except Exception: + CS_ARCH_SYSTEMZ = cs.CS_ARCH_SYSZ + params = { 'i386' : (cs.CS_ARCH_X86, B), 'amd64' : (cs.CS_ARCH_X86, B), 'thumb' : (cs.CS_ARCH_ARM, cs.CS_MODE_THUMB + E), 'arm' : (cs.CS_ARCH_ARM, cs.CS_MODE_ARM + E), - 'aarch64': (cs.CS_ARCH_AARCH64, cs.CS_MODE_ARM + E), + 'aarch64': (CS_ARCH_AARCH64, cs.CS_MODE_ARM + E), 'armhf' : (cs.CS_ARCH_ARM, cs.CS_MODE_THUMB + E), 'mips' : (cs.CS_ARCH_MIPS, cs.CS_MODE_32 + E), 'mips64' : (cs.CS_ARCH_MIPS, cs.CS_MODE_64 + E), @@ -1037,7 +1047,7 @@ def get_cs_disassembler(eabi=None): 'ppc' : (cs.CS_ARCH_PPC, B + E), 'powerpc': (cs.CS_ARCH_PPC, E + cs.CS_MODE_32), 'powerpc64': (cs.CS_ARCH_PPC, E + cs.CS_MODE_64), - 'em_s390': (cs.CS_ARCH_SYSTEMZ, cs.CS_MODE_BIG_ENDIAN + cs.CS_MODE_64), + 'em_s390': (CS_ARCH_SYSTEMZ, cs.CS_MODE_BIG_ENDIAN + cs.CS_MODE_64), #'ia64': None, #'m68k': cs.CS_ARCH_M68K, #'xcore': cs.CS_ARCH_XCORE, diff --git a/pwnlib/elf/elf.py b/pwnlib/elf/elf.py index 368bf9b70..187e51017 100644 --- a/pwnlib/elf/elf.py +++ b/pwnlib/elf/elf.py @@ -1191,8 +1191,6 @@ def libc_start_main_return(self): elif self.arch == 'aarch64': pass elif self.arch in ['mips', 'mips64']: - # FIXME: `bal` was not included in CS_GRP_CALL. This is fixed on capstone v6.alpha - call_instructions = call_instructions.add(cs.CS_GRP_BRANCH_RELATIVE) # Account for the delay slot. call_return_offset = 2 elif self.arch in ['i386', 'amd64', 'ia64']: @@ -1215,6 +1213,13 @@ def libc_start_main_return(self): dis = list(self.cs_disasm(md, func.address, func.size)) filter_calls = lambda dis: ((i, x) for i, x in enumerate(dis) if call_instructions & set(x.groups)) + + if self.arch in ['ppc', 'powerpc', 'powerpc64']: + filter_calls = lambda dis: ((i, x) for i, x in enumerate(dis) if set([x.mnemonic]) & set(['bctrl', 'bl'])) + # FIXME: `bal` was not included in CS_GRP_CALL. This is fixed on capstone v6.alpha + elif self.arch in ['mips', 'mips64']: + filter_calls = lambda dis: ((i, x) for i, x in enumerate(dis) if set([x.mnemonic]) & set(['bal', 'jalr'])) + calls = list(filter_calls(dis)) def find_ret_main_addr(caller_dis, calls): @@ -1235,9 +1240,6 @@ def find_ret_main_addr(caller_dis, calls): if ret_addr: return ret_addr - if self.arch in ['ppc', 'powerpc', 'powerpc64']: - filter_calls = lambda dis: ((i, x) for i, x in enumerate(dis) if set([x.mnemonic]) & set(['bctrl', 'bl'])) - # `__libc_start_main` -> `__libc_start_call_main` -> `main` # Find a direct call which calls `exit` once. That's probably `__libc_start_call_main`. for _, insn in calls: From f1f1b05265261d7c7a27b834046f459859c61009 Mon Sep 17 00:00:00 2001 From: Lzu Tao Date: Fri, 25 Apr 2025 09:18:16 +0700 Subject: [PATCH 5/5] simplify code --- pwnlib/elf/elf.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/pwnlib/elf/elf.py b/pwnlib/elf/elf.py index 187e51017..b3a0c844c 100644 --- a/pwnlib/elf/elf.py +++ b/pwnlib/elf/elf.py @@ -1164,14 +1164,11 @@ def libc_start_main_return(self): to list all calls inside __libc_start_main, find the call to exit after the call to main and select the previous call. """ - if '__libc_start_main' not in self.functions: + func = self.functions.get('__libc_start_main') + exit_addr = self.symbols.get('exit') + if not (func and exit_addr): return 0 - if 'exit' not in self.symbols: - return 0 - - func = self.functions['__libc_start_main'] - exit_addr = self.symbols['exit'] # `__libc_start_call_main` is usually smaller than `__libc_start_main`, # (except for powerpc which uses a bigger `generic_start_main`), so # we might disassemble a bit too much, but it's a good dynamic estimate. @@ -1184,9 +1181,8 @@ def libc_start_main_return(self): call_return_offset = 1 call_instructions = set([cs.CS_GRP_CALL]) if self.arch in ['arm', 'thumb']: - if b'armhf' in self.linker: - # FIXME: I have no idea why setting self.arch = 'armhf' does not work - eabi = 'hf' + # FIXME: I have no idea why setting self.arch = 'armhf' does not work + if b'armhf' in self.linker: eabi = 'hf' if exit_addr & 1: exit_addr -= 1 elif self.arch == 'aarch64': pass @@ -1197,9 +1193,8 @@ def libc_start_main_return(self): pass elif self.arch in ['ppc', 'powerpc', 'powerpc64']: callee_size *= 2 - if exit_addr & 1 == 0: - # powepc often jumps to the local entry point after TOC setup - exit_addr += 8 + # powepc often jumps to the local entry point after TOC setup + if exit_addr & 1 == 0: exit_addr += 8 pass elif self.arch in ['em_s390', 's390']: imm_index = 1 @@ -1215,10 +1210,10 @@ def libc_start_main_return(self): filter_calls = lambda dis: ((i, x) for i, x in enumerate(dis) if call_instructions & set(x.groups)) if self.arch in ['ppc', 'powerpc', 'powerpc64']: - filter_calls = lambda dis: ((i, x) for i, x in enumerate(dis) if set([x.mnemonic]) & set(['bctrl', 'bl'])) + filter_calls = lambda dis: ((i, x) for i, x in enumerate(dis) if x.mnemonic in ['bctrl', 'bl']) # FIXME: `bal` was not included in CS_GRP_CALL. This is fixed on capstone v6.alpha elif self.arch in ['mips', 'mips64']: - filter_calls = lambda dis: ((i, x) for i, x in enumerate(dis) if set([x.mnemonic]) & set(['bal', 'jalr'])) + filter_calls = lambda dis: ((i, x) for i, x in enumerate(dis) if x.mnemonic in ['bal', 'jalr']) calls = list(filter_calls(dis))