Skip to content

Commit 132f01d

Browse files
committed
Use capstone to implement ELF.libc_start_main_return
1 parent e841b50 commit 132f01d

File tree

3 files changed

+96
-31
lines changed

3 files changed

+96
-31
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ The table below shows which release corresponds to each branch, and what date th
7575

7676
## 5.0.0 (`dev`)
7777

78+
- [#2580][2580] Use capstone to implement `ELF.libc_start_main_return`
7879
- [#2419][2419] riscv: avoid compressed instructions (if you need compressed, use .option rvc)
7980
- [#2551][2551] Detect when kitty is being used as terminal
8081
- [#2519][2519] Drop Python 2.7 support / Require Python 3.10
@@ -94,6 +95,7 @@ The table below shows which release corresponds to each branch, and what date th
9495
- [#2575][2575] Detect when Terminator is being used as terminal
9596
- [#2578][2578] Add gnome-terminal, Alacritty, Ttilix for run_in_new_terminal
9697

98+
[2580]: https://github.com/Gallopsled/pwntools/pull/2580
9799
[2419]: https://github.com/Gallopsled/pwntools/pull/2419
98100
[2551]: https://github.com/Gallopsled/pwntools/pull/2551
99101
[2519]: https://github.com/Gallopsled/pwntools/pull/2519

pwnlib/asm.py

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -388,10 +388,10 @@ def _bfdname():
388388
'sparc64' : 'elf64-sparc',
389389
}
390390

391-
if arch in bfdnames:
392-
return bfdnames[arch]
393-
else:
391+
name = bfdnames.get(arch)
392+
if not name:
394393
raise Exception("Cannot find bfd name for architecture %r" % arch)
394+
return name
395395

396396

397397
def _bfdarch():
@@ -409,10 +409,7 @@ def _bfdarch():
409409
'loongarch64': 'loongarch64'
410410
}
411411

412-
if arch in convert:
413-
return convert[arch]
414-
415-
return arch
412+
return convert.get(arch, arch)
416413

417414
def _run(cmd, stdin = None):
418415
log.debug('%s', subprocess.list2cmdline(cmd))
@@ -1015,3 +1012,52 @@ def disasm(data, vma = 0, byte = True, offset = True, instructions = True):
10151012
lines.append(line)
10161013

10171014
return re.sub(',([^ ])', r', \1', '\n'.join(lines))
1015+
1016+
@LocalContext
1017+
def get_cs_disassembler(eabi=None):
1018+
import capstone as cs
1019+
E = {
1020+
'big': cs.CS_MODE_BIG_ENDIAN,
1021+
'little': cs.CS_MODE_LITTLE_ENDIAN,
1022+
}[context.endianness]
1023+
1024+
B = {16: cs.CS_MODE_16, 32: cs.CS_MODE_32, 64: cs.CS_MODE_64}[context.bits]
1025+
1026+
params = {
1027+
'i386' : (cs.CS_ARCH_X86, B),
1028+
'amd64' : (cs.CS_ARCH_X86, B),
1029+
'thumb' : (cs.CS_ARCH_ARM, cs.CS_MODE_THUMB + E),
1030+
'arm' : (cs.CS_ARCH_ARM, cs.CS_MODE_ARM + E),
1031+
'aarch64': (cs.CS_ARCH_AARCH64, cs.CS_MODE_ARM + E),
1032+
'armhf' : (cs.CS_ARCH_ARM, cs.CS_MODE_ARM + cs.CS_MODE_THUMB + E),
1033+
'mips' : (cs.CS_ARCH_MIPS, cs.CS_MODE_32 + E),
1034+
'mips64' : (cs.CS_ARCH_MIPS, cs.CS_MODE_64 + E),
1035+
'sparc': (cs.CS_ARCH_SPARC, cs.CS_MODE_32 + E),
1036+
'sparc64': (cs.CS_ARCH_SPARC, cs.CS_MODE_64 + E),
1037+
'ppc': (cs.CS_ARCH_PPC, B + E),
1038+
'powerpc': (cs.CS_ARCH_PPC, E + cs.CS_MODE_32),
1039+
'powerpc64': (cs.CS_ARCH_PPC, E + cs.CS_MODE_64),
1040+
#'ia64': None,
1041+
#'sysz': cs.CS_ARCH_SYSZ,
1042+
#'m68k': cs.CS_ARCH_M68K,
1043+
#'xcore': cs.CS_ARCH_XCORE,
1044+
#'tms320c64x': cs.CS_ARCH_TMS320C64X,
1045+
#'m680x': cs.CS_ARCH_M680X,
1046+
#'evm': cs.CS_ARCH_EVM,
1047+
#'mos65xx': cs.CS_ARCH_MOS65XX,
1048+
#'bpf': cs.CS_ARCH_BPF,
1049+
#'riscv': cs.CS_ARCH_RISCV,
1050+
#'tricore': cs.CS_ARCH_TRICORE,
1051+
#'wasm': cs.CS_ARCH_WASM,
1052+
#'sh': cs.CS_ARCH_SH,
1053+
}
1054+
1055+
arch = context.arch
1056+
if arch == 'arm' and eabi == 'hf': arch = 'armhf'
1057+
param = params.get(arch)
1058+
if not param:
1059+
raise Exception(f"unsupported {context.arch} for capstone")
1060+
arch, mode = param
1061+
md = cs.Cs(arch, mode)
1062+
md.detail = True
1063+
return md

pwnlib/elf/elf.py

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
import re
5555
import subprocess
5656
import tempfile
57+
import capstone as cs
5758

5859
from io import BytesIO
5960

@@ -1143,6 +1144,12 @@ def _populate_kernel_version(self):
11431144

11441145
self.config['version'] = self.version
11451146

1147+
def cs_disasm(self, md: cs.Cs, address, n_bytes):
1148+
if self.arch == 'arm' and address & 1:
1149+
address -= 1
1150+
1151+
return md.disasm(self.read(address, n_bytes), address)
1152+
11461153
@property
11471154
def libc_start_main_return(self):
11481155
""":class:`int`: Address of the return address into __libc_start_main from main.
@@ -1163,56 +1170,66 @@ def libc_start_main_return(self):
11631170
if 'exit' not in self.symbols:
11641171
return 0
11651172

1173+
eabi = None
11661174
# If there's no delay slot, execution continues on the next instruction after a call.
11671175
call_return_offset = 1
11681176
if self.arch in ['arm', 'thumb']:
1169-
call_instructions = set(['blx', 'bl'])
1177+
if b'armhf' in self.linker:
1178+
eabi = 'hf'
1179+
call_instructions = set([cs.CS_GRP_CALL])
11701180
elif self.arch == 'aarch64':
1171-
call_instructions = set(['blr', 'bl'])
1181+
call_instructions = set([cs.CS_GRP_CALL])
11721182
elif self.arch in ['mips', 'mips64']:
1173-
call_instructions = set(['bal', 'jalr'])
1183+
# FIXME: `bal` was not included in CS_GRP_CALL. This is fixed on capstone v6.alpha
1184+
#call_instructions = set([cs.CS_GRP_CALL])
1185+
call_instructions = set([cs.CS_GRP_CALL, cs.CS_GRP_BRANCH_RELATIVE])
11741186
# Account for the delay slot.
11751187
call_return_offset = 2
11761188
elif self.arch in ['i386', 'amd64', 'ia64']:
1177-
call_instructions = set(['call'])
1189+
call_instructions = set([cs.CS_GRP_CALL])
11781190
else:
11791191
log.error('Unsupported architecture %s in ELF.libc_start_main_return', self.arch)
11801192
return 0
11811193

1182-
lines = self.functions['__libc_start_main'].disasm().split('\n')
1183-
exit_addr = hex(self.symbols['exit'])
1184-
calls = [(index, line) for index, line in enumerate(lines) if set(line.split()) & call_instructions]
1194+
from pwnlib.asm import get_cs_disassembler
1195+
md = get_cs_disassembler(arch=self.arch, endian=self.endian, bits=self.bits, eabi=eabi)
1196+
func = self.functions['__libc_start_main']
1197+
dis = list(self.cs_disasm(md, func.address, func.size))
11851198

1186-
def find_ret_main_addr(lines, calls):
1187-
exit_calls = [index for index, line in enumerate(calls) if exit_addr in line[1]]
1188-
if len(exit_calls) != 1:
1199+
exit_addr = self.symbols['exit']
1200+
if self.arch == 'arm' and exit_addr & 1: exit_addr -= 1
1201+
1202+
calls = [(i, x) for i, x in enumerate(dis) if call_instructions & set(x.groups)]
1203+
1204+
def find_ret_main_addr(caller_dis, calls):
1205+
call_to_main = -1
1206+
for i, insn in calls:
1207+
if insn.operands[0].imm == exit_addr: break
1208+
call_to_main = i
1209+
else:
11891210
return 0
11901211

1191-
call_to_main = calls[exit_calls[0] - 1]
1192-
return_from_main = lines[call_to_main[0] + call_return_offset].lstrip()
1193-
return_from_main = int(return_from_main[ : return_from_main.index(':') ], 16)
1194-
return return_from_main
1212+
return_from_main = caller_dis[call_to_main + call_return_offset]
1213+
return return_from_main.address
11951214

11961215
# Starting with glibc-2.34 calling `main` is split out into `__libc_start_call_main`
1197-
ret_addr = find_ret_main_addr(lines, calls)
1216+
ret_addr = find_ret_main_addr(dis, calls)
11981217
# Pre glibc-2.34 case - `main` is called directly
11991218
if ret_addr:
12001219
return ret_addr
12011220

12021221
# `__libc_start_main` -> `__libc_start_call_main` -> `main`
12031222
# Find a direct call which calls `exit` once. That's probably `__libc_start_call_main`.
1204-
direct_call_pattern = re.compile(r'['+r'|'.join(call_instructions)+r']\s+(0x[0-9a-zA-Z]+)')
1205-
for line in calls:
1206-
match = direct_call_pattern.search(line[1])
1207-
if not match:
1208-
continue
1223+
for _, insn in calls:
1224+
op = insn.operands[0]
1225+
if op.type != cs.CS_OP_IMM: continue
12091226

1210-
target_addr = int(match.group(1), 0)
1227+
target_addr = op.imm
12111228
# `__libc_start_call_main` is usually smaller than `__libc_start_main`, so
12121229
# we might disassemble a bit too much, but it's a good dynamic estimate.
1213-
callee_lines = self.disasm(target_addr, self.functions['__libc_start_main'].size).split('\n')
1214-
callee_calls = [(index, line) for index, line in enumerate(callee_lines) if set(line.split()) & call_instructions]
1215-
ret_addr = find_ret_main_addr(callee_lines, callee_calls)
1230+
callee_dis = list(self.cs_disasm(md, target_addr, func.size))
1231+
callee_calls = [(i, x) for i, x in enumerate(callee_dis) if call_instructions & set(x.groups)]
1232+
ret_addr = find_ret_main_addr(callee_dis, callee_calls)
12161233
if ret_addr:
12171234
return ret_addr
12181235
return 0

0 commit comments

Comments
 (0)