5454import re
5555import subprocess
5656import tempfile
57+ import capstone as cs
5758
5859from io import BytesIO
5960
@@ -1143,6 +1144,12 @@ def _populate_kernel_version(self):
11431144
11441145 self .config ['version' ] = self .version
11451146
1147+ def cs_disasm (self , md : cs .Cs , address , n_bytes ):
1148+ if self .arch == 'arm' and address & 1 :
1149+ address -= 1
1150+
1151+ return md .disasm (self .read (address , n_bytes ), address )
1152+
11461153 @property
11471154 def libc_start_main_return (self ):
11481155 """:class:`int`: Address of the return address into __libc_start_main from main.
@@ -1163,56 +1170,66 @@ def libc_start_main_return(self):
11631170 if 'exit' not in self .symbols :
11641171 return 0
11651172
1173+ eabi = None
11661174 # If there's no delay slot, execution continues on the next instruction after a call.
11671175 call_return_offset = 1
11681176 if self .arch in ['arm' , 'thumb' ]:
1169- call_instructions = set (['blx' , 'bl' ])
1177+ if b'armhf' in self .linker :
1178+ eabi = 'hf'
1179+ call_instructions = set ([cs .CS_GRP_CALL ])
11701180 elif self .arch == 'aarch64' :
1171- call_instructions = set (['blr' , 'bl' ])
1181+ call_instructions = set ([cs . CS_GRP_CALL ])
11721182 elif self .arch in ['mips' , 'mips64' ]:
1173- call_instructions = set (['bal' , 'jalr' ])
1183+ # FIXME: `bal` was not included in CS_GRP_CALL. This is fixed on capstone v6.alpha
1184+ #call_instructions = set([cs.CS_GRP_CALL])
1185+ call_instructions = set ([cs .CS_GRP_CALL , cs .CS_GRP_BRANCH_RELATIVE ])
11741186 # Account for the delay slot.
11751187 call_return_offset = 2
11761188 elif self .arch in ['i386' , 'amd64' , 'ia64' ]:
1177- call_instructions = set (['call' ])
1189+ call_instructions = set ([cs . CS_GRP_CALL ])
11781190 else :
11791191 log .error ('Unsupported architecture %s in ELF.libc_start_main_return' , self .arch )
11801192 return 0
11811193
1182- lines = self .functions ['__libc_start_main' ].disasm ().split ('\n ' )
1183- exit_addr = hex (self .symbols ['exit' ])
1184- calls = [(index , line ) for index , line in enumerate (lines ) if set (line .split ()) & call_instructions ]
1194+ from pwnlib .asm import get_cs_disassembler
1195+ md = get_cs_disassembler (arch = self .arch , endian = self .endian , bits = self .bits , eabi = eabi )
1196+ func = self .functions ['__libc_start_main' ]
1197+ dis = list (self .cs_disasm (md , func .address , func .size ))
11851198
1186- def find_ret_main_addr (lines , calls ):
1187- exit_calls = [index for index , line in enumerate (calls ) if exit_addr in line [1 ]]
1188- if len (exit_calls ) != 1 :
1199+ exit_addr = self .symbols ['exit' ]
1200+ if self .arch == 'arm' and exit_addr & 1 : exit_addr -= 1
1201+
1202+ calls = [(i , x ) for i , x in enumerate (dis ) if call_instructions & set (x .groups )]
1203+
1204+ def find_ret_main_addr (caller_dis , calls ):
1205+ call_to_main = - 1
1206+ for i , insn in calls :
1207+ if insn .operands [0 ].imm == exit_addr : break
1208+ call_to_main = i
1209+ else :
11891210 return 0
11901211
1191- call_to_main = calls [exit_calls [0 ] - 1 ]
1192- return_from_main = lines [call_to_main [0 ] + call_return_offset ].lstrip ()
1193- return_from_main = int (return_from_main [ : return_from_main .index (':' ) ], 16 )
1194- return return_from_main
1212+ return_from_main = caller_dis [call_to_main + call_return_offset ]
1213+ return return_from_main .address
11951214
11961215 # Starting with glibc-2.34 calling `main` is split out into `__libc_start_call_main`
1197- ret_addr = find_ret_main_addr (lines , calls )
1216+ ret_addr = find_ret_main_addr (dis , calls )
11981217 # Pre glibc-2.34 case - `main` is called directly
11991218 if ret_addr :
12001219 return ret_addr
12011220
12021221 # `__libc_start_main` -> `__libc_start_call_main` -> `main`
12031222 # Find a direct call which calls `exit` once. That's probably `__libc_start_call_main`.
1204- direct_call_pattern = re .compile (r'[' + r'|' .join (call_instructions )+ r']\s+(0x[0-9a-zA-Z]+)' )
1205- for line in calls :
1206- match = direct_call_pattern .search (line [1 ])
1207- if not match :
1208- continue
1223+ for _ , insn in calls :
1224+ op = insn .operands [0 ]
1225+ if op .type != cs .CS_OP_IMM : continue
12091226
1210- target_addr = int ( match . group ( 1 ), 0 )
1227+ target_addr = op . imm
12111228 # `__libc_start_call_main` is usually smaller than `__libc_start_main`, so
12121229 # we might disassemble a bit too much, but it's a good dynamic estimate.
1213- callee_lines = self .disasm ( target_addr , self . functions [ '__libc_start_main' ]. size ). split ( ' \n ' )
1214- callee_calls = [(index , line ) for index , line in enumerate (callee_lines ) if set (line . split ()) & call_instructions ]
1215- ret_addr = find_ret_main_addr (callee_lines , callee_calls )
1230+ callee_dis = list ( self .cs_disasm ( md , target_addr , func . size ))
1231+ callee_calls = [(i , x ) for i , x in enumerate (callee_dis ) if call_instructions & set (x . groups ) ]
1232+ ret_addr = find_ret_main_addr (callee_dis , callee_calls )
12161233 if ret_addr :
12171234 return ret_addr
12181235 return 0
0 commit comments