54
54
import re
55
55
import subprocess
56
56
import tempfile
57
+ import capstone as cs
57
58
58
59
from io import BytesIO
59
60
@@ -1143,6 +1144,12 @@ def _populate_kernel_version(self):
1143
1144
1144
1145
self .config ['version' ] = self .version
1145
1146
1147
+ def cs_disasm (self , md : cs .Cs , address , n_bytes ):
1148
+ if self .arch == 'arm' and address & 1 :
1149
+ address -= 1
1150
+
1151
+ return md .disasm (self .read (address , n_bytes ), address )
1152
+
1146
1153
@property
1147
1154
def libc_start_main_return (self ):
1148
1155
""":class:`int`: Address of the return address into __libc_start_main from main.
@@ -1163,56 +1170,66 @@ def libc_start_main_return(self):
1163
1170
if 'exit' not in self .symbols :
1164
1171
return 0
1165
1172
1173
+ eabi = None
1166
1174
# If there's no delay slot, execution continues on the next instruction after a call.
1167
1175
call_return_offset = 1
1168
1176
if self .arch in ['arm' , 'thumb' ]:
1169
- call_instructions = set (['blx' , 'bl' ])
1177
+ if b'armhf' in self .linker :
1178
+ eabi = 'hf'
1179
+ call_instructions = set ([cs .CS_GRP_CALL ])
1170
1180
elif self .arch == 'aarch64' :
1171
- call_instructions = set (['blr' , 'bl' ])
1181
+ call_instructions = set ([cs . CS_GRP_CALL ])
1172
1182
elif self .arch in ['mips' , 'mips64' ]:
1173
- call_instructions = set (['bal' , 'jalr' ])
1183
+ # FIXME: `bal` was not included in CS_GRP_CALL. This is fixed on capstone v6.alpha
1184
+ #call_instructions = set([cs.CS_GRP_CALL])
1185
+ call_instructions = set ([cs .CS_GRP_CALL , cs .CS_GRP_BRANCH_RELATIVE ])
1174
1186
# Account for the delay slot.
1175
1187
call_return_offset = 2
1176
1188
elif self .arch in ['i386' , 'amd64' , 'ia64' ]:
1177
- call_instructions = set (['call' ])
1189
+ call_instructions = set ([cs . CS_GRP_CALL ])
1178
1190
else :
1179
1191
log .error ('Unsupported architecture %s in ELF.libc_start_main_return' , self .arch )
1180
1192
return 0
1181
1193
1182
- lines = self .functions ['__libc_start_main' ].disasm ().split ('\n ' )
1183
- exit_addr = hex (self .symbols ['exit' ])
1184
- calls = [(index , line ) for index , line in enumerate (lines ) if set (line .split ()) & call_instructions ]
1194
+ from pwnlib .asm import get_cs_disassembler
1195
+ md = get_cs_disassembler (arch = self .arch , endian = self .endian , bits = self .bits , eabi = eabi )
1196
+ func = self .functions ['__libc_start_main' ]
1197
+ dis = list (self .cs_disasm (md , func .address , func .size ))
1185
1198
1186
- def find_ret_main_addr (lines , calls ):
1187
- exit_calls = [index for index , line in enumerate (calls ) if exit_addr in line [1 ]]
1188
- if len (exit_calls ) != 1 :
1199
+ exit_addr = self .symbols ['exit' ]
1200
+ if self .arch == 'arm' and exit_addr & 1 : exit_addr -= 1
1201
+
1202
+ calls = [(i , x ) for i , x in enumerate (dis ) if call_instructions & set (x .groups )]
1203
+
1204
+ def find_ret_main_addr (caller_dis , calls ):
1205
+ call_to_main = - 1
1206
+ for i , insn in calls :
1207
+ if insn .operands [0 ].imm == exit_addr : break
1208
+ call_to_main = i
1209
+ else :
1189
1210
return 0
1190
1211
1191
- call_to_main = calls [exit_calls [0 ] - 1 ]
1192
- return_from_main = lines [call_to_main [0 ] + call_return_offset ].lstrip ()
1193
- return_from_main = int (return_from_main [ : return_from_main .index (':' ) ], 16 )
1194
- return return_from_main
1212
+ return_from_main = caller_dis [call_to_main + call_return_offset ]
1213
+ return return_from_main .address
1195
1214
1196
1215
# Starting with glibc-2.34 calling `main` is split out into `__libc_start_call_main`
1197
- ret_addr = find_ret_main_addr (lines , calls )
1216
+ ret_addr = find_ret_main_addr (dis , calls )
1198
1217
# Pre glibc-2.34 case - `main` is called directly
1199
1218
if ret_addr :
1200
1219
return ret_addr
1201
1220
1202
1221
# `__libc_start_main` -> `__libc_start_call_main` -> `main`
1203
1222
# Find a direct call which calls `exit` once. That's probably `__libc_start_call_main`.
1204
- direct_call_pattern = re .compile (r'[' + r'|' .join (call_instructions )+ r']\s+(0x[0-9a-zA-Z]+)' )
1205
- for line in calls :
1206
- match = direct_call_pattern .search (line [1 ])
1207
- if not match :
1208
- continue
1223
+ for _ , insn in calls :
1224
+ op = insn .operands [0 ]
1225
+ if op .type != cs .CS_OP_IMM : continue
1209
1226
1210
- target_addr = int ( match . group ( 1 ), 0 )
1227
+ target_addr = op . imm
1211
1228
# `__libc_start_call_main` is usually smaller than `__libc_start_main`, so
1212
1229
# we might disassemble a bit too much, but it's a good dynamic estimate.
1213
- callee_lines = self .disasm ( target_addr , self . functions [ '__libc_start_main' ]. size ). split ( ' \n ' )
1214
- callee_calls = [(index , line ) for index , line in enumerate (callee_lines ) if set (line . split ()) & call_instructions ]
1215
- ret_addr = find_ret_main_addr (callee_lines , callee_calls )
1230
+ callee_dis = list ( self .cs_disasm ( md , target_addr , func . size ))
1231
+ callee_calls = [(i , x ) for i , x in enumerate (callee_dis ) if call_instructions & set (x . groups ) ]
1232
+ ret_addr = find_ret_main_addr (callee_dis , callee_calls )
1216
1233
if ret_addr :
1217
1234
return ret_addr
1218
1235
return 0
0 commit comments