Merge pull request #891 from elicn/misc_improv

xwings · web-flow · commit 17d58984e3d3 · 2021-08-21T15:21:42.000+08:00
Re-implemented Linux crackme example
diff --git a/examples/crackme_x86_linux.py b/examples/crackme_x86_linux.py
@@ -6,91 +6,129 @@
 import sys
 sys.path.append("..")
 
-import os
+import string
+from typing import TextIO
+
 from qiling import Qiling
+from qiling.os.posix import stat
+
+ROOTFS = r"rootfs/x86_linux"
 
-class MyPipe():
+class MyPipe(TextIO):
     def __init__(self):
-        self.buf = b''
+        self.buf = bytearray()
 
-    def write(self, s):
-        self.buf += s
+    def write(self, s: bytes):
+        self.buf.extend(s)
 
-    def read(self, size):
-        if size <= len(self.buf):
-            ret = self.buf[: size]
-            self.buf = self.buf[size:]
-        else:
-            ret = self.buf
-            self.buf = ''
-        return ret
+    def read(self, size: int) -> bytes:
+        ret = self.buf[:size]
+        self.buf = self.buf[size:]
+
+        return bytes(ret)
 
-    def fileno(self):
-        return 0
+    def fileno(self) -> int:
+        return sys.stdin.fileno()
 
-    def show(self):
-        pass
+    def fstat(self):
+        return stat.Fstat(self.fileno())
 
-    def clear(self):
-        pass
+class Solver:
+    def __init__(self, invalid: bytes):
+        # create a silent qiling instance
+        self.ql = Qiling([rf"{ROOTFS}/bin/crackme_linux"], ROOTFS,
+            console=False,      # thwart qiling logger output
+            stdin=MyPipe(),     # take over the input to the program using a fake stdin
+            stdout=sys.stdout)  # thwart program output
 
-    def flush(self):
-        pass
+        # execute program until it reaches the 'main' function
+        self.ql.run(end=0x0804851b)
 
-    def close(self):
-        self.outpipe.close()
+        # record replay starting and ending points.
+        #
+        # since the emulation halted upon entering 'main', its return address is there on
+        # the stack. we use it to limit the emulation till function returns
+        self.replay_starts = self.ql.reg.arch_pc
+        self.replay_ends = self.ql.stack_read(0)
 
-    def fstat(self):
-        return os.fstat(sys.stdin.fileno())
+        # instead of restarting the whole program every time a new flag character is guessed,
+        # we will restore its state to the latest point possible, fast-forwarding a good
+        # amount of start-up code that is not affected by the input.
+        #
+        # here we save the state just when 'main' is about to be called so we could use it
+        # to jumpstart the initialization part and get to 'main' immediately
+        self.jumpstart = self.ql.save() or {}
+
+        # calibrate the replay instruction count by running the code with an invalid input
+        # first. the instruction count returned from the calibration process will be then
+        # used as a baseline for consequent replays
+        self.best_icount = self.__run(invalid)
+
+    def __run(self, input: bytes) -> int:
+        icount = [0]
 
-def instruction_count(ql: Qiling, address: int, size: int, user_data):
-    user_data[0] += 1
+        def __count_instructions(ql: Qiling, address: int, size: int):
+            icount[0] += 1
 
-def my__llseek(ql, *args, **kw):
-    pass
+        # set a hook to fire up every time an instruction is about to execute
+        hobj = self.ql.hook_code(__count_instructions)
 
-def run_one_round(payload: bytes):
-    stdin = MyPipe()
+        # feed stdin with input
+        self.ql.stdin.write(input + b'\n')
 
-    ql = Qiling(["rootfs/x86_linux/bin/crackme_linux"], "rootfs/x86_linux",
-        console=False,      # thwart qiling logger output
-        stdin=stdin,        # take over the input to the program
-        stdout=sys.stdout)  # thwart program output
+        # resume emulation till function returns
+        self.ql.run(begin=self.replay_starts, end=self.replay_ends)
 
-    ins_count = [0]
-    ql.hook_code(instruction_count, ins_count)
-    ql.set_syscall("_llseek", my__llseek)
+        hobj.remove()
 
-    stdin.write(payload + b'\n')
-    ql.run()
+        return icount[0]
 
-    del stdin
-    del ql
+    def replay(self, input: bytes) -> bool:
+        """Restore state and replay with a new input.
 
-    return ins_count[0]
+        Returns an indication to execution progress: `True` if a progress
+        was made, `False` otherwise
+        """
 
-def solve():
+        # restore program's state back to the starting point
+        self.ql.restore(self.jumpstart)
+
+        # resume emulation and count emulated instructions
+        curr_icount = self.__run(input)
+
+        # the larger part of the input is correct, the more instructions are expected to be executed. this is true
+        # for traditional loop-based validations like strcmp or memcmp which bails as soon as a mismatch is found:
+        # more correct characters mean more loop iterations - thus more executed instructions.
+        #
+        # if we got a higher instruction count, it means we made a progress in the right direction
+        if curr_icount > self.best_icount:
+            self.best_icount = curr_icount
+
+            return True
+
+        return False
+
+def main():
     idx_list = (1, 4, 2, 0, 3)
     flag = [0] * len(idx_list)
 
-    prev_ic = run_one_round(bytes(flag))
+    solver = Solver(bytes(flag))
+
     for idx in idx_list:
 
         # bruteforce all possible flag characters
-        for ch in '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ ':
+        for ch in string.printable:
             flag[idx] = ord(ch)
 
-            print(f'\rguessing char at {idx}: {ch}... ', end='', flush=True)
-            ic = run_one_round(bytes(flag))
+            print(f'\rGuessing... [{"".join(chr(ch) if ch else "_" for ch in flag)}]', end='', file=sys.stderr, flush=True)
 
-            if ic > prev_ic:
-                print(f'ok')
-                prev_ic = ic
+            if solver.replay(bytes(flag)):
                 break
+
         else:
-            print(f'no match found')
+            print(f'No match found')
 
-    print(f'flag: "{"".join(chr(ch) for ch in flag)}"')
+    print(f'\nFlag found!')
 
 if __name__ == "__main__":
-    solve()
+    main()
diff --git a/qiling/core.py b/qiling/core.py
@@ -9,6 +9,8 @@
 # See https://stackoverflow.com/questions/39740632/python-type-hinting-without-cyclic-imports
 from typing import Dict, List, Union
 from typing import TYPE_CHECKING
+
+from unicorn.unicorn import Uc
 if TYPE_CHECKING:
     from .arch.register import QlRegisterManager
     from .arch.arch import QlArch
@@ -654,7 +656,7 @@ def filter(self, ft):
             self._log_filter.update_filter(ft)
 
     @property
-    def uc(self):
+    def uc(self) -> Uc:
         """ Raw uc instance.
 
             Type: Uc
diff --git a/qiling/os/posix/posix.py b/qiling/os/posix/posix.py
@@ -3,7 +3,7 @@
 # Cross Platform and Multi Architecture Advanced Binary Emulation Framework
 #
 
-from inspect import signature
+from inspect import signature, Parameter
 from typing import Union, Callable
 
 from unicorn.arm64_const import UC_ARM64_REG_X8, UC_ARM64_REG_X16
@@ -13,7 +13,7 @@
 
 from qiling import Qiling
 from qiling.cc import QlCC, intel, arm, mips
-from qiling.const import QL_ARCH, QL_OS, QL_INTERCEPT, QL_CALL_BLOCK, QL_VERBOSE
+from qiling.const import QL_ARCH, QL_OS, QL_INTERCEPT
 from qiling.exception import QlErrorSyscallNotFound
 from qiling.os.os import QlOs
 from qiling.os.posix.const import errors, NR_OPEN
@@ -209,15 +209,15 @@ def load_syscall(self):
             args = []
 
             # ignore first arg, which is 'ql'
-            arg_names = tuple(signature(syscall_hook).parameters.values())[1:]
+            args_info = tuple(signature(syscall_hook).parameters.values())[1:]
 
-            for name, value in zip(arg_names, params):
-                name = str(name)
-
-                # ignore python special args
-                if name in ('*args', '**kw', '**kwargs'):
+            for info, value in zip(args_info, params):
+                # skip python special args, like: *args and **kwargs
+                if info.kind != Parameter.POSITIONAL_OR_KEYWORD:
                     continue
 
+                name = info.name
+
                 # cut the first part of the arg if it is of form fstatat64_fd
                 if name.startswith(f'{syscall_basename}_'):
                     name = name.partition('_')[-1]
diff --git a/qiling/os/windows/api.py b/qiling/os/windows/api.py
@@ -5,6 +5,8 @@
 
 from qiling.os.const import *
 
+# See: https://docs.microsoft.com/en-us/windows/win32/winprog/windows-data-types
+
 LONG   = PARAM_INTN
 ULONG  = PARAM_INTN
 CHAR   = PARAM_INT8