Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Bytecode Simplifier

*(Modified by @abel1502 for EXTENDED_ARG opcode support and minor fixes)*

Bytecode simplifier is a tool to deobfuscate PjOrion protected python scripts.
This is a complete rewrite of my older tool [PjOrion Deobfuscator](https://github.com/extremecoders-re/PjOrion-Deobfuscator)

Expand Down
12 changes: 5 additions & 7 deletions assembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def assemble(self):

# Modify relative jump to absolute jump
if ins.mnemonic == 'JUMP_FORWARD':
ins.mnemonic = 'JUMP_ABSOLUTE'
ins.opcode = dis.opmap['JUMP_ABSOLUTE']

# If instruction is a relative control transfer instruction
Expand Down Expand Up @@ -118,7 +119,7 @@ def dfs(self, bb):
bb.b_seen = True

# Recursively dfs on all out going explicit edges
for o_edge in self.bb_graph.out_edges_iter(bb, data=True):
for o_edge in self.bb_graph.out_edges(bb, data=True):
# o_edge is a tuple (edge src, edge dest, edge attrib dict)
if o_edge[2]['edge_type'] == 'explicit':
self.dfs(o_edge[1])
Expand All @@ -130,7 +131,7 @@ def dfs(self, bb):
self.dfs(ins.argval)

# Recursively dfs on all out going implicit edges
for o_edge in self.bb_graph.out_edges_iter(bb, data=True):
for o_edge in self.bb_graph.out_edges(bb, data=True):
# o_edge is a tuple (edge src, edge dest, edge attrib dict)
if o_edge[2]['edge_type'] == 'implicit':
self.dfs(o_edge[1])
Expand Down Expand Up @@ -163,15 +164,12 @@ def calculate_ins_operands(self):
if ins.opcode in dis.hasjabs:
# ins.argval is a BasicBlock
ins.arg = ins.argval.address
# TODO
# We do not generate EXTENDED_ARG opcode at the moment,
# hence size of opcode argument can only be 2 bytes
assert ins.arg <= 0xFFFF
assert ins.arg <= 0xFFFFFFFF
elif ins.opcode in dis.hasjrel:
ins.arg = ins.argval.address - addr
# relative jump can USUALLY go forward
assert ins.arg >= 0
assert ins.arg <= 0xFFFF
assert ins.arg <= 0xFFFFFFFF

def emit(self):
logger.debug('Generating code...')
Expand Down
12 changes: 8 additions & 4 deletions decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,20 @@ def decode_at(self, offset):

opcode = self.insBytes[offset]

if opcode == dis.opmap['EXTENDED_ARG']:
raise Exception('EXTENDED_ARG not yet implemented')


# Invalid instruction
if opcode not in dis.opmap.values():
return Instruction(-1, None, 1)

if opcode < dis.HAVE_ARGUMENT:
return Instruction(opcode, None, 1)

if opcode >= dis.HAVE_ARGUMENT:
elif opcode == dis.opmap['EXTENDED_ARG']:
arg = (self.insBytes[offset + 2] << 8) | self.insBytes[offset + 1]
arg = (arg << 16) + ((self.insBytes[offset + 5] << 8) | self.insBytes[offset + 4])
nextop = self.insBytes[offset + 3]
assert nextop >= dis.HAVE_ARGUMENT
return Instruction(nextop, arg, 6)
else:
arg = (self.insBytes[offset + 2] << 8) | self.insBytes[offset + 1]
return Instruction(opcode, arg, 3)
2 changes: 1 addition & 1 deletion deobfuscator.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def find_oep(insBytes):

# Second instruction is intentionally invalid, on execution
# control transfers to exception handler
assert dec.decode_at(3).is_opcode_valid() == False
assert dec.decode_at(ins.size).is_opcode_valid() == False

assert dec.decode_at(exc_handler).mnemonic == 'POP_TOP'
assert dec.decode_at(exc_handler + 1).mnemonic == 'POP_TOP'
Expand Down
6 changes: 3 additions & 3 deletions disassembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def construct_basic_blocks(self):
logger.debug('{} basic blocks created'.format(self.bb_graph.number_of_nodes()))

def find_bb_by_address(self, address):
for bb in self.bb_graph.nodes_iter():
for bb in self.bb_graph.nodes.iterkeys():
if bb.address == address:
return bb

Expand All @@ -252,7 +252,7 @@ def build_bb_edges(self):
"""
logger.debug('Constructing edges between basic blocks...')

for bb in self.bb_graph.nodes_iter():
for bb in self.bb_graph.nodes.iterkeys():
offset = 0

for idx in xrange(len(bb.instructions)):
Expand Down Expand Up @@ -313,7 +313,7 @@ def build_bb_edges(self):

# RETURN_VALUE
elif ins.is_ret():
nx.set_node_attributes(self.bb_graph, 'isTerminal', {bb: True})
nx.set_node_attributes(self.bb_graph, {bb: True}, 'isTerminal')
# Does not have any sucessors
assert len(nextInsAddr) == 0

Expand Down
8 changes: 6 additions & 2 deletions instruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ def __init__(self, opcode, arg, size):

# Numeric argument to operation(if any), otherwise None
self.arg = arg


if size == 3 and arg >= 65536:
size = 6
# The size of the instruction including the arguement
self.size = size

Expand Down Expand Up @@ -74,8 +76,10 @@ def has_xref(self):
def assemble(self):
if self.size == 1:
return chr(self.opcode)
else:
elif self.size == 3 and self.arg < 65536:
return chr(self.opcode) + chr(self.arg & 0xFF) + chr((self.arg >> 8) & 0xFF)
else:
return chr(dis.opmap["EXTENDED_ARG"]) + chr((self.arg >> 16) & 0xFF) + chr((self.arg >> 24) & 0xFF) + chr(self.opcode) + chr(self.arg & 0xFF) + chr((self.arg >> 8) & 0xFF)

def __str__(self):
return '{} {} {}'.format(self.opcode, self.mnemonic, self.arg)
19 changes: 9 additions & 10 deletions simplifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def eliminate_forwarders(self):
# Loop until no basic block can be eliminated any more
while bb_eliminated:
bb_eliminated = False
for bb in self.bb_graph.nodes_iter():
for bb in self.bb_graph.nodes.iterkeys():
# Must have a single instruction
if len(bb.instructions) == 1:
ins = bb.instructions[0]
Expand All @@ -36,7 +36,7 @@ def eliminate_forwarders(self):
assert self.bb_graph.out_degree(bb) == 1

forwarderBB = bb
forwardedBB = self.bb_graph.successors(bb)[0]
forwardedBB = next(self.bb_graph.successors(bb))

# Check if forwardedBB has atleast one implicit in edge
forwardedBB_in_edge_exists = len(filter(lambda edge: edge[2]['edge_type'] == 'implicit',
Expand All @@ -54,7 +54,7 @@ def eliminate_forwarders(self):
self.bb_graph.remove_edge(forwarderBB, forwardedBB)

# Iterate over the predecessors of the forwarder
for predecessorBB in self.bb_graph.predecessors(forwarderBB):
for predecessorBB in list(self.bb_graph.predecessors(forwarderBB)):
# Get existing edge type
e_type = self.bb_graph.get_edge_data(predecessorBB, forwarderBB)['edge_type']

Expand Down Expand Up @@ -97,7 +97,7 @@ def eliminate_forwarders(self):

def merge_basic_blocks(self):
"""
Merges a basic block into its predecessor iff the basic block has exactly one predecessor
Merges a basic block into its predecessor if the basic block has exactly one predecessor
and the predecessor has this basic block as its lone successor

:param bb_graph: A graph of basic blocks
Expand All @@ -113,14 +113,13 @@ def merge_basic_blocks(self):
# Loop until no basic block can be eliminated any more
while bb_merged:
bb_merged = False
for bb in self.bb_graph.nodes_iter():
for bb in self.bb_graph.nodes.iterkeys():
# The basic block should not have any xrefs and must have exactly one predecessor
if not bb.has_xrefs_to and self.bb_graph.in_degree(bb) == 1:
predecessorBB = self.bb_graph.predecessors(bb)[0]
predecessorBB = self.bb_graph.predecessors(bb).next()

# Predecessor basic block must have exactly one successor
if self.bb_graph.out_degree(predecessorBB) == 1 and self.bb_graph.successors(predecessorBB)[
0] == bb:
if self.bb_graph.out_degree(predecessorBB) == 1 and self.bb_graph.successors(predecessorBB).next() == bb:
# The predecessor block will be the merged block
mergedBB = predecessorBB

Expand All @@ -138,12 +137,12 @@ def merge_basic_blocks(self):

# If bb is a terminal node, mark the mergedBB as terminal too
if bb in nx.get_node_attributes(self.bb_graph, 'isTerminal').keys():
nx.set_node_attributes(self.bb_graph, 'isTerminal', {mergedBB: True})
nx.set_node_attributes(self.bb_graph, {mergedBB: True}, 'isTerminal')

# Remove the edge
self.bb_graph.remove_edge(mergedBB, bb)

for successorBB in self.bb_graph.successors(bb):
for successorBB in list(self.bb_graph.successors(bb)):
# Get existing type
e_type = self.bb_graph.get_edge_data(bb, successorBB)['edge_type']

Expand Down
5 changes: 3 additions & 2 deletions utils/rendergraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def render_graph(bb_graph, filename):

nodedict = {}

for bb in bb_graph.nodes_iter():
for bb in bb_graph.nodes.iterkeys():
node = render_bb(bb, bb == entryblock, bb in returnblocks)
if bb == entryblock:
sub = pydotplus.Subgraph('sub', rank='source')
Expand All @@ -46,7 +46,8 @@ def render_graph(bb_graph, filename):
graph.add_node(node)
nodedict[bb] = node

for edge in bb_graph.edges_iter(data=True):
for edge in bb_graph.edges.iteritems():
edge = (edge[0][0], edge[0][1], edge[1])
src = nodedict[edge[0]]
dest = nodedict[edge[1]]
e_style = 'dashed' if edge[2]['edge_type'] == 'implicit' else 'solid'
Expand Down
14 changes: 8 additions & 6 deletions verifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def verify_graph(bb_graph):
logger.error('The entry point basic block has an in degree of {}'.format(i_degree_entry))
raise Exception

for bb in bb_graph.nodes_iter():
for bb in bb_graph.nodes.iterkeys():
o_degree = bb_graph.out_degree(bb)
# A basic block can have 0,1 or 2 successors
if o_degree > 2:
Expand All @@ -41,11 +41,14 @@ def verify_graph(bb_graph):

# A basic block having out degree of 2, cannot have both out edge as of explicit type or implicit type
if o_degree == 2:
o_edges = bb_graph.out_edges(bb, data=True)
if o_edges[0][2]['edge_type'] == 'explicit' and o_edges[1][2]['edge_type'] == 'explicit':
o_edges = bb_graph.out_edges(bb, data=True).__iter__()
o_edges_zero = o_edges.next()
o_edges_one = o_edges.next()
print o_edges
if o_edges_zero[2]['edge_type'] == 'explicit' and o_edges_one[2]['edge_type'] == 'explicit':
logger.error('Basic block {} has both out edges of explicit type'.format(hex(id(bb))))
raise Exception
if o_edges[0][2]['edge_type'] == 'implicit' and o_edges[1][2]['edge_type'] == 'implicit':
if o_edges_zero[2]['edge_type'] == 'implicit' and o_edges_one[2]['edge_type'] == 'implicit':
logger.error('Basic block {} has both out edges of implicit type'.format(hex(id(bb))))
raise Exception

Expand All @@ -54,7 +57,7 @@ def verify_graph(bb_graph):
# If in degree is greater than zero
if i_degree > 0:
numImplicitEdges = 0
for edge in bb_graph.in_edges_iter(bb, data=True):
for edge in bb_graph.in_edges(bb, data=True):
if edge[2]['edge_type'] == 'implicit':
numImplicitEdges += 1

Expand All @@ -64,7 +67,6 @@ def verify_graph(bb_graph):

if i_degree == o_degree == 0:
logger.error('Orphaned block {} has no edges'.format(hex(id(bb))))

except Exception as ex:
print ex
return False
Expand Down