Skip to content

implements the superset disassembler #1630

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 31 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
d8506ec
Snapshot of bap_superset_disasm building with dune
KennethAdamMiller May 2, 2025
eaa8a54
Update dune file to not reference plot_superset_cache
KennethAdamMiller May 2, 2025
eb1b768
Update with plugins/superset_disasm/dune
KennethAdamMiller May 2, 2025
69d5cd6
Update plugins/superset_disasm/dune with site stanza
KennethAdamMiller May 2, 2025
281d2e6
Update modules to import the correct dependencies
KennethAdamMiller May 2, 2025
c2bf267
Link bap-superset-disasm library to superset-disasm plugin
KennethAdamMiller May 2, 2025
56ec054
Add mli files for Abstract_ssa and Decision_trees, also add interface…
KennethAdamMiller May 7, 2025
4f98da2
Move cmdoptions from lib to plugin
KennethAdamMiller May 9, 2025
dd9f51e
Correct up plugins/superset_disasm/dune, add metadata and plot_supers…
KennethAdamMiller May 9, 2025
fc2b57a
Update lib/bap_superset_disasm/dune to correct public_name
KennethAdamMiller May 9, 2025
8cfe6f0
Fix ppx related issues by adding (preprocess (pps ppx_bap))
KennethAdamMiller May 9, 2025
12d4b83
Revert Sys to Stdlib.Sys to avoid compile errors
KennethAdamMiller May 9, 2025
64e9e77
Snapshot at fully buildinggit status
KennethAdamMiller May 9, 2025
47bc223
Remove old scripts
KennethAdamMiller May 12, 2025
08055f3
Remove metadata module and implement supersetd-graph-metrics command
KennethAdamMiller May 12, 2025
b1129bd
Cmdoptions is stuck with dune insisting it requires a stanza for no impl
KennethAdamMiller May 12, 2025
4b8a732
Ensure that superset disasm and graph metrics commands work harmoniously
KennethAdamMiller May 12, 2025
6da8549
Change command names for consistency
KennethAdamMiller May 12, 2025
c3c69b2
add new script for testing superset disasm using parallel
KennethAdamMiller May 12, 2025
f87faf5
Add new integration test and script for parallel processing binaries
KennethAdamMiller May 12, 2025
c9a8640
Add new find fn culprit flag
KennethAdamMiller May 14, 2025
3a4e46b
Add new print-fn-binaries flag to supersetd-graph-metrics command
KennethAdamMiller May 16, 2025
a3c6a06
Fix TrimLimitedClamped
KennethAdamMiller May 16, 2025
b834949
Disable troublesome invariants
KennethAdamMiller May 16, 2025
661b560
Enable TrimLimitedClamped in disasm_corpora.sh
KennethAdamMiller May 16, 2025
acf71e3
Correct bap superset disasm test library name
KennethAdamMiller May 16, 2025
c3873cd
Trim heuristics severely, drop liveness, features, abstract_ssa
KennethAdamMiller May 16, 2025
c75de31
Remove Fixpoint, Decision_trees, Grammar. Add documentation to heuris…
KennethAdamMiller May 16, 2025
7735f77
Restore grammar and fixpoint, used in plugin
KennethAdamMiller May 16, 2025
502a7db
Make sure analyses arguments can be supplied
KennethAdamMiller May 17, 2025
7e23620
Update analyses and heuristics args in disasm_corpora.sh
KennethAdamMiller May 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions bap-superset-disasm.opam
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
version: "dev"
synopsis: "A bap disassembler that converges on a minimal superset."
maintainer: ["Ivan Gotovchits <ivg@ieee.org>"]
authors: ["The BAP Team"]
license: "MIT"
homepage: "https://github.com/BinaryAnalysisPlatform/bap"
bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues"
depends: [
"dune" {>= "3.1"}
"core_kernel"
"bap"
"ppx_inline_test"
"graphlib"
"landmarks"
"bap-future"
"zmq"
"gnuplot"
"odoc" {with-doc}
]
build: [
["dune" "subst"] {dev}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"--promote-install-files=false"
"@install"
"@runtest" {with-test}
"@doc" {with-doc}
]
["dune" "install" "-p" name "--create-install-files" name]
]
dev-repo: "git+https://github.com/BinaryAnalysisPlatform/bap.git"
18 changes: 18 additions & 0 deletions dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,24 @@
(ocaml (> 4.08.0))
(stdio (and (>= v0.14) (< v0.16)))))

(package
(name bap-superset-disasm)
(synopsis "A bap disassembler that converges on a minimal superset.")
(sites
(lib plugins)
)
(depends
core_kernel
bap
ppx_inline_test
graphlib
landmarks
bap-future
zmq
gnuplot)
)


(package
(name bap-primus-support)
(synopsis "Provides essential Primus components")
Expand Down
9 changes: 9 additions & 0 deletions lib/bap_superset_disasm/bap_superset_disasm.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module Superset_impl = Superset_impl
module Superset = Superset
module Heuristics = Heuristics
module Invariants = Invariants
module Metrics = Metrics
module Report = Report
module Traverse = Traverse
module Trim = Trim

18 changes: 18 additions & 0 deletions lib/bap_superset_disasm/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
(library
(name bap_superset_disasm)
(public_name bap-superset-disasm)
(wrapped false)
(preprocess (pps ppx_bap))
(libraries
bap
bap-core-theory
bap-future
bap-knowledge
graphlib
ppx_inline_test
landmarks
zmq
gnuplot
)
)

27 changes: 27 additions & 0 deletions lib/bap_superset_disasm/fixpoint.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
open Core
open Bap.Std

let iterate rounds f superset =
let (superset) = f superset in
let rec do_analysis round superset =
if round = rounds then superset else
let (superset) = f superset in
do_analysis (round+1) superset in
do_analysis 1 superset

let protect superset f =
let visited = Addr.Hash_set.create () in
let callsites = Heuristics.get_callsites ~threshold:0 superset in
let superset = Heuristics.tag_callsites visited ~callsites superset in
let superset = f superset in
Superset.Core.clear_each superset visited;
Trim.run superset

let converge superset heuristics feature_pmap =
let superset = Trim.run superset in
let cache = Addr.Hash_set.create () in
List.iter Map.(keys feature_pmap) ~f:(fun addr ->
Traverse.mark_descendent_bodies_at superset ~visited:cache addr
);
superset

6 changes: 6 additions & 0 deletions lib/bap_superset_disasm/fixpoint.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
open Bap.Std


val iterate : int -> ('a -> 'a) -> 'a -> 'a
val protect : Superset_impl.t -> (Superset_impl.t -> Superset_impl.t) -> Superset_impl.t
val converge : Superset_impl.t -> 'a -> 'b Addr.Map.t -> Superset_impl.t
118 changes: 118 additions & 0 deletions lib/bap_superset_disasm/grammar.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
open Core
open Bap.Std

(** The objective here is to tag grammar structures while traversing
topologically in such a manner that we can converge the
probability of recognizing an intended sequence by the
compiler. After we've hit some recognition threshold, we begin
traversing forward from some activation point whereby we trim
occlusive instructions. To recognize grammars, we have several
means: one, loops are strongly connected components, and if
sequences must branch at some point only to reify at a common
point, expressing a path by which they can finally rejoin. *)
let tag_by_traversal ?(threshold=8) superset =
let visited = Addr.Hash_set.create () in
(*let callsites = Superset.get_callsites ~threshold:6 superset in
let superset = tag_callsites visited ~callsites superset in
let superset = Invariants.tag_layer_violations superset in
let superset = Invariants.tag_branch_violations superset in*)
let entries = Superset.entries_of_isg superset in
let branches = Superset.get_branches superset in
(*let branches = identify_branches superset in*)
(*let branches = linear_branch_sweep superset entries in*)
let cur_total = ref 0 in
let positives = ref [] in
let entry = ref None in
let tps = Addr.Hash_set.create () in
(* In the case that our current starting point, entry, is none, set *)
(* it to being the address of the lambda parameter, addr. Then, set *)
(* the current total number of recognized grammar items to zero, *)
(* as well as the positives since we're starting over *)
let pre addr =
if Option.is_none !entry then (
entry := Some(addr);
cur_total := 0;
positives := [];
);
if Hash_set.mem branches addr then (
cur_total := !cur_total + 1;
positives := addr :: !positives;
if !cur_total >= threshold then (
let open Option in
ignore (List.nth !positives threshold >>|
(fun convergent_point ->
Hash_set.add tps convergent_point));
)
) in
let post addr =
entry := Option.value_map !entry ~default:!entry
~f:(fun e -> if Addr.(e = addr) then None else Some(e));
if Hash_set.mem branches addr then (
cur_total := !cur_total - 1;
match !positives with
| _ :: remaining -> positives := remaining
| [] -> ();
) in
Traverse.visit ~visited
~pre ~post superset entries;
Hash_set.iter tps ~f:(fun tp ->
if not (Hash_set.mem visited tp) then (
Traverse.with_descendents_at superset tp ~pre:(fun tp ->
let mark_bad addr =
if Superset.ISG.mem_vertex superset addr then (
Superset.Core.mark_bad superset addr
) in
Superset.Occlusion.with_data_of_insn superset tp ~f:mark_bad;
Hash_set.add visited tp;
) ;
)
);
Hash_set.iter visited
~f:(fun tp -> Superset.Core.clear_bad superset tp);
superset


let parents_of_insns superset component =
Set.fold component ~init:Addr.Set.empty ~f:(fun potential_parents addr ->
List.fold (Superset.ISG.ancestors superset addr)
~init:potential_parents
~f:(fun potential_parents ancestor ->
if not Set.(mem component ancestor) then
Set.add potential_parents ancestor
else potential_parents
)
)

let addrs_of_loops loops =
List.fold_left loops ~init:Addr.Set.empty
~f:(fun keep loop ->
Addr.Set.(union keep (of_list loop))
)

let filter_loops ?(min_size=20) loops =
let loops =
List.filter loops ~f:(fun l -> List.length l > min_size) in
addrs_of_loops loops

let addrs_of_filtered_loops ?(min_size=20) superset =
filter_loops ~min_size @@ Superset.ISG.raw_loops superset

(** In the body of a loop, instructions fall through eventually to
themselves, which amounts to effectively a trigger of an
invariant. But the level at which invariants operate is too fine
grained to see the consequence propagated from conflicts that are
potentially in loops that are many instructions long. This
function cleanses the bodies of instructions that occur in loops
of a minimum size. *)
let tag_loop_contradictions ?(min_size=20) superset =
let keep = addrs_of_filtered_loops ~min_size superset in
(* Here we have to be careful; we only want to find instructions
that occur within a loop that produce a self-contradiction *)
let parents = parents_of_insns superset keep in
let to_remove =
Superset.Occlusion.conflicts_within_insns superset keep in
let to_remove = Set.inter to_remove parents in
let to_remove = Set.diff to_remove keep in
Set.iter to_remove ~f:(Superset.Core.mark_bad superset);
superset

5 changes: 5 additions & 0 deletions lib/bap_superset_disasm/grammar.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
open Bap.Std

val tag_loop_contradictions : ?min_size:int -> Superset_impl.t -> Superset_impl.t
val tag_by_traversal : ?threshold:int -> Superset_impl.t -> Superset_impl.t

Loading
Loading