Skip to content

Graph/types #86

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 16 commits into
base: development
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,16 @@ flow analysis labels.
* macOS ≥ 10.15 or GNU / Linux (we recommend Ubuntu Linux ≥ 18.04).
* bazel ≥ 2.0
* Python ≥ 3.6
* MySQL client (N.B. this is not the full MySQL server, just the connector)
* On macOS: `brew install mysql-client`
* On Ubuntu: `sudo apt-get install libmysqlclient-dev`
* A Fortran compiler:
* On macOS: `brew cask install gfortran`
* (Ubuntu has one by default)
* (Optional) NVIDIA GPU with CUDA drivers for TensorFlow and PyTorch

Test that you have everything prepared by building and running the full test
suite:
Once you have the above requirements installed, test that everything is working
by building and running full test suite:

```sh
$ bazel test //programl/...
Expand Down
Binary file modified programl/Documentation/arXiv.2003.10536/paper.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified programl/Documentation/assets/llvm2graph-1-ir.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
126 changes: 126 additions & 0 deletions programl/Documentation/assets/llvm2graph-5-types.dot
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
digraph main {
margin=0;

graph [
fontsize=100,
nodesep=0.2,
ranksep=0.2,
];
node [
fontname=Inconsolata,
fontsize=25,
penwidth=2,
margin=0,
];
edge [
fontname=Inconsolata,
fontsize=22,
arrowsize=.8,
penwidth=1,
]

// === Nodes ===
external [shape=box, style=filled, fillcolor="#C0DAFF", color="#C0DAFF", fontcolor="#345393" label="[external]", width=2];

// Types:
i32 [shape=diamond, margin=0, style=filled, fillcolor="#CCCCCC", width=1, color="#CCCCCC", fontcolor="#222222", label="i32"];

// Constants:
const_0 [shape=octagon, margin=0, style=filled, fillcolor="#F4CCCC", width=1, color="#F4CCCC", fontcolor="#990000", label="val"]; // 0
const_1 [shape=octagon, margin=0, style=filled, fillcolor="#F4CCCC", width=1, color="#F4CCCC", fontcolor="#990000", label="val"]; // 1
const_minus_1 [shape=octagon, margin=0, style=filled, fillcolor="#F4CCCC", width=1, color="#F4CCCC", fontcolor="#990000", label="val"]; // -1
const_minus_2 [shape=octagon, margin=0, style=filled, fillcolor="#F4CCCC", width=1, color="#F4CCCC", fontcolor="#990000", label="val"]; // -2

// Instructions:
inst_switch [shape=box, style=filled, fillcolor="#C0DAFF", color="#C0DAFF", fontcolor="#345393", width=2, label="switch"];
inst_br [shape=box, style=filled, fillcolor="#C0DAFF", color="#C0DAFF", fontcolor="#345393", width=1, label="br"];
phi [shape=box, style=filled, fillcolor="#C0DAFF", color="#C0DAFF", fontcolor="#345393", width=1, label="phi"];
inst_add_minus_1 [shape=box, style=filled, fillcolor="#C0DAFF", color="#C0DAFF", fontcolor="#345393", width=1, label="add"];
call_1 [shape=box, style=filled, fillcolor="#C0DAFF", color="#C0DAFF", fontcolor="#345393", width=1, label="call"];
inst_add_minus_2 [shape=box, style=filled, fillcolor="#C0DAFF", color="#C0DAFF", fontcolor="#345393", width=1, label="add"];
call_2 [shape=box, style=filled, fillcolor="#C0DAFF", color="#C0DAFF", fontcolor="#345393", width=1, label="call"];
add_3 [shape=box, style=filled, fillcolor="#C0DAFF", color="#C0DAFF", fontcolor="#345393", width=1, label="add"];
ret_2 [shape=box, style=filled, fillcolor="#C0DAFF", color="#C0DAFF", fontcolor="#345393", width=1, label="ret"];
ret_1 [shape=box, style=filled, fillcolor="#C0DAFF", color="#C0DAFF", fontcolor="#345393", width=1, label="ret"];

// Variables:
arg_0 [shape=ellipse, style=filled, fillcolor="#E99C9C", color="#E99C9C", width=1, fontcolor="#990000", label="var"]; // %0
var_4 [shape=ellipse, style=filled, fillcolor="#E99C9C", color="#E99C9C", width=1, fontcolor="#990000", label="var"]; // %4
var_5 [shape=ellipse, style=filled, fillcolor="#E99C9C", color="#E99C9C", width=1, fontcolor="#990000", label="var"]; // %5
var_6 [shape=ellipse, style=filled, fillcolor="#E99C9C", color="#E99C9C", width=1, fontcolor="#990000", label="var"]; // %6
var_7 [shape=ellipse, style=filled, fillcolor="#E99C9C", color="#E99C9C", width=1, fontcolor="#990000", label="var"]; // %7
var_8 [shape=ellipse, style=filled, fillcolor="#E99C9C", color="#E99C9C", width=1, fontcolor="#990000", label="var"]; // %8
var_10 [shape=ellipse, style=filled, fillcolor="#E99C9C", color="#E99C9C", width=1, fontcolor="#990000", label="var"]; // %10


// === Edges ===

// Control edges:
inst_switch -> inst_add_minus_1 [color="#345393", weight=10, labelfontcolor="#345393", minlen=2];
inst_switch -> phi [color="#345393", weight=10, labelfontcolor="#345393", minlen=2];
inst_switch -> inst_br [color="#345393", weight=10, labelfontcolor="#345393", minlen=2];
inst_br -> phi [color="#345393", weight=10];
inst_add_minus_1 -> call_1 [color="#345393", weight=10];
call_1 -> inst_add_minus_2 [color="#345393", weight=10];
inst_add_minus_2 -> call_2 [color="#345393", weight=10];
call_2 -> add_3 [color="#345393", weight=10];
add_3 -> ret_2 [color="#345393", weight=10];
phi -> ret_1 [color="#345393", weight=10];

// Data edges:
inst_add_minus_1 -> var_4 [color="#EA9999", labelfontcolor="#990000", weight=0];
call_1 -> var_5 [color="#EA9999", labelfontcolor="#990000", weight=0];
var_4 -> call_1 [color="#EA9999", labelfontcolor="#990000", weight=0];
inst_add_minus_2 -> var_6 [color="#EA9999", labelfontcolor="#990000", weight=0];
call_2 -> var_7 [color="#EA9999", labelfontcolor="#990000", weight=0];
var_6 -> call_2 [color="#EA9999", labelfontcolor="#990000", weight=0];
add_3 -> var_8 [color="#EA9999", labelfontcolor="#990000", weight=0];
var_7 -> add_3 [color="#EA9999", labelfontcolor="#990000", weight=0];
var_5 -> add_3 [color="#EA9999", labelfontcolor="#990000", weight=0];
var_8 -> ret_2 [color="#EA9999", labelfontcolor="#990000", weight=0];
phi -> var_10 [color="#EA9999", labelfontcolor="#990000", weight=0];
var_10 -> ret_1 [color="#EA9999", labelfontcolor="#990000", weight=0];
arg_0 -> inst_switch [color="#EA9999", labelfontcolor="#990000", weight=0];
arg_0 -> inst_add_minus_1 [color="#EA9999", labelfontcolor="#990000", weight=0];
arg_0 -> inst_add_minus_2 [color="#EA9999", labelfontcolor="#990000", weight=0];
arg_0 -> phi [color="#EA9999", labelfontcolor="#990000", weight=0];

// Data edges (constants):

const_0 -> inst_switch [color="#EA9999", labelfontcolor="#990000"];
const_1 -> inst_switch [color="#EA9999", labelfontcolor="#990000"];
const_1 -> phi [color="#EA9999", labelfontcolor="#990000"];
const_minus_1 -> inst_add_minus_1 [color="#EA9999", labelfontcolor="#990000"];
const_minus_2 -> inst_add_minus_2 [color="#EA9999", labelfontcolor="#990000"];

// Call edges
external -> inst_switch [color="#5dba83", weight=0];
ret_2 -> external [color="#5dba83", weight=0];
ret_1 -> external [color="#5dba83", weight=0];
ret_1 -> call_2 [color="#5dba83", weight=0];
ret_1 -> call_1 [color="#5dba83", weight=0];
call_1 -> inst_switch [color="#5dba83", weight=0];
call_2 -> inst_switch [color="#5dba83", weight=0];
ret_2 -> call_2 [color="#5dba83", weight=0];
ret_2 -> call_1 [color="#5dba83", weight=0];

// Type edges
i32 -> const_0 [color="#AAAAAA", penwidth=3, weight=1];
i32 -> const_1 [color="#AAAAAA", penwidth=3, weight=1];
i32 -> const_minus_1 [color="#AAAAAA", penwidth=3, weight=1];
i32 -> const_minus_2 [color="#AAAAAA", penwidth=3, weight=1];
i32 -> arg_0 [color="#AAAAAA", penwidth=3, weight=1];
i32 -> var_4 [color="#AAAAAA", penwidth=3, weight=1];
i32 -> var_5 [color="#AAAAAA", penwidth=3, weight=1];
i32 -> var_6 [color="#AAAAAA", penwidth=3, weight=1];
i32 -> var_7 [color="#AAAAAA", penwidth=3, weight=1];
i32 -> var_8 [color="#AAAAAA", penwidth=3, weight=1];
i32 -> var_10 [color="#AAAAAA", penwidth=3, weight=1];

rankdir = TB;
{rank = same; inst_add_minus_1; phi; inst_br;}
{rank = same; ret_1; call_1;}
{rank = same; call_2, var_5, var_6;}
{rank = same; add_3, var_7;}
{rank = same; ret_2, var_8;}
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified programl/Documentation/assets/program_explorer.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
26 changes: 18 additions & 8 deletions programl/graph/format/graphviz_converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ labm8::Status SerializeGraphVizToString(const ProgramGraph& graph,

// Determine the subgraph to add this node to.
boost::subgraph<GraphvizGraph>* dst = &external;
if (i && node.type() != Node::CONSTANT) {
if (i && (node.type() == Node::INSTRUCTION || node.type() == Node::VARIABLE)) {
dst = &functionGraphs[node.function()].get();
}

Expand Down Expand Up @@ -192,29 +192,33 @@ labm8::Status SerializeGraphVizToString(const ProgramGraph& graph,
}
labm8::TruncateWithEllipsis(text, kMaximumLabelLen);
attributes["label"] = text;
attributes["style"] = "filled";

// Set the node shape.
switch (node.type()) {
case Node::INSTRUCTION:
attributes["shape"] = "box";
attributes["style"] = "filled";
attributes["fillcolor"] = "#3c78d8";
attributes["fontcolor"] = "#ffffff";
break;
case Node::VARIABLE:
attributes["shape"] = "ellipse";
attributes["style"] = "filled";
attributes["fillcolor"] = "#f4cccc";
attributes["color"] = "#990000";
attributes["fontcolor"] = "#990000";
break;
case Node::CONSTANT:
attributes["shape"] = "diamond";
attributes["style"] = "filled";
attributes["shape"] = "octagon";
attributes["fillcolor"] = "#e99c9c";
attributes["color"] = "#990000";
attributes["fontcolor"] = "#990000";
break;
case Node::TYPE:
attributes["shape"] = "diamond";
attributes["fillcolor"] = "#cccccc";
attributes["color"] = "#cccccc";
attributes["fontcolor"] = "#222222";
break;
}
}

Expand Down Expand Up @@ -242,15 +246,21 @@ labm8::Status SerializeGraphVizToString(const ProgramGraph& graph,
attributes["color"] = "#65ae4d";
attributes["weight"] = "1";
break;
case Edge::TYPE:
attributes["color"] = "#aaaaaa";
attributes["weight"] = "1";
attributes["penwidth"] = "1.5";
break;
}

// Set the edge label.
if (edge.position()) {
// Position labels for control edge are drawn close to the originating
// instruction. For data edges, they are drawn closer to the consuming
// instruction.
// instruction. For control edges, they are drawn close to the branching
// instruction. For data and type edges, they are drawn close to the
// consuming node.
const string label =
edge.flow() == Edge::DATA ? "headlabel" : "taillabel";
edge.flow() == Edge::CONTROL ? "taillabel" : "headlabel";
attributes[label] = std::to_string(edge.position());
attributes["labelfontcolor"] = attributes["color"];
}
Expand Down
26 changes: 25 additions & 1 deletion programl/graph/program_graph_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace graph {

ProgramGraphBuilder::ProgramGraphBuilder() {
// Create the graph root node.
AddNode(Node::INSTRUCTION, "<root>");
AddNode(Node::INSTRUCTION, "[external]");
}

Module* ProgramGraphBuilder::AddModule(const string& name) {
Expand Down Expand Up @@ -67,6 +67,10 @@ Node* ProgramGraphBuilder::AddConstant(const string& text) {
return AddNode(Node::CONSTANT, text);
}

Node* ProgramGraphBuilder::AddType(const string& text) {
return AddNode(Node::TYPE, text);
}

labm8::StatusOr<Edge*> ProgramGraphBuilder::AddControlEdge(int32_t position,
const Node* source,
const Node* target) {
Expand Down Expand Up @@ -143,6 +147,26 @@ labm8::StatusOr<Edge*> ProgramGraphBuilder::AddCallEdge(const Node* source,
return AddEdge(Edge::CALL, /*position=*/0, source, target);
}

labm8::StatusOr<Edge*> ProgramGraphBuilder::AddTypeEdge(int32_t position,
const Node* source,
const Node* target) {
DCHECK(source) << "nullptr argument";
DCHECK(target) << "nullptr argument";

if (source->type() != Node::TYPE) {
return Status(labm8::error::Code::INVALID_ARGUMENT,
"Invalid source type ({}) for type edge. Expected type",
Node::Type_Name(source->type()));
}
if (target->type() == Node::INSTRUCTION) {
return Status(labm8::error::Code::INVALID_ARGUMENT,
"Invalid destination type (instruction) for type edge. "
"Expected {variable,constant,type}");
}

return AddEdge(Edge::TYPE, position, source, target);
}

labm8::StatusOr<ProgramGraph> ProgramGraphBuilder::Build() {
// Check that all nodes except the root are connected. The root is allowed to
// have no connections in the case where it is an empty graph.
Expand Down
11 changes: 10 additions & 1 deletion programl/graph/program_graph_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ class ProgramGraphBuilder {

Node* AddConstant(const string& text);

Node* AddType(const string& text);

// Edge factories.
[[nodiscard]] labm8::StatusOr<Edge*> AddControlEdge(int32_t position,
const Node* source,
Expand All @@ -73,6 +75,10 @@ class ProgramGraphBuilder {
[[nodiscard]] labm8::StatusOr<Edge*> AddCallEdge(const Node* source,
const Node* target);

[[nodiscard]] labm8::StatusOr<Edge*> AddTypeEdge(int32_t position,
const Node* source,
const Node* target);

const Node* GetRootNode() const { return &graph_.node(0); }

// Return the graph protocol buffer.
Expand All @@ -99,6 +105,9 @@ class ProgramGraphBuilder {
inline Edge* AddEdge(const Edge::Flow& flow, int32_t position,
const Node* source, const Node* target);

// Return a mutable pointer to the root node in the graph.
Node* GetMutableRootNode() { return graph_.mutable_node(0); }

// Return a mutable pointer to the graph protocol buffer.
ProgramGraph* GetMutableProgramGraph() { return &graph_; }

Expand All @@ -110,7 +119,7 @@ class ProgramGraphBuilder {
int32_t GetIndex(const Function* function);
int32_t GetIndex(const Node* node);

// Maps which covert store the index of objects in repeated field lists.
// Maps that store the index of objects in repeated field lists.
absl::flat_hash_map<Module*, int32_t> moduleIndices_;
absl::flat_hash_map<Function*, int32_t> functionIndices_;
absl::flat_hash_map<Node*, int32_t> nodeIndices_;
Expand Down
4 changes: 2 additions & 2 deletions programl/graph/py/program_graph_builder_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_empty_proto():
builder = program_graph_builder.ProgramGraphBuilder()
with test.Raises(ValueError) as e_ctx:
builder.Build()
assert "INSTRUCTION has no connections: `<root>`" == str(e_ctx.value)
assert "INSTRUCTION has no connections: `[external]`" == str(e_ctx.value)


def test_add_empty_module():
Expand Down Expand Up @@ -75,7 +75,7 @@ def test_linear_statement_control_flow():

assert len(builder.Build().node) == 3

assert builder.Build().node[builder.root].text == "<root>"
assert builder.Build().node[builder.root].text == "[external]"
assert builder.Build().node[builder.root].type == node_pb2.Node.INSTRUCTION

assert builder.Build().node[a].text == "a"
Expand Down
21 changes: 14 additions & 7 deletions programl/ir/llvm/inst2vec_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,13 @@
)


def NodeFullText(node: node_pb2.Node) -> str:
def NodeFullText(
graph: program_graph_pb2.ProgramGraph,
node: node_pb2.Node
) -> str:
"""Get the full text of a node, or an empty string if not set."""
if len(node.features.feature["full_text"].bytes_list.value):
return (
node.features.feature["full_text"].bytes_list.value[0].decode("utf-8")
)
return ""
idx = node.features.feature["llvm_string"].int64_list.value[0]
return graph.features.feature["strings"].bytes_list.value[idx].decode("utf-8")


class Inst2vecEncoder(object):
Expand Down Expand Up @@ -94,7 +94,7 @@ def Encode(
"""
# Gather the instruction texts to pre-process.
lines = [
[NodeFullText(node)]
[NodeFullText(proto, node)]
for node in proto.node
if node.type == node_pb2.Node.INSTRUCTION
]
Expand Down Expand Up @@ -122,6 +122,7 @@ def Encode(
# Add the node features.
var_embedding = self.dictionary["!IDENTIFIER"]
const_embedding = self.dictionary["!IMMEDIATE"]
type_embedding = self.dictionary["!IMMEDIATE"] # Types are immediates

text_index = 0
for node in proto.node:
Expand All @@ -143,6 +144,12 @@ def Encode(
node.features.feature["inst2vec_embedding"].int64_list.value.append(
const_embedding
)
elif node.type == node_pb2.Node.TYPE:
node.features.feature["inst2vec_embedding"].int64_list.value.append(
type_embedding
)
else:
raise TypeError(f"Unknown node type {node}")

proto.features.feature["inst2vec_annotated"].int64_list.value.append(1)
return proto
Expand Down
17 changes: 14 additions & 3 deletions programl/ir/llvm/inst2vec_encoder_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,21 @@ def AddVariable(self, full_text: str):

def Build(self):
proto = super(Inst2vecGraphBuilder, self).Build()

# Add the root node string feature.
proto.node[0].features.feature["llvm_string"].int64_list.value[:] = [0]

# Build the strings list.
strings_list = list(set(self.full_texts.values()))
proto.features.feature["strings"].bytes_list.value[:] = [
string.encode("utf-8") for string in strings_list
]

# Add the string indices.
for node, full_text in self.full_texts.items():
proto.node[node].features.feature["full_text"].bytes_list.value.append(
full_text.encode("utf-8")
)
idx = strings_list.index(full_text)
node_feature = proto.node[node].features.feature["llvm_string"]
node_feature.int64_list.value.append(idx)
return proto


Expand Down
Loading