Skip to content

Add initial cgra compiler #704

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compiler_gym/datasets/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def dataset_from_parsed_uri(self, uri: BenchmarkUri) -> Dataset:
key = self._dataset_key_from_uri(uri)

if key not in self._datasets:
print("datasets are ", str(self._datasets))
raise LookupError(f"Dataset not found: {key}")

return self._datasets[key]
Expand Down
1 change: 1 addition & 0 deletions compiler_gym/envs/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ py_library(
deps = [
":compiler_env",
"//compiler_gym:config",
"//compiler_gym/envs/cgra",
"//compiler_gym/envs/gcc",
"//compiler_gym/envs/llvm",
"//compiler_gym/envs/loop_tool",
Expand Down
1 change: 1 addition & 0 deletions compiler_gym/envs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ cg_add_all_subdirs()
set(ENVS_DEPS
::compiler_env
compiler_gym::envs::gcc::gcc
compiler_gym::envs::cgra::cgra
compiler_gym::envs::loop_tool::loop_tool
)
if(COMPILER_GYM_ENABLE_LLVM_ENV)
Expand Down
2 changes: 2 additions & 0 deletions compiler_gym/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from compiler_gym import config
from compiler_gym.envs.compiler_env import CompilerEnv
from compiler_gym.envs.gcc import GccEnv
from compiler_gym.envs.cgra import CgraEnv

if config.enable_llvm_env:
from compiler_gym.envs.llvm.llvm_env import LlvmEnv # noqa: F401
Expand All @@ -18,6 +19,7 @@
"COMPILER_GYM_ENVS",
"CompilerEnv",
"GccEnv",
"CgraEnv",
"LoopToolEnv",
]

Expand Down
4 changes: 4 additions & 0 deletions compiler_gym/envs/cgra/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
__pycache__
env.sh
cart-pole.py
env
26 changes: 26 additions & 0 deletions compiler_gym/envs/cgra/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
load("@rules_python//python:defs.bzl", "py_library", "py_test")

py_library(
name = "cgra",
srcs = [
"__init__.py",
"cgra_rewards.py",
"Operations.py",
"DFG.py"
],
data = [
"//compiler_gym/envs/cgra/service",
],
visibility = ["//visibility:public"],
deps = [
"//compiler_gym/envs/cgra/datasets",
"//compiler_gym/errors",
"//compiler_gym/service:client_service_compiler_env",
"//compiler_gym/service/runtime", # Implicit dependency of service.
"//compiler_gym/util"
],
)
25 changes: 25 additions & 0 deletions compiler_gym/envs/cgra/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) Facebook, Inc. and its affiliates.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this file needs updating to match the cgra BUILD file

#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

cg_add_all_subdirs()

cg_py_library(
NAME
gcc
SRCS
"__init__.py"
"gcc.py"
"gcc_env.py"
"gcc_rewards.py"
DATA
compiler_gym::envs::gcc::service::service
DEPS
compiler_gym::service::client_service_compiler_env
compiler_gym::envs::gcc::datasets::datasets
compiler_gym::errors::errors
compiler_gym::service::runtime::runtime
compiler_gym::util::util
PUBLIC
)
206 changes: 206 additions & 0 deletions compiler_gym/envs/cgra/DFG.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
import json
from pathlib import Path
import random

from importlib_metadata import entry_points
from compiler_gym.service.proto import (
Benchmark
)
from typing import Optional, List
from compiler_gym.third_party.inst2vec import Inst2vecEncoder
import compiler_gym.third_party.llvm as llvm
from compiler_gym.envs.cgra.Operations import Operation, operation_from_name

class Edge(object):
def __init__(self, type):
self.type = type

class Node(object):
def __init__(self, name, operation):
self.name = name
self.operation = operation

def __str__(self):
return "Node with name " + self.name + " and op " + str(self.operation)

class DFG(object):
def __init__(self, working_directory: Optional[Path] = None, benchmark: Optional[Benchmark] = None, from_json: Optional[Path] = None, from_text: Optional[str] = None):
# Copied from here: https://github.com/facebookresearch/CompilerGym/blob/development/examples/loop_optimizations_service/service_py/loops_opt_service.py
# self.inst2vec = _INST2VEC_ENCODER

if from_json is not None:
self.load_dfg_from_json(from_json)
elif from_text is not None:
self.load_dfg_from_text(from_text)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need a final else branch to raise an error if neither is set?

elif benchmark is not None:
# Only re-create the JSON file if we aren't providing an existing one.
# The existing ones are mostly a debugging functionality.
with open(self.working_directory / "benchmark.c", "wb") as f:
f.write(benchmark.program.contents)

# We use CGRA-Mapper to produce a DFG in JSON.
run_command(
["cgra-mapper", self.src_path, self.dfg_path]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this work if the user doesn't have cgra-mapper in their $PATH?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is a dead bit of code right now anyway.

However, I am going to have benchmarking dependencies on cgra-mapper, and might come back and fix this at some point anyway. Do you have a suggested strategy for handling this dependency? I can print installation instructions if it's not found?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Installation instructions is a good idea. Or, if it's open source and reasonably self-contained, we can bundle it ourselves with the python package. That would give the easiest user experience, and frees having to worry about matching the right version.

Cheers,
Chris

)

# Now, load in the DFG.
self.load_dfg_from_json(self.dfg_path)

def __str__(self):
res = "nodes are: " + str(self.nodes) + " and edges are " + str(self.adj)
return res

def load_dfg_from_json(self, path):
import json
with open(path, 'r') as p:
# This isnt' text, but I think the json.loads
# that this calls just works?
self.load_dfg_from_text(p)

def load_dfg_from_text(self, text):
import json
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why import json here and not in the main module?

f = json.loads(text)
self.nodes = {}
self.node_names = []
self.edges = []
self.adj = {}
self.entry_points = f['entry_points']

# build the nodes first.
for node in f['nodes']:
self.nodes[node['name']] = (Node(node['name'], operation_from_name(node['operation'])))
self.adj[node['name']] = []
self.node_names.append(node['name'])

for edge in f['edges']:
self.edges.append(Edge(edge['type']))

# Build the adj matrix:
for edge in f['edges']:
fnode = edge['from']
tnode = edge['to']

self.adj[fnode].append(tnode)

# Bit slow this one --- the adjacency matrix is backwards for it :'(
def get_preds(self, node):
preds = []
for n in self.adj:
if node.name in self.adj[n]:
preds.append(self.nodes[n])

return preds

def get_succs(self, node):
succs = []
for n in self.adj[node.name]:
succs.append(self.nodes[n])
return succs

# TODO -- fix this, because for a graph with multiple entry nodes,
# this doesn't actually give the right answer :)
# (should do in most cases)
def bfs(self):
to_explore = self.entry_points[:]
print ("Doing BFS, entry points are ")
print(self.entry_points)
seen = set()

while len(to_explore) > 0:
head = to_explore[0]
to_explore = to_explore[1:]
if head in seen:
continue
seen.add(head)
yield self.nodes[head]

# Get the following nodes.
following_nodes = self.adj[head]
to_explore += following_nodes

# Generate a test DFG using the operations in
# 'operations'.
def generate_DFG(operations: List[Operation], size, seed=0):
random.seed(seed)
# Start with some 0-input ops:
start_ops = random.randint(1, min(size, 3))

# Jump-start this --- in reality, these can be
# phi nodes coming from previous tiers of the loop,
# or variables coming from outside the loop.
start_options = []
print("Generating DFG with ", start_ops, " starting nodes")
for op in operations:
if op.inputs == 0:
start_options.append(op)

node_number = 0
edge_number = 0

entry_points = []
nodes = {}
node_names = []
nodes_list = []
edges = []
adj = {}

# Keep track of variables that we should probably use somewhere.
unused_outputs = []
for i in range(start_ops):
name = "node" + str(node_number)
node_names.append(name)
n = Node(name, random.choice(start_options))
node_number += 1

nodes[name] = n
nodes_list.append(n)
entry_points.append(name)
unused_outputs.append(n)
adj[name] = []

while len(nodes) < size:
# Generate a new node.
operation = random.choice(operations)
name = "node" + str(node_number)
node_names.append(name)
node_number += 1

# Get inputs for this:
inputs = []
while len(inputs) < operation.inputs:
# Select random nodes: baised towards the unused ones.
if random.randint(0, 10) > 6 and len(unused_outputs) > 0:
inputs.append(unused_outputs[0])
unused_outputs = unused_outputs[1:]
else:
inputs.append(random.choice(nodes_list))
# If the node has no arguments, then we should add it
# as an entry point. --- todo --- should we just skip
# this avoid creating graphs with too many constant loads?
if operation.inputs == 0:
entry_points.append(name)

# now create the edges.
for inp in inputs:
edge = Edge('data')
# Not too sure why this doens't have the start/end points.
# Think it's a dead datafield.
edges.append(edge)

adj[inp.name].append(name)

this_node = Node(name, operation)
nodes[name] = this_node
nodes_list.append(this_node)
unused_outputs.append(this_node)
adj[name] = []

res = DFG()
res.adj = adj
res.nodes = nodes
res.entry_points = entry_points
res.edges = edges
res.node_names = node_names
print(res.nodes)

return res
Loading