facebookresearch · j-c-w · May 11, 2022 · Jun 19, 2022 · ChrisCummins · Jun 10, 2022
diff --git a/compiler_gym/datasets/datasets.py b/compiler_gym/datasets/datasets.py
@@ -152,6 +152,7 @@ def dataset_from_parsed_uri(self, uri: BenchmarkUri) -> Dataset:
         key = self._dataset_key_from_uri(uri)
 
         if key not in self._datasets:
+            print("datasets are ", str(self._datasets))
             raise LookupError(f"Dataset not found: {key}")
 
         return self._datasets[key]

diff --git a/compiler_gym/envs/BUILD b/compiler_gym/envs/BUILD
@@ -11,6 +11,7 @@ py_library(
     deps = [
         ":compiler_env",
         "//compiler_gym:config",
+        "//compiler_gym/envs/cgra",
         "//compiler_gym/envs/gcc",
         "//compiler_gym/envs/llvm",
         "//compiler_gym/envs/loop_tool",

diff --git a/compiler_gym/envs/CMakeLists.txt b/compiler_gym/envs/CMakeLists.txt
@@ -8,6 +8,7 @@ cg_add_all_subdirs()
 set(ENVS_DEPS
     ::compiler_env
     compiler_gym::envs::gcc::gcc
+    compiler_gym::envs::cgra::cgra
     compiler_gym::envs::loop_tool::loop_tool
 )
 if(COMPILER_GYM_ENABLE_LLVM_ENV)

diff --git a/compiler_gym/envs/__init__.py b/compiler_gym/envs/__init__.py
@@ -5,6 +5,7 @@
 from compiler_gym import config
 from compiler_gym.envs.compiler_env import CompilerEnv
 from compiler_gym.envs.gcc import GccEnv
+from compiler_gym.envs.cgra import CgraEnv
 
 if config.enable_llvm_env:
     from compiler_gym.envs.llvm.llvm_env import LlvmEnv  # noqa: F401
@@ -18,6 +19,7 @@
     "COMPILER_GYM_ENVS",
     "CompilerEnv",
     "GccEnv",
+    "CgraEnv",
     "LoopToolEnv",
 ]
 

diff --git a/compiler_gym/envs/cgra/.gitignore b/compiler_gym/envs/cgra/.gitignore
@@ -0,0 +1,4 @@
+__pycache__
+env.sh
+cart-pole.py
+env
diff --git a/compiler_gym/envs/cgra/BUILD b/compiler_gym/envs/cgra/BUILD
@@ -0,0 +1,26 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+load("@rules_python//python:defs.bzl", "py_library", "py_test")
+
+py_library(
+    name = "cgra",
+    srcs = [
+        "__init__.py",
+        "cgra_rewards.py",
+        "Operations.py",
+        "DFG.py"
+    ],
+    data = [
+        "//compiler_gym/envs/cgra/service",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//compiler_gym/envs/cgra/datasets",
+        "//compiler_gym/errors",
+        "//compiler_gym/service:client_service_compiler_env",
+        "//compiler_gym/service/runtime",  # Implicit dependency of service.
+        "//compiler_gym/util"
+    ],
+)
diff --git a/compiler_gym/envs/cgra/CMakeLists.txt b/compiler_gym/envs/cgra/CMakeLists.txt
@@ -0,0 +1,25 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+cg_add_all_subdirs()
+
+cg_py_library(
+  NAME
+    gcc
+  SRCS
+    "__init__.py"
+    "gcc.py"
+    "gcc_env.py"
+    "gcc_rewards.py"
+  DATA
+    compiler_gym::envs::gcc::service::service
+  DEPS
+    compiler_gym::service::client_service_compiler_env
+    compiler_gym::envs::gcc::datasets::datasets
+    compiler_gym::errors::errors
+    compiler_gym::service::runtime::runtime
+    compiler_gym::util::util
+  PUBLIC
+)
diff --git a/compiler_gym/envs/cgra/DFG.py b/compiler_gym/envs/cgra/DFG.py
@@ -0,0 +1,206 @@
+import json
+from pathlib import Path
+import random
+
+from importlib_metadata import entry_points
+from compiler_gym.service.proto import (
+Benchmark
+)
+from typing import Optional, List
+from compiler_gym.third_party.inst2vec import Inst2vecEncoder
+import compiler_gym.third_party.llvm as llvm
+from compiler_gym.envs.cgra.Operations import Operation, operation_from_name
+
+class Edge(object):
+    def __init__(self, type):
+        self.type = type
+
+class Node(object):
+    def __init__(self, name, operation):
+        self.name = name
+        self.operation = operation
+
+    def __str__(self):
+        return "Node with name " + self.name + " and op " + str(self.operation)
+
+class DFG(object):
+    def __init__(self, working_directory: Optional[Path] = None, benchmark: Optional[Benchmark] = None, from_json: Optional[Path] = None, from_text: Optional[str] = None):
+        # Copied from here: https://github.com/facebookresearch/CompilerGym/blob/development/examples/loop_optimizations_service/service_py/loops_opt_service.py
+        # self.inst2vec = _INST2VEC_ENCODER
+
+        if from_json is not None:
+            self.load_dfg_from_json(from_json)
+        elif from_text is not None:
+            self.load_dfg_from_text(from_text)
+        elif benchmark is not None:
+            # Only re-create the JSON file if we aren't providing an existing one.
+            # The existing ones are mostly a debugging functionality.
+            with open(self.working_directory / "benchmark.c", "wb") as f:
+                f.write(benchmark.program.contents)
+
+            # We use CGRA-Mapper to produce a DFG in JSON.
+            run_command(
+                ["cgra-mapper", self.src_path, self.dfg_path]
+            )
+
+            # Now, load in the DFG.
+            self.load_dfg_from_json(self.dfg_path)
+
+    def __str__(self):
+        res = "nodes are: " + str(self.nodes) + " and edges are " + str(self.adj)
+        return res
+
+    def load_dfg_from_json(self, path):
+        import json
+        with open(path, 'r') as p:
+            # This isnt' text, but I think the json.loads
+            # that this calls just works?
+            self.load_dfg_from_text(p)
+
+    def load_dfg_from_text(self, text):
+        import json
+        f = json.loads(text)
+        self.nodes = {}
+        self.node_names = []
+        self.edges = []
+        self.adj = {}
+        self.entry_points = f['entry_points']
+
+        # build the nodes first.
+        for node in f['nodes']:
+            self.nodes[node['name']] = (Node(node['name'], operation_from_name(node['operation'])))
+            self.adj[node['name']] = []
+            self.node_names.append(node['name'])
+
+        for edge in f['edges']:
+            self.edges.append(Edge(edge['type']))
+
+        # Build the adj matrix:
+        for edge in f['edges']:
+            fnode = edge['from']
+            tnode = edge['to']
+
+            self.adj[fnode].append(tnode)
+
+    # Bit slow this one --- the adjacency matrix is backwards for it :'(
+    def get_preds(self, node):
+        preds = []
+        for n in self.adj:
+            if node.name in self.adj[n]:
+                preds.append(self.nodes[n])
+
+        return preds
+
+    def get_succs(self, node):
+        succs = []
+        for n in self.adj[node.name]:
+            succs.append(self.nodes[n])
+        return succs
+
+    # TODO -- fix this, because for a graph with multiple entry nodes,
+    # this doesn't actually give the right answer :)
+    # (should do in most cases)
+    def bfs(self):
+        to_explore = self.entry_points[:]
+        print ("Doing BFS, entry points are ")
+        print(self.entry_points)
+        seen = set()
+
+        while len(to_explore) > 0:
+            head = to_explore[0]
+            to_explore = to_explore[1:]
+            if head in seen:
+                continue
+            seen.add(head)
+            yield self.nodes[head]
+
+            # Get the following nodes.
+            following_nodes = self.adj[head]
+            to_explore += following_nodes
+
+# Generate a test DFG using the operations in
+# 'operations'.
+def generate_DFG(operations: List[Operation], size, seed=0):
+    random.seed(seed)
+    # Start with some 0-input ops:
+    start_ops = random.randint(1, min(size, 3))
+
+    # Jump-start this --- in reality, these can be
+    # phi nodes coming from previous tiers of the loop,
+    # or variables coming from outside the loop.
+    start_options = []
+    print("Generating DFG with ", start_ops, " starting nodes")
+    for op in operations:
+        if op.inputs == 0:
+            start_options.append(op)
+
+    node_number = 0
+    edge_number = 0
+
+    entry_points = []
+    nodes = {}
+    node_names = []
+    nodes_list = []
+    edges = []
+    adj = {}
+
+    # Keep track of variables that we should probably use somewhere.
+    unused_outputs = []
+    for i in range(start_ops):
+        name = "node" + str(node_number)
+        node_names.append(name)
+        n = Node(name, random.choice(start_options))
+        node_number += 1
+
+        nodes[name] = n
+        nodes_list.append(n)
+        entry_points.append(name)
+        unused_outputs.append(n)
+        adj[name] = []
+
+    while len(nodes) < size:
+        # Generate a new node.
+        operation = random.choice(operations)
+        name = "node" + str(node_number)
+        node_names.append(name)
+        node_number += 1
+
+        # Get inputs for this:
+        inputs = []
+        while len(inputs) < operation.inputs:
+            # Select random nodes: baised towards the unused ones.
+            if random.randint(0, 10) > 6 and len(unused_outputs) > 0:
+                inputs.append(unused_outputs[0])
+                unused_outputs = unused_outputs[1:]
+            else:
+                inputs.append(random.choice(nodes_list))
+        # If the node has no arguments, then we should add it
+        # as an entry point.  --- todo --- should we just skip
+        # this avoid creating graphs with too many constant loads?
+        if operation.inputs == 0:
+            entry_points.append(name)
+
+        # now create the edges.
+        for inp in inputs:
+            edge = Edge('data')
+            # Not too sure why this doens't have the start/end points.
+            # Think it's a dead datafield.
+            edges.append(edge)
+
+            adj[inp.name].append(name)
+
+        this_node = Node(name, operation)
+        nodes[name] = this_node
+        nodes_list.append(this_node)
+        unused_outputs.append(this_node)
+        adj[name] = []
+
+    res = DFG()
+    res.adj = adj
+    res.nodes = nodes
+    res.entry_points = entry_points
+    res.edges = edges
+    res.node_names = node_names
+    print(res.nodes)
+
+    return res