rmosolgo · rmosolgo · Jan 6, 2021 · Dec 27, 2020 · Dec 27, 2020 · Dec 27, 2020
diff --git a/Gemfile b/Gemfile
@@ -7,6 +7,7 @@ gem 'bootsnap' # required by the Rails apps generated in tests
 gem 'ruby-prof', platform: :ruby
 gem 'pry'
 gem 'pry-stack_explorer', platform: :ruby
+gem 'graphql-batch'
 if RUBY_VERSION >= "2.4"
   gem 'pry-byebug'
 end

diff --git a/Rakefile b/Rakefile
@@ -98,6 +98,12 @@ namespace :bench do
     prepare_benchmark
     GraphQLBenchmark.profile_large_result
   end
+
+  desc "Compare GraphQL-Batch and GraphQL-Dataloader"
+  task :profile_batch_loaders do
+    prepare_benchmark
+    GraphQLBenchmark.profile_batch_loaders
+  end
 end
 
 namespace :test do

diff --git a/benchmark/batch_loading.rb b/benchmark/batch_loading.rb
@@ -0,0 +1,143 @@
+module BatchLoading
+  class GraphQLBatchSchema < GraphQL::Schema
+    DATA = [
+      { id: "1", name: "Bulls", player_ids: ["2", "3"] },
+      { id: "2", name: "Michael Jordan", team_id: "1" },
+      { id: "3", name: "Scottie Pippin", team_id: "1" },
+      { id: "4", name: "Braves", player_ids: ["5", "6"] },
+      { id: "5", name: "Chipper Jones", team_id: "4" },
+      { id: "6", name: "Tom Glavine", team_id: "4" },
+    ]
+
+    class DataLoader < GraphQL::Batch::Loader
+      def initialize(column: :id)
+        @column = column
+      end
+
+      def perform(keys)
+        keys.each do |key|
+          record = DATA.find { |d| d[@column] == key }
+          fulfill(key, record)
+        end
+      end
+    end
+
+    class Team < GraphQL::Schema::Object
+      field :name, String, null: false
+      field :players, "[BatchLoading::GraphQLBatchSchema::Player]", null: false
+
+      def players
+        DataLoader.load_many(object[:player_ids])
+      end
+    end
+
+    class Player < GraphQL::Schema::Object
+      field :name, String, null: false
+      field :team, Team, null: false
+
+      def team
+        DataLoader.load(object[:team_id])
+      end
+    end
+
+    class Query < GraphQL::Schema::Object
+      field :team, Team, null: true do
+        argument :name, String, required: true
+      end
+
+      def team(name:)
+        DataLoader.for(column: :name).load(name)
+      end
+    end
+
+    query(Query)
+    use GraphQL::Execution::Interpreter
+    use GraphQL::Analysis::AST
+    use GraphQL::Batch
+  end
+
+  class GraphQLDataloaderSchema < GraphQL::Schema
+    class DataSource < GraphQL::Dataloader::Source
+      def initialize(column: :id)
+        @column = column
+      end
+
+      def fetch(keys)
+        keys.map { |key|
+          d = GraphQLBatchSchema::DATA.find { |d| d[@column] == key }
+          # p [key, @column, d]
+          d
+        }
+      end
+    end
+
+    class Team < GraphQL::Schema::Object
+      field :name, String, null: false
+      field :players, "[BatchLoading::GraphQLDataloaderSchema::Player]", null: false
+
+      def players
+        dataloader.with(DataSource).load_all(object[:player_ids])
+      end
+    end
+
+    class Player < GraphQL::Schema::Object
+      field :name, String, null: false
+      field :team, Team, null: false
+
+      def team
+        dataloader.with(DataSource).load(object[:team_id])
+      end
+    end
+
+    class Query < GraphQL::Schema::Object
+      field :team, Team, null: true do
+        argument :name, String, required: true
+      end
+
+      def team(name:)
+        dataloader.with(DataSource, column: :name).load(name)
+      end
+    end
+
+    query(Query)
+    use GraphQL::Execution::Interpreter
+    use GraphQL::Analysis::AST
+    use GraphQL::Dataloader
+  end
+
+  class GraphQLNoBatchingSchema < GraphQL::Schema
+    DATA = GraphQLBatchSchema::DATA
+
+    class Team < GraphQL::Schema::Object
+      field :name, String, null: false
+      field :players, "[BatchLoading::GraphQLNoBatchingSchema::Player]", null: false
+
+      def players
+        object[:player_ids].map { |id| DATA.find { |d| d[:id] == id } }
+      end
+    end
+
+    class Player < GraphQL::Schema::Object
+      field :name, String, null: false
+      field :team, Team, null: false
+
+      def team
+        DATA.find { |d| d[:id] == object[:team_id] }
+      end
+    end
+
+    class Query < GraphQL::Schema::Object
+      field :team, Team, null: true do
+        argument :name, String, required: true
+      end
+
+      def team(name:)
+        DATA.find { |d| d[:name] == name }
+      end
+    end
+
+    query(Query)
+    use GraphQL::Execution::Interpreter
+    use GraphQL::Analysis::AST
+  end
+end
diff --git a/benchmark/run.rb b/benchmark/run.rb
@@ -5,6 +5,7 @@
 require "benchmark/ips"
 require "ruby-prof"
 require "memory_profiler"
+require "graphql/batch"
 
 module GraphQLBenchmark
   QUERY_STRING = GraphQL::Introspection::INTROSPECTION_QUERY
@@ -123,6 +124,7 @@ class Schema < GraphQL::Schema
       query QueryType
       use GraphQL::Execution::Interpreter
       use GraphQL::Analysis::AST
+      use GraphQL::Dataloader
     end
 
     ALL_FIELDS = GraphQL.parse <<-GRAPHQL
@@ -142,4 +144,71 @@ class Schema < GraphQL::Schema
       }
     GRAPHQL
   end
+
+  def self.profile_batch_loaders
+    require_relative "./batch_loading"
+    include BatchLoading
+
+    document = GraphQL.parse <<-GRAPHQL
+    {
+      braves: team(name: "Braves") { ...TeamFields }
+      bulls: team(name: "Bulls") { ...TeamFields }
+    }
+
+    fragment TeamFields on Team {
+      players {
+        team {
+          players {
+            team {
+              name
+            }
+          }
+        }
+      }
+    }
+    GRAPHQL
+    batch_result = GraphQLBatchSchema.execute(document: document).to_h
+    dataloader_result = GraphQLDataloaderSchema.execute(document: document).to_h
+    no_batch_result = GraphQLNoBatchingSchema.execute(document: document).to_h
+
+    results = [batch_result, dataloader_result, no_batch_result].uniq
+    if results.size > 1
+      puts "Batch result:"
+      pp batch_result
+      puts "Dataloader result:"
+      pp dataloader_result
+      puts "No-batch result:"
+      pp no_batch_result
+      raise "Got different results -- fix implementation before benchmarking."
+    end
+
+    Benchmark.ips do |x|
+      x.report("GraphQL::Batch") { GraphQLBatchSchema.execute(document: document) }
+      x.report("GraphQL::Dataloader") { GraphQLDataloaderSchema.execute(document: document) }
+      x.report("No Batching") { GraphQLNoBatchingSchema.execute(document: document) }
+
+      x.compare!
+    end
+
+    puts "========== GraphQL-Batch Memory =============="
+    report = MemoryProfiler.report do
+      GraphQLBatchSchema.execute(document: document)
+    end
+
+    report.pretty_print
+
+    puts "========== Dataloader Memory ================="
+    report = MemoryProfiler.report do
+      GraphQLDataloaderSchema.execute(document: document)
+    end
+
+    report.pretty_print
+
+    puts "========== No Batch Memory =============="
+    report = MemoryProfiler.report do
+      GraphQLNoBatchingSchema.execute(document: document)
+    end
+
+    report.pretty_print
+  end
 end
diff --git a/guides/dataloader/adopting.md b/guides/dataloader/adopting.md
@@ -0,0 +1,100 @@
+---
+layout: guide
+search: true
+section: Dataloader
+title: Dataloader vs. GraphQL-Batch
+desc: Comparing and Contrasting Batch Loading Options
+index: 3
+experimental: true
+---
+
+{{ "GraphQL::Dataloader" | api_doc }} solves the same problem as [`GraphQL::Batch`](https://github.com/shopify/graphql-batch). There are a few major differences between the modules:
+
+
+- __Concurrency Primitive:__ GraphQL-Batch uses `Promise`s from [`promise.rb`](https://github.com/lgierth/promise.rb); GraphQL::Dataloader uses Ruby's [`Fiber` API](https://ruby-doc.org/core-3.0.0/Fiber.html). These primitives dictate how batch loading code is written (see below for comparisons).
+- __Maturity:__ Frankly, GraphQL-Batch is about as old as GraphQL-Ruby, and it's been in production at Shopify, GitHub, and others for many years. GraphQL::Dataloader is new, and although Ruby has supported `Fiber`s since 1.9, they still aren't widely used.
+- __Scope:__ It's not currently possible to use `GraphQL::Dataloader` _outside_ GraphQL.
+
+The incentive in writing `GraphQL::Dataloader` was to leverage `Fiber`'s ability to _transparently_ pause and resume work, which removes the need for `Promise`s (and removes the resulting complexity in the code). Additionally, `GraphQL::Dataloader` shoulde _eventually_ support Ruby 3.0's `Fiber.scheduler` API, which runs I/O in the background by default.
+
+## Comparison: Fetching a single object
+
+In this example, a single object is batch-loaded to satisfy a GraphQL field.
+
+- With __GraphQL-Batch__, you call a loader, which returns a `Promise`:
+
+  ```ruby
+  record_promise = Loaders::Record.load(1)
+  ```
+
+  Then, under the hood, GraphQL-Ruby manages the promise (using its `lazy_resolve` feature, upstreamed from GraphQL-Batch many years ago). GraphQL-Ruby will call `.sync` on it when no futher execution is possible; `promise.rb` implements `Promise#sync` to execute the pending work.
+
+- With __GraphQL::Dataloader__, you get a source, then call `.load` on it, which may pause the current Fiber, but it returns the requested object.
+
+  ```ruby
+  dataloader.with(Sources::Record).load(1)
+  ```
+
+  Since the requested object is (eventually) returned from `.load`, Nothing else is required.
+
+## Comparison: Fetching objects in sequence (dependent)
+
+In this example, one object is loaded, then another object is loaded _based on_ the first one.
+
+- With __GraphQL-Batch__, `.then { ... }` is used to join dependent code blocks:
+
+  ```ruby
+  Loaders::Record.load(1).then do |record|
+    Loaders::OtherRecord.load(record.other_record_id)
+  end
+  ```
+
+  That call returns a `Promise`, which is stored by GraphQL-Ruby, and finally `.sync`ed.
+
+- With __GraphQL-Dataloader__, `.load(...)` returns the requested object (after a potential `Fiber` pause), so other method calls are necessary:
+
+  ```ruby
+  record = dataloader.with(Sources::Record).load(1)
+  dataloader.with(Sources::OtherRecord).load(record.other_record_id)
+  ```
+
+## Comparison: Fetching objects concurrently (independent)
+
+Sometimes, you need multiple _indepenedent_ records to perform a calcuation. Each record is loaded, then they're combined in some bit of work.
+
+- With __GraphQL-Batch__, `Promise.all(...)` is used to to wait for several pending loads:
+
+  ```ruby
+  promise_1 = Loaders::Record.load(1)
+  promise_2 = Loaders::OtherRecord.load(2)
+  Promise.all([promise_1, promise_2]).then do |record, other_record|
+    do_something(record, other_record)
+  end
+  ```
+
+  If the objects are loaded from the same loader, then `.load_many` also works:
+
+  ```ruby
+  Loaders::Record.load_many([1, 2]).then do |record, other_record|
+    do_something(record, other_record)
+  end
+  ```
+
+- With __GraphQL::Dataloader__, each request is registered with `.request(...)` (which never pauses the Fiber), then data is loaded with `.load` (which will pause the Fiber as needed):
+
+  ```ruby
+  # first, make some requests
+  request_1 = dataloader.with(Sources::Record).request(1)
+  request_2 = dataloader.with(Sources::OtherRecord).request(2)
+  # then, load the objects and do something
+  record = request_1.load
+  other_record = request_2.load
+  do_something(record, other_record)
+  ```
+
+  If the objects come from the same `Source`, then `.load_all` will return the objects directly:
+
+  ```ruby
+  record, other_record = dataloader.with(Sources::Record).load_all([1, 2])
+  do_something(record, other_record)
+  ```
diff --git a/guides/dataloader/dataloader.md b/guides/dataloader/dataloader.md
@@ -0,0 +1,20 @@
+---
+layout: guide
+search: true
+section: Dataloader
+title: Dataloader
+desc: The Dataloader orchestrates Fibers and Sources
+index: 2
+experimental: true
+---
+
+{{ "GraphQL::Dataloader" | api_doc }} instances are created for each query (or multiplex) and they:
+
+- Cache {% internal_link "Source", "/dataloader/sources" %} instances for the duration of GraphQL execution
+- Run pending Fibers to resolve data requirements and continue GraphQL execution
+
+During a query, you can access the dataloader instance with:
+
+- {{ "GraphQL::Query::Context#dataloader" | api_doc }} (`context.dataloader`, anywhere that query context is available)
+- {{ "GraphQL::Schema::Object#dataloader" | api_doc }} (`dataloader` inside a resolver method)
+- {{ "GraphQL::Schema::Resolver#dataloader" | api_doc }} (`dataloader` inside `def resolve` of a Resolver, Mutation, or Subscription class.)