fix sinkhorn2 bug for ReverseDiff (#130)

zsteve · web-flow · commit e37845e5d9c9 · 2021-09-09T15:21:24.000+10:00
* fix sinkhorn2 bug for ReverseDiff * remove unnecessary files and format * fix typo * change eps to be larger for autodiff tests * incorporate @devmotion's fix instead and fix deps * test to fix CI * format
diff --git a/Project.toml b/Project.toml
@@ -30,6 +30,7 @@ julia = "1"
 
 [extras]
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 HCubature = "19dc6840-f33b-545b-b366-655c7e3ffd49"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 PythonOT = "3c485715-4278-42b2-9b5f-8f00e43c12ef"
@@ -39,4 +40,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Tulip = "6dd1b50a-3aae-11e9-10b5-ef983d2400fa"
 
 [targets]
-test = ["ForwardDiff", "Pkg", "PythonOT", "Random", "SafeTestsets", "Test", "Tulip", "HCubature"]
+test = ["ForwardDiff", "ReverseDiff", "Pkg", "PythonOT", "Random", "SafeTestsets", "Test", "Tulip", "HCubature"]
diff --git a/src/entropic/sinkhorn.jl b/src/entropic/sinkhorn.jl
@@ -202,7 +202,7 @@ function sinkhorn2(μ, ν, C, ε, alg::Sinkhorn; regularization=false, plan=noth
     end
     cost = if regularization
         dot_matwise(γ, C) .+
-        ε * reshape(sum(LogExpFunctions.xlogx, γ; dims=(1, 2)), size(γ)[3:end])
+        ε .* reshape(sum(LogExpFunctions.xlogx, γ; dims=(1, 2)), size(γ)[3:end])
     else
         dot_matwise(γ, C)
     end
diff --git a/test/entropic/sinkhorn_gibbs.jl b/test/entropic/sinkhorn_gibbs.jl
@@ -2,6 +2,7 @@ using OptimalTransport
 
 using Distances
 using ForwardDiff
+using ReverseDiff
 using LogExpFunctions
 using PythonOT: PythonOT
 
@@ -160,68 +161,71 @@ Random.seed!(100)
         # together. test against gradient computed using analytic formula of Proposition 2.3 of 
         # Cuturi, Marco, and Gabriel Peyré. "A smoothed dual approach for variational Wasserstein problems." SIAM Journal on Imaging Sciences 9.1 (2016): 320-343.
         #
+        ε = 0.05 # use a larger ε to avoid having to do many iterations 
         # target marginal
-        ∇ = ForwardDiff.gradient(log.(ν)) do xs
-            sinkhorn2(μ, softmax(xs), C, ε, SinkhornGibbs(); regularization=true)
+        for Diff in [ReverseDiff, ForwardDiff]
+            ∇ = Diff.gradient(log.(ν)) do xs
+                sinkhorn2(μ, softmax(xs), C, ε, SinkhornGibbs(); regularization=true)
+            end
+            ∇default = Diff.gradient(log.(ν)) do xs
+                sinkhorn2(μ, softmax(xs), C, ε; regularization=true)
+            end
+            @test ∇ == ∇default
+
+            solver = OptimalTransport.build_solver(μ, ν, C, ε, SinkhornGibbs())
+            OptimalTransport.solve!(solver)
+            # helper function
+            function dualvar_to_grad(x, ε)
+                x = -ε * log.(x)
+                x .-= sum(x) / size(x, 1)
+                return -x
+            end
+            ∇_ot = dualvar_to_grad(solver.cache.v, ε)
+            # chain rule because target measure parameterised by softmax
+            J_softmax = ForwardDiff.jacobian(log.(ν)) do xs
+                softmax(xs)
+            end
+            ∇analytic_target = J_softmax * ∇_ot
+            # check that gradient obtained by AD matches the analytic formula
+            @test ∇ ≈ ∇analytic_target rtol = 1e-6
+
+            # source marginal
+            ∇ = Diff.gradient(log.(μ)) do xs
+                sinkhorn2(softmax(xs), ν, C, ε, SinkhornGibbs(); regularization=true)
+            end
+            ∇default = Diff.gradient(log.(μ)) do xs
+                sinkhorn2(softmax(xs), ν, C, ε; regularization=true)
+            end
+            @test ∇ == ∇default
+
+            # check that gradient obtained by AD matches the analytic formula
+            solver = OptimalTransport.build_solver(μ, ν, C, ε, SinkhornGibbs())
+            OptimalTransport.solve!(solver)
+            J_softmax = ForwardDiff.jacobian(log.(μ)) do xs
+                softmax(xs)
+            end
+            ∇_ot = dualvar_to_grad(solver.cache.u, ε)
+            ∇analytic_source = J_softmax * ∇_ot
+            @test ∇ ≈ ∇analytic_source rtol = 1e-6
+
+            # both marginals
+            ∇ = Diff.gradient(log.(vcat(μ, ν))) do xs
+                sinkhorn2(
+                    softmax(xs[1:M]),
+                    softmax(xs[(M + 1):end]),
+                    C,
+                    ε,
+                    SinkhornGibbs();
+                    regularization=true,
+                )
+            end
+            ∇default = Diff.gradient(log.(vcat(μ, ν))) do xs
+                sinkhorn2(softmax(xs[1:M]), softmax(xs[(M + 1):end]), C, ε; regularization=true)
+            end
+            @test ∇ == ∇default
+            ∇analytic = vcat(∇analytic_source, ∇analytic_target)
+            @test ∇ ≈ ∇analytic rtol = 1e-6
         end
-        ∇default = ForwardDiff.gradient(log.(ν)) do xs
-            sinkhorn2(μ, softmax(xs), C, ε; regularization=true)
-        end
-        @test ∇ == ∇default
-
-        solver = OptimalTransport.build_solver(μ, ν, C, ε, SinkhornGibbs())
-        OptimalTransport.solve!(solver)
-        # helper function
-        function dualvar_to_grad(x, ε)
-            x = -ε * log.(x)
-            x .-= sum(x) / size(x, 1)
-            return -x
-        end
-        ∇_ot = dualvar_to_grad(solver.cache.v, ε)
-        # chain rule because target measure parameterised by softmax
-        J_softmax = ForwardDiff.jacobian(log.(ν)) do xs
-            softmax(xs)
-        end
-        ∇analytic_target = J_softmax * ∇_ot
-        # check that gradient obtained by AD matches the analytic formula
-        @test ∇ ≈ ∇analytic_target rtol = 1e-6
-
-        # source marginal
-        ∇ = ForwardDiff.gradient(log.(μ)) do xs
-            sinkhorn2(softmax(xs), ν, C, ε, SinkhornGibbs(); regularization=true)
-        end
-        ∇default = ForwardDiff.gradient(log.(μ)) do xs
-            sinkhorn2(softmax(xs), ν, C, ε; regularization=true)
-        end
-        @test ∇ == ∇default
-
-        # check that gradient obtained by AD matches the analytic formula
-        solver = OptimalTransport.build_solver(μ, ν, C, ε, SinkhornGibbs())
-        OptimalTransport.solve!(solver)
-        J_softmax = ForwardDiff.jacobian(log.(μ)) do xs
-            softmax(xs)
-        end
-        ∇_ot = dualvar_to_grad(solver.cache.u, ε)
-        ∇analytic_source = J_softmax * ∇_ot
-        @test ∇ ≈ ∇analytic_source rtol = 1e-6
-
-        # both marginals
-        ∇ = ForwardDiff.gradient(log.(vcat(μ, ν))) do xs
-            sinkhorn2(
-                softmax(xs[1:M]),
-                softmax(xs[(M + 1):end]),
-                C,
-                ε,
-                SinkhornGibbs();
-                regularization=true,
-            )
-        end
-        ∇default = ForwardDiff.gradient(log.(vcat(μ, ν))) do xs
-            sinkhorn2(softmax(xs[1:M]), softmax(xs[(M + 1):end]), C, ε; regularization=true)
-        end
-        @test ∇ == ∇default
-        ∇analytic = vcat(∇analytic_source, ∇analytic_target)
-        @test ∇ ≈ ∇analytic rtol = 1e-6
     end
 
     @testset "deprecations" begin
diff --git a/test/exact.jl b/test/exact.jl
@@ -72,7 +72,7 @@ Random.seed!(100)
 
             # compute OT plan
             γ = ot_plan(sqeuclidean, μ, ν)
-            x = randn()
+            x = 0
             @test γ(x) ≈ quantile(ν, cdf(μ, x))
 
             # compute OT cost