convert dummy_solver

dpo · dpo · commit 389d83d83c4e · 2022-09-02T19:26:20.000-04:00
diff --git a/test/dummy_solver.jl b/test/dummy_solver.jl
@@ -1,56 +1,157 @@
+# non-allocating reshape
+# see https://github.com/JuliaLang/julia/issues/36313
+reshape_array(a, dims) = invoke(Base._reshape, Tuple{AbstractArray, typeof(dims)}, a, dims)
+
+mutable struct DummySolver{S} <: AbstractOptimizationSolver
+  x::S     # primal approximation
+  gx::S    # gradient of objective
+  y::S     # multipliers estimates
+  rhs::S   # right-hand size of Newton system
+  jval::S  # flattened Jacobian
+  hval::S  # flattened Hessian
+  wval::S  # flattened augmented matrix
+  Δxy::S   # search direction
+end
+
+function DummySolver(nlp::AbstractNLPModel{T, S}) where {T, S <: AbstractVector{T}}
+  nvar, ncon = nlp.meta.nvar, nlp.meta.ncon
+  x = similar(nlp.meta.x0)
+  gx = similar(nlp.meta.x0)
+  y = similar(nlp.meta.y0)
+  rhs = similar(nlp.meta.x0, nvar + ncon)
+  jval = similar(nlp.meta.x0, ncon * nvar)
+  hval = similar(nlp.meta.x0, nvar * nvar)
+  wval = similar(nlp.meta.x0, (nvar + ncon) * (nvar + ncon))
+  Δxy = similar(nlp.meta.x0, nvar + ncon)
+  DummySolver{S}(x, gx, y, rhs, jval, hval, wval, Δxy)
+end
+
 function dummy_solver(
-  nlp::AbstractNLPModel;
-  x::AbstractVector = nlp.meta.x0,
-  atol::Real = sqrt(eps(eltype(x))),
-  rtol::Real = sqrt(eps(eltype(x))),
+  nlp::AbstractNLPModel{T, S},
+  args...;
+  kwargs...,
+) where {T, S <: AbstractVector{T}}
+  solver = DummySolver(nlp)
+  stats = GenericExecutionStats(nlp)
+  solve!(solver, nlp, stats, args...; kwargs...)
+end
+
+function SolverCore.solve!(
+  solver::DummySolver{S},
+  nlp::AbstractNLPModel{T, S},
+  stats::GenericExecutionStats;
+  callback = (args...) -> nothing,
+  x0::S = nlp.meta.x0,
+  atol::Real = sqrt(eps(T)),
+  rtol::Real = sqrt(eps(T)),
   max_eval::Int = 1000,
   max_time::Float64 = 30.0,
-)
+  verbose::Bool = true,
+) where {T, S <: AbstractVector{T}}
   start_time = time()
   elapsed_time = 0.0
 
   nvar, ncon = nlp.meta.nvar, nlp.meta.ncon
-  T = eltype(x)
+  x = solver.x .= x0
+  rhs = solver.rhs
+  dual = view(rhs, 1:nvar)
+  cx = view(rhs, (nvar + 1):(nvar + ncon))
+  gx = solver.gx
+  y = solver.y
+  jval = solver.jval
+  hval = solver.hval
+  wval = solver.wval
+  Δxy = solver.Δxy
+  nnzh = Int(nvar * (nvar + 1) / 2)
+  nnzh == nlp.meta.nnzh || error("solver assumes Hessian is dense")
+  nvar * ncon == nlp.meta.nnzj || error("solver assumes Jacobian is dense")
 
-  cx = ncon > 0 ? cons(nlp, x) : zeros(T, 0)
-  gx = grad(nlp, x)
-  Jx = ncon > 0 ? Matrix(jac(nlp, x)) : zeros(T, 0, nvar)
-  y = -Jx' \ gx
-  Hxy = ncon > 0 ? hess(nlp, x, y) : hess(nlp, x)
+  grad!(nlp, x, gx)
+  dual .= gx
 
-  dual = gx + Jx' * y
+  # assume the model returns a dense Jacobian in column-major order
+  if ncon > 0
+    cons!(nlp, x, cx)
+    jac_coord!(nlp, x, jval)
+    Jx = reshape_array(jval, (ncon, nvar))
+    Jqr = qr(Jx')
 
-  iter = 0
+    # compute least-squares multipliers
+    # by solving Jx' y = -gx
+    gx .*= -1
+    ldiv!(y, Jqr, gx)
+
+    # update dual <- dual + Jx' * y
+    mul!(dual, Jx', y, one(T), one(T))
+  end
 
+  iter = 0
   ϵd = atol + rtol * norm(dual)
   ϵp = atol
 
   fx = obj(nlp, x)
-  @info log_header([:iter, :f, :c, :dual, :t, :x], [Int, T, T, T, Float64, Char])
-  @info log_row(Any[iter, fx, norm(cx), norm(dual), elapsed_time, 'c'])
+  verbose && @info log_header([:iter, :f, :c, :dual, :t, :x], [Int, T, T, T, Float64, Char])
+  verbose && @info log_row(Any[iter, fx, norm(cx), norm(dual), elapsed_time, 'c'])
   solved = norm(dual) < ϵd && norm(cx) < ϵp
   tired = neval_obj(nlp) + neval_cons(nlp) > max_eval || elapsed_time > max_time
 
   while !(solved || tired)
-    Hxy = ncon > 0 ? hess(nlp, x, y) : hess(nlp, x)
-    W = Symmetric([Hxy zeros(T, nvar, ncon); Jx zeros(T, ncon, ncon)], :L)
-    Δxy = -W \ [dual; cx]
-    Δx = Δxy[1:nvar]
-    Δy = Δxy[(nvar + 1):end]
-    x += Δx
-    y += Δy
-
-    cx = ncon > 0 ? cons(nlp, x) : zeros(T, 0)
-    gx = grad(nlp, x)
-    Jx = ncon > 0 ? Matrix(jac(nlp, x)) : zeros(T, 0, nvar)
-    dual = gx + Jx' * y
+    # assume the model returns a dense Hessian in column-major order
+    # NB: hess_coord!() only returns values in the lower triangle
+    hess_coord!(nlp, x, y, view(hval, 1:nnzh))
+
+    # rearrange nonzeros so they correspond to a dense nvar x nvar matrix
+    j = nvar * nvar
+    i = nnzh
+    k = 1
+    while i > nvar
+      for _ = 1:k
+        hval[j] = hval[i]
+        hval[i] = 0
+        j -= 1
+        i -= 1
+      end
+      j -= nvar - k
+      k += 1
+    end
+
+    # fill in augmented matrix
+    # W = [H J']
+    #     [J 0 ]
+    wval .= 0
+    Wxy = reshape_array(wval, (nvar + ncon, nvar + ncon))
+    Hxy = reshape_array(hval, (nvar, nvar))
+    Wxy[1:nvar, 1:nvar] .= Hxy
+    if ncon > 0
+      Wxy[(nvar + 1):(nvar + ncon), 1:nvar] .= Jx
+    end
+    LBL = factorize(Symmetric(Wxy, :L))
+
+    ldiv!(Δxy, LBL, rhs)
+    Δxy .*= -1
+    @views Δx = Δxy[1:nvar]
+    @views Δy = Δxy[(nvar + 1):(nvar + ncon)]
+    x .+= Δx
+    y .+= Δy
+
+    grad!(nlp, x, gx)
+    dual .= gx
+    if ncon > 0
+      cons!(nlp, x, cx)
+      jac_coord!(nlp, x, jval)
+      Jx = reshape_array(jval, (ncon, nvar))
+      Jqr = qr(Jx')
+      gx .*= -1
+      ldiv!(y, Jqr, gx)
+      mul!(dual, Jx', y, one(T), one(T))
+    end
     elapsed_time = time() - start_time
     solved = norm(dual) < ϵd && norm(cx) < ϵp
     tired = neval_obj(nlp) + neval_cons(nlp) > max_eval || elapsed_time > max_time
 
     iter += 1
     fx = obj(nlp, x)
-    @info log_row(Any[iter, fx, norm(cx), norm(dual), elapsed_time, 'd'])
+    verbose && @info log_row(Any[iter, fx, norm(cx), norm(dual), elapsed_time, 'd'])
   end
 
   status = if solved
@@ -61,7 +162,6 @@ function dummy_solver(
     :max_eval
   end
 
-  stats = GenericExecutionStats(nlp)
   set_status!(stats, status)
   set_objective!(stats, fx)
   set_residuals!(stats, norm(cx), norm(dual))
diff --git a/test/test_stats.jl b/test/test_stats.jl
@@ -66,9 +66,10 @@ function test_stats()
   @testset "Testing Dummy Solver with multi-precision" begin
     for T in (Float16, Float32, Float64, BigFloat)
       nlp = ADNLPModel(x -> dot(x, x), ones(T, 2))
+      solver = DummySolver(nlp)
 
-      with_logger(NullLogger()) do
-        stats = dummy_solver(nlp)
+      stats = with_logger(NullLogger()) do
+        solve!(solver, nlp)
       end
       @test typeof(stats.objective) == T
       @test typeof(stats.dual_feas) == T
@@ -79,9 +80,11 @@ function test_stats()
       @test eltype(stats.multipliers_U) == T
 
       nlp = ADNLPModel(x -> dot(x, x), ones(T, 2), x -> [sum(x) - 1], T[0], T[0])
+      solver = DummySolver(nlp)
+      stats = GenericExecutionStats(nlp)
 
       with_logger(NullLogger()) do
-        stats = dummy_solver(nlp)
+        solve!(solver, nlp, stats)
       end
       @test typeof(stats.objective) == T
       @test typeof(stats.dual_feas) == T