From 14676d01aae703fc5d7f49e5e12199f51bcc0a1a Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Tue, 29 Apr 2025 14:52:20 +0200 Subject: [PATCH 01/33] preliminary solve! for TRDH --- src/TRDH_alg.jl | 240 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 239 insertions(+), 1 deletion(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index df7f41df..1a53951d 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -1,5 +1,243 @@ -export TRDH +export TRDH, TRDHSolver, solve! + +import SolverCore.solve! + +mutable struct TRDHSolver{ + T <: Real, + G <: ShiftedProximableFunction, + V <: AbstractVector{T}, + QN <: AbstractDiagonalQuasiNewtonOperator{T}, +} <: AbstractOptimizationSolver + xk::V + ∇fk::V + ∇fk⁻::V + mν∇fk::V + D::QN + ψ::G + xkn::V + s::V + dkσk::V + has_bnds::Bool + l_bound::V + u_bound::V + l_bound_m_x::V + u_bound_m_x::V +end + +function TRDHSolver( + reg_nlp::AbstractRegularizedNLPModel{T, V}; + D::Union{Nothing, AbstractDiagonalQuasiNewtonOperator} = nothing, +) where {T, V} + x0 = reg_nlp.model.meta.x0 + l_bound = reg_nlp.model.meta.lvar + u_bound = reg_nlp.model.meta.uvar + + xk = similar(x0) + ∇fk = similar(x0) + ∇fk⁻ = similar(x0) + mν∇fk = similar(x0) + xkn = similar(x0) + s = similar(x0) + dkσk = similar(x0) + has_bnds = any(l_bound .!= T(-Inf)) || any(u_bound .!= T(Inf)) + if has_bnds + l_bound_m_x = similar(xk) + u_bound_m_x = similar(xk) + @. l_bound_m_x = l_bound - x0 + @. u_bound_m_x = u_bound - x0 + else + l_bound_m_x = similar(xk, 0) + u_bound_m_x = similar(xk, 0) + end + + ψ = + has_bnds ? shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : + shifted(reg_nlp.h, xk) + isnothing(D) && ( + D = + isa(reg_nlp.model, AbstractDiagonalQNModel) ? hess_op(reg_nlp.model, x0) : + SpectralGradient(T(1), reg_nlp.model.meta.nvar) + ) + + return TRDHSolver( + xk, + ∇fk, + ∇fk⁻, + mν∇fk, + D, + ψ, + xkn, + s, + dkσk, + has_bnds, + l_bound, + u_bound, + l_bound_m_x, + u_bound_m_x, + ) +end + +function SolverCore.solve!( + solver::TRDHSolver{T}, + reg_nlp::AbstractRegularizedNLPModel{T, V}, + stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, + x::V = reg_nlp.model.meta.x0, + atol::T = √eps(T), + rtol::T = √eps(T), + neg_tol::T = eps(T)^(1 / 4), + verbose::Int = 0, + max_iter::Int = 10000, + max_time::Float64 = 30.0, + max_eval::Int = -1, + reduce_TR::Bool = true, + Δk::T = T(1), + η1::T = √√eps(T), + η2::T = T(0.9), + γ::T = T(3), + α::T = 1 / eps(T), + β::T = 1 / eps(T) +) where {T, V} + reset!(stats) + + # Retrieve workspace + selected = reg_nlp.selected + h = reg_nlp.h + nlp = reg_nlp.model + + xk = solver.xk .= x + + # Make sure ψ has the correct shift + shift!(solver.ψ, xk) + + ∇fk = solver.∇fk + ∇fk⁻ = solver.∇fk⁻ + mν∇fk = solver.mν∇fk + D = solver.D + dkσk = solver.dkσk + ψ = solver.ψ + xkn = solver.xkn + s = solver.s + has_bnds = solver.has_bnds + + if has_bnds + l_bound_m_x = solver.l_bound_m_x + u_bound_m_x = solver.u_bound_m_x + l_bound = solver.l_bound + u_bound = solver.u_bound + end + + # initialize parameters + improper = false + hk = @views h(xk[selected]) + if hk == Inf + verbose > 0 && @info "TRDH: finding initial guess where nonsmooth term is finite" + prox!(xk, h, xk, T(1)) + hk = @views h(xk[selected]) + hk < Inf || error("prox computation must be erroneous") + verbose > 0 && @debug "TRDH: found point where h has value" hk + end + improper = (hk == -Inf) + improper == true && @warn "TRDH: Improper term detected" + improper == true && return stats + + if verbose > 0 + @info log_header( + [:iter, :fx, :hx, :xi, :ρ, :Δ, :normx, :norms, :normD, :arrow], + [Int, T, T, T, T, T, T, T, T, Char], + hdr_override = Dict{Symbol, String}( # TODO: Add this as constant dict elsewhere + :fx => "f(x)", + :hx => "h(x)", + :xi => "√(ξ/ν)", + :normx => "‖x‖", + :norms => "‖s‖", + :normD => "‖D‖", + :arrow => "TRDH", + ), + colsep = 1, + ) + end + + local ξ1::T + local ρk::T = zero(T) + + fk = obj(nlp, xk) + grad!(nlp, xk, ∇fk) + ∇fk⁻ .= ∇fk + DNorm = norm(D.d, Inf) + + ν = (α * Δk)/(DNorm + one(T)) + sqrt_ξ_νInv = one(T) + + @. mν∇fk = -ν * ∇fk + + set_iter!(stats, 0) + start_time = time() + set_time!(stats, 0.0) + set_objective!(stats, fk + hk) + set_solver_specific!(stats, :smooth_obj, fk) + set_solver_specific!(stats, :nonsmooth_obj, hk) + + φ1(d) = dot(∇fk, d) + mk1(d)::T = φ1(d) + ψ(d)::T #TODO put this in reduce_TR conditionals and see if the code compiles. + + φ(d) = begin + result = zero(T) + n = length(d) + @inbounds for i = 1:n + result += d[i]^2*D.d[i]/2 + ∇fk[i]*d[i] + end + return result + end + mk(d) = φ(d) + ψ(d) + + if reduce_TR + prox!(s, ψ, mν∇fk, ν) + mks = mk1(s) + + ξ1 = hk - mks + max(1, abs(hk)) * 10 * eps() + sqrt_ξ_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν) + solved = (ξ1 < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ_νInv ≤ atol) + (ξ1 < 0 && sqrt_ξ_νInv > neg_tol) && + error("R2DH: prox-gradient step should produce a decrease but ξ = $(ξ)") + atol += rtol * sqrt_ξ_νInv # make stopping test absolute and relative + else + if has_bnds + update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δk) + set_bounds!(ψ, l_bound_k, u_bound_k) + else + set_radius!(ψ, Δk) + end + + end + + + + set_status!( + stats, + get_status( + reg_nlp, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + improper = improper, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + + + return stats + """ + + """ +end """ TRDH(nlp, h, χ, options; kwargs...) TRDH(f, ∇f!, h, options, x0) From 298d0b5d71c62a45cf4968de6a1b9cec6049a21e Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sun, 4 May 2025 15:49:30 +0200 Subject: [PATCH 02/33] further solve! TRDH --- src/TRDH_alg.jl | 57 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 1a53951d..845729fd 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -27,10 +27,13 @@ end function TRDHSolver( reg_nlp::AbstractRegularizedNLPModel{T, V}; D::Union{Nothing, AbstractDiagonalQuasiNewtonOperator} = nothing, + χ = NormLinf(one(T)) ) where {T, V} x0 = reg_nlp.model.meta.x0 l_bound = reg_nlp.model.meta.lvar u_bound = reg_nlp.model.meta.uvar + l_bound_k = similar(l_bound) + u_bound_k = similar(u_bound) xk = similar(x0) ∇fk = similar(x0) @@ -40,19 +43,26 @@ function TRDHSolver( s = similar(x0) dkσk = similar(x0) has_bnds = any(l_bound .!= T(-Inf)) || any(u_bound .!= T(Inf)) - if has_bnds - l_bound_m_x = similar(xk) - u_bound_m_x = similar(xk) - @. l_bound_m_x = l_bound - x0 - @. u_bound_m_x = u_bound - x0 + + is_subsolver = reg_nlp.h isa ShiftedProximableFunction # case TRDH is used as a subsolver + if is_subsolver + ψ = shifted(reg_nlp.h, xk) + @assert !has_bnds + l_bound = copy(ψ.l) + u_bound = copy(ψ.u) + @. l_bound_k = max(xk - one(T), l_bound) + @. u_bound_k = min(xk + one(T), u_bound) + has_bnds = true + set_bounds!(ψ, l_bound_k, u_bound_k) else - l_bound_m_x = similar(xk, 0) - u_bound_m_x = similar(xk, 0) + if has_bnds + @. l_bound_k = max(-one(T), l_bound - xk) + @. u_bound_k = min(one(T), u_bound - xk) + ψ = shifted(reg_nlp.h, xk, l_bound_k, u_bound_k, selected) + else + ψ = shifted(reg_nlp.h, xk, one(T), χ) + end end - - ψ = - has_bnds ? shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : - shifted(reg_nlp.h, xk) isnothing(D) && ( D = isa(reg_nlp.model, AbstractDiagonalQNModel) ? hess_op(reg_nlp.model, x0) : @@ -72,8 +82,8 @@ function TRDHSolver( has_bnds, l_bound, u_bound, - l_bound_m_x, - u_bound_m_x, + l_bound_k, + u_bound_k, ) end @@ -83,6 +93,7 @@ function SolverCore.solve!( stats::GenericExecutionStats{T, V}; callback = (args...) -> nothing, x::V = reg_nlp.model.meta.x0, + #χ = NormLinf(one(T)), atol::T = √eps(T), rtol::T = √eps(T), neg_tol::T = eps(T)^(1 / 4), @@ -182,6 +193,7 @@ function SolverCore.solve!( φ1(d) = dot(∇fk, d) mk1(d)::T = φ1(d) + ψ(d)::T #TODO put this in reduce_TR conditionals and see if the code compiles. + # model with diagonal Hessian φ(d) = begin result = zero(T) n = length(d) @@ -202,16 +214,21 @@ function SolverCore.solve!( (ξ1 < 0 && sqrt_ξ_νInv > neg_tol) && error("R2DH: prox-gradient step should produce a decrease but ξ = $(ξ)") atol += rtol * sqrt_ξ_νInv # make stopping test absolute and relative - else - if has_bnds - update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δk) - set_bounds!(ψ, l_bound_k, u_bound_k) - else - set_radius!(ψ, Δk) - end + end + return + Δ_effective = reduce_TR ? min(β * χ(s), Δk) : Δk + + # update radius + if has_bnds + update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δ_effective) + set_bounds!(ψ, l_bound_k, u_bound_k) + else + set_radius!(ψ, Δ_effective) end + iprox!(s, ψ, ∇fk, D) + set_status!( From 4b12fa96670c9d127ef44a3e572164d8374cdf94 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Fri, 6 Jun 2025 12:18:53 +0200 Subject: [PATCH 03/33] non-allocating, JSO-compliant TRDH --- src/TRDH_alg.jl | 193 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 167 insertions(+), 26 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 845729fd..7ecf00a7 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -7,6 +7,7 @@ mutable struct TRDHSolver{ G <: ShiftedProximableFunction, V <: AbstractVector{T}, QN <: AbstractDiagonalQuasiNewtonOperator{T}, + N } <: AbstractOptimizationSolver xk::V ∇fk::V @@ -14,9 +15,10 @@ mutable struct TRDHSolver{ mν∇fk::V D::QN ψ::G + χ::N xkn::V s::V - dkσk::V + dk::V has_bnds::Bool l_bound::V u_bound::V @@ -41,7 +43,7 @@ function TRDHSolver( mν∇fk = similar(x0) xkn = similar(x0) s = similar(x0) - dkσk = similar(x0) + dk = similar(x0) has_bnds = any(l_bound .!= T(-Inf)) || any(u_bound .!= T(Inf)) is_subsolver = reg_nlp.h isa ShiftedProximableFunction # case TRDH is used as a subsolver @@ -76,9 +78,10 @@ function TRDHSolver( mν∇fk, D, ψ, + χ, xkn, s, - dkσk, + dk, has_bnds, l_bound, u_bound, @@ -88,7 +91,7 @@ function TRDHSolver( end function SolverCore.solve!( - solver::TRDHSolver{T}, + solver::TRDHSolver{T, G, V}, reg_nlp::AbstractRegularizedNLPModel{T, V}, stats::GenericExecutionStats{T, V}; callback = (args...) -> nothing, @@ -108,7 +111,7 @@ function SolverCore.solve!( γ::T = T(3), α::T = 1 / eps(T), β::T = 1 / eps(T) -) where {T, V} +) where {T, G, V} reset!(stats) # Retrieve workspace @@ -125,10 +128,11 @@ function SolverCore.solve!( ∇fk⁻ = solver.∇fk⁻ mν∇fk = solver.mν∇fk D = solver.D - dkσk = solver.dkσk + dk = solver.dk ψ = solver.ψ xkn = solver.xkn s = solver.s + χ = solver.χ has_bnds = solver.has_bnds if has_bnds @@ -152,6 +156,7 @@ function SolverCore.solve!( improper == true && @warn "TRDH: Improper term detected" improper == true && return stats + is_subsolver = h isa ShiftedProximableFunction # case TRDH is used as a subsolver if verbose > 0 @info log_header( [:iter, :fx, :hx, :xi, :ρ, :Δ, :normx, :norms, :normD, :arrow], @@ -176,6 +181,7 @@ function SolverCore.solve!( grad!(nlp, xk, ∇fk) ∇fk⁻ .= ∇fk + dk .= D.d DNorm = norm(D.d, Inf) ν = (α * Δk)/(DNorm + one(T)) @@ -190,20 +196,28 @@ function SolverCore.solve!( set_solver_specific!(stats, :smooth_obj, fk) set_solver_specific!(stats, :nonsmooth_obj, hk) - φ1(d) = dot(∇fk, d) - mk1(d)::T = φ1(d) + ψ(d)::T #TODO put this in reduce_TR conditionals and see if the code compiles. + # models + φ1 = let ∇fk = ∇fk + d -> dot(∇fk, d) + end + mk1 = let ψ = ψ + d -> φ1(d) + ψ(d)::T + end - # model with diagonal Hessian - φ(d) = begin - result = zero(T) - n = length(d) - @inbounds for i = 1:n - result += d[i]^2*D.d[i]/2 + ∇fk[i]*d[i] + φ = let ∇fk = ∇fk, dk = dk + d -> begin + result = zero(T) + n = length(d) + @inbounds for i = 1:n + result += d[i]^2 * dk[i] / 2 + ∇fk[i] * d[i] + end + result end - return result end - mk(d) = φ(d) + ψ(d) - + mk = let ψ = ψ + d -> φ(d) + ψ(d)::T + end + if reduce_TR prox!(s, ψ, mν∇fk, ν) mks = mk1(s) @@ -215,21 +229,28 @@ function SolverCore.solve!( error("R2DH: prox-gradient step should produce a decrease but ξ = $(ξ)") atol += rtol * sqrt_ξ_νInv # make stopping test absolute and relative end - return Δ_effective = reduce_TR ? min(β * χ(s), Δk) : Δk # update radius if has_bnds - update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δ_effective) - set_bounds!(ψ, l_bound_k, u_bound_k) + update_bounds!(l_bound_m_x, u_bound_m_x, is_subsolver, l_bound, u_bound, xk, Δ_effective) + set_bounds!(ψ, l_bound_m_x, u_bound_m_x) else set_radius!(ψ, Δ_effective) end - - iprox!(s, ψ, ∇fk, D) - + iprox!(s, ψ, ∇fk, dk) + ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() + sNorm = χ(s) + + if !reduce_TR + sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν) : sqrt(-ξ / ν) + solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv < atol) + (ξ < 0 && sqrt_ξ_νInv > neg_tol) && + error("TRDH: prox-gradient step should produce a decrease but ξ = $(ξ)") + atol += rtol * sqrt_ξ_νInv # make stopping test absolute and relative #TODO : this is redundant code with the other case of the test. + end set_status!( stats, @@ -249,12 +270,132 @@ function SolverCore.solve!( done = stats.status != :unknown + while !done - return stats - """ + stats.iter > 0 && iprox!(s, ψ, ∇fk, dk) + sNorm = χ(s) + + xkn .= xk .+ s + fkn = obj(nlp, xkn) + hkn = @views h(xkn[selected]) + + Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() + ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() + ρk = Δobj / ξ + + verbose > 0 && + stats.iter % verbose == 0 && + @info log_row( + Any[ + stats.iter, + fk, + hk, + sqrt_ξ_νInv, + ρk, + Δk, + χ(xk), + sNorm, + norm(D.d), + (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "="), + ], + colsep = 1, + ) + + if η1 ≤ ρk < Inf + xk .= xkn + if has_bnds + update_bounds!(l_bound_m_x, u_bound_m_x, is_subsolver, l_bound, u_bound, xk, Δk) + has_bnds && set_bounds!(ψ, l_bound_m_x, u_bound_m_x) + end + fk = fkn + hk = hkn + shift!(ψ, xk) + grad!(nlp, xk, ∇fk) + @. ∇fk⁻ = ∇fk - ∇fk⁻ + push!(D, s, ∇fk⁻) # update QN operator + dk .= D.d + DNorm = norm(D.d, Inf) + ∇fk⁻ .= ∇fk + end + + if η2 ≤ ρk < Inf + Δk = max(Δk, γ * sNorm) + !has_bnds && set_radius!(ψ, Δk) + end + + if ρk < η1 || ρk == Inf + Δk = Δk / 2 + if has_bnds + update_bounds!(l_bound_m_x, u_bound_m_x, is_subsolver, l_bound, u_bound, xk, Δ_effective) + set_bounds!(ψ, l_bound_m_x, u_bound_m_x) + else + set_radius!(ψ, Δ_effective) + end + end + + set_objective!(stats, fk + hk) + set_solver_specific!(stats, :smooth_obj, fk) + set_solver_specific!(stats, :nonsmooth_obj, hk) + set_iter!(stats, stats.iter + 1) + set_time!(stats, time() - start_time) + + ν = reduce_TR ? (α * Δk)/(DNorm + one(T)) : α / (DNorm + one(T)) + mν∇fk .= -ν .* ∇fk + + if reduce_TR + prox!(s, ψ, mν∇fk, ν) + ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() + sqrt_ξ_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν) + solved = (ξ1 < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ_νInv < atol) + (ξ1 < 0 && sqrt_ξ_νInv > neg_tol) && + error("TRDH: prox-gradient step should produce a decrease but ξ = $(ξ)") + + else + sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν) : sqrt(-ξ / ν) + solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv < atol) + (ξ < 0 && sqrt_ξ_νInv > neg_tol) && + error("TRDH: prox-gradient step should produce a decrease but ξ = $(ξ)") + end + + set_status!( + stats, + get_status( + reg_nlp, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + improper = improper, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, + ), + ) - """ + callback(nlp, solver, stats) + + done = stats.status != :unknown + end + + if verbose > 0 && stats.status == :first_order + @info log_row( + Any[ + stats.iter, + fk, + hk, + sqrt_ξ_νInv, + ρk, + Δk, + χ(xk), + sNorm, + norm(D.d), + "", + ], + colsep = 1, + ) + @info "TRDH: terminating with √(ξ/ν) = $(sqrt_ξ_νInv)" + end end + """ TRDH(nlp, h, χ, options; kwargs...) TRDH(f, ∇f!, h, options, x0) From 2866da0d1a568099c693b967b966988d28b50869 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Fri, 6 Jun 2025 12:36:48 +0200 Subject: [PATCH 04/33] make TRDH equal when reduce_TR = false --- src/TRDH_alg.jl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 7ecf00a7..5f8a2aa5 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -226,7 +226,7 @@ function SolverCore.solve!( sqrt_ξ_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν) solved = (ξ1 < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ_νInv ≤ atol) (ξ1 < 0 && sqrt_ξ_νInv > neg_tol) && - error("R2DH: prox-gradient step should produce a decrease but ξ = $(ξ)") + error("TR: prox-gradient step should produce a decrease but ξ = $(ξ)") atol += rtol * sqrt_ξ_νInv # make stopping test absolute and relative end @@ -272,15 +272,11 @@ function SolverCore.solve!( while !done - stats.iter > 0 && iprox!(s, ψ, ∇fk, dk) - sNorm = χ(s) - xkn .= xk .+ s fkn = obj(nlp, xkn) hkn = @views h(xkn[selected]) Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() - ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() ρk = Δobj / ξ verbose > 0 && @@ -349,8 +345,13 @@ function SolverCore.solve!( solved = (ξ1 < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ_νInv < atol) (ξ1 < 0 && sqrt_ξ_νInv > neg_tol) && error("TRDH: prox-gradient step should produce a decrease but ξ = $(ξ)") + end - else + iprox!(s, ψ, ∇fk, dk) + sNorm = χ(s) + + if !reduce_TR + ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν) : sqrt(-ξ / ν) solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv < atol) (ξ < 0 && sqrt_ξ_νInv > neg_tol) && From 196edbc41a9ef21dbdefd4930b0d74dafc212b44 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Fri, 6 Jun 2025 14:08:48 +0200 Subject: [PATCH 05/33] TRDH function signatures --- src/TRDH_alg.jl | 484 +++++++++++++----------------------------------- 1 file changed, 124 insertions(+), 360 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 5f8a2aa5..d2b3f187 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -90,13 +90,135 @@ function TRDHSolver( ) end +""" + TRDH(reg_nlp; kwargs…) + TRDH(nlp, h, χ, options; kwargs...) + TRDH(f, ∇f!, h, options, x0) + +A trust-region method with diagonal Hessian approximation for the problem + + min f(x) + h(x) + +where f: ℝⁿ → ℝ has a Lipschitz-continuous Jacobian, and h: ℝⁿ → ℝ is +lower semi-continuous and proper. + +About each iterate xₖ, a step sₖ is computed as an approximate solution of + + min φ(s; xₖ) + ψ(s; xₖ) subject to ‖s‖ ≤ Δₖ + +where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ Dₖ s is a quadratic approximation of f about xₖ, +ψ(s; xₖ) = h(xₖ + s), ‖⋅‖ is a user-defined norm, Dₖ is a diagonal Hessian approximation +and Δₖ > 0 is the trust-region radius. + +### Arguments + +* `nlp::AbstractDiagonalQNModel`: a smooth optimization problem +* `h`: a regularizer such as those defined in ProximalOperators +* `χ`: a norm used to define the trust region in the form of a regularizer +* `options::ROSolverOptions`: a structure containing algorithmic parameters + +The objective and gradient of `nlp` will be accessed. + +In the second form, instead of `nlp`, the user may pass in + +* `f` a function such that `f(x)` returns the value of f at x +* `∇f!` a function to evaluate the gradient in place, i.e., such that `∇f!(g, x)` store ∇f(x) in `g` +* `x0::AbstractVector`: an initial guess. + +### Keyword arguments + +#TODO + +### Return values + +#TODO +""" +function TRDH( + nlp::AbstractDiagonalQNModel{T, V}, + h, + χ, + options::ROSolverOptions{T}; + kwargs..., +) where {T, V} + kwargs_dict = Dict(kwargs...) + selected = pop!(kwargs_dict, :selected, 1:(nlp.meta.nvar)) + x0 = pop!(kwargs_dict, :x0, nlp.meta.x0) + reg_nlp = RegularizedNLPModel(nlp, h, selected) + stats = TRDH( + reg_nlp; + x = x0, + D = nlp.op, + χ = χ, + atol = options.ϵa, + rtol = options.ϵr, + neg_tol = options.neg_tol, + verbose = options.verbose, + max_iter = options.maxIter, + max_time = options.maxTime, + reduce_TR = options.reduce_TR, + Δk = options.Δk, + η1 = options.η1, + η2 = options.η2, + γ = options.γ, + α = options.α, + β = options.β, + kwargs_dict..., + ) + return stats +end + +function TRDH( + f::F, + ∇f!::G, + h::H, + D::DQN, + options::ROSolverOptions{R}, + x0::AbstractVector{R}; + χ::X = NormLinf(one(R)), + selected::AbstractVector{<:Integer} = 1:length(x0), + kwargs..., +) where {R <: Real, F, G, H, DQN <: AbstractDiagonalQuasiNewtonOperator, X} + nlp = FirstOrderModel(f, ∇f!, x0) + reg_nlp = RegularizedNLPModel(nlp, h, selected) + stats = TRDH( + reg_nlp; + x = x0, + D = D, + χ = χ, + atol = options.ϵa, + rtol = options.ϵr, + neg_tol = options.neg_tol, + verbose = options.verbose, + max_iter = options.maxIter, + max_time = options.maxTime, + reduce_TR = options.reduce_TR, + Δk = options.Δk, + η1 = options.η1, + η2 = options.η2, + γ = options.γ, + α = options.α, + β = options.β, + kwargs_dict..., + ) + return stats +end + +function TRDH(reg_nlp::AbstractRegularizedNLPModel{T, V}; kwargs...) where {T, V} + kwargs_dict = Dict(kwargs...) + D = pop!(kwargs_dict, :D, nothing) + χ = pop!(kwargs_dict, :χ, NormLinf(one(T))) + solver = TRDHSolver(reg_nlp, D = D, χ = χ) + stats = RegularizedExecutionStats(reg_nlp) + solve!(solver, reg_nlp, stats; kwargs_dict...) + return stats +end + function SolverCore.solve!( solver::TRDHSolver{T, G, V}, reg_nlp::AbstractRegularizedNLPModel{T, V}, stats::GenericExecutionStats{T, V}; callback = (args...) -> nothing, x::V = reg_nlp.model.meta.x0, - #χ = NormLinf(one(T)), atol::T = √eps(T), rtol::T = √eps(T), neg_tol::T = eps(T)^(1 / 4), @@ -397,89 +519,6 @@ function SolverCore.solve!( end end -""" - TRDH(nlp, h, χ, options; kwargs...) - TRDH(f, ∇f!, h, options, x0) - -A trust-region method with diagonal Hessian approximation for the problem - - min f(x) + h(x) - -where f: ℝⁿ → ℝ has a Lipschitz-continuous Jacobian, and h: ℝⁿ → ℝ is -lower semi-continuous and proper. - -About each iterate xₖ, a step sₖ is computed as an approximate solution of - - min φ(s; xₖ) + ψ(s; xₖ) subject to ‖s‖ ≤ Δₖ - -where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ Dₖ s is a quadratic approximation of f about xₖ, -ψ(s; xₖ) = h(xₖ + s), ‖⋅‖ is a user-defined norm, Dₖ is a diagonal Hessian approximation -and Δₖ > 0 is the trust-region radius. - -### Arguments - -* `nlp::AbstractDiagonalQNModel`: a smooth optimization problem -* `h`: a regularizer such as those defined in ProximalOperators -* `χ`: a norm used to define the trust region in the form of a regularizer -* `options::ROSolverOptions`: a structure containing algorithmic parameters - -The objective and gradient of `nlp` will be accessed. - -In the second form, instead of `nlp`, the user may pass in - -* `f` a function such that `f(x)` returns the value of f at x -* `∇f!` a function to evaluate the gradient in place, i.e., such that `∇f!(g, x)` store ∇f(x) in `g` -* `x0::AbstractVector`: an initial guess. - -### Keyword arguments - -* `x0::AbstractVector`: an initial guess (default: `nlp.meta.x0`) -* `selected::AbstractVector{<:Integer}`: (default `1:f.meta.nvar`) - -### Return values - -* `xk`: the final iterate -* `Fobj_hist`: an array with the history of values of the smooth objective -* `Hobj_hist`: an array with the history of values of the nonsmooth objective -* `Complex_hist`: an array with the history of number of inner iterations. -""" -function TRDH( - nlp::AbstractDiagonalQNModel{R, S}, - h, - χ, - options::ROSolverOptions{R}; - kwargs..., -) where {R <: Real, S} - kwargs_dict = Dict(kwargs...) - x0 = pop!(kwargs_dict, :x0, nlp.meta.x0) - xk, k, outdict = TRDH( - x -> obj(nlp, x), - (g, x) -> grad!(nlp, x, g), - h, - hess_op(nlp, x0), - options, - x0; - χ = χ, - l_bound = nlp.meta.lvar, - u_bound = nlp.meta.uvar, - kwargs..., - ) - sqrt_ξ_νInv = outdict[:sqrt_ξ_νInv] - stats = GenericExecutionStats(nlp) - set_status!(stats, outdict[:status]) - set_solution!(stats, xk) - set_objective!(stats, outdict[:fk] + outdict[:hk]) - set_residuals!(stats, zero(eltype(xk)), sqrt_ξ_νInv) - set_iter!(stats, k) - set_time!(stats, outdict[:elapsed_time]) - set_solver_specific!(stats, :radius, outdict[:radius]) - set_solver_specific!(stats, :Fhist, outdict[:Fhist]) - set_solver_specific!(stats, :Hhist, outdict[:Hhist]) - set_solver_specific!(stats, :NonSmooth, outdict[:NonSmooth]) - set_solver_specific!(stats, :SubsolverCounter, outdict[:Chist]) - return stats -end - # update l_bound_k and u_bound_k function update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δ) if is_subsolver @@ -489,279 +528,4 @@ function update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk @. l_bound_k = max(-Δ, l_bound - xk) @. u_bound_k = min(Δ, u_bound - xk) end -end - -function TRDH( - f::F, - ∇f!::G, - h::H, - D::DQN, - options::ROSolverOptions{R}, - x0::AbstractVector{R}; - χ::X = NormLinf(one(R)), - selected::AbstractVector{<:Integer} = 1:length(x0), - kwargs..., -) where {R <: Real, F, G, H, DQN <: AbstractDiagonalQuasiNewtonOperator, X} - start_time = time() - elapsed_time = 0.0 - ϵ = options.ϵa - ϵr = options.ϵr - Δk = options.Δk - neg_tol = options.neg_tol - verbose = options.verbose - maxIter = options.maxIter - maxTime = options.maxTime - η1 = options.η1 - η2 = options.η2 - γ = options.γ - α = options.α - β = options.β - reduce_TR = options.reduce_TR - - local l_bound, u_bound - has_bnds = false - kw_keys = keys(kwargs) - if :l_bound in kw_keys - l_bound = kwargs[:l_bound] - has_bnds = has_bnds || any(l_bound .!= R(-Inf)) - end - if :u_bound in kw_keys - u_bound = kwargs[:u_bound] - has_bnds = has_bnds || any(u_bound .!= R(Inf)) - end - - if verbose == 0 - ptf = Inf - elseif verbose == 1 - ptf = round(maxIter / 10) - elseif verbose == 2 - ptf = round(maxIter / 100) - else - ptf = 1 - end - - # initialize parameters - xk = copy(x0) - hk = h(xk[selected]) - if hk == Inf - verbose > 0 && @info "TRDH: finding initial guess where nonsmooth term is finite" - prox!(xk, h, x0, one(eltype(x0))) - hk = h(xk[selected]) - hk < Inf || error("prox computation must be erroneous") - verbose > 0 && @debug "TRDH: found point where h has value" hk - end - hk == -Inf && error("nonsmooth term is not proper") - - xkn = similar(xk) - s = zero(xk) - l_bound_k = similar(xk) - u_bound_k = similar(xk) - is_subsolver = h isa ShiftedProximableFunction # case TRDH is used as a subsolver - if is_subsolver - ψ = shifted(h, xk) - @assert !has_bnds - l_bound = copy(ψ.l) - u_bound = copy(ψ.u) - @. l_bound_k = max(xk - Δk, l_bound) - @. u_bound_k = min(xk + Δk, u_bound) - has_bnds = true - set_bounds!(ψ, l_bound_k, u_bound_k) - else - if has_bnds - @. l_bound_k = max(-Δk, l_bound - xk) - @. u_bound_k = min(Δk, u_bound - xk) - ψ = shifted(h, xk, l_bound_k, u_bound_k, selected) - else - ψ = shifted(h, xk, Δk, χ) - end - end - - Fobj_hist = zeros(maxIter) - Hobj_hist = zeros(maxIter) - Complex_hist = zeros(Int, maxIter) - if verbose > 0 - #! format: off - if reduce_TR - @info @sprintf "%6s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "f(x)" "h(x)" "√(ξ1/ν)" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "‖Dₖ‖" "TRDH" - else - @info @sprintf "%6s %8s %8s %7s %8s %7s %7s %7s %7s %1s" "outer" "f(x)" "h(x)" "√(ξ/ν)" "ρ" "Δ" "‖x‖" "‖s‖" "‖Dₖ‖" "TRDH" - end - #! format: off - end - - local ξ1 - local ξ - k = 0 - - fk = f(xk) - ∇fk = similar(xk) - ∇f!(∇fk, xk) - ∇fk⁻ = copy(∇fk) - DNorm = norm(D.d, Inf) - ν = (α * Δk)/(DNorm + one(R)) - mν∇fk = -ν .* ∇fk - sqrt_ξ_νInv = one(R) - - optimal = false - tired = maxIter > 0 && k ≥ maxIter || elapsed_time > maxTime - - while !(optimal || tired) - k = k + 1 - elapsed_time = time() - start_time - Fobj_hist[k] = fk - Hobj_hist[k] = hk - - # model for prox-gradient step to update Δk if ||s|| is too small and ξ1 - φ1(d) = ∇fk' * d - mk1(d) = φ1(d) + ψ(d) - - if reduce_TR - prox!(s, ψ, mν∇fk, ν) - Complex_hist[k] += 1 - ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() - sqrt_ξ_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν) - - if ξ1 ≥ 0 && k == 1 - ϵ += ϵr * sqrt_ξ_νInv # make stopping test absolute and relative - end - - if (ξ1 < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ_νInv < ϵ) - # the current xk is approximately first-order stationary - optimal = true - continue - end - - ξ1 > 0 || error("TR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - end - - Δ_effective = reduce_TR ? min(β * χ(s), Δk) : Δk - # update radius - if has_bnds - update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δ_effective) - set_bounds!(ψ, l_bound_k, u_bound_k) - else - set_radius!(ψ, Δ_effective) - end - - # model with diagonal hessian - φ(d) = ∇fk' * d + (d' * (D.d .* d)) / 2 - mk(d) = φ(d) + ψ(d) - - iprox!(s, ψ, ∇fk, D) - - sNorm = χ(s) - xkn .= xk .+ s - fkn = f(xkn) - hkn = h(xkn[selected]) - hkn == -Inf && error("nonsmooth term is not proper") - - Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() - ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() - - if !reduce_TR - - sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν) : sqrt(-ξ / ν) - if ξ ≥ 0 && k == 1 - ϵ += ϵr * sqrt_ξ_νInv # make stopping test absolute and relative - end - - if (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv < ϵ) - # the current xk is approximately first-order stationary - optimal = true - continue - end - end - - if (ξ ≤ 0 || isnan(ξ)) - error("TRDH: failed to compute a step: ξ = $ξ") - end - - ρk = Δobj / ξ - - TR_stat = (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "=") - - if (verbose > 0) && (k % ptf == 0) - #! format: off - if reduce_TR - @info @sprintf "%6d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt_ξ_νInv sqrt(ξ) ρk Δk χ(xk) sNorm norm(D.d) TR_stat - else - @info @sprintf "%6d %8.1e %8.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt_ξ_νInv ρk Δk χ(xk) sNorm norm(D.d) TR_stat - end - #! format: on - end - - if η2 ≤ ρk < Inf - Δk = max(Δk, γ * sNorm) - !has_bnds && set_radius!(ψ, Δk) - end - - if η1 ≤ ρk < Inf - xk .= xkn - has_bnds && update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δk) - has_bnds && set_bounds!(ψ, l_bound_k, u_bound_k) - #update functions - fk = fkn - hk = hkn - shift!(ψ, xk) - ∇f!(∇fk, xk) - push!(D, s, ∇fk - ∇fk⁻) # update QN operator - DNorm = norm(D.d, Inf) - ∇fk⁻ .= ∇fk - end - - if ρk < η1 || ρk == Inf - Δk = Δk / 2 - has_bnds && update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δk) - has_bnds ? set_bounds!(ψ, l_bound_k, u_bound_k) : set_radius!(ψ, Δk) - end - - ν = reduce_TR ? (α * Δk)/(DNorm + one(R)) : α / (DNorm + one(R)) - mν∇fk .= -ν .* ∇fk - - tired = k ≥ maxIter || elapsed_time > maxTime - end - - if verbose > 0 - if k == 1 - @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk - elseif optimal - #! format: off - if reduce_TR - @info @sprintf "%6d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k fk hk sqrt_ξ_νInv sqrt(ξ1) "" Δk χ(xk) χ(s) norm(D.d) - #! format: on - @info "TRDH: terminating with √(ξ1/ν) = $(sqrt_ξ_νInv)" - else - #! format: off - @info @sprintf "%6d %8.1e %8.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k fk hk sqrt_ξ_νInv "" Δk χ(xk) χ(s) norm(D.d) - #! format: on - @info "TRDH: terminating with √(ξ/ν) = $(sqrt_ξ_νInv)" - end - end - end - - !reduce_TR && (ξ1 = ξ) # for output dict - - status = if optimal - :first_order - elseif elapsed_time > maxTime - :max_time - elseif tired - :max_iter - else - :exception - end - outdict = Dict( - :Fhist => Fobj_hist[1:k], - :Hhist => Hobj_hist[1:k], - :Chist => Complex_hist[1:k], - :NonSmooth => h, - :status => status, - :fk => fk, - :hk => hk, - :sqrt_ξ_νInv => sqrt_ξ_νInv, - :elapsed_time => elapsed_time, - :radius => Δk, - ) - - return xk, k, outdict -end +end \ No newline at end of file From c294d57e48a02227775d4ef84baa5312cf7dead4 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Tue, 10 Jun 2025 14:53:15 +0200 Subject: [PATCH 06/33] TR JSO-struct --- src/TR_alg.jl | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/src/TR_alg.jl b/src/TR_alg.jl index cf16c827..d33bbfa3 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -1,4 +1,92 @@ -export TR +export TR, TRSolver#, solve! + +import SolverCore.solve! + +mutable struct TRSolver{ + T <: Real, + G <: ShiftedProximableFunction, + V <: AbstractVector{T}, + QN <: AbstractDiagonalQuasiNewtonOperator{T}, + N, + ST <: AbstractOptimizationSolver, + PB <: AbstractRegularizedNLPModel +} <: AbstractOptimizationSolver + xk::V + ∇fk::V + ∇fk⁻::V + mν∇fk::V + D::QN + ψ::G + χ::N + xkn::V + s::V + has_bnds::Bool + l_bound::V + u_bound::V + l_bound_m_x::V + u_bound_m_x::V + subsolver::ST + subpb::PB + substats::GenericExecutionStats{T, V, V, T} +end + +function TRSolver( + reg_nlp::AbstractRegularizedNLPModel{T, V}, + χ::X, + subsolver = R2Solver, +) where {T, V, X} + x0 = reg_nlp.model.meta.x0 + l_bound = reg_nlp.model.meta.lvar + u_bound = reg_nlp.model.meta.uvar + + xk = similar(x0) + ∇fk = similar(x0) + ∇fk⁻ = similar(x0) + mν∇fk = similar(x0) + xkn = similar(x0) + s = similar(x0) + has_bnds = any(l_bound .!= T(-Inf)) || any(u_bound .!= T(Inf)) + if has_bnds + l_bound_m_x = similar(xk) + u_bound_m_x = similar(xk) + @. l_bound_m_x = l_bound - x0 + @. u_bound_m_x = u_bound - x0 + else + l_bound_m_x = similar(xk, 0) + u_bound_m_x = similar(xk, 0) + end + m_fh_hist = fill(T(-Inf), m_monotone - 1) + + ψ = + has_bnds ? shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : + shifted(reg_nlp.h, xk) + + Bk = hess_op(reg_nlp.model, x0) + sub_nlp = R2NModel(Bk, ∇fk, T(1), x0) + subpb = RegularizedNLPModel(sub_nlp, ψ) + substats = RegularizedExecutionStats(subpb) + subsolver = subsolver(subpb) + + return R2NSolver{T, typeof(ψ), V, typeof(subsolver), typeof(subpb)}( + xk, + ∇fk, + ∇fk⁻, + mν∇fk, + ψ, + xkn, + s, + s1, + has_bnds, + l_bound, + u_bound, + l_bound_m_x, + u_bound_m_x, + m_fh_hist, + subsolver, + subpb, + substats, + ) +end """ TR(nlp, h, χ, options; kwargs...) From 6c43a5194798bf596928a26b92fa401211bc58d1 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Tue, 8 Jul 2025 17:46:16 -0400 Subject: [PATCH 07/33] fix minor bug in TRDH --- src/TRDH_alg.jl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index d2b3f187..8c89e1b8 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -55,12 +55,12 @@ function TRDHSolver( @. l_bound_k = max(xk - one(T), l_bound) @. u_bound_k = min(xk + one(T), u_bound) has_bnds = true - set_bounds!(ψ, l_bound_k, u_bound_k) + ψ = shifted(reg_nlp.h, xk, l_bound_k, u_bound_k, reg_nlp.selected) #fails else if has_bnds @. l_bound_k = max(-one(T), l_bound - xk) @. u_bound_k = min(one(T), u_bound - xk) - ψ = shifted(reg_nlp.h, xk, l_bound_k, u_bound_k, selected) + ψ = shifted(reg_nlp.h, xk, l_bound_k, u_bound_k, reg_nlp.selected) else ψ = shifted(reg_nlp.h, xk, one(T), χ) end @@ -161,8 +161,7 @@ function TRDH( η2 = options.η2, γ = options.γ, α = options.α, - β = options.β, - kwargs_dict..., + β = options.β ) return stats end From a6e9b2609acff008771b58ea74cfb6592d358d14 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Tue, 8 Jul 2025 17:46:32 -0400 Subject: [PATCH 08/33] add JSO compliant TR --- src/TR_alg.jl | 338 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 326 insertions(+), 12 deletions(-) diff --git a/src/TR_alg.jl b/src/TR_alg.jl index d33bbfa3..c85e1724 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -1,4 +1,4 @@ -export TR, TRSolver#, solve! +export TR, TRSolver, solve! import SolverCore.solve! @@ -6,7 +6,6 @@ mutable struct TRSolver{ T <: Real, G <: ShiftedProximableFunction, V <: AbstractVector{T}, - QN <: AbstractDiagonalQuasiNewtonOperator{T}, N, ST <: AbstractOptimizationSolver, PB <: AbstractRegularizedNLPModel @@ -15,7 +14,6 @@ mutable struct TRSolver{ ∇fk::V ∇fk⁻::V mν∇fk::V - D::QN ψ::G χ::N xkn::V @@ -32,8 +30,8 @@ end function TRSolver( reg_nlp::AbstractRegularizedNLPModel{T, V}, - χ::X, - subsolver = R2Solver, + χ::X; + subsolver = R2Solver ) where {T, V, X} x0 = reg_nlp.model.meta.x0 l_bound = reg_nlp.model.meta.lvar @@ -55,33 +53,31 @@ function TRSolver( l_bound_m_x = similar(xk, 0) u_bound_m_x = similar(xk, 0) end - m_fh_hist = fill(T(-Inf), m_monotone - 1) ψ = - has_bnds ? shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : - shifted(reg_nlp.h, xk) + has_bnds || isa(subsolver, TRDHSolver) ? shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : + shifted(reg_nlp.h, xk, T(1), χ) Bk = hess_op(reg_nlp.model, x0) - sub_nlp = R2NModel(Bk, ∇fk, T(1), x0) + sub_nlp = R2NModel(Bk, ∇fk, zero(T), x0) #FIXME subpb = RegularizedNLPModel(sub_nlp, ψ) substats = RegularizedExecutionStats(subpb) subsolver = subsolver(subpb) - return R2NSolver{T, typeof(ψ), V, typeof(subsolver), typeof(subpb)}( + return TRSolver( xk, ∇fk, ∇fk⁻, mν∇fk, ψ, + χ, xkn, s, - s1, has_bnds, l_bound, u_bound, l_bound_m_x, u_bound_m_x, - m_fh_hist, subsolver, subpb, substats, @@ -276,9 +272,11 @@ function TR( subsolver_options.Δk = ∆_effective / 10 subsolver_options.ν = ν subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, f.meta.nvar),) : () + s, iter, outdict = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) end + # restore initial values of subsolver_options here so that it is not modified # if there is an error subsolver_options.ν = ν_subsolver @@ -380,3 +378,319 @@ function TR( set_solver_specific!(stats, :SubsolverCounter, Complex_hist[1:k]) return stats end + +function SolverCore.solve!( + solver::TRSolver{T, G, V}, + reg_nlp::AbstractRegularizedNLPModel{T, V}, + stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, + x::V = reg_nlp.model.meta.x0, + atol::T = √eps(T), + sub_atol::T = √eps(T), + rtol::T = √eps(T), + neg_tol::T = eps(T)^(1 / 4), + verbose::Int = 0, + max_iter::Int = 10000, + max_time::Float64 = 30.0, + max_eval::Int = -1, + reduce_TR::Bool = true, + Δk::T = T(1), + η1::T = √√eps(T), + η2::T = T(0.9), + γ::T = T(3), + α::T = 1 / eps(T), + β::T = 1 / eps(T) +) where {T, G, V} + reset!(stats) + + # Retrieve workspace + selected = reg_nlp.selected + h = reg_nlp.h + nlp = reg_nlp.model + + xk = solver.xk .= x + + # Make sure ψ has the correct shift + shift!(solver.ψ, xk) + + ∇fk = solver.∇fk + ∇fk⁻ = solver.∇fk⁻ + mν∇fk = solver.mν∇fk + ψ = solver.ψ + xkn = solver.xkn + s = solver.s + χ = solver.χ + has_bnds = solver.has_bnds + + if has_bnds || isa(solver.subsolver, TRDHSolver) #TODO elsewhere ? + @. l_bound_m_x = l_bound - xk + @. u_bound_m_x = u_bound - xk + set_bounds!(ψ, l_bound_m_x, u_bound_m_x) + else + set_radius!(ψ, Δk) + end + + # initialize parameters + improper = false + hk = @views h(xk[selected]) + if hk == Inf + verbose > 0 && @info "TR: finding initial guess where nonsmooth term is finite" + prox!(xk, h, xk, T(1)) + hk = @views h(xk[selected]) + hk < Inf || error("prox computation must be erroneous") + verbose > 0 && @debug "TR: found point where h has value" hk + end + improper = (hk == -Inf) + improper == true && @warn "TR: Improper term detected" + improper == true && return stats + + if verbose > 0 + @info log_header( + [:outer, :inner, :fx, :hx, :xi, :ρ, :Δ, :normx, :norms, :normB, :arrow], + [Int, Int, T, T, T, T, T, T, T, T, Char], + hdr_override = Dict{Symbol, String}( # TODO: Add this as constant dict elsewhere + :fx => "f(x)", + :hx => "h(x)", + :xi => "√(ξ1/ν)", + :normx => "‖x‖", + :norms => "‖s‖", + :normB => "‖B‖", + :arrow => "TR", + ), + colsep = 1, + ) + end + + local ξ1::T + local ρk::T = zero(T) + + fk = obj(nlp, xk) + grad!(nlp, xk, ∇fk) + ∇fk⁻ .= ∇fk + + quasiNewtTest = isa(nlp, QuasiNewtonModel) + λmax::T = T(1) + solver.subpb.model.B = hess_op(nlp, xk) + + λmax, found_λ = opnorm(solver.subpb.model.B) + found_λ || error("operator norm computation failed") + + ν₁ = α * Δk / (1 + λmax * (α * Δk + 1)) + sqrt_ξ1_νInv = one(T) + + @. mν∇fk = -ν₁ * ∇fk + + set_iter!(stats, 0) + start_time = time() + set_time!(stats, 0.0) + set_objective!(stats, fk + hk) + set_solver_specific!(stats, :smooth_obj, fk) + set_solver_specific!(stats, :nonsmooth_obj, hk) + + # models + φ1 = let ∇fk = ∇fk + d -> dot(∇fk, d) + end + mk1 = let ψ = ψ, φ1 = φ1 + d -> φ1(d) + ψ(d) + end + + mk = let ψ = ψ, solver = solver + d -> obj(solver.subpb.model, d) + ψ(d)::T + end + + prox!(s, ψ, mν∇fk, ν₁) + ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() + ξ1 > 0 || error("TR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + sqrt_ξ1_νInv = sqrt(ξ1 / ν₁) + + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) + (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && + error("TR: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + atol += rtol * sqrt_ξ1_νInv # make stopping test absolute and relative + sub_atol += rtol * sqrt_ξ1_νInv + + set_status!( + stats, + get_status( + reg_nlp, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + improper = improper, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + + while !done + + sub_atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv)) + ∆_effective = min(β * χ(s), Δk) + + if isa(solver.subsolver, TRDHSolver) #FIXME + solver.subsolver.D.d[1] = 1/ν₁ + solve!( + solver.subsolver, + solver.subpb, + solver.substats; + x = s, + atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv)), + Δk = Δ_effective / 10 + ) + else + solve!( + solver.subsolver, + solver.subpb, + solver.substats; + x = s, + atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv)), + ν = ν₁, + ) + end + + s .= solver.substats.solution + + xkn .= xk .+ s + fkn = obj(nlp, xkn) + hkn = @views h(xkn[selected]) + sNorm = χ(s) + + Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() + ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() + + if (ξ ≤ 0 || isnan(ξ)) + error("TR: failed to compute a step: ξ = $ξ") + end + + ρk = Δobj / ξ + + verbose > 0 && + stats.iter % verbose == 0 && + @info log_row( + Any[ + stats.iter, + solver.substats.iter, + fk, + hk, + sqrt_ξ1_νInv, + ρk, + Δk, + χ(xk), + sNorm, + λmax, + (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "="), + ], + colsep = 1, + ) + + if η1 ≤ ρk < Inf + xk .= xkn + ∆_effective = min(β * χ(s), Δk) + if has_bnds || isa(solver.subsolver, TRDHSolver) + @. l_bound_m_x = l_bound - xk + @. u_bound_m_x = u_bound - xk + set_bounds!(ψ, l_bound_m_x, u_bound_m_x) + end + fk = fkn + hk = hkn + + shift!(ψ, xk) + grad!(nlp, xk, ∇fk) + + if quasiNewtTest + @. ∇fk⁻ = ∇fk - ∇fk⁻ + push!(nlp, s, ∇fk⁻) # update QN operator + end + + solver.subpb.model.B = hess_op(nlp, xk) + + λmax, found_λ = opnorm(solver.subpb.model.B) + found_λ || error("operator norm computation failed") + + ∇fk⁻ .= ∇fk + end + + if η2 ≤ ρk < Inf + Δk = max(Δk, γ * sNorm) + if !(has_bnds || isa(solver.subsolver, TRDHSolver)) + set_radius!(ψ, Δk) + end + end + + if η2 ≤ ρk < Inf + Δk = max(Δk, γ * sNorm) + !has_bnds && set_radius!(ψ, Δk) + end + + if ρk < η1 || ρk == Inf + Δk = Δk / 2 + if has_bnds || isa(solver.subsolver, TRDHSolver) #TODO elsewhere ? + @. l_bound_m_x = l_bound - xk + @. u_bound_m_x = u_bound - xk + set_bounds!(ψ, l_bound_m_x, u_bound_m_x) + else + set_radius!(ψ, Δk) + end + end + + set_objective!(stats, fk + hk) + set_solver_specific!(stats, :smooth_obj, fk) + set_solver_specific!(stats, :nonsmooth_obj, hk) + set_iter!(stats, stats.iter + 1) + set_time!(stats, time() - start_time) + + ν₁ = α * Δk / (1 + λmax * (α * Δk + 1)) + @. mν∇fk = -ν₁ * ∇fk + + prox!(s, ψ, mν∇fk, ν₁) + ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() + sqrt_ξ1_νInv = sqrt(ξ1 / ν₁) + + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) + (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && + error("TR: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + + set_status!( + stats, + get_status( + reg_nlp, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + improper = improper, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + end + if verbose > 0 && stats.status == :first_order + @info log_row( + Any[ + stats.iter, + solver.substats.iter, + fk, + hk, + sqrt_ξ1_νInv, + ρk, + Δk, + χ(xk), + χ(s), + λmax, + "", + ], + colsep = 1, + ) + @info "TR: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" + end +end From b737d55e2087ca601c08d5a3ebc0d9b19721fc4b Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Tue, 8 Jul 2025 18:21:13 -0400 Subject: [PATCH 09/33] solve bugs for TR with TRDH subsolver --- src/TRDH_alg.jl | 12 ++++++++---- src/TR_alg.jl | 19 +++++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 8c89e1b8..9c55d804 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -55,7 +55,7 @@ function TRDHSolver( @. l_bound_k = max(xk - one(T), l_bound) @. u_bound_k = min(xk + one(T), u_bound) has_bnds = true - ψ = shifted(reg_nlp.h, xk, l_bound_k, u_bound_k, reg_nlp.selected) #fails + set_bounds!(ψ, l_bound_k, u_bound_k) else if has_bnds @. l_bound_k = max(-one(T), l_bound - xk) @@ -161,7 +161,8 @@ function TRDH( η2 = options.η2, γ = options.γ, α = options.α, - β = options.β + β = options.β, + kwargs_dict... ) return stats end @@ -196,8 +197,7 @@ function TRDH( η2 = options.η2, γ = options.γ, α = options.α, - β = options.β, - kwargs_dict..., + β = options.β ) return stats end @@ -516,6 +516,10 @@ function SolverCore.solve!( ) @info "TRDH: terminating with √(ξ/ν) = $(sqrt_ξ_νInv)" end + + set_solution!(stats, xk) + + return stats end # update l_bound_k and u_bound_k diff --git a/src/TR_alg.jl b/src/TR_alg.jl index c85e1724..83ed322f 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -44,7 +44,7 @@ function TRSolver( xkn = similar(x0) s = similar(x0) has_bnds = any(l_bound .!= T(-Inf)) || any(u_bound .!= T(Inf)) - if has_bnds + if has_bnds || subsolver == TRDHSolver l_bound_m_x = similar(xk) u_bound_m_x = similar(xk) @. l_bound_m_x = l_bound - x0 @@ -55,7 +55,7 @@ function TRSolver( end ψ = - has_bnds || isa(subsolver, TRDHSolver) ? shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : + has_bnds || subsolver == TRDHSolver ? shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : shifted(reg_nlp.h, xk, T(1), χ) Bk = hess_op(reg_nlp.model, x0) @@ -273,9 +273,9 @@ function TR( subsolver_options.ν = ν subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, f.meta.nvar),) : () - s, iter, outdict = with_logger(subsolver_logger) do - subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) - end + #s, iter, outdict = with_logger(subsolver_logger) do + s, iter, outdict = subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) + #end # restore initial values of subsolver_options here so that it is not modified # if there is an error @@ -423,6 +423,10 @@ function SolverCore.solve!( has_bnds = solver.has_bnds if has_bnds || isa(solver.subsolver, TRDHSolver) #TODO elsewhere ? + l_bound_m_x = solver.l_bound_m_x + u_bound_m_x = solver.u_bound_m_x + l_bound = solver.l_bound + u_bound = solver.u_bound @. l_bound_m_x = l_bound - xk @. u_bound_m_x = u_bound - xk set_bounds!(ψ, l_bound_m_x, u_bound_m_x) @@ -531,7 +535,6 @@ function SolverCore.solve!( while !done sub_atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv)) - ∆_effective = min(β * χ(s), Δk) if isa(solver.subsolver, TRDHSolver) #FIXME solver.subsolver.D.d[1] = 1/ν₁ @@ -541,7 +544,7 @@ function SolverCore.solve!( solver.substats; x = s, atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv)), - Δk = Δ_effective / 10 + Δk = min(β * χ(s), Δk) / 10 ) else solve!( @@ -625,7 +628,7 @@ function SolverCore.solve!( if η2 ≤ ρk < Inf Δk = max(Δk, γ * sNorm) - !has_bnds && set_radius!(ψ, Δk) + !(has_bnds || isa(solver.subsolver, TRDHSolver)) && set_radius!(ψ, Δk) end if ρk < η1 || ρk == Inf From 7fbfc5ce87730e1e7b7644dcd18c667287f11d21 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Wed, 9 Jul 2025 18:11:08 -0600 Subject: [PATCH 10/33] add doc and unit tests for TRDH --- src/TRDH_alg.jl | 77 +++++++++++++++++++++++++++++++-------------- test/test_allocs.jl | 13 +++++--- 2 files changed, 61 insertions(+), 29 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 9c55d804..082aba75 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -99,8 +99,8 @@ A trust-region method with diagonal Hessian approximation for the problem min f(x) + h(x) -where f: ℝⁿ → ℝ has a Lipschitz-continuous Jacobian, and h: ℝⁿ → ℝ is -lower semi-continuous and proper. +where f: ℝⁿ → ℝ has a Lipschitz-continuous gradient,, and h: ℝⁿ → ℝ is +lower semi-continuous, proper and prox-bounded. About each iterate xₖ, a step sₖ is computed as an approximate solution of @@ -110,28 +110,57 @@ where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ Dₖ s is a quadratic a ψ(s; xₖ) = h(xₖ + s), ‖⋅‖ is a user-defined norm, Dₖ is a diagonal Hessian approximation and Δₖ > 0 is the trust-region radius. -### Arguments - -* `nlp::AbstractDiagonalQNModel`: a smooth optimization problem -* `h`: a regularizer such as those defined in ProximalOperators -* `χ`: a norm used to define the trust region in the form of a regularizer -* `options::ROSolverOptions`: a structure containing algorithmic parameters - -The objective and gradient of `nlp` will be accessed. - -In the second form, instead of `nlp`, the user may pass in - -* `f` a function such that `f(x)` returns the value of f at x -* `∇f!` a function to evaluate the gradient in place, i.e., such that `∇f!(g, x)` store ∇f(x) in `g` -* `x0::AbstractVector`: an initial guess. - -### Keyword arguments - -#TODO - -### Return values - -#TODO +For advanced usage, first define a solver "TRDHSolver" to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = TRDH(reg_nlp; D = nothing, χ = NormLinf(1)) + solve!(solver, reg_nlp) + + stats = RegularizedExecutionStats(reg_nlp) + solve!(solver, reg_nlp, stats) + +# Arguments +* `reg_nlp::AbstractRegularizedNLPModel{T, V}`: the problem to solve, see `RegularizedProblems.jl`, `NLPModels.jl`. + +# Keyword arguments +- `x::V = nlp.meta.x0`: the initial guess; +- `atol::T = √eps(T)`: absolute tolerance; +- `rtol::T = √eps(T)`: relative tolerance; +- `neg_tol::T = eps(T)^(1 / 4)`: negative tolerance; +- `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); +- `max_time::Float64 = 30.0`: maximum time limit in seconds; +- `max_iter::Int = 10000`: maximum number of iterations; +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration; +- `Δk::T = T(1)`: initial value of the trust-region radius; +- `η1::T = √√eps(T)`: successful iteration threshold; +- `η2::T = T(0.9)`: very successful iteration threshold; +- `γ::T = T(3)`: trust-region radius parameter multiplier, Δ := Δ*γ when the iteration is very successful and Δ := Δ/γ when the iteration is unsuccessful; +- `α::T = 1/eps(T)`: TODO +- `β::T = 1/eps(T)`: TODO +- `reduce_TR::Bool = True`: TODO +- `χ::F = NormLinf(1)`: norm used to define the trust-region;` +- `D::L = nothing`: diagonal quasi-Newton approximation used for the model φ. If nothing is provided and `reg_nlp.model` is not a diagonal quasi-Newton approximation, a spectral gradient approximation is used.` + +The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure. + +# Output +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. + +# Callback +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user` will stop the algorithm. +All relevant information should be available in `nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.xk`: current iterate; +- `solver.∇fk`: current gradient; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.solver_specific[:smooth_obj]`: current value of the smooth part of the objective function; + - `stats.solver_specific[:nonsmooth_obj]`: current value of the nonsmooth part of the objective function; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything other than `:unknown` will stop the algorithm, but you should use `:user` to properly indicate the intention; + - `stats.elapsed_time`: elapsed time in seconds. """ function TRDH( nlp::AbstractDiagonalQNModel{T, V}, diff --git a/test/test_allocs.jl b/test/test_allocs.jl index accf0ccc..90d67499 100644 --- a/test/test_allocs.jl +++ b/test/test_allocs.jl @@ -42,18 +42,21 @@ end # Test non allocating solve! @testset "allocs" begin for (h, h_name) ∈ ((NormL0(λ), "l0"),) - for (solver, solver_name) ∈ ((:R2Solver, "R2"), (:R2DHSolver, "R2DH"), (:R2NSolver, "R2N")) + for (solver, solver_name) ∈ ((:R2Solver, "R2"), (:R2DHSolver, "R2DH"), (:R2NSolver, "R2N"), (:TRDHSolver, "TRDH"), (:TRSolver, "TR")) @testset "$(solver_name)" begin - solver_name == "R2N" && continue #FIXME + (solver_name == "R2N" || solver_name == "TR") && continue #FIXME reg_nlp = RegularizedNLPModel(LBFGSModel(bpdn), h) solver = eval(solver)(reg_nlp) stats = RegularizedExecutionStats(reg_nlp) - solver_name == "R2" && - @test @wrappedallocs(solve!(solver, reg_nlp, stats, ν = 1.0, atol = 1e-6, rtol = 1e-6)) == - 0 + solver_name == "R2" && @test @wrappedallocs( + solve!(solver, reg_nlp, stats, ν = 1.0, atol = 1e-6, rtol = 1e-6) + ) == 0 solver_name == "R2DH" && @test @wrappedallocs( solve!(solver, reg_nlp, stats, σk = 1.0, atol = 1e-6, rtol = 1e-6) ) == 0 + solver_name == "TRDH" && @test @wrappedallocs( + solve!(solver, reg_nlp, stats, atol = 1e-6, rtol = 1e-6) + ) == 0 @test stats.status == :first_order end end From 5f6164b83f8d6b40a28a098c3315828edaad7541 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Wed, 9 Jul 2025 18:39:48 -0600 Subject: [PATCH 11/33] solve unallocated memory issue --- src/TRDH_alg.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 082aba75..811f1276 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -52,8 +52,8 @@ function TRDHSolver( @assert !has_bnds l_bound = copy(ψ.l) u_bound = copy(ψ.u) - @. l_bound_k = max(xk - one(T), l_bound) - @. u_bound_k = min(xk + one(T), u_bound) + @. l_bound_k = max(x0 - one(T), l_bound) + @. u_bound_k = min(x0 + one(T), u_bound) has_bnds = true set_bounds!(ψ, l_bound_k, u_bound_k) else From 52117ed217f498cca75c07ff501a42c274fce6e1 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 10 Jul 2025 17:34:28 -0600 Subject: [PATCH 12/33] fix reproducibility in TR --- src/TR_alg.jl | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 83ed322f..e66b309e 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -134,7 +134,7 @@ function TR( χ::X, options::ROSolverOptions{R}; x0::AbstractVector{R} = f.meta.x0, - subsolver_logger::Logging.AbstractLogger = Logging.NullLogger(), + subsolver_logger::Logging.AbstractLogger = Logging.SimpleLogger(), subsolver = R2, subsolver_options = ROSolverOptions(ϵa = options.ϵa), selected::AbstractVector{<:Integer} = 1:(f.meta.nvar), @@ -264,7 +264,7 @@ function TR( continue end - subsolver_options.ϵa = k == 1 ? 1.0e-5 : max(ϵ_subsolver, min(1e-2, sqrt_ξ1_νInv)) + subsolver_options.ϵa = k == 2 ? 1.0e-5 : max(ϵ_subsolver, min(1e-2, sqrt_ξ1_νInv)) ∆_effective = min(β * χ(s), Δk) (has_bounds(f) || subsolver == TRDH) ? set_bounds!(ψ, max.(-∆_effective, l_bound - xk), min.(∆_effective, u_bound - xk)) : @@ -273,9 +273,10 @@ function TR( subsolver_options.ν = ν subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, f.meta.nvar),) : () - #s, iter, outdict = with_logger(subsolver_logger) do - s, iter, outdict = subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) - #end + stats = subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) + + s = stats.solution + iter = stats.iter # restore initial values of subsolver_options here so that it is not modified # if there is an error @@ -283,7 +284,7 @@ function TR( subsolver_options.ϵa = ϵa_subsolver subsolver_options.Δk = Δk_subsolver - Complex_hist[k] = sum(outdict[:Chist]) + Complex_hist[k] = 1 sNorm = χ(s) xkn .= xk .+ s @@ -429,6 +430,8 @@ function SolverCore.solve!( u_bound = solver.u_bound @. l_bound_m_x = l_bound - xk @. u_bound_m_x = u_bound - xk + @. l_bound_m_x .= max.(l_bound_m_x, -Δk) + @. u_bound_m_x .= min.(u_bound_m_x, Δk) set_bounds!(ψ, l_bound_m_x, u_bound_m_x) else set_radius!(ψ, Δk) @@ -535,7 +538,18 @@ function SolverCore.solve!( while !done sub_atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv)) - + ∆_effective = min(β * χ(s), Δk) + + if has_bnds || isa(solver.subsolver, TRDHSolver) #TODO elsewhere ? + @. l_bound_m_x = l_bound - xk + @. u_bound_m_x = u_bound - xk + @. l_bound_m_x .= max.(l_bound_m_x, -∆_effective) + @. u_bound_m_x .= min.(u_bound_m_x, ∆_effective) + set_bounds!(ψ, l_bound_m_x, u_bound_m_x) + else + set_radius!(ψ, ∆_effective) + end + if isa(solver.subsolver, TRDHSolver) #FIXME solver.subsolver.D.d[1] = 1/ν₁ solve!( @@ -544,7 +558,7 @@ function SolverCore.solve!( solver.substats; x = s, atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv)), - Δk = min(β * χ(s), Δk) / 10 + Δk = ∆_effective / 10 ) else solve!( @@ -633,13 +647,6 @@ function SolverCore.solve!( if ρk < η1 || ρk == Inf Δk = Δk / 2 - if has_bnds || isa(solver.subsolver, TRDHSolver) #TODO elsewhere ? - @. l_bound_m_x = l_bound - xk - @. u_bound_m_x = u_bound - xk - set_bounds!(ψ, l_bound_m_x, u_bound_m_x) - else - set_radius!(ψ, Δk) - end end set_objective!(stats, fk + hk) From 3358ff00e996ea5e51c683c96650b65865585b95 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 10 Jul 2025 18:19:38 -0600 Subject: [PATCH 13/33] unit tests for TR --- test/runtests.jl | 15 --------------- test/test_bounds.jl | 7 ------- 2 files changed, 22 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 1e8b28d4..1eddcad1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -25,14 +25,7 @@ for (mod, mod_name) ∈ ((x -> x, "exact"), (LSR1Model, "lsr1"), (LBFGSModel, "l out = solver(mod(bpdn), h, args..., options, x0 = x0) @test typeof(out.solution) == typeof(bpdn.meta.x0) @test length(out.solution) == bpdn.meta.nvar - @test typeof(out.solver_specific[:Fhist]) == typeof(out.solution) - @test typeof(out.solver_specific[:Hhist]) == typeof(out.solution) - @test typeof(out.solver_specific[:SubsolverCounter]) == Array{Int, 1} @test typeof(out.dual_feas) == eltype(out.solution) - @test length(out.solver_specific[:Fhist]) == length(out.solver_specific[:Hhist]) - @test length(out.solver_specific[:Fhist]) == length(out.solver_specific[:SubsolverCounter]) - @test obj(bpdn, out.solution) == out.solver_specific[:Fhist][end] - @test h(out.solution) == out.solver_specific[:Hhist][end] @test out.status == :first_order end end @@ -66,15 +59,7 @@ for (mod, mod_name) ∈ ((LSR1Model, "lsr1"), (LBFGSModel, "lbfgs")) TR_out = TR(mod(bpdn), h, NormL2(1.0), options, x0 = x0) @test typeof(TR_out.solution) == typeof(bpdn.meta.x0) @test length(TR_out.solution) == bpdn.meta.nvar - @test typeof(TR_out.solver_specific[:Fhist]) == typeof(TR_out.solution) - @test typeof(TR_out.solver_specific[:Hhist]) == typeof(TR_out.solution) - @test typeof(TR_out.solver_specific[:SubsolverCounter]) == Array{Int, 1} @test typeof(TR_out.dual_feas) == eltype(TR_out.solution) - @test length(TR_out.solver_specific[:Fhist]) == length(TR_out.solver_specific[:Hhist]) - @test length(TR_out.solver_specific[:Fhist]) == - length(TR_out.solver_specific[:SubsolverCounter]) - @test obj(bpdn, TR_out.solution) == TR_out.solver_specific[:Fhist][end] - @test h(TR_out.solution) == TR_out.solver_specific[:Hhist][end] @test TR_out.status == :first_order end end diff --git a/test/test_bounds.jl b/test/test_bounds.jl index 14407d60..04139abe 100644 --- a/test/test_bounds.jl +++ b/test/test_bounds.jl @@ -15,14 +15,7 @@ for (mod, mod_name) ∈ ((x -> x, "exact"), (LSR1Model, "lsr1"), (LBFGSModel, "l out = solver(mod(bpdn2), h, args..., options; x0 = x0) @test typeof(out.solution) == typeof(bpdn2.meta.x0) @test length(out.solution) == bpdn2.meta.nvar - @test typeof(out.solver_specific[:Fhist]) == typeof(out.solution) - @test typeof(out.solver_specific[:Hhist]) == typeof(out.solution) - @test typeof(out.solver_specific[:SubsolverCounter]) == Array{Int, 1} @test typeof(out.dual_feas) == eltype(out.solution) - @test length(out.solver_specific[:Fhist]) == length(out.solver_specific[:Hhist]) - @test length(out.solver_specific[:Fhist]) == length(out.solver_specific[:SubsolverCounter]) - @test obj(bpdn2, out.solution) == out.solver_specific[:Fhist][end] - @test h(out.solution) == out.solver_specific[:Hhist][end] @test out.status == :first_order end end From 836c02e9be48a5b507d3d9930cc8b9a83b9b6f2d Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 10 Jul 2025 18:20:18 -0600 Subject: [PATCH 14/33] TR function signatures --- src/TR_alg.jl | 274 +++++++------------------------------------------- 1 file changed, 34 insertions(+), 240 deletions(-) diff --git a/src/TR_alg.jl b/src/TR_alg.jl index e66b309e..6bc0b696 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -29,8 +29,8 @@ mutable struct TRSolver{ end function TRSolver( - reg_nlp::AbstractRegularizedNLPModel{T, V}, - χ::X; + reg_nlp::AbstractRegularizedNLPModel{T, V}; + χ::X = NormLinf(one(T)), subsolver = R2Solver ) where {T, V, X} x0 = reg_nlp.model.meta.x0 @@ -138,245 +138,40 @@ function TR( subsolver = R2, subsolver_options = ROSolverOptions(ϵa = options.ϵa), selected::AbstractVector{<:Integer} = 1:(f.meta.nvar), + kwargs... ) where {H, X, R} - start_time = time() - elapsed_time = 0.0 - # initialize passed options - ϵ = options.ϵa - ϵ_subsolver = subsolver_options.ϵa - ϵr = options.ϵr - Δk = options.Δk - verbose = options.verbose - maxIter = options.maxIter - maxTime = options.maxTime - η1 = options.η1 - η2 = options.η2 - γ = options.γ - α = options.α - θ = options.θ - β = options.β - - # store initial values of the subsolver_options fields that will be modified - ν_subsolver = subsolver_options.ν - ϵa_subsolver = subsolver_options.ϵa - Δk_subsolver = subsolver_options.Δk - - local l_bound, u_bound - if has_bounds(f) || subsolver == TRDH - l_bound = f.meta.lvar - u_bound = f.meta.uvar - end - - if verbose == 0 - ptf = Inf - elseif verbose == 1 - ptf = round(maxIter / 10) - elseif verbose == 2 - ptf = round(maxIter / 100) - else - ptf = 1 - end - - # initialize parameters - xk = copy(x0) - hk = h(xk[selected]) - if hk == Inf - verbose > 0 && @info "TR: finding initial guess where nonsmooth term is finite" - prox!(xk, h, x0, one(eltype(x0))) - hk = h(xk[selected]) - hk < Inf || error("prox computation must be erroneous") - verbose > 0 && @debug "TR: found point where h has value" hk - end - hk == -Inf && error("nonsmooth term is not proper") - - xkn = similar(xk) - s = zero(xk) - ψ = - (has_bounds(f) || subsolver == TRDH) ? - shifted(h, xk, max.(-Δk, l_bound - xk), min.(Δk, u_bound - xk), selected) : - shifted(h, xk, Δk, χ) - - Fobj_hist = zeros(maxIter) - Hobj_hist = zeros(maxIter) - Complex_hist = zeros(Int, maxIter) - if verbose > 0 - #! format: off - @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√(ξ1/ν)" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "‖Bₖ‖" "TR" - #! format: on - end - - local ξ1 - k = 0 - - fk = obj(f, xk) - ∇fk = grad(f, xk) - ∇fk⁻ = copy(∇fk) - - quasiNewtTest = isa(f, QuasiNewtonModel) - Bk = hess_op(f, xk) - - λmax, found_λ = opnorm(Bk) - found_λ || error("operator norm computation failed") - α⁻¹Δ⁻¹ = 1 / (α * Δk) - ν = 1 / (α⁻¹Δ⁻¹ + λmax * (α⁻¹Δ⁻¹ + 1)) - sqrt_ξ1_νInv = one(R) - - optimal = false - tired = k ≥ maxIter || elapsed_time > maxTime - - while !(optimal || tired) - k = k + 1 - elapsed_time = time() - start_time - Fobj_hist[k] = fk - Hobj_hist[k] = hk - - # model for first prox-gradient step and ξ1 - φ1(d) = ∇fk' * d - mk1(d) = φ1(d) + ψ(d) - - # model for subsequent prox-gradient steps and ξ - φ(d) = (d' * (Bk * d)) / 2 + ∇fk' * d - - ∇φ!(g, d) = begin - mul!(g, Bk, d) - g .+= ∇fk - g - end - - mk(d) = φ(d) + ψ(d) - - # Take first proximal gradient step s1 and see if current xk is nearly stationary. - # s1 minimizes φ1(s) + ‖s‖² / 2 / ν + ψ(s) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0)). - prox!(s, ψ, -ν * ∇fk, ν) - ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() - ξ1 > 0 || error("TR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - sqrt_ξ1_νInv = sqrt(ξ1 / ν) - - if ξ1 ≥ 0 && k == 1 - ϵ_increment = ϵr * sqrt_ξ1_νInv - ϵ += ϵ_increment # make stopping test absolute and relative - ϵ_subsolver += ϵ_increment - end - - if sqrt_ξ1_νInv < ϵ - # the current xk is approximately first-order stationary - optimal = true - continue - end - - subsolver_options.ϵa = k == 2 ? 1.0e-5 : max(ϵ_subsolver, min(1e-2, sqrt_ξ1_νInv)) - ∆_effective = min(β * χ(s), Δk) - (has_bounds(f) || subsolver == TRDH) ? - set_bounds!(ψ, max.(-∆_effective, l_bound - xk), min.(∆_effective, u_bound - xk)) : - set_radius!(ψ, ∆_effective) - subsolver_options.Δk = ∆_effective / 10 - subsolver_options.ν = ν - subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, f.meta.nvar),) : () - - stats = subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) - - s = stats.solution - iter = stats.iter - - # restore initial values of subsolver_options here so that it is not modified - # if there is an error - subsolver_options.ν = ν_subsolver - subsolver_options.ϵa = ϵa_subsolver - subsolver_options.Δk = Δk_subsolver - - Complex_hist[k] = 1 - - sNorm = χ(s) - xkn .= xk .+ s - fkn = obj(f, xkn) - hkn = h(xkn[selected]) - hkn == -Inf && error("nonsmooth term is not proper") - - Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() - ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() - - if (ξ ≤ 0 || isnan(ξ)) - error("TR: failed to compute a step: ξ = $ξ") - end - - ρk = Δobj / ξ - - TR_stat = (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "=") - - if (verbose > 0) && (k % ptf == 0) - #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ) ρk ∆_effective χ(xk) sNorm λmax TR_stat - #! format: on - end - - if η2 ≤ ρk < Inf - Δk = max(Δk, γ * sNorm) - !(has_bounds(f) || subsolver == TRDH) && set_radius!(ψ, Δk) - end - - if η1 ≤ ρk < Inf - xk .= xkn - (has_bounds(f) || subsolver == TRDH) && - set_bounds!(ψ, max.(-Δk, l_bound - xk), min.(Δk, u_bound - xk)) - - #update functions - fk = fkn - hk = hkn - shift!(ψ, xk) - ∇fk = grad(f, xk) - # grad!(f, xk, ∇fk) - if quasiNewtTest - push!(f, s, ∇fk - ∇fk⁻) - end - Bk = hess_op(f, xk) - λmax, found_λ = opnorm(Bk) - found_λ || error("operator norm computation failed") - ∇fk⁻ .= ∇fk - end - - if ρk < η1 || ρk == Inf - Δk = Δk / 2 - (has_bounds(f) || subsolver == TRDH) ? - set_bounds!(ψ, max.(-Δk, l_bound - xk), min.(Δk, u_bound - xk)) : set_radius!(ψ, Δk) - end - α⁻¹Δ⁻¹ = 1 / (α * Δk) - ν = 1 / (α⁻¹Δ⁻¹ + λmax * (α⁻¹Δ⁻¹ + 1)) - tired = k ≥ maxIter || elapsed_time > maxTime - end - - if verbose > 0 - if k == 1 - @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk - elseif optimal - #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" Δk χ(xk) χ(s) λmax - #! format: on - @info "TR: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" - end - end - - status = if optimal - :first_order - elseif elapsed_time > maxTime - :max_time - elseif tired - :max_iter - else - :exception - end + reg_nlp = RegularizedNLPModel(f, h, selected) + stats = TR( + reg_nlp; + x = x0, + atol = options.ϵa, + sub_atol = subsolver_options.ϵa, + rtol = options.ϵr, + neg_tol = options.neg_tol, + verbose = options.verbose, + max_iter = options.maxIter, + max_time = options.maxTime, + Δk = options.Δk, + η1 = options.η1, + η2 = options.η2, + γ = options.γ, + α = options.α, + β = options.β, + kwargs... + ) + return stats +end - stats = GenericExecutionStats(f) - set_status!(stats, status) - set_solution!(stats, xk) - set_objective!(stats, fk + hk) - set_residuals!(stats, zero(eltype(xk)), sqrt_ξ1_νInv) - set_iter!(stats, k) - set_time!(stats, elapsed_time) - set_solver_specific!(stats, :radius, Δk) - set_solver_specific!(stats, :Fhist, Fobj_hist[1:k]) - set_solver_specific!(stats, :Hhist, Hobj_hist[1:k]) - set_solver_specific!(stats, :NonSmooth, h) - set_solver_specific!(stats, :SubsolverCounter, Complex_hist[1:k]) +function TR( + reg_nlp::AbstractRegularizedNLPModel{T, V}; + kwargs... +) where{T, V} + kwargs_dict = Dict(kwargs...) + subsolver = pop!(kwargs_dict, :subsolver, R2Solver) + χ = pop!(kwargs_dict, :χ, NormLinf(one(T))) + solver = TRSolver(reg_nlp, subsolver = subsolver, χ = χ) + stats = RegularizedExecutionStats(reg_nlp) + solve!(solver, reg_nlp, stats; kwargs_dict...) return stats end @@ -394,7 +189,6 @@ function SolverCore.solve!( max_iter::Int = 10000, max_time::Float64 = 30.0, max_eval::Int = -1, - reduce_TR::Bool = true, Δk::T = T(1), η1::T = √√eps(T), η2::T = T(0.9), From 2bd11efe8b8bb54fd4266f5f1b73a231e1b0ffd2 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 10 Jul 2025 18:21:04 -0600 Subject: [PATCH 15/33] add kwargs for TRDH --- src/TRDH_alg.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 811f1276..56b160e8 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -226,7 +226,8 @@ function TRDH( η2 = options.η2, γ = options.γ, α = options.α, - β = options.β + β = options.β, + kwargs... ) return stats end From c389274b4c688ac09ab63e56d8d00651eb385f81 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Fri, 11 Jul 2025 07:00:30 -0600 Subject: [PATCH 16/33] fix uninitalized memory in TRDH --- src/TRDH_alg.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 56b160e8..7f5132b4 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -58,8 +58,8 @@ function TRDHSolver( set_bounds!(ψ, l_bound_k, u_bound_k) else if has_bnds - @. l_bound_k = max(-one(T), l_bound - xk) - @. u_bound_k = min(one(T), u_bound - xk) + @. l_bound_k = max(-one(T), l_bound - x0) + @. u_bound_k = min(one(T), u_bound - x0) ψ = shifted(reg_nlp.h, xk, l_bound_k, u_bound_k, reg_nlp.selected) else ψ = shifted(reg_nlp.h, xk, one(T), χ) @@ -229,7 +229,7 @@ function TRDH( β = options.β, kwargs... ) - return stats + return stats.solution, stats.iter, stats end function TRDH(reg_nlp::AbstractRegularizedNLPModel{T, V}; kwargs...) where {T, V} From ad7990d9c594a848498ef8549f2ffae063a4922e Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Fri, 11 Jul 2025 16:38:41 -0600 Subject: [PATCH 17/33] update doc for TR --- src/TR_alg.jl | 81 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 28 deletions(-) diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 6bc0b696..98756acb 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -85,14 +85,15 @@ function TRSolver( end """ + TR(reg_nlp; kwargs…) TR(nlp, h, χ, options; kwargs...) A trust-region method for the problem min f(x) + h(x) -where f: ℝⁿ → ℝ has a Lipschitz-continuous Jacobian, and h: ℝⁿ → ℝ is -lower semi-continuous and proper. +where f: ℝⁿ → ℝ has a Lipschitz-continuous gradient, and h: ℝⁿ → ℝ is +lower semi-continuous, proper and prox-bounded. About each iterate xₖ, a step sₖ is computed as an approximate solution of @@ -100,33 +101,57 @@ About each iterate xₖ, a step sₖ is computed as an approximate solution of where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ Bₖ s is a quadratic approximation of f about xₖ, ψ(s; xₖ) = h(xₖ + s), ‖⋅‖ is a user-defined norm and Δₖ > 0 is the trust-region radius. -The subproblem is solved inexactly by way of a first-order method such as the proximal-gradient -method or the quadratic regularization method. -### Arguments - -* `nlp::AbstractNLPModel`: a smooth optimization problem -* `h`: a regularizer such as those defined in ProximalOperators -* `χ`: a norm used to define the trust region in the form of a regularizer -* `options::ROSolverOptions`: a structure containing algorithmic parameters - -The objective, gradient and Hessian of `nlp` will be accessed. -The Hessian is accessed as an abstract operator and need not be the exact Hessian. - -### Keyword arguments - -* `x0::AbstractVector`: an initial guess (default: `nlp.meta.x0`) -* `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver (default: the null logger) -* `subsolver`: the procedure used to compute a step (`PG`, `R2` or `TRDH`) -* `subsolver_options::ROSolverOptions`: default options to pass to the subsolver (default: all defaut options) -* `selected::AbstractVector{<:Integer}`: (default `1:f.meta.nvar`). - -### Return values - -* `xk`: the final iterate -* `Fobj_hist`: an array with the history of values of the smooth objective -* `Hobj_hist`: an array with the history of values of the nonsmooth objective -* `Complex_hist`: an array with the history of number of inner iterations. +For advanced usage, first define a solver "TRSolver" to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = TR(reg_nlp; χ = NormLinf(1), subsolver = R2Solver) + solve!(solver, reg_nlp) + + stats = RegularizedExecutionStats(reg_nlp) + solve!(solver, reg_nlp, stats) + +# Arguments +* `reg_nlp::AbstractRegularizedNLPModel{T, V}`: the problem to solve, see `RegularizedProblems.jl`, `NLPModels.jl`. + +# Keyword arguments +- `x::V = nlp.meta.x0`: the initial guess; +- `atol::T = √eps(T)`: absolute tolerance; +- `rtol::T = √eps(T)`: relative tolerance; +- `neg_tol::T = eps(T)^(1 / 4)`: negative tolerance; +- `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); +- `max_time::Float64 = 30.0`: maximum time limit in seconds; +- `max_iter::Int = 10000`: maximum number of iterations; +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration; +- `Δk::T = T(1)`: initial value of the trust-region radius; +- `η1::T = √√eps(T)`: successful iteration threshold; +- `η2::T = T(0.9)`: very successful iteration threshold; +- `γ::T = T(3)`: trust-region radius parameter multiplier, Δ := Δ*γ when the iteration is very successful and Δ := Δ/γ when the iteration is unsuccessful; +- `α::T = 1/eps(T)`: TODO +- `β::T = 1/eps(T)`: TODO +- `χ::F = NormLinf(1)`: norm used to define the trust-region;` +- `subsolver::S = R2Solver`: subsolver used to solve the subproblem that appears at each iteration. + +The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure. + +# Output +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. + +# Callback +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user` will stop the algorithm. +All relevant information should be available in `nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.xk`: current iterate; +- `solver.∇fk`: current gradient; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.solver_specific[:smooth_obj]`: current value of the smooth part of the objective function; + - `stats.solver_specific[:nonsmooth_obj]`: current value of the nonsmooth part of the objective function; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything other than `:unknown` will stop the algorithm, but you should use `:user` to properly indicate the intention; + - `stats.elapsed_time`: elapsed time in seconds. """ function TR( f::AbstractNLPModel, From ca4d66ca0700256de20e213ef96ee63c5d32f76c Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sat, 12 Jul 2025 09:47:50 -0600 Subject: [PATCH 18/33] fix reproducibility in TRDH --- src/TRDH_alg.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 7f5132b4..5fb5a807 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -476,7 +476,7 @@ function SolverCore.solve!( update_bounds!(l_bound_m_x, u_bound_m_x, is_subsolver, l_bound, u_bound, xk, Δ_effective) set_bounds!(ψ, l_bound_m_x, u_bound_m_x) else - set_radius!(ψ, Δ_effective) + set_radius!(ψ, Δk) end end @@ -499,6 +499,7 @@ function SolverCore.solve!( end iprox!(s, ψ, ∇fk, dk) + sNorm = χ(s) if !reduce_TR From 1abfe5794a054dfb476408026ef309b65176d63c Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sat, 12 Jul 2025 11:01:26 -0600 Subject: [PATCH 19/33] fix reproducibility with TR --- src/TR_alg.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 98756acb..b2de3d74 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -159,8 +159,7 @@ function TR( χ::X, options::ROSolverOptions{R}; x0::AbstractVector{R} = f.meta.x0, - subsolver_logger::Logging.AbstractLogger = Logging.SimpleLogger(), - subsolver = R2, + subsolver = R2Solver, subsolver_options = ROSolverOptions(ϵa = options.ϵa), selected::AbstractVector{<:Integer} = 1:(f.meta.nvar), kwargs... @@ -182,6 +181,7 @@ function TR( γ = options.γ, α = options.α, β = options.β, + subsolver = R2Solver, kwargs... ) return stats @@ -252,8 +252,10 @@ function SolverCore.solve!( @. l_bound_m_x .= max.(l_bound_m_x, -Δk) @. u_bound_m_x .= min.(u_bound_m_x, Δk) set_bounds!(ψ, l_bound_m_x, u_bound_m_x) + set_bounds!(solver.subsolver.ψ, l_bound_m_x, u_bound_m_x) else set_radius!(ψ, Δk) + set_radius!(solver.subsolver.ψ, Δk) end # initialize parameters @@ -365,7 +367,9 @@ function SolverCore.solve!( @. l_bound_m_x .= max.(l_bound_m_x, -∆_effective) @. u_bound_m_x .= min.(u_bound_m_x, ∆_effective) set_bounds!(ψ, l_bound_m_x, u_bound_m_x) + set_bounds!(solver.subsolver.ψ, l_bound_m_x, u_bound_m_x) else + set_radius!(solver.subsolver.ψ, ∆_effective) set_radius!(ψ, ∆_effective) end @@ -432,6 +436,7 @@ function SolverCore.solve!( @. l_bound_m_x = l_bound - xk @. u_bound_m_x = u_bound - xk set_bounds!(ψ, l_bound_m_x, u_bound_m_x) + set_bounds!(solver.subsolver.ψ, l_bound_m_x, u_bound_m_x) end fk = fkn hk = hkn From ea0eaa2e33487769358e24733828bc139eec9669 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sat, 12 Jul 2025 11:04:45 -0600 Subject: [PATCH 20/33] fix bound test for new TR signature --- test/test_bounds.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_bounds.jl b/test/test_bounds.jl index 04139abe..1b601b9a 100644 --- a/test/test_bounds.jl +++ b/test/test_bounds.jl @@ -1,5 +1,5 @@ const subsolver_options = deepcopy(options) -TR_TRDH(args...; kwargs...) = TR(args...; subsolver = TRDH, kwargs...) +TR_TRDH(args...; kwargs...) = TR(args...; subsolver = TRDHSolver, kwargs...) for (mod, mod_name) ∈ ((x -> x, "exact"), (LSR1Model, "lsr1"), (LBFGSModel, "lbfgs")) for (h, h_name) ∈ ((NormL0(λ), "l0"), (NormL1(λ), "l1")) From 5b2cc0ad557bea9907a34524919ef3803e6a8420 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sat, 12 Jul 2025 11:43:57 -0600 Subject: [PATCH 21/33] fix reproducibility with TR-TRDH --- src/TR_alg.jl | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/src/TR_alg.jl b/src/TR_alg.jl index b2de3d74..24c90b12 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -159,7 +159,6 @@ function TR( χ::X, options::ROSolverOptions{R}; x0::AbstractVector{R} = f.meta.x0, - subsolver = R2Solver, subsolver_options = ROSolverOptions(ϵa = options.ϵa), selected::AbstractVector{<:Integer} = 1:(f.meta.nvar), kwargs... @@ -181,7 +180,6 @@ function TR( γ = options.γ, α = options.α, β = options.β, - subsolver = R2Solver, kwargs... ) return stats @@ -429,12 +427,21 @@ function SolverCore.solve!( colsep = 1, ) + if η2 ≤ ρk < Inf + Δk = max(Δk, γ * sNorm) + if !(has_bnds || isa(solver.subsolver, TRDHSolver)) + set_radius!(ψ, Δk) + set_radius!(solver.subsolver.ψ, Δk) + end + end + if η1 ≤ ρk < Inf xk .= xkn - ∆_effective = min(β * χ(s), Δk) if has_bnds || isa(solver.subsolver, TRDHSolver) @. l_bound_m_x = l_bound - xk @. u_bound_m_x = u_bound - xk + @. l_bound_m_x .= max.(l_bound_m_x, -Δk) + @. u_bound_m_x .= min.(u_bound_m_x, Δk) set_bounds!(ψ, l_bound_m_x, u_bound_m_x) set_bounds!(solver.subsolver.ψ, l_bound_m_x, u_bound_m_x) end @@ -447,6 +454,8 @@ function SolverCore.solve!( if quasiNewtTest @. ∇fk⁻ = ∇fk - ∇fk⁻ push!(nlp, s, ∇fk⁻) # update QN operator + #println(ψ) + #stats.iter == 1 && error("done") end solver.subpb.model.B = hess_op(nlp, xk) @@ -457,20 +466,19 @@ function SolverCore.solve!( ∇fk⁻ .= ∇fk end - if η2 ≤ ρk < Inf - Δk = max(Δk, γ * sNorm) - if !(has_bnds || isa(solver.subsolver, TRDHSolver)) - set_radius!(ψ, Δk) - end - end - - if η2 ≤ ρk < Inf - Δk = max(Δk, γ * sNorm) - !(has_bnds || isa(solver.subsolver, TRDHSolver)) && set_radius!(ψ, Δk) - end - if ρk < η1 || ρk == Inf Δk = Δk / 2 + if has_bnds || isa(solver.subsolver, TRDHSolver) + @. l_bound_m_x = l_bound - xk + @. u_bound_m_x = u_bound - xk + @. l_bound_m_x .= max.(l_bound_m_x, -Δk) + @. u_bound_m_x .= min.(u_bound_m_x, Δk) + set_bounds!(ψ, l_bound_m_x, u_bound_m_x) + set_bounds!(solver.subsolver.ψ, l_bound_m_x, u_bound_m_x) + else + set_radius!(ψ, Δk) + set_radius!(solver.subsolver.ψ, Δk) + end end set_objective!(stats, fk + hk) From b77a5a322729b3f24a718d1d04a3ca93ae8f99eb Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sat, 12 Jul 2025 13:02:12 -0600 Subject: [PATCH 22/33] fix bug in TRDH --- src/TRDH_alg.jl | 6 ++++++ src/TR_alg.jl | 2 -- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 5fb5a807..24aecea3 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -308,6 +308,12 @@ function SolverCore.solve!( improper == true && return stats is_subsolver = h isa ShiftedProximableFunction # case TRDH is used as a subsolver + + if is_subsolver + l_bound .= ψ.l + u_bound .= ψ.u + end + if verbose > 0 @info log_header( [:iter, :fx, :hx, :xi, :ρ, :Δ, :normx, :norms, :normD, :arrow], diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 24c90b12..cfe3d553 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -454,8 +454,6 @@ function SolverCore.solve!( if quasiNewtTest @. ∇fk⁻ = ∇fk - ∇fk⁻ push!(nlp, s, ∇fk⁻) # update QN operator - #println(ψ) - #stats.iter == 1 && error("done") end solver.subpb.model.B = hess_op(nlp, xk) From 820204695ff6eb547e1cea661ddebf470c946f22 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Fri, 8 Aug 2025 12:12:28 +0200 Subject: [PATCH 23/33] apply julia formatter --- src/TRDH_alg.jl | 90 +++++++++++++++++++++---------------------------- src/TR_alg.jl | 87 ++++++++++++++++++++--------------------------- 2 files changed, 75 insertions(+), 102 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 24aecea3..f6356a02 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -7,7 +7,7 @@ mutable struct TRDHSolver{ G <: ShiftedProximableFunction, V <: AbstractVector{T}, QN <: AbstractDiagonalQuasiNewtonOperator{T}, - N + N, } <: AbstractOptimizationSolver xk::V ∇fk::V @@ -29,7 +29,7 @@ end function TRDHSolver( reg_nlp::AbstractRegularizedNLPModel{T, V}; D::Union{Nothing, AbstractDiagonalQuasiNewtonOperator} = nothing, - χ = NormLinf(one(T)) + χ = NormLinf(one(T)), ) where {T, V} x0 = reg_nlp.model.meta.x0 l_bound = reg_nlp.model.meta.lvar @@ -191,7 +191,7 @@ function TRDH( γ = options.γ, α = options.α, β = options.β, - kwargs_dict... + kwargs_dict..., ) return stats end @@ -227,7 +227,7 @@ function TRDH( γ = options.γ, α = options.α, β = options.β, - kwargs... + kwargs..., ) return stats.solution, stats.iter, stats end @@ -261,10 +261,10 @@ function SolverCore.solve!( η2::T = T(0.9), γ::T = T(3), α::T = 1 / eps(T), - β::T = 1 / eps(T) + β::T = 1 / eps(T), ) where {T, G, V} reset!(stats) - + # Retrieve workspace selected = reg_nlp.selected h = reg_nlp.h @@ -363,18 +363,18 @@ function SolverCore.solve!( φ = let ∇fk = ∇fk, dk = dk d -> begin - result = zero(T) - n = length(d) - @inbounds for i = 1:n - result += d[i]^2 * dk[i] / 2 + ∇fk[i] * d[i] - end - result + result = zero(T) + n = length(d) + @inbounds for i = 1:n + result += d[i]^2 * dk[i] / 2 + ∇fk[i] * d[i] + end + result end end mk = let ψ = ψ - d -> φ(d) + ψ(d)::T + d -> φ(d) + ψ(d)::T end - + if reduce_TR prox!(s, ψ, mν∇fk, ν) mks = mk1(s) @@ -396,17 +396,17 @@ function SolverCore.solve!( else set_radius!(ψ, Δ_effective) end - + iprox!(s, ψ, ∇fk, dk) ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() sNorm = χ(s) if !reduce_TR - sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν) : sqrt(-ξ / ν) - solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv < atol) - (ξ < 0 && sqrt_ξ_νInv > neg_tol) && + sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν) : sqrt(-ξ / ν) + solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv < atol) + (ξ < 0 && sqrt_ξ_νInv > neg_tol) && error("TRDH: prox-gradient step should produce a decrease but ξ = $(ξ)") - atol += rtol * sqrt_ξ_νInv # make stopping test absolute and relative #TODO : this is redundant code with the other case of the test. + atol += rtol * sqrt_ξ_νInv # make stopping test absolute and relative #TODO : this is redundant code with the other case of the test. end set_status!( @@ -428,7 +428,6 @@ function SolverCore.solve!( done = stats.status != :unknown while !done - xkn .= xk .+ s fkn = obj(nlp, xkn) hkn = @views h(xkn[selected]) @@ -453,7 +452,7 @@ function SolverCore.solve!( ], colsep = 1, ) - + if η1 ≤ ρk < Inf xk .= xkn if has_bnds @@ -494,7 +493,7 @@ function SolverCore.solve!( ν = reduce_TR ? (α * Δk)/(DNorm + one(T)) : α / (DNorm + one(T)) mν∇fk .= -ν .* ∇fk - + if reduce_TR prox!(s, ψ, mν∇fk, ν) ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() @@ -503,13 +502,13 @@ function SolverCore.solve!( (ξ1 < 0 && sqrt_ξ_νInv > neg_tol) && error("TRDH: prox-gradient step should produce a decrease but ξ = $(ξ)") end - + iprox!(s, ψ, ∇fk, dk) sNorm = χ(s) if !reduce_TR - ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() + ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν) : sqrt(-ξ / ν) solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv < atol) (ξ < 0 && sqrt_ξ_νInv > neg_tol) && @@ -517,40 +516,29 @@ function SolverCore.solve!( end set_status!( - stats, - get_status( - reg_nlp, - elapsed_time = stats.elapsed_time, - iter = stats.iter, - optimal = solved, - improper = improper, - max_eval = max_eval, - max_time = max_time, - max_iter = max_iter, + stats, + get_status( + reg_nlp, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + improper = improper, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, ), ) - callback(nlp, solver, stats) + callback(nlp, solver, stats) - done = stats.status != :unknown + done = stats.status != :unknown end if verbose > 0 && stats.status == :first_order @info log_row( - Any[ - stats.iter, - fk, - hk, - sqrt_ξ_νInv, - ρk, - Δk, - χ(xk), - sNorm, - norm(D.d), - "", - ], - colsep = 1, - ) + Any[stats.iter, fk, hk, sqrt_ξ_νInv, ρk, Δk, χ(xk), sNorm, norm(D.d), ""], + colsep = 1, + ) @info "TRDH: terminating with √(ξ/ν) = $(sqrt_ξ_νInv)" end @@ -568,4 +556,4 @@ function update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk @. l_bound_k = max(-Δ, l_bound - xk) @. u_bound_k = min(Δ, u_bound - xk) end -end \ No newline at end of file +end diff --git a/src/TR_alg.jl b/src/TR_alg.jl index cfe3d553..6106aee2 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -8,7 +8,7 @@ mutable struct TRSolver{ V <: AbstractVector{T}, N, ST <: AbstractOptimizationSolver, - PB <: AbstractRegularizedNLPModel + PB <: AbstractRegularizedNLPModel, } <: AbstractOptimizationSolver xk::V ∇fk::V @@ -31,7 +31,7 @@ end function TRSolver( reg_nlp::AbstractRegularizedNLPModel{T, V}; χ::X = NormLinf(one(T)), - subsolver = R2Solver + subsolver = R2Solver, ) where {T, V, X} x0 = reg_nlp.model.meta.x0 l_bound = reg_nlp.model.meta.lvar @@ -55,7 +55,8 @@ function TRSolver( end ψ = - has_bnds || subsolver == TRDHSolver ? shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : + has_bnds || subsolver == TRDHSolver ? + shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : shifted(reg_nlp.h, xk, T(1), χ) Bk = hess_op(reg_nlp.model, x0) @@ -161,7 +162,7 @@ function TR( x0::AbstractVector{R} = f.meta.x0, subsolver_options = ROSolverOptions(ϵa = options.ϵa), selected::AbstractVector{<:Integer} = 1:(f.meta.nvar), - kwargs... + kwargs..., ) where {H, X, R} reg_nlp = RegularizedNLPModel(f, h, selected) stats = TR( @@ -180,15 +181,12 @@ function TR( γ = options.γ, α = options.α, β = options.β, - kwargs... + kwargs..., ) return stats end -function TR( - reg_nlp::AbstractRegularizedNLPModel{T, V}; - kwargs... -) where{T, V} +function TR(reg_nlp::AbstractRegularizedNLPModel{T, V}; kwargs...) where {T, V} kwargs_dict = Dict(kwargs...) subsolver = pop!(kwargs_dict, :subsolver, R2Solver) χ = pop!(kwargs_dict, :χ, NormLinf(one(T))) @@ -217,10 +215,10 @@ function SolverCore.solve!( η2::T = T(0.9), γ::T = T(3), α::T = 1 / eps(T), - β::T = 1 / eps(T) + β::T = 1 / eps(T), ) where {T, G, V} reset!(stats) - + # Retrieve workspace selected = reg_nlp.selected h = reg_nlp.h @@ -355,7 +353,6 @@ function SolverCore.solve!( done = stats.status != :unknown while !done - sub_atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv)) ∆_effective = min(β * χ(s), Δk) @@ -374,21 +371,21 @@ function SolverCore.solve!( if isa(solver.subsolver, TRDHSolver) #FIXME solver.subsolver.D.d[1] = 1/ν₁ solve!( - solver.subsolver, - solver.subpb, - solver.substats; - x = s, + solver.subsolver, + solver.subpb, + solver.substats; + x = s, atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv)), - Δk = ∆_effective / 10 - ) - else + Δk = ∆_effective / 10, + ) + else solve!( - solver.subsolver, - solver.subpb, - solver.substats; - x = s, + solver.subsolver, + solver.subpb, + solver.substats; + x = s, atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv)), - ν = ν₁, + ν = ν₁, ) end @@ -426,7 +423,7 @@ function SolverCore.solve!( ], colsep = 1, ) - + if η2 ≤ ρk < Inf Δk = max(Δk, γ * sNorm) if !(has_bnds || isa(solver.subsolver, TRDHSolver)) @@ -487,7 +484,7 @@ function SolverCore.solve!( ν₁ = α * Δk / (1 + λmax * (α * Δk + 1)) @. mν∇fk = -ν₁ * ∇fk - + prox!(s, ψ, mν∇fk, ν₁) ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() sqrt_ξ1_νInv = sqrt(ξ1 / ν₁) @@ -497,16 +494,16 @@ function SolverCore.solve!( error("TR: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") set_status!( - stats, - get_status( - reg_nlp, - elapsed_time = stats.elapsed_time, - iter = stats.iter, - optimal = solved, - improper = improper, - max_eval = max_eval, - max_time = max_time, - max_iter = max_iter, + stats, + get_status( + reg_nlp, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + improper = improper, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, ), ) @@ -516,21 +513,9 @@ function SolverCore.solve!( end if verbose > 0 && stats.status == :first_order @info log_row( - Any[ - stats.iter, - solver.substats.iter, - fk, - hk, - sqrt_ξ1_νInv, - ρk, - Δk, - χ(xk), - χ(s), - λmax, - "", - ], - colsep = 1, - ) + Any[stats.iter, solver.substats.iter, fk, hk, sqrt_ξ1_νInv, ρk, Δk, χ(xk), χ(s), λmax, ""], + colsep = 1, + ) @info "TR: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" end end From 08bc4d69030b8a763ec5fa39e800d8bc6aa07a6d Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sun, 17 Aug 2025 11:32:59 +0200 Subject: [PATCH 24/33] implicit hessian update fix --- src/TR_alg.jl | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 6106aee2..1f09fa52 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -59,7 +59,7 @@ function TRSolver( shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : shifted(reg_nlp.h, xk, T(1), χ) - Bk = hess_op(reg_nlp.model, x0) + Bk = isa(reg_nlp.model, QuasiNewtonModel) ? hess_op(reg_nlp.model, xk) : hess_op!(reg_nlp.model, xk, similar(xk)) sub_nlp = R2NModel(Bk, ∇fk, zero(T), x0) #FIXME subpb = RegularizedNLPModel(sub_nlp, ψ) substats = RegularizedExecutionStats(subpb) @@ -94,7 +94,7 @@ A trust-region method for the problem min f(x) + h(x) where f: ℝⁿ → ℝ has a Lipschitz-continuous gradient, and h: ℝⁿ → ℝ is -lower semi-continuous, proper and prox-bounded. +lower semi-continuous and proper. About each iterate xₖ, a step sₖ is computed as an approximate solution of @@ -266,7 +266,6 @@ function SolverCore.solve!( end improper = (hk == -Inf) improper == true && @warn "TR: Improper term detected" - improper == true && return stats if verbose > 0 @info log_header( @@ -286,15 +285,14 @@ function SolverCore.solve!( end local ξ1::T - local ρk::T = zero(T) + local ρk = zero(T) fk = obj(nlp, xk) grad!(nlp, xk, ∇fk) ∇fk⁻ .= ∇fk quasiNewtTest = isa(nlp, QuasiNewtonModel) - λmax::T = T(1) - solver.subpb.model.B = hess_op(nlp, xk) + λmax = T(1) λmax, found_λ = opnorm(solver.subpb.model.B) found_λ || error("operator norm computation failed") @@ -324,6 +322,7 @@ function SolverCore.solve!( end prox!(s, ψ, mν∇fk, ν₁) + #println(solver.subpb.model.B*ones(length(xk))) ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() ξ1 > 0 || error("TR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") sqrt_ξ1_νInv = sqrt(ξ1 / ν₁) @@ -419,7 +418,7 @@ function SolverCore.solve!( χ(xk), sNorm, λmax, - (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "="), + (η2 ≤ ρk < Inf) ? '↗' : (ρk < η1 ? '↘' : '='), ], colsep = 1, ) @@ -453,8 +452,6 @@ function SolverCore.solve!( push!(nlp, s, ∇fk⁻) # update QN operator end - solver.subpb.model.B = hess_op(nlp, xk) - λmax, found_λ = opnorm(solver.subpb.model.B) found_λ || error("operator norm computation failed") From 03bd51658a2a320816c4fb8881cdbaf781f28f2c Mon Sep 17 00:00:00 2001 From: Maxence Gollier <134112149+MaxenceGollier@users.noreply.github.com> Date: Sun, 17 Aug 2025 11:36:15 +0200 Subject: [PATCH 25/33] update doc Co-authored-by: Dominique --- src/TR_alg.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 1f09fa52..0a9174fb 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -118,7 +118,7 @@ For advanced usage, first define a solver "TRSolver" to preallocate the memory u - `x::V = nlp.meta.x0`: the initial guess; - `atol::T = √eps(T)`: absolute tolerance; - `rtol::T = √eps(T)`: relative tolerance; -- `neg_tol::T = eps(T)^(1 / 4)`: negative tolerance; +- `neg_tol::T = eps(T)^(1 / 4)`: negative tolerance (see stopping conditions below); - `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); - `max_time::Float64 = 30.0`: maximum time limit in seconds; - `max_iter::Int = 10000`: maximum number of iterations; From 120d07319c7779dbb2008cfb6c2ee91434bddbe4 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sun, 17 Aug 2025 11:42:01 +0200 Subject: [PATCH 26/33] add callback docstring --- src/RegularizedOptimization.jl | 18 ++++++++++++++++++ src/TRDH_alg.jl | 18 ++---------------- src/TR_alg.jl | 18 ++---------------- 3 files changed, 22 insertions(+), 32 deletions(-) diff --git a/src/RegularizedOptimization.jl b/src/RegularizedOptimization.jl index 01778e86..ae5656bd 100644 --- a/src/RegularizedOptimization.jl +++ b/src/RegularizedOptimization.jl @@ -11,6 +11,24 @@ using LinearOperators, NLPModels, NLPModelsModifiers, RegularizedProblems, ShiftedProximalOperators, SolverCore using Percival: AugLagModel, update_y!, update_μ! +const callback_docstring = " +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user` will stop the algorithm. +All relevant information should be available in `nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.xk`: current iterate; +- `solver.∇fk`: current gradient; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.solver_specific[:smooth_obj]`: current value of the smooth part of the objective function; + - `stats.solver_specific[:nonsmooth_obj]`: current value of the nonsmooth part of the objective function; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything other than `:unknown` will stop the algorithm, but you should use `:user` to properly indicate the intention; + - `stats.elapsed_time`: elapsed time in seconds. +" + include("utils.jl") include("input_struct.jl") include("PG_alg.jl") diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index f6356a02..665a0229 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -133,7 +133,7 @@ For advanced usage, first define a solver "TRDHSolver" to preallocate the memory - `Δk::T = T(1)`: initial value of the trust-region radius; - `η1::T = √√eps(T)`: successful iteration threshold; - `η2::T = T(0.9)`: very successful iteration threshold; -- `γ::T = T(3)`: trust-region radius parameter multiplier, Δ := Δ*γ when the iteration is very successful and Δ := Δ/γ when the iteration is unsuccessful; +- `γ::T = T(3)`: trust-region radius parameter multiplier. Must satisfy `γ > 1`. The trust-region radius is updated as Δ := Δ*γ when the iteration is very successful and Δ := Δ/γ when the iteration is unsuccessful; - `α::T = 1/eps(T)`: TODO - `β::T = 1/eps(T)`: TODO - `reduce_TR::Bool = True`: TODO @@ -146,21 +146,7 @@ The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. # Callback -The callback is called at each iteration. -The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. -Changing any of the input arguments will affect the subsequent iterations. -In particular, setting `stats.status = :user` will stop the algorithm. -All relevant information should be available in `nlp` and `solver`. -Notably, you can access, and modify, the following: -- `solver.xk`: current iterate; -- `solver.∇fk`: current gradient; -- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: - - `stats.iter`: current iteration counter; - - `stats.objective`: current objective function value; - - `stats.solver_specific[:smooth_obj]`: current value of the smooth part of the objective function; - - `stats.solver_specific[:nonsmooth_obj]`: current value of the nonsmooth part of the objective function; - - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything other than `:unknown` will stop the algorithm, but you should use `:user` to properly indicate the intention; - - `stats.elapsed_time`: elapsed time in seconds. +$(callback_docstring) """ function TRDH( nlp::AbstractDiagonalQNModel{T, V}, diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 0a9174fb..ca11e839 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -126,7 +126,7 @@ For advanced usage, first define a solver "TRSolver" to preallocate the memory u - `Δk::T = T(1)`: initial value of the trust-region radius; - `η1::T = √√eps(T)`: successful iteration threshold; - `η2::T = T(0.9)`: very successful iteration threshold; -- `γ::T = T(3)`: trust-region radius parameter multiplier, Δ := Δ*γ when the iteration is very successful and Δ := Δ/γ when the iteration is unsuccessful; +- `γ::T = T(3)`: trust-region radius parameter multiplier. Must satisfy `γ > 1`. The trust-region radius is updated as Δ := Δ*γ when the iteration is very successful and Δ := Δ/γ when the iteration is unsuccessful; - `α::T = 1/eps(T)`: TODO - `β::T = 1/eps(T)`: TODO - `χ::F = NormLinf(1)`: norm used to define the trust-region;` @@ -138,21 +138,7 @@ The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. # Callback -The callback is called at each iteration. -The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. -Changing any of the input arguments will affect the subsequent iterations. -In particular, setting `stats.status = :user` will stop the algorithm. -All relevant information should be available in `nlp` and `solver`. -Notably, you can access, and modify, the following: -- `solver.xk`: current iterate; -- `solver.∇fk`: current gradient; -- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: - - `stats.iter`: current iteration counter; - - `stats.objective`: current objective function value; - - `stats.solver_specific[:smooth_obj]`: current value of the smooth part of the objective function; - - `stats.solver_specific[:nonsmooth_obj]`: current value of the nonsmooth part of the objective function; - - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything other than `:unknown` will stop the algorithm, but you should use `:user` to properly indicate the intention; - - `stats.elapsed_time`: elapsed time in seconds. +$(callback_docstring) """ function TR( f::AbstractNLPModel, From fc63949e7b8c35e1d9dd4c571a5d5a87ef888ba7 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sun, 17 Aug 2025 11:58:36 +0200 Subject: [PATCH 27/33] add update_bounds function --- src/RegularizedOptimization.jl | 11 +++++++++++ src/TRDH_alg.jl | 11 ----------- src/TR_alg.jl | 26 ++++++-------------------- 3 files changed, 17 insertions(+), 31 deletions(-) diff --git a/src/RegularizedOptimization.jl b/src/RegularizedOptimization.jl index ae5656bd..b6ef20ef 100644 --- a/src/RegularizedOptimization.jl +++ b/src/RegularizedOptimization.jl @@ -29,6 +29,17 @@ Notably, you can access, and modify, the following: - `stats.elapsed_time`: elapsed time in seconds. " +# update l_bound_k and u_bound_k +function update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δ) + if is_subsolver + @. l_bound_k = max(xk - Δ, l_bound) + @. u_bound_k = min(xk + Δ, u_bound) + else + @. l_bound_k = max(-Δ, l_bound - xk) + @. u_bound_k = min(Δ, u_bound - xk) + end +end + include("utils.jl") include("input_struct.jl") include("PG_alg.jl") diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 665a0229..df422d06 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -532,14 +532,3 @@ function SolverCore.solve!( return stats end - -# update l_bound_k and u_bound_k -function update_bounds!(l_bound_k, u_bound_k, is_subsolver, l_bound, u_bound, xk, Δ) - if is_subsolver - @. l_bound_k = max(xk - Δ, l_bound) - @. u_bound_k = min(xk + Δ, u_bound) - else - @. l_bound_k = max(-Δ, l_bound - xk) - @. u_bound_k = min(Δ, u_bound - xk) - end -end diff --git a/src/TR_alg.jl b/src/TR_alg.jl index ca11e839..985a7009 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -225,14 +225,9 @@ function SolverCore.solve!( has_bnds = solver.has_bnds if has_bnds || isa(solver.subsolver, TRDHSolver) #TODO elsewhere ? - l_bound_m_x = solver.l_bound_m_x - u_bound_m_x = solver.u_bound_m_x - l_bound = solver.l_bound - u_bound = solver.u_bound - @. l_bound_m_x = l_bound - xk - @. u_bound_m_x = u_bound - xk - @. l_bound_m_x .= max.(l_bound_m_x, -Δk) - @. u_bound_m_x .= min.(u_bound_m_x, Δk) + l_bound_m_x, u_bound_m_x = solver.l_bound_m_x, solver.u_bound_m_x + l_bound, u_bound = solver.l_bound, solver.u_bound + update_bounds!(l_bound_m_x, u_bound_m_x, false, l_bound, u_bound, xk, Δk) set_bounds!(ψ, l_bound_m_x, u_bound_m_x) set_bounds!(solver.subsolver.ψ, l_bound_m_x, u_bound_m_x) else @@ -342,10 +337,7 @@ function SolverCore.solve!( ∆_effective = min(β * χ(s), Δk) if has_bnds || isa(solver.subsolver, TRDHSolver) #TODO elsewhere ? - @. l_bound_m_x = l_bound - xk - @. u_bound_m_x = u_bound - xk - @. l_bound_m_x .= max.(l_bound_m_x, -∆_effective) - @. u_bound_m_x .= min.(u_bound_m_x, ∆_effective) + update_bounds!(l_bound_m_x, u_bound_m_x, false, l_bound, u_bound, xk, Δk) set_bounds!(ψ, l_bound_m_x, u_bound_m_x) set_bounds!(solver.subsolver.ψ, l_bound_m_x, u_bound_m_x) else @@ -420,10 +412,7 @@ function SolverCore.solve!( if η1 ≤ ρk < Inf xk .= xkn if has_bnds || isa(solver.subsolver, TRDHSolver) - @. l_bound_m_x = l_bound - xk - @. u_bound_m_x = u_bound - xk - @. l_bound_m_x .= max.(l_bound_m_x, -Δk) - @. u_bound_m_x .= min.(u_bound_m_x, Δk) + update_bounds!(l_bound_m_x, u_bound_m_x, false, l_bound, u_bound, xk, Δk) set_bounds!(ψ, l_bound_m_x, u_bound_m_x) set_bounds!(solver.subsolver.ψ, l_bound_m_x, u_bound_m_x) end @@ -447,10 +436,7 @@ function SolverCore.solve!( if ρk < η1 || ρk == Inf Δk = Δk / 2 if has_bnds || isa(solver.subsolver, TRDHSolver) - @. l_bound_m_x = l_bound - xk - @. u_bound_m_x = u_bound - xk - @. l_bound_m_x .= max.(l_bound_m_x, -Δk) - @. u_bound_m_x .= min.(u_bound_m_x, Δk) + update_bounds!(l_bound_m_x, u_bound_m_x, false, l_bound, u_bound, xk, Δk) set_bounds!(ψ, l_bound_m_x, u_bound_m_x) set_bounds!(solver.subsolver.ψ, l_bound_m_x, u_bound_m_x) else From c4f9b68b599aadddfd71213ba6ef7c4840406c35 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sun, 17 Aug 2025 14:15:17 +0200 Subject: [PATCH 28/33] add subsolver logger --- src/TR_alg.jl | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 985a7009..c2d51dc3 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -193,6 +193,7 @@ function SolverCore.solve!( rtol::T = √eps(T), neg_tol::T = eps(T)^(1 / 4), verbose::Int = 0, + subsolver_logger::Logging.AbstractLogger = Logging.SimpleLogger(), max_iter::Int = 10000, max_time::Float64 = 30.0, max_eval::Int = -1, @@ -344,26 +345,27 @@ function SolverCore.solve!( set_radius!(solver.subsolver.ψ, ∆_effective) set_radius!(ψ, ∆_effective) end - - if isa(solver.subsolver, TRDHSolver) #FIXME - solver.subsolver.D.d[1] = 1/ν₁ - solve!( - solver.subsolver, - solver.subpb, - solver.substats; - x = s, - atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv)), - Δk = ∆_effective / 10, - ) - else - solve!( - solver.subsolver, - solver.subpb, - solver.substats; - x = s, - atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv)), - ν = ν₁, - ) + with_logger(subsolver_logger) do + if isa(solver.subsolver, TRDHSolver) #FIXME + solver.subsolver.D.d[1] = 1/ν₁ + solve!( + solver.subsolver, + solver.subpb, + solver.substats; + x = s, + atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv)), + Δk = ∆_effective / 10, + ) + else + solve!( + solver.subsolver, + solver.subpb, + solver.substats; + x = s, + atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv)), + ν = ν₁, + ) + end end s .= solver.substats.solution From 37d9bac1865c1018cabafb53bedf3c1583ec6d5c Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Wed, 20 Aug 2025 17:19:47 +0200 Subject: [PATCH 29/33] remove alpha and beta params from keyword arguments --- src/TRDH_alg.jl | 11 +++-------- src/TR_alg.jl | 9 +++------ 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index df422d06..8c1f1402 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -134,8 +134,6 @@ For advanced usage, first define a solver "TRDHSolver" to preallocate the memory - `η1::T = √√eps(T)`: successful iteration threshold; - `η2::T = T(0.9)`: very successful iteration threshold; - `γ::T = T(3)`: trust-region radius parameter multiplier. Must satisfy `γ > 1`. The trust-region radius is updated as Δ := Δ*γ when the iteration is very successful and Δ := Δ/γ when the iteration is unsuccessful; -- `α::T = 1/eps(T)`: TODO -- `β::T = 1/eps(T)`: TODO - `reduce_TR::Bool = True`: TODO - `χ::F = NormLinf(1)`: norm used to define the trust-region;` - `D::L = nothing`: diagonal quasi-Newton approximation used for the model φ. If nothing is provided and `reg_nlp.model` is not a diagonal quasi-Newton approximation, a spectral gradient approximation is used.` @@ -175,8 +173,6 @@ function TRDH( η1 = options.η1, η2 = options.η2, γ = options.γ, - α = options.α, - β = options.β, kwargs_dict..., ) return stats @@ -211,8 +207,6 @@ function TRDH( η1 = options.η1, η2 = options.η2, γ = options.γ, - α = options.α, - β = options.β, kwargs..., ) return stats.solution, stats.iter, stats @@ -246,8 +240,6 @@ function SolverCore.solve!( η1::T = √√eps(T), η2::T = T(0.9), γ::T = T(3), - α::T = 1 / eps(T), - β::T = 1 / eps(T), ) where {T, G, V} reset!(stats) @@ -320,6 +312,9 @@ function SolverCore.solve!( local ξ1::T local ρk::T = zero(T) + α = 1 / eps(T) + β = 1 / eps(T) + fk = obj(nlp, xk) grad!(nlp, xk, ∇fk) ∇fk⁻ .= ∇fk diff --git a/src/TR_alg.jl b/src/TR_alg.jl index c2d51dc3..b2fb1361 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -127,8 +127,6 @@ For advanced usage, first define a solver "TRSolver" to preallocate the memory u - `η1::T = √√eps(T)`: successful iteration threshold; - `η2::T = T(0.9)`: very successful iteration threshold; - `γ::T = T(3)`: trust-region radius parameter multiplier. Must satisfy `γ > 1`. The trust-region radius is updated as Δ := Δ*γ when the iteration is very successful and Δ := Δ/γ when the iteration is unsuccessful; -- `α::T = 1/eps(T)`: TODO -- `β::T = 1/eps(T)`: TODO - `χ::F = NormLinf(1)`: norm used to define the trust-region;` - `subsolver::S = R2Solver`: subsolver used to solve the subproblem that appears at each iteration. @@ -165,8 +163,6 @@ function TR( η1 = options.η1, η2 = options.η2, γ = options.γ, - α = options.α, - β = options.β, kwargs..., ) return stats @@ -201,8 +197,6 @@ function SolverCore.solve!( η1::T = √√eps(T), η2::T = T(0.9), γ::T = T(3), - α::T = 1 / eps(T), - β::T = 1 / eps(T), ) where {T, G, V} reset!(stats) @@ -269,6 +263,9 @@ function SolverCore.solve!( local ξ1::T local ρk = zero(T) + α = 1 / eps(T) + β = 1 / eps(T) + fk = obj(nlp, xk) grad!(nlp, xk, ∇fk) ∇fk⁻ .= ∇fk From 032a2b5b4241a362e71a875214b76a9370fb1da2 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Wed, 20 Aug 2025 18:04:18 +0200 Subject: [PATCH 30/33] add reduce_TR doc --- src/TRDH_alg.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 8c1f1402..a6ae8347 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -134,11 +134,12 @@ For advanced usage, first define a solver "TRDHSolver" to preallocate the memory - `η1::T = √√eps(T)`: successful iteration threshold; - `η2::T = T(0.9)`: very successful iteration threshold; - `γ::T = T(3)`: trust-region radius parameter multiplier. Must satisfy `γ > 1`. The trust-region radius is updated as Δ := Δ*γ when the iteration is very successful and Δ := Δ/γ when the iteration is unsuccessful; -- `reduce_TR::Bool = True`: TODO +- `reduce_TR::Bool = true`: see explanation on the stopping criterion below; - `χ::F = NormLinf(1)`: norm used to define the trust-region;` - `D::L = nothing`: diagonal quasi-Newton approximation used for the model φ. If nothing is provided and `reg_nlp.model` is not a diagonal quasi-Newton approximation, a spectral gradient approximation is used.` The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure. +Alternatively, if `reduce_TR = true`, then ξₖ₁ := f(xₖ) + h(xₖ) - φ(sₖ₁; xₖ) - ψ(sₖ₁; xₖ) is used instead of ξₖ, where sₖ₁ is the Cauchy point. # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. From d90ba89259c763e9a60e5c777f1ea807492494fd Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Mon, 8 Sep 2025 09:37:24 -0400 Subject: [PATCH 31/33] remove FirstOrderModel --- Project.toml | 6 ++++-- src/TRDH_alg.jl | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index e513061b..baf0d479 100644 --- a/Project.toml +++ b/Project.toml @@ -4,9 +4,11 @@ author = ["Robert Baraldi and Dominique Orban Date: Mon, 8 Sep 2025 09:45:06 -0400 Subject: [PATCH 32/33] remove commented code --- src/TR_alg.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/TR_alg.jl b/src/TR_alg.jl index b2fb1361..6818a731 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -301,7 +301,6 @@ function SolverCore.solve!( end prox!(s, ψ, mν∇fk, ν₁) - #println(solver.subpb.model.B*ones(length(xk))) ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() ξ1 > 0 || error("TR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") sqrt_ξ1_νInv = sqrt(ξ1 / ν₁) From a7df86f664a437373cb2061df5e2c14b8201e630 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Mon, 8 Sep 2025 10:22:31 -0400 Subject: [PATCH 33/33] add ManualNLPModels dep --- src/RegularizedOptimization.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RegularizedOptimization.jl b/src/RegularizedOptimization.jl index b6ef20ef..81ffec57 100644 --- a/src/RegularizedOptimization.jl +++ b/src/RegularizedOptimization.jl @@ -8,7 +8,7 @@ using Arpack, ProximalOperators # dependencies from us using LinearOperators, - NLPModels, NLPModelsModifiers, RegularizedProblems, ShiftedProximalOperators, SolverCore + ManualNLPModels, NLPModels, NLPModelsModifiers, RegularizedProblems, ShiftedProximalOperators, SolverCore using Percival: AugLagModel, update_y!, update_μ! const callback_docstring = "