From 9c57f7f301d90a533da25f36921e609290c474a5 Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Thu, 27 Mar 2025 15:08:13 -0400 Subject: [PATCH 01/12] Implement Line Search with Negative Curvature Detection --- src/cr.jl | 33 +++++++++++++++-------- src/krylov_workspaces.jl | 3 +++ test/test_cr.jl | 57 +++++++++++++++++++++++++++++++++------- 3 files changed, 72 insertions(+), 21 deletions(-) diff --git a/src/cr.jl b/src/cr.jl index c48004b03..e0b05ee5e 100644 --- a/src/cr.jl +++ b/src/cr.jl @@ -57,7 +57,7 @@ For an in-place variant that reuses memory across solves, see [`cr!`](@ref). * `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; * `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; * `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; -* `linesearch`: if `true`, indicate that the solution is to be used in an inexact Newton method with linesearch. If negative curvature is detected at iteration k > 0, the solution of iteration k-1 is returned. If negative curvature is detected at iteration 0, the right-hand side is returned (i.e., the negative gradient); +* `linesearch`: if `true`, indicate that the solution is to be used in an inexact Newton method with linesearch. If `linesearch` is true and nonpositive curvature is detected, the solution depends on the iteration: at iteration k = 0, the right-hand side is returned with `solver.npc_dir` as the preconditioned initial residual; at iteration k > 0, the solution from iteration k-1 is returned with `solver.npc_dir` as the most recent residual; * `γ`: tolerance to determine that the curvature of the quadratic model is nonpositive; * `atol`: absolute stopping tolerance based on the residual norm; * `rtol`: relative stopping tolerance based on the residual norm; @@ -133,6 +133,7 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema length(b) == n || error("Inconsistent problem size") linesearch && (radius > 0) && error("'linesearch' set to 'true' but radius > 0") (verbose > 0) && @printf(iostream, "CR: system of %d equations in %d variables\n", n, n) + (solver.warm_start && linesearch) && error("warm_start and linesearch cannot be used together") # Tests M = Iₙ MisI = (M === I) @@ -142,12 +143,16 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema ktypeof(b) == S || error("ktypeof(b) must be equal to $S") # Set up workspace - allocate_if(!MisI, workspace, :Mq, S, workspace.x) # The length of Mq is n - Δx, x, r, p, q, Ar, stats = workspace.Δx, workspace.x, workspace.r, workspace.p, workspace.q, workspace.Ar, workspace.stats - warm_start = workspace.warm_start + allocate_if(!MisI, solver, :Mq, S, solver.x) # The length of Mq is n + allocate_if(linesearch, solver, :npc_dir , S, solver.x) # The length of npc_dir is n + Δx, x, r, p, q, Ar, stats = solver.Δx, solver.x, solver.r, solver.p, solver.q, solver.Ar, solver.stats + warm_start = solver.warm_start rNorms, ArNorms = stats.residuals, stats.Aresiduals reset!(stats) - Mq = MisI ? q : workspace.Mq + Mq = MisI ? q : solver.Mq + if linesearch + npc_dir = solver.npc_dir + end # Initial state. kfill!(x, zero(FC)) @@ -158,7 +163,8 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema kcopy!(n, p, b) # p ← b end MisI && kcopy!(n, r, p) # r ← p - MisI || mulorldiv!(r, M, p, ldiv) + MisI || mulorldiv!(r, M, p, ldiv) + linesearch && kcopy!(n, npc_dir , r) # npc_dir ← r; contain the preconditioned initial residual rNorm = knorm_elliptic(n, r, p) # ‖r‖ history && push!(rNorms, rNorm) # Values of ‖r‖ @@ -219,6 +225,7 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema overtimed = false while ! (solved || tired || user_requested_exit || overtimed) + linesearch && kcopy!(n, npc_dir , r) # npc_dir ← r; contain the last residual if linesearch if (pAp ≤ γ * pNorm²) || (ρ ≤ γ * rNorm²) npcurv = true @@ -229,8 +236,9 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema stats.timer = start_time |> ktimer stats.status = "nonpositive curvature" iter == 0 && kcopy!(n, x, b) # x ← b - workspace.warm_start = false - return workspace + solver.warm_start = false + stats.indefinite = true + return solver end elseif pAp ≤ 0 && radius == 0 error("Indefinite system and no trust region") @@ -344,7 +352,7 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema kaxpy!(n, α, p, x) xNorm = knorm(n, x) - xNorm ≈ radius && (on_boundary = true) + xNorm ≈ radius > 0 && (on_boundary = true) kaxpy!(n, -α, Mq, r) # residual if MisI rNorm² = kdotr(n, r, r) @@ -410,12 +418,15 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema # Termination status tired && (status = "maximum number of iterations exceeded") - on_boundary && (status = "on trust-region boundary") - npcurv && (status = "nonpositive curvature") solved && (status = "solution good enough given atol and rtol") user_requested_exit && (status = "user-requested exit") overtimed && (status = "time limit exceeded") + npcurv && (status = "nonpositive curvature") + on_boundary && (status = "on trust-region boundary") + if npcurv && linesearch + stats.indefinite = true + end # Update x warm_start && kaxpy!(n, one(FC), Δx, x) workspace.warm_start = false diff --git a/src/krylov_workspaces.jl b/src/krylov_workspaces.jl index 5f6db3e98..112f10024 100644 --- a/src/krylov_workspaces.jl +++ b/src/krylov_workspaces.jl @@ -266,6 +266,7 @@ mutable struct CrWorkspace{T,FC,S} <: KrylovWorkspace{T,FC,S} Δx :: S x :: S r :: S + npc_dir :: S p :: S q :: S Ar :: S @@ -283,6 +284,7 @@ function CrWorkspace(kc::KrylovConstructor) Δx = similar(kc.vn_empty) x = similar(kc.vn) r = similar(kc.vn) + npc_dir = similar(kc.vn_empty) p = similar(kc.vn) q = similar(kc.vn) Ar = similar(kc.vn) @@ -298,6 +300,7 @@ function CrWorkspace(m::Integer, n::Integer, S::Type) Δx = S(undef, 0) x = S(undef, n) r = S(undef, n) + npc_dir = S(undef, 0) p = S(undef, n) q = S(undef, n) Ar = S(undef, n) diff --git a/test/test_cr.jl b/test/test_cr.jl index 102e182c9..69f93d4f4 100644 --- a/test/test_cr.jl +++ b/test/test_cr.jl @@ -11,6 +11,8 @@ resid = norm(r) / norm(b) @test(resid ≤ cr_tol) @test(stats.solved) + @test stats.indefinite == false + # Code coverage (x, stats) = cr(Matrix(A), b) @@ -64,23 +66,43 @@ # Test linesearch A, b = symmetric_indefinite(FC=FC) - x, stats = cr(A, b, linesearch=true) + solver = CrSolver(A, b) + cr!(solver, A, b, linesearch=true) + x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir @test stats.status == "nonpositive curvature" + @test stats.indefinite == true + # Verify that the returned direction indeed exhibits nonpositive curvature. + # For both real and complex cases, ensure to take the real part. + curvature = real(dot(npc_dir, A * npc_dir)) + @test curvature <= 0 + # Test Linesearch which would stop on the first call since A is negative definite A, b = symmetric_indefinite(FC=FC; shift = 5) - x, stats = cr(A, b, linesearch=true) + solver = CrSolver(A, b) + cr!(solver, A, b, linesearch=true) + x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir @test stats.status == "nonpositive curvature" @test stats.niter == 0 @test all(x .== b) @test stats.solved == true + @test stats.indefinite == true + curvature = real(dot(npc_dir, A * npc_dir)) + @test curvature <= 0 # Test when b^TAb=0 and linesearch is true A, b = system_zero_quad(FC=FC) - x, stats = cr(A, b, linesearch=true) + solver = CrSolver(A, b) + cr!(solver, A, b, linesearch=true) + x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir @test stats.status == "b is a zero-curvature direction" @test all(x .== b) @test stats.solved == true + @test real(dot(npc_dir, A * npc_dir)) ≈ 0.0 + + # Test if warm_start and linesearch are both true, it should throw an error + A, b = symmetric_indefinite(FC=FC) + @test_throws MethodError cr(A, b, warm_start = true, linesearch = true) # Test when b^TAb=0 and linesearch is false A, b = system_zero_quad(FC=FC) @@ -88,18 +110,33 @@ @test stats.status == "b is a zero-curvature direction" @test norm(x) == zero(FC) @test stats.solved == true - - - # test callback function + + # Test callback function A, b = symmetric_definite(FC=FC) workspace = CrWorkspace(A, b) tol = 1.0e-1 cb_n2 = TestCallbackN2(A, b, tol = tol) - cr!(workspace, A, b, callback = cb_n2) - @test workspace.stats.status == "user-requested exit" - @test cb_n2(workspace) + cr!(solver, A, b, callback = cb_n2) + @test solver.stats.status == "user-requested exit" + @test cb_n2(solver) + @test_throws TypeError cr(A, b, callback = solver -> "string", history = true) + + + # Test on trust-region boundary when radius > 0 + A, b = symmetric_indefinite(FC=FC, shift = 5) + solver = CrSolver(A, b) + cr!(solver, A, b, radius = one(Float64)) + x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir + @test stats.status == "on trust-region boundary" + @test norm(x) ≈ 1.0 + curvature = real(dot(x, A * x)) + @test curvature <= 0 + + # Test on trust-region boundary when radius = 1 and linesearch is true + A, b = symmetric_indefinite(FC=FC, shift = 5) + @test_throws ErrorException cr(A, b, radius = one(Float64), linesearch = true) + - @test_throws TypeError cr(A, b, callback = workspace -> "string", history = true) end end end From c88e50d4d504d2fa41f1885373fabcd01034cf76 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 28 Apr 2025 21:15:51 -0400 Subject: [PATCH 02/12] Update test/test_cr.jl Co-authored-by: Dominique --- test/test_cr.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_cr.jl b/test/test_cr.jl index 69f93d4f4..6e73904c7 100644 --- a/test/test_cr.jl +++ b/test/test_cr.jl @@ -13,7 +13,6 @@ @test(stats.solved) @test stats.indefinite == false - # Code coverage (x, stats) = cr(Matrix(A), b) From 71ccf698adc74229a8080963e7fd6a8e951ea3fe Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 5 May 2025 05:12:02 -0400 Subject: [PATCH 03/12] Update for 2 cases of -ve direction --- src/cr.jl | 23 ++++++++++++++++++----- src/krylov_stats.jl | 3 +++ test/test_cr.jl | 3 +-- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/cr.jl b/src/cr.jl index e0b05ee5e..5b500851d 100644 --- a/src/cr.jl +++ b/src/cr.jl @@ -57,7 +57,9 @@ For an in-place variant that reuses memory across solves, see [`cr!`](@ref). * `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; * `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; * `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; -* `linesearch`: if `true`, indicate that the solution is to be used in an inexact Newton method with linesearch. If `linesearch` is true and nonpositive curvature is detected, the solution depends on the iteration: at iteration k = 0, the right-hand side is returned with `solver.npc_dir` as the preconditioned initial residual; at iteration k > 0, the solution from iteration k-1 is returned with `solver.npc_dir` as the most recent residual; +* `linesearch`: if `true` and nonpositive curvature is detected, behavior depends on the iteration. + – At k = 0, return the right-hand side with `solver.npc_dir` set to the preconditioned initial residual. + – At k > 0, return the solution from iteration k–1 with `solver.npc_dir` set to the detected nonpositive-curvature direction—either the previous search direction if the search-direction test fails, or the latest residual if the residual-curvature test fails. If both test fails, set `solver.npc_dir` to the current residual, update `solver.p` to the most recent search direction, and record the number of negative-curvature directions in `stats.num_neg_dir`; * `γ`: tolerance to determine that the curvature of the quadratic model is nonpositive; * `atol`: absolute stopping tolerance based on the residual norm; * `rtol`: relative stopping tolerance based on the residual norm; @@ -225,7 +227,6 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema overtimed = false while ! (solved || tired || user_requested_exit || overtimed) - linesearch && kcopy!(n, npc_dir , r) # npc_dir ← r; contain the last residual if linesearch if (pAp ≤ γ * pNorm²) || (ρ ≤ γ * rNorm²) npcurv = true @@ -235,6 +236,8 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema stats.inconsistent = false stats.timer = start_time |> ktimer stats.status = "nonpositive curvature" + kcopy!(n, npc_dir , r) # npc_dir ← r; contain the last residual + stats.num_neg_dir = 2 iter == 0 && kcopy!(n, x, b) # x ← b solver.warm_start = false stats.indefinite = true @@ -271,6 +274,7 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema else # case 2 (verbose > 0) && @printf(iostream, "r is a direction of nonpositive curvature: %8.1e\n", ρ) α = tr + #TODO what to do in this case? end else # q_p = q(x + α_p * p) - q(x) = -α_p * rᴴp + ½ (α_p)² * pᴴAp @@ -301,6 +305,10 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema elseif pAp > 0 && ρ < 0 npcurv = true (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e > 0 and rᴴAr = %8.1e < 0\n", pAp, ρ) + if linesearch + kcopy!(n, npc_dir , r) # npc_dir ← r; contain the last residual + stats.num_neg_dir = 1 + end # q_p is minimal for α_p = rᴴp / pᴴAp α = descent ? min(t1, pr / pAp) : max(t2, pr / pAp) Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2 @@ -317,6 +325,10 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema elseif pAp < 0 && ρ > 0 npcurv = true (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e < 0 and rᴴAr = %8.1e > 0\n", pAp, ρ) + if linesearch + kcopy!(n, npc_dir , p) # npc_dir ← r; contain the last residual + stats.num_neg_dir = 1 + end α = descent ? t1 : t2 tr = min(tr, rNorm² / ρ) Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2 @@ -332,6 +344,10 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema elseif pAp < 0 && ρ < 0 npcurv = true + if linesearch + kcopy!(n, npc_dir , r) # npc_dir ← r; contain the last residual + stats.num_neg_dir = 2 + end (verbose > 0) && @printf(iostream, "negative curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ) α = descent ? t1 : t2 Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2 @@ -424,9 +440,6 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema npcurv && (status = "nonpositive curvature") on_boundary && (status = "on trust-region boundary") - if npcurv && linesearch - stats.indefinite = true - end # Update x warm_start && kaxpy!(n, one(FC), Δx, x) workspace.warm_start = false diff --git a/src/krylov_stats.jl b/src/krylov_stats.jl index 98e56f983..1ed8251d1 100644 --- a/src/krylov_stats.jl +++ b/src/krylov_stats.jl @@ -13,6 +13,7 @@ The fields are as follows: - `solved`: Indicates whether the solver successfully reached convergence (`true` if solved, `false` otherwise); - `inconsistent`: Flags whether the system was detected as inconsistent (i.e., when `b` is not in the range of `A`); - `indefinite`: Flags whether the system was detected as indefinite (i.e., when `A` is not positive definite); +- `num_neg_dir`: The number of negative curvature directions encountered during the solve; - `residuals`: A vector containing the residual norms at each iteration; - `Aresiduals`: A vector of `A'`-residual norms at each iteration; - `Acond`: An estimate of the condition number of matrix `A`. @@ -24,6 +25,7 @@ mutable struct SimpleStats{T} <: KrylovStats{T} solved :: Bool inconsistent :: Bool indefinite :: Bool + num_neg_dir :: Int residuals :: Vector{T} Aresiduals :: Vector{T} Acond :: Vector{T} @@ -42,6 +44,7 @@ function copyto!(dest :: SimpleStats, src :: SimpleStats) dest.solved = src.solved dest.inconsistent = src.inconsistent dest.indefinite = src.indefinite + dest.num_neg_dir = src.num_neg_dir dest.residuals = copy(src.residuals) dest.Aresiduals = copy(src.Aresiduals) dest.Acond = copy(src.Acond) diff --git a/test/test_cr.jl b/test/test_cr.jl index 6e73904c7..af03f363c 100644 --- a/test/test_cr.jl +++ b/test/test_cr.jl @@ -74,8 +74,7 @@ # For both real and complex cases, ensure to take the real part. curvature = real(dot(npc_dir, A * npc_dir)) @test curvature <= 0 - - + # Test Linesearch which would stop on the first call since A is negative definite A, b = symmetric_indefinite(FC=FC; shift = 5) solver = CrSolver(A, b) From 725f4b896e5ce2065ae1885e8363aafb8b8582e3 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Sat, 10 May 2025 10:39:20 -0400 Subject: [PATCH 04/12] Update src/cr.jl Co-authored-by: Dominique --- src/cr.jl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/cr.jl b/src/cr.jl index 5b500851d..ac11765bf 100644 --- a/src/cr.jl +++ b/src/cr.jl @@ -57,9 +57,12 @@ For an in-place variant that reuses memory across solves, see [`cr!`](@ref). * `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; * `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; * `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; -* `linesearch`: if `true` and nonpositive curvature is detected, behavior depends on the iteration. - – At k = 0, return the right-hand side with `solver.npc_dir` set to the preconditioned initial residual. - – At k > 0, return the solution from iteration k–1 with `solver.npc_dir` set to the detected nonpositive-curvature direction—either the previous search direction if the search-direction test fails, or the latest residual if the residual-curvature test fails. If both test fails, set `solver.npc_dir` to the current residual, update `solver.p` to the most recent search direction, and record the number of negative-curvature directions in `stats.num_neg_dir`; +* `linesearch`: if `true` and nonpositive curvature is detected, the behavior depends on the iteration: + – at iteration k = 0, return the preconditioned initial search direction in `solver.npc_dir`; + – at iteration k > 0, + - if the residual from iteration k-1 is a nonpositive curvature direction but `solver.p` is not, the residual is stored in `stats.npc_dir` and `stats.num_neg_dir` is set to 1; + - if `solver.p` is a nonpositive curvature direction but the residual is not, `solver.p` is copied into `stats.npc_dir` and `stats.num_neg_dir` is set to 1; + - if both are nonnegative curvature directions, the residual is stored in `stats.npc_dir` and `stats.num_neg_dir` is set to 2. * `γ`: tolerance to determine that the curvature of the quadratic model is nonpositive; * `atol`: absolute stopping tolerance based on the residual norm; * `rtol`: relative stopping tolerance based on the residual norm; From bf626495d4718458cb943b21e13f78103241aa02 Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Sat, 10 May 2025 13:45:16 -0400 Subject: [PATCH 05/12] updated based on the feedback --- src/block_krylov_workspaces.jl | 4 +- src/cr.jl | 67 +++++++++++--------- src/krylov_stats.jl | 6 +- src/krylov_workspaces.jl | 112 ++++++++++++++++----------------- test/test_cr.jl | 93 ++++++++++++++++++--------- test/test_stats.jl | 5 +- 6 files changed, 164 insertions(+), 123 deletions(-) diff --git a/src/block_krylov_workspaces.jl b/src/block_krylov_workspaces.jl index 62cdfb11e..5dd7d1d83 100644 --- a/src/block_krylov_workspaces.jl +++ b/src/block_krylov_workspaces.jl @@ -54,7 +54,7 @@ function BlockMinresWorkspace(m::Integer, n::Integer, p::Integer, SV::Type, SM:: τₖ₋₁ = SV(undef, p) SV = isconcretetype(SV) ? SV : typeof(τₖ₋₁) SM = isconcretetype(SM) ? SM : typeof(X) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = BlockMinresWorkspace{T,FC,SV,SM}(m, n, p, ΔX, X, P, Q, C, D, Φ, Vₖ₋₁, Vₖ, wₖ₋₂, wₖ₋₁, Hₖ₋₂, Hₖ₋₁, τₖ₋₂, τₖ₋₁, false, stats) return workspace end @@ -115,7 +115,7 @@ function BlockGmresWorkspace(m::Integer, n::Integer, p::Integer, SV::Type, SM::T τ = SV[SV(undef, p) for i = 1 : memory] SV = isconcretetype(SV) ? SV : typeof(τ) SM = isconcretetype(SM) ? SM : typeof(X) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = BlockGmresWorkspace{T,FC,SV,SM}(m, n, p, ΔX, X, W, P, Q, C, D, V, Z, R, H, τ, false, stats) return workspace end diff --git a/src/cr.jl b/src/cr.jl index ac11765bf..aeac4cea9 100644 --- a/src/cr.jl +++ b/src/cr.jl @@ -58,11 +58,11 @@ For an in-place variant that reuses memory across solves, see [`cr!`](@ref). * `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; * `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; * `linesearch`: if `true` and nonpositive curvature is detected, the behavior depends on the iteration: - – at iteration k = 0, return the preconditioned initial search direction in `solver.npc_dir`; + – at iteration k = 0, return the preconditioned initial search direction in `workspace.npc_dir`; – at iteration k > 0, - - if the residual from iteration k-1 is a nonpositive curvature direction but `solver.p` is not, the residual is stored in `stats.npc_dir` and `stats.num_neg_dir` is set to 1; - - if `solver.p` is a nonpositive curvature direction but the residual is not, `solver.p` is copied into `stats.npc_dir` and `stats.num_neg_dir` is set to 1; - - if both are nonnegative curvature directions, the residual is stored in `stats.npc_dir` and `stats.num_neg_dir` is set to 2. + - if the residual from iteration k-1 is a nonpositive curvature direction but `workspace.p` is not, the residual is stored in `stats.npc_dir` and `stats.npcCount` is set to 1; + - if `workspace.p` is a nonpositive curvature direction but the residual is not, `workspace.p` is copied into `stats.npc_dir` and `stats.npcCount` is set to 1; + - if both are nonnegative curvature directions, the residual is stored in `stats.npc_dir` and `stats.npcCount` is set to 2. * `γ`: tolerance to determine that the curvature of the quadratic model is nonpositive; * `atol`: absolute stopping tolerance based on the residual norm; * `rtol`: relative stopping tolerance based on the residual norm; @@ -138,7 +138,7 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema length(b) == n || error("Inconsistent problem size") linesearch && (radius > 0) && error("'linesearch' set to 'true' but radius > 0") (verbose > 0) && @printf(iostream, "CR: system of %d equations in %d variables\n", n, n) - (solver.warm_start && linesearch) && error("warm_start and linesearch cannot be used together") + (workspace.warm_start && linesearch) && error("warm_start and linesearch cannot be used together") # Tests M = Iₙ MisI = (M === I) @@ -148,15 +148,15 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema ktypeof(b) == S || error("ktypeof(b) must be equal to $S") # Set up workspace - allocate_if(!MisI, solver, :Mq, S, solver.x) # The length of Mq is n - allocate_if(linesearch, solver, :npc_dir , S, solver.x) # The length of npc_dir is n - Δx, x, r, p, q, Ar, stats = solver.Δx, solver.x, solver.r, solver.p, solver.q, solver.Ar, solver.stats - warm_start = solver.warm_start + allocate_if(!MisI, workspace, :Mq, S, workspace.x) # The length of Mq is n + allocate_if(linesearch, workspace, :npc_dir , S, workspace.x) # The length of npc_dir is n + Δx, x, r, p, q, Ar, stats = workspace.Δx, workspace.x, workspace.r, workspace.p, workspace.q, workspace.Ar, workspace.stats + warm_start = workspace.warm_start rNorms, ArNorms = stats.residuals, stats.Aresiduals reset!(stats) - Mq = MisI ? q : solver.Mq + Mq = MisI ? q : workspace.Mq if linesearch - npc_dir = solver.npc_dir + npc_dir = workspace.npc_dir end # Initial state. @@ -169,7 +169,6 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema end MisI && kcopy!(n, r, p) # r ← p MisI || mulorldiv!(r, M, p, ldiv) - linesearch && kcopy!(n, npc_dir , r) # npc_dir ← r; contain the preconditioned initial residual rNorm = knorm_elliptic(n, r, p) # ‖r‖ history && push!(rNorms, rNorm) # Values of ‖r‖ @@ -195,7 +194,11 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema stats.status = "b is a zero-curvature direction" history && push!(ArNorms, zero(T)) workspace.warm_start = false - linesearch && kcopy!(n, x, b) # x ← b + if linesearch + kcopy!(n, x, b) # x ← b + kcopy!(n, npc_dir, r) + stats.npcCount = 1 + end return workspace end @@ -231,7 +234,9 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema while ! (solved || tired || user_requested_exit || overtimed) if linesearch - if (pAp ≤ γ * pNorm²) || (ρ ≤ γ * rNorm²) + p_dir = (pAp ≤ γ * pNorm^2) + r_dir = (ρ ≤ γ * rNorm^2) + if p_dir || r_dir npcurv = true (verbose > 0) && @printf(iostream, "nonpositive curvature detected: pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ) stats.solved = true @@ -239,12 +244,24 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema stats.inconsistent = false stats.timer = start_time |> ktimer stats.status = "nonpositive curvature" - kcopy!(n, npc_dir , r) # npc_dir ← r; contain the last residual - stats.num_neg_dir = 2 - iter == 0 && kcopy!(n, x, b) # x ← b - solver.warm_start = false + workspace.warm_start = false stats.indefinite = true - return solver + if iter == 0 + kcopy!(n, npc_dir, r) + stats.npcCount = 1 + else + if r_dir && !p_dir + kcopy!(n, npc_dir, r) + stats.npcCount = 1 + elseif p_dir && !r_dir + kcopy!(n, npc_dir, p) + stats.npcCount = 1 + else + kcopy!(n, npc_dir, r) + stats.npcCount = 2 + end + end + return workspace end elseif pAp ≤ 0 && radius == 0 error("Indefinite system and no trust region") @@ -308,10 +325,6 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema elseif pAp > 0 && ρ < 0 npcurv = true (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e > 0 and rᴴAr = %8.1e < 0\n", pAp, ρ) - if linesearch - kcopy!(n, npc_dir , r) # npc_dir ← r; contain the last residual - stats.num_neg_dir = 1 - end # q_p is minimal for α_p = rᴴp / pᴴAp α = descent ? min(t1, pr / pAp) : max(t2, pr / pAp) Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2 @@ -328,10 +341,6 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema elseif pAp < 0 && ρ > 0 npcurv = true (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e < 0 and rᴴAr = %8.1e > 0\n", pAp, ρ) - if linesearch - kcopy!(n, npc_dir , p) # npc_dir ← r; contain the last residual - stats.num_neg_dir = 1 - end α = descent ? t1 : t2 tr = min(tr, rNorm² / ρ) Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2 @@ -347,10 +356,6 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema elseif pAp < 0 && ρ < 0 npcurv = true - if linesearch - kcopy!(n, npc_dir , r) # npc_dir ← r; contain the last residual - stats.num_neg_dir = 2 - end (verbose > 0) && @printf(iostream, "negative curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ) α = descent ? t1 : t2 Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2 diff --git a/src/krylov_stats.jl b/src/krylov_stats.jl index 1ed8251d1..6ce210015 100644 --- a/src/krylov_stats.jl +++ b/src/krylov_stats.jl @@ -13,7 +13,7 @@ The fields are as follows: - `solved`: Indicates whether the solver successfully reached convergence (`true` if solved, `false` otherwise); - `inconsistent`: Flags whether the system was detected as inconsistent (i.e., when `b` is not in the range of `A`); - `indefinite`: Flags whether the system was detected as indefinite (i.e., when `A` is not positive definite); -- `num_neg_dir`: The number of negative curvature directions encountered during the solve; +- `npcCount`: The number of negative curvature directions encountered during the solve; - `residuals`: A vector containing the residual norms at each iteration; - `Aresiduals`: A vector of `A'`-residual norms at each iteration; - `Acond`: An estimate of the condition number of matrix `A`. @@ -25,7 +25,7 @@ mutable struct SimpleStats{T} <: KrylovStats{T} solved :: Bool inconsistent :: Bool indefinite :: Bool - num_neg_dir :: Int + npcCount :: Int residuals :: Vector{T} Aresiduals :: Vector{T} Acond :: Vector{T} @@ -44,7 +44,7 @@ function copyto!(dest :: SimpleStats, src :: SimpleStats) dest.solved = src.solved dest.inconsistent = src.inconsistent dest.indefinite = src.indefinite - dest.num_neg_dir = src.num_neg_dir + dest.npcCount = src.npcCount dest.residuals = copy(src.residuals) dest.Aresiduals = copy(src.Aresiduals) dest.Acond = copy(src.Acond) diff --git a/src/krylov_workspaces.jl b/src/krylov_workspaces.jl index 112f10024..5088dabc4 100644 --- a/src/krylov_workspaces.jl +++ b/src/krylov_workspaces.jl @@ -92,7 +92,7 @@ function MinresWorkspace(kc::KrylovConstructor; window::Integer = 5) y = similar(kc.vn) v = similar(kc.vn_empty) err_vec = zeros(T, window) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = MinresWorkspace{T,FC,S}(m, n, Δx, x, r1, r2, npc_dir, w1, w2, y, v, err_vec, false, stats) return workspace end @@ -111,7 +111,7 @@ function MinresWorkspace(m::Integer, n::Integer, S::Type; window::Integer = 5) v = S(undef, 0) err_vec = zeros(T, window) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = MinresWorkspace{T,FC,S}(m, n, Δx, x, r1, r2, npc_dir, w1, w2, y, v, err_vec, false, stats) return workspace end @@ -162,7 +162,7 @@ function MinaresWorkspace(kc::KrylovConstructor) dₖ₋₂ = similar(kc.vn) dₖ₋₁ = similar(kc.vn) q = similar(kc.vn) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = MinaresWorkspace{T,FC,S}(m, n, Δx, vₖ, vₖ₊₁, x, wₖ₋₂, wₖ₋₁, dₖ₋₂, dₖ₋₁, q, false, stats) return workspace end @@ -180,7 +180,7 @@ function MinaresWorkspace(m::Integer, n::Integer, S::Type) dₖ₋₁ = S(undef, n) q = S(undef, n) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = MinaresWorkspace{T,FC,S}(m, n, Δx, vₖ, vₖ₊₁, x, wₖ₋₂, wₖ₋₁, dₖ₋₂, dₖ₋₁, q, false, stats) return workspace end @@ -225,7 +225,7 @@ function CgWorkspace(kc::KrylovConstructor) p = similar(kc.vn) Ap = similar(kc.vn) z = similar(kc.vn_empty) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CgWorkspace{T,FC,S}(m, n, Δx, x, r, p, Ap, z, false, stats) return workspace end @@ -240,7 +240,7 @@ function CgWorkspace(m::Integer, n::Integer, S::Type) Ap = S(undef, n) z = S(undef, 0) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CgWorkspace{T,FC,S}(m, n, Δx, x, r, p, Ap, z, false, stats) return workspace end @@ -289,8 +289,8 @@ function CrWorkspace(kc::KrylovConstructor) q = similar(kc.vn) Ar = similar(kc.vn) Mq = similar(kc.vn_empty) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") - workspace = CrWorkspace{T,FC,S}(m, n, Δx, x, r, p, q, Ar, Mq, false, stats) + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") + workspace = CrWorkspace{T,FC,S}(m, n, Δx, x, r, npc_dir, p, q, Ar, Mq, false, stats) return workspace end @@ -306,8 +306,8 @@ function CrWorkspace(m::Integer, n::Integer, S::Type) Ar = S(undef, n) Mq = S(undef, 0) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") - workspace = CrWorkspace{T,FC,S}(m, n, Δx, x, r, p, q, Ar, Mq, false, stats) + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") + workspace = CrWorkspace{T,FC,S}(m, n, Δx, x, r, npc_dir, p, q, Ar, Mq, false, stats) return workspace end @@ -357,7 +357,7 @@ function CarWorkspace(kc::KrylovConstructor) t = similar(kc.vn) u = similar(kc.vn) Mu = similar(kc.vn_empty) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CarWorkspace{T,FC,S}(m, n, Δx, x, r, p, s, q, t, u, Mu, false, stats) return workspace end @@ -375,7 +375,7 @@ function CarWorkspace(m::Integer, n::Integer, S::Type) u = S(undef, n) Mu = S(undef, 0) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CarWorkspace{T,FC,S}(m, n, Δx, x, r, p, s, q, t, u, Mu, false, stats) return workspace end @@ -642,7 +642,7 @@ function MinresQlpWorkspace(kc::KrylovConstructor) x = similar(kc.vn) p = similar(kc.vn) vₖ = similar(kc.vn_empty) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = MinresQlpWorkspace{T,FC,S}(m, n, Δx, wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x, p, vₖ, false, stats) return workspace end @@ -659,7 +659,7 @@ function MinresQlpWorkspace(m::Integer, n::Integer, S::Type) p = S(undef, n) vₖ = S(undef, 0) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = MinresQlpWorkspace{T,FC,S}(m, n, Δx, wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x, p, vₖ, false, stats) return workspace end @@ -715,7 +715,7 @@ function DqgmresWorkspace(kc::KrylovConstructor; memory::Integer = 20) c = Vector{T}(undef, memory) s = Vector{FC}(undef, memory) H = Vector{FC}(undef, memory+1) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = DqgmresWorkspace{T,FC,S}(m, n, Δx, x, t, z, w, P, V, c, s, H, false, stats) return workspace end @@ -735,7 +735,7 @@ function DqgmresWorkspace(m::Integer, n::Integer, S::Type; memory::Integer = 20) s = Vector{FC}(undef, memory) H = Vector{FC}(undef, memory+1) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = DqgmresWorkspace{T,FC,S}(m, n, Δx, x, t, z, w, P, V, c, s, H, false, stats) return workspace end @@ -789,7 +789,7 @@ function DiomWorkspace(kc::KrylovConstructor; memory::Integer = 20) V = S[similar(kc.vn) for i = 1 : memory] L = Vector{FC}(undef, memory-1) H = Vector{FC}(undef, memory) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = DiomWorkspace{T,FC,S}(m, n, Δx, x, t, z, w, P, V, L, H, false, stats) return workspace end @@ -808,7 +808,7 @@ function DiomWorkspace(m::Integer, n::Integer, S::Type; memory::Integer = 20) L = Vector{FC}(undef, memory-1) H = Vector{FC}(undef, memory) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = DiomWorkspace{T,FC,S}(m, n, Δx, x, t, z, w, P, V, L, H, false, stats) return workspace end @@ -859,7 +859,7 @@ function UsymlqWorkspace(kc::KrylovConstructor) vₖ₋₁ = similar(kc.vm) vₖ = similar(kc.vm) q = similar(kc.vm) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = UsymlqWorkspace{T,FC,S}(m, n, uₖ₋₁, uₖ, p, Δx, x, d̅, vₖ₋₁, vₖ, q, false, stats) return workspace end @@ -877,7 +877,7 @@ function UsymlqWorkspace(m::Integer, n::Integer, S::Type) vₖ = S(undef, m) q = S(undef, m) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = UsymlqWorkspace{T,FC,S}(m, n, uₖ₋₁, uₖ, p, Δx, x, d̅, vₖ₋₁, vₖ, q, false, stats) return workspace end @@ -930,7 +930,7 @@ function UsymqrWorkspace(kc::KrylovConstructor) uₖ₋₁ = similar(kc.vn) uₖ = similar(kc.vn) p = similar(kc.vn) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = UsymqrWorkspace{T,FC,S}(m, n, vₖ₋₁, vₖ, q, Δx, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, p, false, stats) return workspace end @@ -949,7 +949,7 @@ function UsymqrWorkspace(m::Integer, n::Integer, S::Type) uₖ = S(undef, n) p = S(undef, n) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = UsymqrWorkspace{T,FC,S}(m, n, vₖ₋₁, vₖ, q, Δx, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, p, false, stats) return workspace end @@ -1014,7 +1014,7 @@ function TricgWorkspace(kc::KrylovConstructor) Δy = similar(kc.vn_empty) uₖ = similar(kc.vn_empty) vₖ = similar(kc.vm_empty) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = TricgWorkspace{T,FC,S}(m, n, y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats) return workspace end @@ -1039,7 +1039,7 @@ function TricgWorkspace(m::Integer, n::Integer, S::Type) uₖ = S(undef, 0) vₖ = S(undef, 0) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = TricgWorkspace{T,FC,S}(m, n, y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats) return workspace end @@ -1112,7 +1112,7 @@ function TrimrWorkspace(kc::KrylovConstructor) Δy = similar(kc.vn_empty) uₖ = similar(kc.vn_empty) vₖ = similar(kc.vm_empty) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = TrimrWorkspace{T,FC,S}(m, n, y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats) return workspace end @@ -1141,7 +1141,7 @@ function TrimrWorkspace(m::Integer, n::Integer, S::Type) uₖ = S(undef, 0) vₖ = S(undef, 0) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = TrimrWorkspace{T,FC,S}(m, n, y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats) return workspace end @@ -1273,7 +1273,7 @@ function CgsWorkspace(kc::KrylovConstructor) ts = similar(kc.vn) yz = similar(kc.vn_empty) vw = similar(kc.vn_empty) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CgsWorkspace{T,FC,S}(m, n, Δx, x, r, u, p, q, ts, yz, vw, false, stats) return workspace end @@ -1291,7 +1291,7 @@ function CgsWorkspace(m::Integer, n::Integer, S::Type) yz = S(undef, 0) vw = S(undef, 0) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CgsWorkspace{T,FC,S}(m, n, Δx, x, r, u, p, q, ts, yz, vw, false, stats) return workspace end @@ -1342,7 +1342,7 @@ function BicgstabWorkspace(kc::KrylovConstructor) qd = similar(kc.vn) yz = similar(kc.vn_empty) t = similar(kc.vn_empty) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = BicgstabWorkspace{T,FC,S}(m, n, Δx, x, r, p, v, s, qd, yz, t, false, stats) return workspace end @@ -1360,7 +1360,7 @@ function BicgstabWorkspace(m::Integer, n::Integer, S::Type) yz = S(undef, 0) t = S(undef, 0) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = BicgstabWorkspace{T,FC,S}(m, n, Δx, x, r, p, v, s, qd, yz, t, false, stats) return workspace end @@ -1415,7 +1415,7 @@ function BilqWorkspace(kc::KrylovConstructor) d̅ = similar(kc.vn) t = similar(kc.vn_empty) s = similar(kc.vn_empty) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = BilqWorkspace{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, d̅, t, s, false, stats) return workspace end @@ -1435,7 +1435,7 @@ function BilqWorkspace(m::Integer, n::Integer, S::Type) t = S(undef, 0) s = S(undef, 0) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = BilqWorkspace{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, d̅, t, s, false, stats) return workspace end @@ -1492,7 +1492,7 @@ function QmrWorkspace(kc::KrylovConstructor) wₖ₋₁ = similar(kc.vn) t = similar(kc.vn_empty) s = similar(kc.vn_empty) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = QmrWorkspace{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, wₖ₋₂, wₖ₋₁, t, s, false, stats) return workspace end @@ -1513,7 +1513,7 @@ function QmrWorkspace(m::Integer, n::Integer, S::Type) t = S(undef, 0) s = S(undef, 0) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = QmrWorkspace{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, wₖ₋₂, wₖ₋₁, t, s, false, stats) return workspace end @@ -1638,7 +1638,7 @@ function CglsWorkspace(kc::KrylovConstructor) r = similar(kc.vm) q = similar(kc.vm) Mr = similar(kc.vm_empty) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CglsWorkspace{T,FC,S}(m, n, x, p, s, r, q, Mr, stats) return workspace end @@ -1653,7 +1653,7 @@ function CglsWorkspace(m::Integer, n::Integer, S::Type) q = S(undef, m) Mr = S(undef, 0) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CglsWorkspace{T,FC,S}(m, n, x, p, s, r, q, Mr, stats) return workspace end @@ -1787,7 +1787,7 @@ function CrlsWorkspace(kc::KrylovConstructor) Ap = similar(kc.vm) s = similar(kc.vm) Ms = similar(kc.vm_empty) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CrlsWorkspace{T,FC,S}(m, n, x, p, Ar, q, r, Ap, s, Ms, stats) return workspace end @@ -1804,7 +1804,7 @@ function CrlsWorkspace(m::Integer, n::Integer, S::Type) s = S(undef, m) Ms = S(undef, 0) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CrlsWorkspace{T,FC,S}(m, n, x, p, Ar, q, r, Ap, s, Ms, stats) return workspace end @@ -1850,7 +1850,7 @@ function CgneWorkspace(kc::KrylovConstructor) q = similar(kc.vm) s = similar(kc.vm_empty) z = similar(kc.vm_empty) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CgneWorkspace{T,FC,S}(m, n, x, p, Aᴴz, r, q, s, z, stats) return workspace end @@ -1866,7 +1866,7 @@ function CgneWorkspace(m::Integer, n::Integer, S::Type) s = S(undef, 0) z = S(undef, 0) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CgneWorkspace{T,FC,S}(m, n, x, p, Aᴴz, r, q, s, z, stats) return workspace end @@ -1912,7 +1912,7 @@ function CrmrWorkspace(kc::KrylovConstructor) q = similar(kc.vm) Nq = similar(kc.vm_empty) s = similar(kc.vm_empty) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CrmrWorkspace{T,FC,S}(m, n, x, p, Aᴴr, r, q, Nq, s, stats) return workspace end @@ -1928,7 +1928,7 @@ function CrmrWorkspace(m::Integer, n::Integer, S::Type) Nq = S(undef, 0) s = S(undef, 0) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CrmrWorkspace{T,FC,S}(m, n, x, p, Aᴴr, r, q, Nq, s, stats) return workspace end @@ -2046,7 +2046,7 @@ function LsqrWorkspace(kc::KrylovConstructor; window::Integer = 5) u = similar(kc.vm_empty) v = similar(kc.vn_empty) err_vec = zeros(T, window) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = LsqrWorkspace{T,FC,S}(m, n, x, Nv, Aᴴu, w, Mu, Av, u, v, err_vec, stats) return workspace end @@ -2064,7 +2064,7 @@ function LsqrWorkspace(m::Integer, n::Integer, S::Type; window::Integer = 5) v = S(undef, 0) err_vec = zeros(T, window) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = LsqrWorkspace{T,FC,S}(m, n, x, Nv, Aᴴu, w, Mu, Av, u, v, err_vec, stats) return workspace end @@ -2258,7 +2258,7 @@ function CraigWorkspace(kc::KrylovConstructor) u = similar(kc.vm_empty) v = similar(kc.vn_empty) w2 = similar(kc.vn_empty) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CraigWorkspace{T,FC,S}(m, n, x, Nv, Aᴴu, y, w, Mu, Av, u, v, w2, stats) return workspace end @@ -2277,7 +2277,7 @@ function CraigWorkspace(m::Integer, n::Integer, S::Type) v = S(undef, 0) w2 = S(undef, 0) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CraigWorkspace{T,FC,S}(m, n, x, Nv, Aᴴu, y, w, Mu, Av, u, v, w2, stats) return workspace end @@ -2333,7 +2333,7 @@ function CraigmrWorkspace(kc::KrylovConstructor) u = similar(kc.vm_empty) v = similar(kc.vn_empty) q = similar(kc.vn_empty) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CraigmrWorkspace{T,FC,S}(m, n, x, Nv, Aᴴu, d, y, Mu, w, wbar, Av, u, v, q, stats) return workspace end @@ -2354,7 +2354,7 @@ function CraigmrWorkspace(m::Integer, n::Integer, S::Type) v = S(undef, 0) q = S(undef, 0) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = CraigmrWorkspace{T,FC,S}(m, n, x, Nv, Aᴴu, d, y, Mu, w, wbar, Av, u, v, q, stats) return workspace end @@ -2411,7 +2411,7 @@ function GmresWorkspace(kc::KrylovConstructor; memory::Integer = 20) s = Vector{FC}(undef, memory) z = Vector{FC}(undef, memory) R = Vector{FC}(undef, div(memory * (memory+1), 2)) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = GmresWorkspace{T,FC,S}(m, n, Δx, x, w, p, q, V, c, s, z, R, false, 0, stats) return workspace end @@ -2431,7 +2431,7 @@ function GmresWorkspace(m::Integer, n::Integer, S::Type; memory::Integer = 20) z = Vector{FC}(undef, memory) R = Vector{FC}(undef, div(memory * (memory+1), 2)) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = GmresWorkspace{T,FC,S}(m, n, Δx, x, w, p, q, V, c, s, z, R, false, 0, stats) return workspace end @@ -2488,7 +2488,7 @@ function FgmresWorkspace(kc::KrylovConstructor; memory::Integer = 20) s = Vector{FC}(undef, memory) z = Vector{FC}(undef, memory) R = Vector{FC}(undef, div(memory * (memory+1), 2)) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = FgmresWorkspace{T,FC,S}(m, n, Δx, x, w, q, V, Z, c, s, z, R, false, 0, stats) return workspace end @@ -2508,7 +2508,7 @@ function FgmresWorkspace(m::Integer, n::Integer, S::Type; memory::Integer = 20) z = Vector{FC}(undef, memory) R = Vector{FC}(undef, div(memory * (memory+1), 2)) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = FgmresWorkspace{T,FC,S}(m, n, Δx, x, w, q, V, Z, c, s, z, R, false, 0, stats) return workspace end @@ -2562,7 +2562,7 @@ function FomWorkspace(kc::KrylovConstructor; memory::Integer = 20) l = Vector{FC}(undef, memory) z = Vector{FC}(undef, memory) U = Vector{FC}(undef, div(memory * (memory+1), 2)) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = FomWorkspace{T,FC,S}(m, n, Δx, x, w, p, q, V, l, z, U, false, stats) return workspace end @@ -2581,7 +2581,7 @@ function FomWorkspace(m::Integer, n::Integer, S::Type; memory::Integer = 20) z = Vector{FC}(undef, memory) U = Vector{FC}(undef, div(memory * (memory+1), 2)) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = FomWorkspace{T,FC,S}(m, n, Δx, x, w, p, q, V, l, z, U, false, stats) return workspace end @@ -2649,7 +2649,7 @@ function GpmrWorkspace(kc::KrylovConstructor; memory::Integer = 20) gc = Vector{T}(undef, 4 * memory) zt = Vector{FC}(undef, 2 * memory) R = Vector{FC}(undef, memory * (2 * memory + 1)) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = GpmrWorkspace{T,FC,S}(m, n, wA, wB, dA, dB, Δx, Δy, x, y, q, p, V, U, gs, gc, zt, R, false, stats) return workspace end @@ -2675,7 +2675,7 @@ function GpmrWorkspace(m::Integer, n::Integer, S::Type; memory::Integer = 20) zt = Vector{FC}(undef, 2 * memory) R = Vector{FC}(undef, memory * (2 * memory + 1)) S = isconcretetype(S) ? S : typeof(x) - stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown") + stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown") workspace = GpmrWorkspace{T,FC,S}(m, n, wA, wB, dA, dB, Δx, Δy, x, y, q, p, V, U, gs, gc, zt, R, false, stats) return workspace end diff --git a/test/test_cr.jl b/test/test_cr.jl index af03f363c..532772274 100644 --- a/test/test_cr.jl +++ b/test/test_cr.jl @@ -1,9 +1,9 @@ @testset "cr" begin cr_tol = 1.0e-6 + γ_test = sqrt(eps(Float64)) for FC in (Float64, ComplexF64) @testset "Data Type: $FC" begin - # Symmetric and positive definite system. A, b = symmetric_definite(FC=FC) (x, stats) = cr(A, b) @@ -64,65 +64,100 @@ @test(stats.solved) # Test linesearch - A, b = symmetric_indefinite(FC=FC) - solver = CrSolver(A, b) - cr!(solver, A, b, linesearch=true) + # Iter=0: b^T A b = 0 → zero-curvature at k=0, with γ + A, b = system_zero_quad(FC=Float64) # ensures bᵀ A b == 0 + solver = CrWorkspace(A, b) + cr!(solver, A, b; linesearch=true, γ=γ_test) x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir - @test stats.status == "nonpositive curvature" - @test stats.indefinite == true - # Verify that the returned direction indeed exhibits nonpositive curvature. - # For both real and complex cases, ensure to take the real part. - curvature = real(dot(npc_dir, A * npc_dir)) - @test curvature <= 0 + @test stats.niter == 0 + @test stats.status == "b is a zero-curvature direction" + @test real(dot(npc_dir, A * npc_dir)) ≈ 0 + # Test when b^TAb=0 and linesearch is true, without γ + A, b = system_zero_quad(FC=FC) + solver = CrWorkspace(A, b) + cr!(solver, A, b, linesearch=true) + x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir + @test stats.status == "b is a zero-curvature direction" + @test all(x .== b) + @test stats.solved == true + @test real(dot(npc_dir, A * npc_dir)) ≈ 0.0 + # Test Linesearch which would stop on the first call since A is negative definite A, b = symmetric_indefinite(FC=FC; shift = 5) - solver = CrSolver(A, b) + solver = CrWorkspace(A, b) cr!(solver, A, b, linesearch=true) x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir @test stats.status == "nonpositive curvature" @test stats.niter == 0 - @test all(x .== b) @test stats.solved == true @test stats.indefinite == true curvature = real(dot(npc_dir, A * npc_dir)) @test curvature <= 0 - # Test when b^TAb=0 and linesearch is true - A, b = system_zero_quad(FC=FC) - solver = CrSolver(A, b) - cr!(solver, A, b, linesearch=true) - x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir - @test stats.status == "b is a zero-curvature direction" - @test all(x .== b) - @test stats.solved == true - @test real(dot(npc_dir, A * npc_dir)) ≈ 0.0 - - # Test if warm_start and linesearch are both true, it should throw an error - A, b = symmetric_indefinite(FC=FC) - @test_throws MethodError cr(A, b, warm_start = true, linesearch = true) - # Test when b^TAb=0 and linesearch is false A, b = system_zero_quad(FC=FC) x, stats = cr(A,b, linesearch=false) @test stats.status == "b is a zero-curvature direction" @test norm(x) == zero(FC) @test stats.solved == true + + # 2 negative curvature + A = FC[ 1.0 0.0; + 0.0 0.0 ] + b = ones(FC, 2) + solver = CrWorkspace(A, b) + cr!(solver, A, b; linesearch=true, γ=γ_test) + x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir + @test stats.npcCount == 2 + @test real(dot(npc_dir, A*npc_dir)) ≤ γ_test*norm(npc_dir)^2 + cr_tol + p1 = solver.p + @test real(dot(p1, A*p1)) < cr_tol + + # Only -p negative curvature + A = FC(-1.0)*I(2) + b = ones(FC, 2) + solver = CrWorkspace(A, b) + cr!(solver, A, b; linesearch=true, γ=γ_test) + x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir + @test stats.status == "nonpositive curvature" + @test stats.npcCount == 1 + @test real(dot(npc_dir, A*npc_dir)) ≤ cr_tol + p1 = solver.p + @test real(dot(p1, A*p1)) < 0 + # Warm-start + linesearch must error + A, b = symmetric_indefinite(FC=Float64) + @test_throws MethodError cr(A, b; warm_start=true, linesearch=true) + + # npc_dir is not a zero vector, however we don't have γ + A, b = symmetric_indefinite(FC=FC) + solver = CrWorkspace(A, b) + cr!(solver, A, b, linesearch=true) + x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir + @test stats.status == "nonpositive curvature" + @test stats.indefinite == true + # Verify that the returned direction indeed exhibits nonpositive curvature. + # For both real and complex cases, ensure to take the real part. + curvature = real(dot(npc_dir, A * npc_dir)) + @test curvature <= 0 + @test stats.npcCount == 2 + + # Test callback function A, b = symmetric_definite(FC=FC) workspace = CrWorkspace(A, b) tol = 1.0e-1 cb_n2 = TestCallbackN2(A, b, tol = tol) cr!(solver, A, b, callback = cb_n2) - @test solver.stats.status == "user-requested exit" + @test stats.status == "user-requested exit" @test cb_n2(solver) @test_throws TypeError cr(A, b, callback = solver -> "string", history = true) # Test on trust-region boundary when radius > 0 A, b = symmetric_indefinite(FC=FC, shift = 5) - solver = CrSolver(A, b) + solver = CrWorkspace(A, b) cr!(solver, A, b, radius = one(Float64)) x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir @test stats.status == "on trust-region boundary" @@ -137,4 +172,4 @@ end end -end +end \ No newline at end of file diff --git a/test/test_stats.jl b/test/test_stats.jl index 56e7224f7..d266a8920 100644 --- a/test/test_stats.jl +++ b/test/test_stats.jl @@ -1,6 +1,6 @@ @testset "stats" begin - stats = Krylov.SimpleStats(0, true, true, false ,Float64[1.0], Float64[2.0], Float64[], 1.234, "unknown") - stats2 = Krylov.SimpleStats(1, true, true, false, Float64[1.0], Float64[2.0], Float64[], 1.234, "unknown") + stats = Krylov.SimpleStats(0, true, true, false, 0,Float64[1.0], Float64[2.0], Float64[], 1.234, "unknown") + stats2 = Krylov.SimpleStats(1, true, true, false, 0,Float64[1.0], Float64[2.0], Float64[], 1.234, "unknown") copyto!(stats2, stats) io = IOBuffer() show(io, stats) @@ -11,6 +11,7 @@ solved: true inconsistent: true indefinite: false + npcCount: 0 residuals: [ 1.0e+00 ] Aresiduals: [ 2.0e+00 ] κ₂(A): [] From 7191100d040ccc042adf92e50362de3f99a1eb37 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 12 May 2025 16:50:26 -0400 Subject: [PATCH 06/12] Update src/krylov_stats.jl Co-authored-by: Dominique --- src/krylov_stats.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/krylov_stats.jl b/src/krylov_stats.jl index 6ce210015..1acc694eb 100644 --- a/src/krylov_stats.jl +++ b/src/krylov_stats.jl @@ -13,7 +13,7 @@ The fields are as follows: - `solved`: Indicates whether the solver successfully reached convergence (`true` if solved, `false` otherwise); - `inconsistent`: Flags whether the system was detected as inconsistent (i.e., when `b` is not in the range of `A`); - `indefinite`: Flags whether the system was detected as indefinite (i.e., when `A` is not positive definite); -- `npcCount`: The number of negative curvature directions encountered during the solve; +- `npcCount`: The number of nonpositive curvature directions encountered during the solve; - `residuals`: A vector containing the residual norms at each iteration; - `Aresiduals`: A vector of `A'`-residual norms at each iteration; - `Acond`: An estimate of the condition number of matrix `A`. From 31d991f706f8c02bd88ce30b6b9e6d1a1905f8da Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 12 May 2025 16:50:43 -0400 Subject: [PATCH 07/12] Update src/krylov_stats.jl Co-authored-by: Dominique --- src/krylov_stats.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/krylov_stats.jl b/src/krylov_stats.jl index 1acc694eb..da71643a0 100644 --- a/src/krylov_stats.jl +++ b/src/krylov_stats.jl @@ -44,7 +44,7 @@ function copyto!(dest :: SimpleStats, src :: SimpleStats) dest.solved = src.solved dest.inconsistent = src.inconsistent dest.indefinite = src.indefinite - dest.npcCount = src.npcCount + dest.npcCount = src.npcCount dest.residuals = copy(src.residuals) dest.Aresiduals = copy(src.Aresiduals) dest.Acond = copy(src.Acond) From 5aac7b6791e0b977b276b73b4a4e38bddf227d28 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 12 May 2025 16:50:58 -0400 Subject: [PATCH 08/12] Update src/cr.jl Co-authored-by: Dominique --- src/cr.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cr.jl b/src/cr.jl index aeac4cea9..61a9291ca 100644 --- a/src/cr.jl +++ b/src/cr.jl @@ -168,7 +168,7 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema kcopy!(n, p, b) # p ← b end MisI && kcopy!(n, r, p) # r ← p - MisI || mulorldiv!(r, M, p, ldiv) + MisI || mulorldiv!(r, M, p, ldiv) rNorm = knorm_elliptic(n, r, p) # ‖r‖ history && push!(rNorms, rNorm) # Values of ‖r‖ From a995dc29aa9936eb531471eb5ba253b8099c3efb Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 12 May 2025 16:52:19 -0400 Subject: [PATCH 09/12] Update test/test_cr.jl Co-authored-by: Dominique --- test/test_cr.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_cr.jl b/test/test_cr.jl index 532772274..65d71f733 100644 --- a/test/test_cr.jl +++ b/test/test_cr.jl @@ -64,7 +64,7 @@ @test(stats.solved) # Test linesearch - # Iter=0: b^T A b = 0 → zero-curvature at k=0, with γ + # Iter=0: bᵀ Ab = 0 → zero-curvature at k=0 A, b = system_zero_quad(FC=Float64) # ensures bᵀ A b == 0 solver = CrWorkspace(A, b) cr!(solver, A, b; linesearch=true, γ=γ_test) From ebc5e7bf949606b911c747a4b6784b2722cb75d7 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 12 May 2025 16:52:29 -0400 Subject: [PATCH 10/12] Update test/test_cr.jl Co-authored-by: Dominique --- test/test_cr.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_cr.jl b/test/test_cr.jl index 65d71f733..e3254488f 100644 --- a/test/test_cr.jl +++ b/test/test_cr.jl @@ -154,7 +154,6 @@ @test cb_n2(solver) @test_throws TypeError cr(A, b, callback = solver -> "string", history = true) - # Test on trust-region boundary when radius > 0 A, b = symmetric_indefinite(FC=FC, shift = 5) solver = CrWorkspace(A, b) From 1f3665c1afe8725268f289fe7b388d2f2fed5696 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 12 May 2025 16:53:15 -0400 Subject: [PATCH 11/12] Update test/test_cr.jl Co-authored-by: Dominique --- test/test_cr.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_cr.jl b/test/test_cr.jl index e3254488f..f360d37aa 100644 --- a/test/test_cr.jl +++ b/test/test_cr.jl @@ -67,7 +67,7 @@ # Iter=0: bᵀ Ab = 0 → zero-curvature at k=0 A, b = system_zero_quad(FC=Float64) # ensures bᵀ A b == 0 solver = CrWorkspace(A, b) - cr!(solver, A, b; linesearch=true, γ=γ_test) + cr!(solver, A, b; linesearch=true) x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir @test stats.niter == 0 @test stats.status == "b is a zero-curvature direction" From 711eab0b6eb9ddb5a01c87535d0c64ec490d813c Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 12 May 2025 18:27:03 -0400 Subject: [PATCH 12/12] updated to have npc_dir when radius>0 --- src/cr.jl | 64 ++++++++++++++++++++++++++-------- test/test_cr.jl | 91 ++++++++++++++++++++++++++++++++++--------------- 2 files changed, 112 insertions(+), 43 deletions(-) diff --git a/src/cr.jl b/src/cr.jl index 61a9291ca..f5d780212 100644 --- a/src/cr.jl +++ b/src/cr.jl @@ -56,7 +56,7 @@ For an in-place variant that reuses memory across solves, see [`cr!`](@ref). * `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; * `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; -* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; +* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; If `radius > 0` and nonpositive curvature is detected, the behavior depends on the iteration and follow similar logic as linesearch; * `linesearch`: if `true` and nonpositive curvature is detected, the behavior depends on the iteration: – at iteration k = 0, return the preconditioned initial search direction in `workspace.npc_dir`; – at iteration k > 0, @@ -150,12 +150,13 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema # Set up workspace allocate_if(!MisI, workspace, :Mq, S, workspace.x) # The length of Mq is n allocate_if(linesearch, workspace, :npc_dir , S, workspace.x) # The length of npc_dir is n + allocate_if(radius>0, workspace, :npc_dir , S, workspace.x) Δx, x, r, p, q, Ar, stats = workspace.Δx, workspace.x, workspace.r, workspace.p, workspace.q, workspace.Ar, workspace.stats warm_start = workspace.warm_start rNorms, ArNorms = stats.residuals, stats.Aresiduals reset!(stats) Mq = MisI ? q : workspace.Mq - if linesearch + if linesearch || radius > 0 npc_dir = workspace.npc_dir end @@ -195,7 +196,7 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema history && push!(ArNorms, zero(T)) workspace.warm_start = false if linesearch - kcopy!(n, x, b) # x ← b + kcopy!(n, x, p) # x ← M⁻¹ b kcopy!(n, npc_dir, r) stats.npcCount = 1 end @@ -234,9 +235,9 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema while ! (solved || tired || user_requested_exit || overtimed) if linesearch - p_dir = (pAp ≤ γ * pNorm^2) - r_dir = (ρ ≤ γ * rNorm^2) - if p_dir || r_dir + p_curv = (pAp ≤ γ * pNorm^2) + r_curv = (ρ ≤ γ * rNorm^2) + if p_curv || r_curv npcurv = true (verbose > 0) && @printf(iostream, "nonpositive curvature detected: pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ) stats.solved = true @@ -250,15 +251,13 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema kcopy!(n, npc_dir, r) stats.npcCount = 1 else - if r_dir && !p_dir + if r_curv kcopy!(n, npc_dir, r) - stats.npcCount = 1 - elseif p_dir && !r_dir - kcopy!(n, npc_dir, p) - stats.npcCount = 1 - else - kcopy!(n, npc_dir, r) - stats.npcCount = 2 + stats.npcCount += 1 + end + if p_curv + stats.npcCount += 1 + r_curv || kcopy!(n, npc_dir, p) end end return workspace @@ -280,6 +279,14 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema if abspAp ≤ γ * pNorm * knorm(n, q) # pᴴAp ≃ 0 npcurv = true # nonpositive curvature + stats.indefinite = true + if iter == 0 + kcopy!(n, npc_dir, r) + stats.npcCount = 1 + else + stats.npcCount = 1 + kcopy!(n, npc_dir, p) + end (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e ≃ 0\n", pAp) if abspr ≤ γ * pNorm * rNorm # pᴴr ≃ 0 (verbose > 0) && @printf(iostream, "pᴴr = %8.1e ≃ 0, redefining p := r\n", pr) @@ -294,7 +301,10 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema else # case 2 (verbose > 0) && @printf(iostream, "r is a direction of nonpositive curvature: %8.1e\n", ρ) α = tr - #TODO what to do in this case? + if iter > 0 + stats.npcCount = 2 + kcopy!(n, npc_dir, r) + end end else # q_p = q(x + α_p * p) - q(x) = -α_p * rᴴp + ½ (α_p)² * pᴴAp @@ -324,6 +334,14 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema elseif pAp > 0 && ρ < 0 npcurv = true + stats.indefinite = true + if iter == 0 + kcopy!(n, npc_dir, r) + stats.npcCount = 1 + else + stats.npcCount = 1 + kcopy!(n, npc_dir, r) + end (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e > 0 and rᴴAr = %8.1e < 0\n", pAp, ρ) # q_p is minimal for α_p = rᴴp / pᴴAp α = descent ? min(t1, pr / pAp) : max(t2, pr / pAp) @@ -340,6 +358,14 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema elseif pAp < 0 && ρ > 0 npcurv = true + stats.indefinite = true + if iter == 0 + kcopy!(n, npc_dir, r) + stats.npcCount = 1 + else + stats.npcCount = 1 + kcopy!(n, npc_dir, p) + end (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e < 0 and rᴴAr = %8.1e > 0\n", pAp, ρ) α = descent ? t1 : t2 tr = min(tr, rNorm² / ρ) @@ -356,6 +382,14 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema elseif pAp < 0 && ρ < 0 npcurv = true + stats.indefinite = true + if iter == 0 + kcopy!(n, npc_dir, r) + stats.npcCount = 1 + else + stats.npcCount = 2 + kcopy!(n, npc_dir, r) + end (verbose > 0) && @printf(iostream, "negative curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ) α = descent ? t1 : t2 Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2 diff --git a/test/test_cr.jl b/test/test_cr.jl index f360d37aa..2928b733b 100644 --- a/test/test_cr.jl +++ b/test/test_cr.jl @@ -1,6 +1,5 @@ @testset "cr" begin cr_tol = 1.0e-6 - γ_test = sqrt(eps(Float64)) for FC in (Float64, ComplexF64) @testset "Data Type: $FC" begin @@ -73,17 +72,7 @@ @test stats.status == "b is a zero-curvature direction" @test real(dot(npc_dir, A * npc_dir)) ≈ 0 - # Test when b^TAb=0 and linesearch is true, without γ - A, b = system_zero_quad(FC=FC) - solver = CrWorkspace(A, b) - cr!(solver, A, b, linesearch=true) - x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir - @test stats.status == "b is a zero-curvature direction" - @test all(x .== b) - @test stats.solved == true - @test real(dot(npc_dir, A * npc_dir)) ≈ 0.0 - - # Test Linesearch which would stop on the first call since A is negative definite + # Test Linesearch which would stop on the first call since A is indefinite A, b = symmetric_indefinite(FC=FC; shift = 5) solver = CrWorkspace(A, b) cr!(solver, A, b, linesearch=true) @@ -94,23 +83,36 @@ @test stats.indefinite == true curvature = real(dot(npc_dir, A * npc_dir)) @test curvature <= 0 - + @test stats.solved == true + # Test when b^TAb=0 and linesearch is false A, b = system_zero_quad(FC=FC) x, stats = cr(A,b, linesearch=false) @test stats.status == "b is a zero-curvature direction" @test norm(x) == zero(FC) @test stats.solved == true + @test stats.niter == 0 + + # test callback function + A, b = symmetric_definite(FC=FC) + workspace = CrWorkspace(A, b) + tol = 1.0e-1 + cb_n2 = TestCallbackN2(A, b, tol = tol) + cr!(workspace, A, b, callback = cb_n2) + @test workspace.stats.status == "user-requested exit" + @test cb_n2(workspace) + + @test_throws TypeError cr(A, b, callback = workspace -> "string", history = true) # 2 negative curvature A = FC[ 1.0 0.0; 0.0 0.0 ] b = ones(FC, 2) solver = CrWorkspace(A, b) - cr!(solver, A, b; linesearch=true, γ=γ_test) + cr!(solver, A, b; linesearch=true) x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir @test stats.npcCount == 2 - @test real(dot(npc_dir, A*npc_dir)) ≤ γ_test*norm(npc_dir)^2 + cr_tol + @test real(dot(npc_dir, A*npc_dir)) ≤ norm(npc_dir)^2 + cr_tol p1 = solver.p @test real(dot(p1, A*p1)) < cr_tol @@ -118,7 +120,7 @@ A = FC(-1.0)*I(2) b = ones(FC, 2) solver = CrWorkspace(A, b) - cr!(solver, A, b; linesearch=true, γ=γ_test) + cr!(solver, A, b; linesearch=true) x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir @test stats.status == "nonpositive curvature" @test stats.npcCount == 1 @@ -130,7 +132,7 @@ A, b = symmetric_indefinite(FC=Float64) @test_throws MethodError cr(A, b; warm_start=true, linesearch=true) - # npc_dir is not a zero vector, however we don't have γ + # npc_dir is not a zero vector A, b = symmetric_indefinite(FC=FC) solver = CrWorkspace(A, b) cr!(solver, A, b, linesearch=true) @@ -143,16 +145,50 @@ @test curvature <= 0 @test stats.npcCount == 2 - - # Test callback function - A, b = symmetric_definite(FC=FC) - workspace = CrWorkspace(A, b) - tol = 1.0e-1 - cb_n2 = TestCallbackN2(A, b, tol = tol) - cr!(solver, A, b, callback = cb_n2) - @test stats.status == "user-requested exit" - @test cb_n2(solver) - @test_throws TypeError cr(A, b, callback = solver -> "string", history = true) + # radius > 0 + # Iter=0: bᵀ Ab = 0 → zero-curvature at k=0 + A, b = system_zero_quad(FC=Float64) # ensures bᵀ A b == 0 + solver = CrWorkspace(A, b) + cr!(solver, A, b; radius = 10 * one(Float64)) + x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir + @test stats.niter == 0 + @test stats.status == "b is a zero-curvature direction" + @test real(dot(npc_dir, A * npc_dir)) ≈ 0 + + # Test stop since A is indefinite + A, b = symmetric_indefinite(FC=FC; shift = 5) + solver = CrWorkspace(A, b) + cr!(solver, A, b, radius = 10 * one(Float64)) + x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir + @test stats.status == "on trust-region boundary" + @test stats.solved == true + @test stats.indefinite == true + curvature = real(dot(npc_dir, A * npc_dir)) + @test curvature <= 0 + @test stats.solved == true + + # negative curvature + A = FC[ 1.0 0.0; + 0.0 0.0 ] + b = ones(FC, 2) + solver = CrWorkspace(A, b) + cr!(solver, A, b; radius = 10 * one(Float64)) + x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir + @test stats.npcCount == 1 + @test real(dot(npc_dir, A*npc_dir)) ≤ 0.0 + + # p negative curvature + A = FC(-1.0)*I(2) + b = ones(FC, 2) + solver = CrWorkspace(A, b) + cr!(solver, A, b; radius = 10 * one(Float64)) + x, stats, npc_dir = solver.x, solver.stats, solver.npc_dir + @test stats.status == "on trust-region boundary" + @test stats.npcCount == 1 + @test real(dot(npc_dir, A*npc_dir)) ≤ cr_tol + p1 = solver.p + @test real(dot(p1, A*p1)) < 0 + # Test on trust-region boundary when radius > 0 A, b = symmetric_indefinite(FC=FC, shift = 5) @@ -168,7 +204,6 @@ A, b = symmetric_indefinite(FC=FC, shift = 5) @test_throws ErrorException cr(A, b, radius = one(Float64), linesearch = true) - end end end \ No newline at end of file