Skip to content

implementation Line Search with Negative Curvature Detection with direction for CR #985

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
4 changes: 2 additions & 2 deletions src/block_krylov_workspaces.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ function BlockMinresWorkspace(m::Integer, n::Integer, p::Integer, SV::Type, SM::
τₖ₋₁ = SV(undef, p)
SV = isconcretetype(SV) ? SV : typeof(τₖ₋₁)
SM = isconcretetype(SM) ? SM : typeof(X)
stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown")
stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown")
workspace = BlockMinresWorkspace{T,FC,SV,SM}(m, n, p, ΔX, X, P, Q, C, D, Φ, Vₖ₋₁, Vₖ, wₖ₋₂, wₖ₋₁, Hₖ₋₂, Hₖ₋₁, τₖ₋₂, τₖ₋₁, false, stats)
return workspace
end
Expand Down Expand Up @@ -115,7 +115,7 @@ function BlockGmresWorkspace(m::Integer, n::Integer, p::Integer, SV::Type, SM::T
τ = SV[SV(undef, p) for i = 1 : memory]
SV = isconcretetype(SV) ? SV : typeof(τ)
SM = isconcretetype(SM) ? SM : typeof(X)
stats = SimpleStats(0, false, false, false, T[], T[], T[], 0.0, "unknown")
stats = SimpleStats(0, false, false, false, 0, T[], T[], T[], 0.0, "unknown")
workspace = BlockGmresWorkspace{T,FC,SV,SM}(m, n, p, ΔX, X, W, P, Q, C, D, V, Z, R, H, τ, false, stats)
return workspace
end
Expand Down
82 changes: 74 additions & 8 deletions src/cr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,13 @@

* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
* `linesearch`: if `true`, indicate that the solution is to be used in an inexact Newton method with linesearch. If negative curvature is detected at iteration k > 0, the solution of iteration k-1 is returned. If negative curvature is detected at iteration 0, the right-hand side is returned (i.e., the negative gradient);
* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; If `radius > 0` and nonpositive curvature is detected, the behavior depends on the iteration and follow similar logic as linesearch;
* `linesearch`: if `true` and nonpositive curvature is detected, the behavior depends on the iteration:
– at iteration k = 0, return the preconditioned initial search direction in `workspace.npc_dir`;
– at iteration k > 0,
- if the residual from iteration k-1 is a nonpositive curvature direction but `workspace.p` is not, the residual is stored in `stats.npc_dir` and `stats.npcCount` is set to 1;
- if `workspace.p` is a nonpositive curvature direction but the residual is not, `workspace.p` is copied into `stats.npc_dir` and `stats.npcCount` is set to 1;
- if both are nonnegative curvature directions, the residual is stored in `stats.npc_dir` and `stats.npcCount` is set to 2.
* `γ`: tolerance to determine that the curvature of the quadratic model is nonpositive;
* `atol`: absolute stopping tolerance based on the residual norm;
* `rtol`: relative stopping tolerance based on the residual norm;
Expand Down Expand Up @@ -133,6 +138,7 @@
length(b) == n || error("Inconsistent problem size")
linesearch && (radius > 0) && error("'linesearch' set to 'true' but radius > 0")
(verbose > 0) && @printf(iostream, "CR: system of %d equations in %d variables\n", n, n)
(workspace.warm_start && linesearch) && error("warm_start and linesearch cannot be used together")

# Tests M = Iₙ
MisI = (M === I)
Expand All @@ -143,11 +149,16 @@

# Set up workspace
allocate_if(!MisI, workspace, :Mq, S, workspace.x) # The length of Mq is n
allocate_if(linesearch, workspace, :npc_dir , S, workspace.x) # The length of npc_dir is n
allocate_if(radius>0, workspace, :npc_dir , S, workspace.x)
Δx, x, r, p, q, Ar, stats = workspace.Δx, workspace.x, workspace.r, workspace.p, workspace.q, workspace.Ar, workspace.stats
warm_start = workspace.warm_start
rNorms, ArNorms = stats.residuals, stats.Aresiduals
reset!(stats)
Mq = MisI ? q : workspace.Mq
if linesearch || radius > 0
npc_dir = workspace.npc_dir
end

# Initial state.
kfill!(x, zero(FC))
Expand Down Expand Up @@ -184,7 +195,11 @@
stats.status = "b is a zero-curvature direction"
history && push!(ArNorms, zero(T))
workspace.warm_start = false
linesearch && kcopy!(n, x, b) # x ← b
if linesearch
kcopy!(n, x, p) # x ← M⁻¹ b
kcopy!(n, npc_dir, r)
stats.npcCount = 1
end
return workspace
end

Expand Down Expand Up @@ -220,16 +235,31 @@

while ! (solved || tired || user_requested_exit || overtimed)
if linesearch
if (pAp ≤ γ * pNorm²) || (ρ ≤ γ * rNorm²)
p_curv = (pAp ≤ γ * pNorm^2)
r_curv = (ρ ≤ γ * rNorm^2)
if p_curv || r_curv
npcurv = true
(verbose > 0) && @printf(iostream, "nonpositive curvature detected: pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ)
stats.solved = true
stats.niter = iter
stats.inconsistent = false
stats.timer = start_time |> ktimer
stats.status = "nonpositive curvature"
iter == 0 && kcopy!(n, x, b) # x ← b
workspace.warm_start = false
stats.indefinite = true
if iter == 0
kcopy!(n, npc_dir, r)
stats.npcCount = 1
else
if r_curv
kcopy!(n, npc_dir, r)
stats.npcCount += 1
end
if p_curv
stats.npcCount += 1
r_curv || kcopy!(n, npc_dir, p)
end
end
return workspace
end
elseif pAp ≤ 0 && radius == 0
Expand All @@ -249,6 +279,14 @@

if abspAp ≤ γ * pNorm * knorm(n, q) # pᴴAp ≃ 0
npcurv = true # nonpositive curvature
stats.indefinite = true
if iter == 0
kcopy!(n, npc_dir, r)
stats.npcCount = 1

Check warning on line 285 in src/cr.jl

View check run for this annotation

Codecov / codecov/patch

src/cr.jl#L284-L285

Added lines #L284 - L285 were not covered by tests
else
stats.npcCount = 1
kcopy!(n, npc_dir, p)
end
(verbose > 0) && @printf(iostream, "pᴴAp = %8.1e ≃ 0\n", pAp)
if abspr ≤ γ * pNorm * rNorm # pᴴr ≃ 0
(verbose > 0) && @printf(iostream, "pᴴr = %8.1e ≃ 0, redefining p := r\n", pr)
Expand All @@ -263,6 +301,10 @@
else # case 2
(verbose > 0) && @printf(iostream, "r is a direction of nonpositive curvature: %8.1e\n", ρ)
α = tr
if iter > 0
stats.npcCount = 2
kcopy!(n, npc_dir, r)

Check warning on line 306 in src/cr.jl

View check run for this annotation

Codecov / codecov/patch

src/cr.jl#L304-L306

Added lines #L304 - L306 were not covered by tests
end
end
else
# q_p = q(x + α_p * p) - q(x) = -α_p * rᴴp + ½ (α_p)² * pᴴAp
Expand Down Expand Up @@ -292,6 +334,14 @@

elseif pAp > 0 && ρ < 0
npcurv = true
stats.indefinite = true
if iter == 0
kcopy!(n, npc_dir, r)
stats.npcCount = 1

Check warning on line 340 in src/cr.jl

View check run for this annotation

Codecov / codecov/patch

src/cr.jl#L337-L340

Added lines #L337 - L340 were not covered by tests
else
stats.npcCount = 1
kcopy!(n, npc_dir, r)

Check warning on line 343 in src/cr.jl

View check run for this annotation

Codecov / codecov/patch

src/cr.jl#L342-L343

Added lines #L342 - L343 were not covered by tests
end
(verbose > 0) && @printf(iostream, "pᴴAp = %8.1e > 0 and rᴴAr = %8.1e < 0\n", pAp, ρ)
# q_p is minimal for α_p = rᴴp / pᴴAp
α = descent ? min(t1, pr / pAp) : max(t2, pr / pAp)
Expand All @@ -308,6 +358,14 @@

elseif pAp < 0 && ρ > 0
npcurv = true
stats.indefinite = true
if iter == 0
kcopy!(n, npc_dir, r)
stats.npcCount = 1

Check warning on line 364 in src/cr.jl

View check run for this annotation

Codecov / codecov/patch

src/cr.jl#L361-L364

Added lines #L361 - L364 were not covered by tests
else
stats.npcCount = 1
kcopy!(n, npc_dir, p)

Check warning on line 367 in src/cr.jl

View check run for this annotation

Codecov / codecov/patch

src/cr.jl#L366-L367

Added lines #L366 - L367 were not covered by tests
end
(verbose > 0) && @printf(iostream, "pᴴAp = %8.1e < 0 and rᴴAr = %8.1e > 0\n", pAp, ρ)
α = descent ? t1 : t2
tr = min(tr, rNorm² / ρ)
Expand All @@ -324,6 +382,14 @@

elseif pAp < 0 && ρ < 0
npcurv = true
stats.indefinite = true
if iter == 0
kcopy!(n, npc_dir, r)
stats.npcCount = 1
else
stats.npcCount = 2
kcopy!(n, npc_dir, r)

Check warning on line 391 in src/cr.jl

View check run for this annotation

Codecov / codecov/patch

src/cr.jl#L390-L391

Added lines #L390 - L391 were not covered by tests
end
(verbose > 0) && @printf(iostream, "negative curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ)
α = descent ? t1 : t2
Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
Expand All @@ -344,7 +410,7 @@

kaxpy!(n, α, p, x)
xNorm = knorm(n, x)
xNorm ≈ radius && (on_boundary = true)
xNorm ≈ radius > 0 && (on_boundary = true)
kaxpy!(n, -α, Mq, r) # residual
if MisI
rNorm² = kdotr(n, r, r)
Expand Down Expand Up @@ -410,11 +476,11 @@

# Termination status
tired && (status = "maximum number of iterations exceeded")
on_boundary && (status = "on trust-region boundary")
npcurv && (status = "nonpositive curvature")
solved && (status = "solution good enough given atol and rtol")
user_requested_exit && (status = "user-requested exit")
overtimed && (status = "time limit exceeded")
npcurv && (status = "nonpositive curvature")
on_boundary && (status = "on trust-region boundary")

# Update x
warm_start && kaxpy!(n, one(FC), Δx, x)
Expand Down
3 changes: 3 additions & 0 deletions src/krylov_stats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ The fields are as follows:
- `solved`: Indicates whether the solver successfully reached convergence (`true` if solved, `false` otherwise);
- `inconsistent`: Flags whether the system was detected as inconsistent (i.e., when `b` is not in the range of `A`);
- `indefinite`: Flags whether the system was detected as indefinite (i.e., when `A` is not positive definite);
- `npcCount`: The number of nonpositive curvature directions encountered during the solve;
- `residuals`: A vector containing the residual norms at each iteration;
- `Aresiduals`: A vector of `A'`-residual norms at each iteration;
- `Acond`: An estimate of the condition number of matrix `A`.
Expand All @@ -24,6 +25,7 @@ mutable struct SimpleStats{T} <: KrylovStats{T}
solved :: Bool
inconsistent :: Bool
indefinite :: Bool
npcCount :: Int
residuals :: Vector{T}
Aresiduals :: Vector{T}
Acond :: Vector{T}
Expand All @@ -42,6 +44,7 @@ function copyto!(dest :: SimpleStats, src :: SimpleStats)
dest.solved = src.solved
dest.inconsistent = src.inconsistent
dest.indefinite = src.indefinite
dest.npcCount = src.npcCount
dest.residuals = copy(src.residuals)
dest.Aresiduals = copy(src.Aresiduals)
dest.Acond = copy(src.Acond)
Expand Down
Loading
Loading