From 0dc986cc265371e021eace2aa2f853892d869419 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sun, 28 Jan 2024 02:32:07 -0600 Subject: [PATCH] HZ: avoid convergence if bisected secant2 calls update, and update might switch to bisection in the U3 step. If we did bisect, the line-search progress step L2 testing whether the interval is shrinking fast enough is nonsensical (we might have shrunk multiple iterations of bisection, but that is not an indication that the secant model was "working"). Consequently, this reports back about whether bisection was engaged in `update`, and if so skip any kind of convergence assessment and do another iteration. Fixes #173. --- src/hagerzhang.jl | 74 +++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/src/hagerzhang.jl b/src/hagerzhang.jl index cf17dbd..9bc2940 100644 --- a/src/hagerzhang.jl +++ b/src/hagerzhang.jl @@ -203,10 +203,10 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, else # We'll still going downhill, expand the interval and try again. # Reaching this branch means that dphi_c < 0 and phi_c <= phi_0 + ϵ_k - # So cold = c has a lower objective than phi_0 up to epsilon. + # So cold = c has a lower objective than phi_0 up to epsilon. # This makes it a viable step to return if bracketing fails. - # Bracketing can fail if no cold < c <= alphamax can be found with finite phi_c and dphi_c. + # Bracketing can fail if no cold < c <= alphamax can be found with finite phi_c and dphi_c. # Going back to the loop with c = cold will only result in infinite cycling. # So returning (cold, phi_cold) and exiting the line search is the best move. cold = c @@ -266,7 +266,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, mayterminate[] = false # reset in case another initial guess is used next return a, values[ia] # lsr.value[ia] end - iswolfe, iA, iB = secant2!(ϕdϕ, alphas, values, slopes, ia, ib, phi_lim, delta, sigma, display) + iswolfe, iA, iB, bisected = secant2!(ϕdϕ, alphas, values, slopes, ia, ib, phi_lim, delta, sigma, display) if iswolfe mayterminate[] = false # reset in case another initial guess is used next return alphas[iA], values[iA] # lsr.value[iA] @@ -274,34 +274,39 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, A = alphas[iA] B = alphas[iB] @assert B > A - if B - A < gamma * (b - a) - if display & LINESEARCH > 0 - println("Linesearch: secant succeeded") - end - if nextfloat(values[ia]) >= values[ib] && nextfloat(values[iA]) >= values[iB] - # It's so flat, secant didn't do anything useful, time to quit + if !bisected + if B - A < gamma * (b - a) if display & LINESEARCH > 0 - println("Linesearch: secant suggests it's flat") + println("Linesearch: secant succeeded") end - mayterminate[] = false # reset in case another initial guess is used next - return A, values[iA] + if nextfloat(values[ia]) >= values[ib] && nextfloat(values[iA]) >= values[iB] + # It's so flat, secant didn't do anything useful, time to quit + if display & LINESEARCH > 0 + println("Linesearch: secant suggests it's flat") + end + mayterminate[] = false # reset in case another initial guess is used next + return A, values[iA] + end + ia = iA + ib = iB + else + # Secant is converging too slowly, use bisection + if display & LINESEARCH > 0 + println("Linesearch: secant failed, using bisection") + end + c = (A + B) / convert(T, 2) + + phi_c, dphi_c = ϕdϕ(c) + @assert isfinite(phi_c) && isfinite(dphi_c) + push!(alphas, c) + push!(values, phi_c) + push!(slopes, dphi_c) + + ia, ib = update!(ϕdϕ, alphas, values, slopes, iA, iB, length(alphas), phi_lim, display) end + else ia = iA ib = iB - else - # Secant is converging too slowly, use bisection - if display & LINESEARCH > 0 - println("Linesearch: secant failed, using bisection") - end - c = (A + B) / convert(T, 2) - - phi_c, dphi_c = ϕdϕ(c) - @assert isfinite(phi_c) && isfinite(dphi_c) - push!(alphas, c) - push!(values, phi_c) - push!(slopes, dphi_c) - - ia, ib = update!(ϕdϕ, alphas, values, slopes, iA, iB, length(alphas), phi_lim, display) end iter += 1 end @@ -373,14 +378,15 @@ function secant2!(ϕdϕ, push!(slopes, dphi_c) ic = length(alphas) + bisected = false if satisfies_wolfe(c, phi_c, dphi_c, phi_0, dphi_0, phi_lim, delta, sigma) if display & SECANT2 > 0 println("secant2: first c satisfied Wolfe conditions") end - return true, ic, ic + return true, ic, ic, bisected end - iA, iB = update!(ϕdϕ, alphas, values, slopes, ia, ib, ic, phi_lim, display) + iA, iB, bisected = update!(ϕdϕ, alphas, values, slopes, ia, ib, ic, phi_lim, display) if display & SECANT2 > 0 println("secant2: iA = ", iA, ", iB = ", iB, ", ic = ", ic) end @@ -412,14 +418,14 @@ function secant2!(ϕdϕ, if display & SECANT2 > 0 println("secant2: second c satisfied Wolfe conditions") end - return true, ic, ic + return true, ic, ic, bisected end iA, iB = update!(ϕdϕ, alphas, values, slopes, iA, iB, ic, phi_lim, display) end if display & SECANT2 > 0 println("secant2 output: a = ", alphas[iA], ", b = ", alphas[iB]) end - return false, iA, iB + return false, iA, iB, bisected end # HZ, stages U0-U3 @@ -457,10 +463,10 @@ function update!(ϕdϕ, ", dphi_c = ", dphi_c) end if c < a || c > b - return ia, ib #, 0, 0 # it's out of the bracketing interval + return ia, ib, false #, 0, 0 # it's out of the bracketing interval end if dphi_c >= zeroT - return ia, ic #, 0, 0 # replace b with a closer point + return ia, ic, false #, 0, 0 # replace b with a closer point end # We know dphi_c < 0. However, phi may not be monotonic between a # and c, so check that the value is also smaller than phi_0. (It's @@ -468,11 +474,11 @@ function update!(ϕdϕ, # secure environment of alpha=0; that's why we didn't check this # above.) if phi_c <= phi_lim - return ic, ib#, 0, 0 # replace a + return ic, ib, false#, 0, 0 # replace a end # phi_c is bigger than phi_0, which implies that the minimum # lies between a and c. Find it via bisection. - return bisect!(ϕdϕ, alphas, values, slopes, ia, ic, phi_lim, display) + return (bisect!(ϕdϕ, alphas, values, slopes, ia, ic, phi_lim, display)..., true) end # HZ, stage U3 (with theta=0.5)