From 0dc986cc265371e021eace2aa2f853892d869419 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sun, 28 Jan 2024 02:32:07 -0600
Subject: [PATCH] HZ: avoid convergence if bisected

secant2 calls update, and update might switch to bisection in the U3 step. If we did bisect, the line-search progress
step L2 testing whether the interval is shrinking fast
enough is nonsensical (we might have shrunk multiple
iterations of bisection, but that is not an indication that the
secant model was "working").

Consequently, this reports back about whether bisection
was engaged in `update`, and if so skip any kind of
convergence assessment and do another iteration.

Fixes #173.
---
 src/hagerzhang.jl | 74 +++++++++++++++++++++++++----------------------
 1 file changed, 40 insertions(+), 34 deletions(-)

diff --git a/src/hagerzhang.jl b/src/hagerzhang.jl
index cf17dbd..9bc2940 100644
--- a/src/hagerzhang.jl
+++ b/src/hagerzhang.jl
@@ -203,10 +203,10 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
         else
             # We'll still going downhill, expand the interval and try again.
             # Reaching this branch means that dphi_c < 0 and phi_c <= phi_0 + ϵ_k
-            # So cold = c has a lower objective than phi_0 up to epsilon. 
+            # So cold = c has a lower objective than phi_0 up to epsilon.
             # This makes it a viable step to return if bracketing fails.
 
-            # Bracketing can fail if no cold < c <= alphamax can be found with finite phi_c and dphi_c. 
+            # Bracketing can fail if no cold < c <= alphamax can be found with finite phi_c and dphi_c.
             # Going back to the loop with c = cold will only result in infinite cycling.
             # So returning (cold, phi_cold) and exiting the line search is the best move.
             cold = c
@@ -266,7 +266,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
             mayterminate[] = false # reset in case another initial guess is used next
             return a, values[ia] # lsr.value[ia]
         end
-        iswolfe, iA, iB = secant2!(ϕdϕ, alphas, values, slopes, ia, ib, phi_lim, delta, sigma, display)
+        iswolfe, iA, iB, bisected = secant2!(ϕdϕ, alphas, values, slopes, ia, ib, phi_lim, delta, sigma, display)
         if iswolfe
             mayterminate[] = false # reset in case another initial guess is used next
             return alphas[iA], values[iA] # lsr.value[iA]
@@ -274,34 +274,39 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
         A = alphas[iA]
         B = alphas[iB]
         @assert B > A
-        if B - A < gamma * (b - a)
-            if display & LINESEARCH > 0
-                println("Linesearch: secant succeeded")
-            end
-            if nextfloat(values[ia]) >= values[ib] && nextfloat(values[iA]) >= values[iB]
-                # It's so flat, secant didn't do anything useful, time to quit
+        if !bisected
+            if B - A < gamma * (b - a)
                 if display & LINESEARCH > 0
-                    println("Linesearch: secant suggests it's flat")
+                    println("Linesearch: secant succeeded")
                 end
-                mayterminate[] = false # reset in case another initial guess is used next
-                return A, values[iA]
+                if nextfloat(values[ia]) >= values[ib] && nextfloat(values[iA]) >= values[iB]
+                    # It's so flat, secant didn't do anything useful, time to quit
+                    if display & LINESEARCH > 0
+                        println("Linesearch: secant suggests it's flat")
+                    end
+                    mayterminate[] = false # reset in case another initial guess is used next
+                    return A, values[iA]
+                end
+                ia = iA
+                ib = iB
+            else
+                # Secant is converging too slowly, use bisection
+                if display & LINESEARCH > 0
+                    println("Linesearch: secant failed, using bisection")
+                end
+                c = (A + B) / convert(T, 2)
+
+                phi_c, dphi_c = ϕdϕ(c)
+                @assert isfinite(phi_c) && isfinite(dphi_c)
+                push!(alphas, c)
+                push!(values, phi_c)
+                push!(slopes, dphi_c)
+
+                ia, ib = update!(ϕdϕ, alphas, values, slopes, iA, iB, length(alphas), phi_lim, display)
             end
+        else
             ia = iA
             ib = iB
-        else
-            # Secant is converging too slowly, use bisection
-            if display & LINESEARCH > 0
-                println("Linesearch: secant failed, using bisection")
-            end
-            c = (A + B) / convert(T, 2)
-
-            phi_c, dphi_c = ϕdϕ(c)
-            @assert isfinite(phi_c) && isfinite(dphi_c)
-            push!(alphas, c)
-            push!(values, phi_c)
-            push!(slopes, dphi_c)
-
-            ia, ib = update!(ϕdϕ, alphas, values, slopes, iA, iB, length(alphas), phi_lim, display)
         end
         iter += 1
     end
@@ -373,14 +378,15 @@ function secant2!(ϕdϕ,
     push!(slopes, dphi_c)
 
     ic = length(alphas)
+    bisected = false
     if satisfies_wolfe(c, phi_c, dphi_c, phi_0, dphi_0, phi_lim, delta, sigma)
         if display & SECANT2 > 0
             println("secant2: first c satisfied Wolfe conditions")
         end
-        return true, ic, ic
+        return true, ic, ic, bisected
     end
 
-    iA, iB = update!(ϕdϕ, alphas, values, slopes, ia, ib, ic, phi_lim, display)
+    iA, iB, bisected = update!(ϕdϕ, alphas, values, slopes, ia, ib, ic, phi_lim, display)
     if display & SECANT2 > 0
         println("secant2: iA = ", iA, ", iB = ", iB, ", ic = ", ic)
     end
@@ -412,14 +418,14 @@ function secant2!(ϕdϕ,
             if display & SECANT2 > 0
                 println("secant2: second c satisfied Wolfe conditions")
             end
-            return true, ic, ic
+            return true, ic, ic, bisected
         end
         iA, iB = update!(ϕdϕ, alphas, values, slopes, iA, iB, ic, phi_lim, display)
     end
     if display & SECANT2 > 0
         println("secant2 output: a = ", alphas[iA], ", b = ", alphas[iB])
     end
-    return false, iA, iB
+    return false, iA, iB, bisected
 end
 
 # HZ, stages U0-U3
@@ -457,10 +463,10 @@ function update!(ϕdϕ,
                 ", dphi_c = ", dphi_c)
     end
     if c < a || c > b
-        return ia, ib #, 0, 0  # it's out of the bracketing interval
+        return ia, ib, false #, 0, 0  # it's out of the bracketing interval
     end
     if dphi_c >= zeroT
-        return ia, ic #, 0, 0  # replace b with a closer point
+        return ia, ic, false #, 0, 0  # replace b with a closer point
     end
     # We know dphi_c < 0. However, phi may not be monotonic between a
     # and c, so check that the value is also smaller than phi_0.  (It's
@@ -468,11 +474,11 @@ function update!(ϕdϕ,
     # secure environment of alpha=0; that's why we didn't check this
     # above.)
     if phi_c <= phi_lim
-        return ic, ib#, 0, 0  # replace a
+        return ic, ib, false#, 0, 0  # replace a
     end
     # phi_c is bigger than phi_0, which implies that the minimum
     # lies between a and c. Find it via bisection.
-    return bisect!(ϕdϕ, alphas, values, slopes, ia, ic, phi_lim, display)
+    return (bisect!(ϕdϕ, alphas, values, slopes, ia, ic, phi_lim, display)..., true)
 end
 
 # HZ, stage U3 (with theta=0.5)