add diagonal PSB quasi-Newton update

dpo · dpo · commit 3f5445665af5 · 2022-10-24T14:11:50.000-04:00
diff --git a/src/DiagonalHessianApproximation.jl b/src/DiagonalHessianApproximation.jl
@@ -1,7 +1,14 @@
 export DiagonalQN, SpectralGradient
 
 """
-Implementation of the diagonal quasi-Newton approximation described in
+Implementation of the diagonal quasi-Newton approximations described in
+
+M. Zhu, J. L. Nazareth and H. Wolkowicz
+The Quasi-Cauchy Relation and Diagonal Updating.
+SIAM Journal on Optimization, vol. 9, number 4, pp. 1192-1204, 1999.
+https://doi.org/10.1137/S1052623498331793.
+
+and
 
 Andrei, N. 
 A diagonal quasi-Newton updating method for unconstrained optimization. 
@@ -23,6 +30,7 @@ mutable struct DiagonalQN{T <: Real, I <: Integer, V <: AbstractVector{T}, F} <:
   args5::Bool
   use_prod5!::Bool # true for 5-args mul! and for composite operators created with operators that use the 3-args mul!
   allocated5::Bool # true for 5-args mul!, false for 3-args mul! until the vectors are allocated
+  psb::Bool
 end
 
 """
@@ -34,11 +42,12 @@ positive definite.
 
 # Arguments
 
-- `d::AbstractVector`: initial diagonal approximation.
+- `d::AbstractVector`: initial diagonal approximation;
+- `psb::Bool`: whether to use the diagonal PSB update or the Andrei update.
 """
-function DiagonalQN(d::AbstractVector{T}) where {T <: Real}
+function DiagonalQN(d::AbstractVector{T}, psb::Bool = false) where {T <: Real}
   prod = (res, v, α, β) -> mulSquareOpDiagonal!(res, d, v, α, β)
-  DiagonalQN(d, length(d), length(d), true, true, prod, prod, prod, 0, 0, 0, true, true, true)
+  DiagonalQN(d, length(d), length(d), true, true, prod, prod, prod, 0, 0, 0, true, true, true, psb)
 end
 
 # update function
@@ -49,18 +58,23 @@ function push!(
   s::V,
   y::V,
 ) where {T <: Real, I <: Integer, V <: AbstractVector{T}, F}
-  trA2 = zero(T)
-  for i in eachindex(s)
-    trA2 += s[i]^4
-  end
-  sT_s = dot(s, s)
-  sT_y = dot(s, y)
-  sT_B_s = sum(s[i]^2 * B.d[i] for i ∈ eachindex(s))
+  s2 = (si^2 for si ∈ s)
+  trA2 = dot(s2, s2)
   if trA2 == 0
     error("Cannot divide by zero and trA2 = 0")
   end
-  q = (sT_y + sT_s - sT_B_s) / trA2
-  B.d .+= q .* s .^ 2 .- 1
+  sT_y = dot(s, y)
+  sT_B_s = dot(s2, B.d)
+  q = sT_y - sT_B_s
+  if B.psb
+    q /= trA2
+    B.d .+= q .* s .^ 2
+  else
+    sT_s = dot(s, s)
+    q += sT_s
+    q /= trA2
+    B.d .+= q .* s .^ 2 .- 1
+  end
   return B
 end
 
diff --git a/test/test_diag.jl b/test/test_diag.jl
@@ -1,6 +1,6 @@
 # Points
 x0 = [-1.0, 1.0, -1.0]
-x1 = x0 + [1.0, 1.0, 1.0]
+x1 = x0 + [1.0, 0.0, 1.0]
 
 # Test functions
 # f(x) = x[1]^2 + x[2]^2 + x[3]^2
@@ -12,7 +12,7 @@ x1 = x0 + [1.0, 1.0, 1.0]
 # h(x) = x[1]^2 * x[2] * x[3]^3
 ∇h(x) = [2 * x[1] * x[2] * x[3]^3, x[1]^2 * x[3]^3, 3 * x[1]^2 * x[2] * x[3]^2]
 
-@testset "Weak secant equation" begin
+@testset "Weak secant equation for Andrei update" begin
   for grad_fun in (:∇f, :∇g, ∇h)
     grad = eval(grad_fun)
     s = x1 - x0
@@ -23,30 +23,44 @@ x1 = x0 + [1.0, 1.0, 1.0]
   end
 end
 
+@testset "Weak secant equation for PSB update" begin
+  for grad_fun in (:∇f, :∇g, ∇h)
+    grad = eval(grad_fun)
+    s = x1 - x0
+    y = grad(x1) - grad(x0)
+    B = DiagonalQN([1.0, -1.0, 1.0], true)
+    push!(B, s, y)
+    @test abs(dot(s, B * s) - dot(s, y)) <= 1e-10
+  end
+end
+
 @testset "Hard coded test" begin
   for grad_fun in (:∇f, :∇g, :∇h)
     grad = eval(grad_fun)
     s = x1 - x0
     y = grad(x1) - grad(x0)
-    B = DiagonalQN([1.0, -1.0, 1.0])
-    if grad_fun == :∇f
-      Bref = [8 / 3, 8 / 3 - 2, 8 / 3]
-    elseif grad_fun == :∇g
-      Bref =
-        [1 + (sin(-1) - exp(-1)) / 3, -1 + (sin(-1) - exp(-1)) / 3, 1 + (sin(-1) - exp(-1)) / 3]
-    else
-      Bref = [-2 / 3, -2 / 3 - 2, -2 / 3]
+    for psb ∈ (false, true)
+      B = DiagonalQN([1.0, -1.0, 1.0], psb)
+      if grad_fun == :∇f
+        Bref = psb ? [2, -1, 2] : [2, -2, 2]
+      elseif grad_fun == :∇g
+        Bref =
+          psb ? [1 + (sin(-1) - exp(-1) - 1) / 2, -1, 1 + (sin(-1) - exp(-1) - 1) / 2] :
+          [(1 + sin(-1) - exp(-1)) / 2, -2, (1 + sin(-1) - exp(-1)) / 2]
+      else
+        Bref = psb ? [-5 / 2, -1, -5 / 2] : [-5 / 2, -2, -5 / 2]
+      end
+      push!(B, s, y)
+      @test norm(B.d - Bref) <= 1e-10
     end
-    push!(B, s, y)
-    @test norm(B.d - Bref) <= 1e-10
 
     B = SpectralGradient(1.0, 3)
     if grad_fun == :∇f
       Bref = 2
     elseif grad_fun == :∇g
-      Bref = 1 / 3 * (1 - exp(-1) + sin(-1))
+      Bref = (1 - exp(-1) + sin(-1)) / 2
     else
-      Bref = -4 / 3
+      Bref = -5 / 2
     end
     push!(B, s, y)
     @test abs(B.d - Bref) <= 1e-10
@@ -60,7 +74,10 @@ end
   u = similar(v)
   mul!(u, A, v)
   @test (@allocated mul!(u, A, v)) == 0
-  B = SpectralGradient(rand(), 5)
+  B = DiagonalQN(d, true)
   mul!(u, B, v)
   @test (@allocated mul!(u, B, v)) == 0
+  C = SpectralGradient(rand(), 5)
+  mul!(u, C, v)
+  @test (@allocated mul!(u, C, v)) == 0
 end