Skip to content

Commit 3f54456

Browse files
committed
add diagonal PSB quasi-Newton update
1 parent aef78b0 commit 3f54456

File tree

2 files changed

+59
-28
lines changed

2 files changed

+59
-28
lines changed

src/DiagonalHessianApproximation.jl

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,14 @@
11
export DiagonalQN, SpectralGradient
22

33
"""
4-
Implementation of the diagonal quasi-Newton approximation described in
4+
Implementation of the diagonal quasi-Newton approximations described in
5+
6+
M. Zhu, J. L. Nazareth and H. Wolkowicz
7+
The Quasi-Cauchy Relation and Diagonal Updating.
8+
SIAM Journal on Optimization, vol. 9, number 4, pp. 1192-1204, 1999.
9+
https://doi.org/10.1137/S1052623498331793.
10+
11+
and
512
613
Andrei, N.
714
A diagonal quasi-Newton updating method for unconstrained optimization.
@@ -23,6 +30,7 @@ mutable struct DiagonalQN{T <: Real, I <: Integer, V <: AbstractVector{T}, F} <:
2330
args5::Bool
2431
use_prod5!::Bool # true for 5-args mul! and for composite operators created with operators that use the 3-args mul!
2532
allocated5::Bool # true for 5-args mul!, false for 3-args mul! until the vectors are allocated
33+
psb::Bool
2634
end
2735

2836
"""
@@ -34,11 +42,12 @@ positive definite.
3442
3543
# Arguments
3644
37-
- `d::AbstractVector`: initial diagonal approximation.
45+
- `d::AbstractVector`: initial diagonal approximation;
46+
- `psb::Bool`: whether to use the diagonal PSB update or the Andrei update.
3847
"""
39-
function DiagonalQN(d::AbstractVector{T}) where {T <: Real}
48+
function DiagonalQN(d::AbstractVector{T}, psb::Bool = false) where {T <: Real}
4049
prod = (res, v, α, β) -> mulSquareOpDiagonal!(res, d, v, α, β)
41-
DiagonalQN(d, length(d), length(d), true, true, prod, prod, prod, 0, 0, 0, true, true, true)
50+
DiagonalQN(d, length(d), length(d), true, true, prod, prod, prod, 0, 0, 0, true, true, true, psb)
4251
end
4352

4453
# update function
@@ -49,18 +58,23 @@ function push!(
4958
s::V,
5059
y::V,
5160
) where {T <: Real, I <: Integer, V <: AbstractVector{T}, F}
52-
trA2 = zero(T)
53-
for i in eachindex(s)
54-
trA2 += s[i]^4
55-
end
56-
sT_s = dot(s, s)
57-
sT_y = dot(s, y)
58-
sT_B_s = sum(s[i]^2 * B.d[i] for i eachindex(s))
61+
s2 = (si^2 for si s)
62+
trA2 = dot(s2, s2)
5963
if trA2 == 0
6064
error("Cannot divide by zero and trA2 = 0")
6165
end
62-
q = (sT_y + sT_s - sT_B_s) / trA2
63-
B.d .+= q .* s .^ 2 .- 1
66+
sT_y = dot(s, y)
67+
sT_B_s = dot(s2, B.d)
68+
q = sT_y - sT_B_s
69+
if B.psb
70+
q /= trA2
71+
B.d .+= q .* s .^ 2
72+
else
73+
sT_s = dot(s, s)
74+
q += sT_s
75+
q /= trA2
76+
B.d .+= q .* s .^ 2 .- 1
77+
end
6478
return B
6579
end
6680

test/test_diag.jl

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Points
22
x0 = [-1.0, 1.0, -1.0]
3-
x1 = x0 + [1.0, 1.0, 1.0]
3+
x1 = x0 + [1.0, 0.0, 1.0]
44

55
# Test functions
66
# f(x) = x[1]^2 + x[2]^2 + x[3]^2
@@ -12,7 +12,7 @@ x1 = x0 + [1.0, 1.0, 1.0]
1212
# h(x) = x[1]^2 * x[2] * x[3]^3
1313
∇h(x) = [2 * x[1] * x[2] * x[3]^3, x[1]^2 * x[3]^3, 3 * x[1]^2 * x[2] * x[3]^2]
1414

15-
@testset "Weak secant equation" begin
15+
@testset "Weak secant equation for Andrei update" begin
1616
for grad_fun in (:∇f, :∇g, ∇h)
1717
grad = eval(grad_fun)
1818
s = x1 - x0
@@ -23,30 +23,44 @@ x1 = x0 + [1.0, 1.0, 1.0]
2323
end
2424
end
2525

26+
@testset "Weak secant equation for PSB update" begin
27+
for grad_fun in (:∇f, :∇g, ∇h)
28+
grad = eval(grad_fun)
29+
s = x1 - x0
30+
y = grad(x1) - grad(x0)
31+
B = DiagonalQN([1.0, -1.0, 1.0], true)
32+
push!(B, s, y)
33+
@test abs(dot(s, B * s) - dot(s, y)) <= 1e-10
34+
end
35+
end
36+
2637
@testset "Hard coded test" begin
2738
for grad_fun in (:∇f, :∇g, :∇h)
2839
grad = eval(grad_fun)
2940
s = x1 - x0
3041
y = grad(x1) - grad(x0)
31-
B = DiagonalQN([1.0, -1.0, 1.0])
32-
if grad_fun == :∇f
33-
Bref = [8 / 3, 8 / 3 - 2, 8 / 3]
34-
elseif grad_fun == :∇g
35-
Bref =
36-
[1 + (sin(-1) - exp(-1)) / 3, -1 + (sin(-1) - exp(-1)) / 3, 1 + (sin(-1) - exp(-1)) / 3]
37-
else
38-
Bref = [-2 / 3, -2 / 3 - 2, -2 / 3]
42+
for psb (false, true)
43+
B = DiagonalQN([1.0, -1.0, 1.0], psb)
44+
if grad_fun == :∇f
45+
Bref = psb ? [2, -1, 2] : [2, -2, 2]
46+
elseif grad_fun == :∇g
47+
Bref =
48+
psb ? [1 + (sin(-1) - exp(-1) - 1) / 2, -1, 1 + (sin(-1) - exp(-1) - 1) / 2] :
49+
[(1 + sin(-1) - exp(-1)) / 2, -2, (1 + sin(-1) - exp(-1)) / 2]
50+
else
51+
Bref = psb ? [-5 / 2, -1, -5 / 2] : [-5 / 2, -2, -5 / 2]
52+
end
53+
push!(B, s, y)
54+
@test norm(B.d - Bref) <= 1e-10
3955
end
40-
push!(B, s, y)
41-
@test norm(B.d - Bref) <= 1e-10
4256

4357
B = SpectralGradient(1.0, 3)
4458
if grad_fun == :∇f
4559
Bref = 2
4660
elseif grad_fun == :∇g
47-
Bref = 1 / 3 * (1 - exp(-1) + sin(-1))
61+
Bref = (1 - exp(-1) + sin(-1)) / 2
4862
else
49-
Bref = -4 / 3
63+
Bref = -5 / 2
5064
end
5165
push!(B, s, y)
5266
@test abs(B.d - Bref) <= 1e-10
@@ -60,7 +74,10 @@ end
6074
u = similar(v)
6175
mul!(u, A, v)
6276
@test (@allocated mul!(u, A, v)) == 0
63-
B = SpectralGradient(rand(), 5)
77+
B = DiagonalQN(d, true)
6478
mul!(u, B, v)
6579
@test (@allocated mul!(u, B, v)) == 0
80+
C = SpectralGradient(rand(), 5)
81+
mul!(u, C, v)
82+
@test (@allocated mul!(u, C, v)) == 0
6683
end

0 commit comments

Comments
 (0)