Skip to content

Commit e10fb7a

Browse files
author
RAYNAUD Paul (raynaudp)
committed
add documentation
1 parent 3284ced commit e10fb7a

File tree

1 file changed

+41
-14
lines changed

1 file changed

+41
-14
lines changed

src/compressed_lbfgs.jl

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,33 @@ using CUDA
1212

1313
export CompressedLBFGS
1414

15+
"""
16+
CompressedLBFGS{T, M<:AbstractMatrix{T}, V<:AbstractVector{T}}
17+
18+
A LBFGS limited-memory operator.
19+
It represents a linear application Rⁿˣⁿ, considering at most `m` BFGS updates.
20+
This implementation considers the bloc matrices reoresentation of the BFGS (forward) update.
21+
It follows the algorithm described in [REPRESENTATIONS OF QUASI-NEWTON MATRICES AND THEIR USE IN LIMITED MEMORY METHODS](https://link.springer.com/article/10.1007/BF01582063) from Richard H. Byrd, Jorge Nocedal and Robert B. Schnabel (1994).
22+
This operator considers several fields directly related to the bloc representation of the operator:
23+
- `m`: the maximal memory of the operator;
24+
- `n`: the dimension of the linear application;
25+
- `k`: the current memory's size of the operator;
26+
- `α`: scalar for `B₀ = α I`;
27+
- `Sₖ`: retain the `k`-th last vectors `s` from the updates parametrized by `(s,y)`;
28+
- `Yₖ`: retain the `k`-th last vectors `y` from the updates parametrized by `(s,y)`;;
29+
- `Dₖ`: a diagonal matrix mandatory to perform the linear application and to form the matrix;
30+
- `Lₖ`: a lower diagonal mandatory to perform the linear application and to form the matrix.
31+
In addition to this structures which are circurlarly update when `k` reaches `m`, we consider other intermediate data structures renew at each update:
32+
- `chol_matrix`: a matrix required to store a Cholesky factorization of a Rᵏˣᵏ matrix;
33+
- `intermediate_1`: a R²ᵏˣ²ᵏ matrix;
34+
- `intermediate_2`: a R²ᵏˣ²ᵏ matrix;
35+
- `inverse_intermediate_1`: a R²ᵏˣ²ᵏ matrix;
36+
- `inverse_intermediate_2`: a R²ᵏˣ²ᵏ matrix;
37+
- `intermediary_vector`: a vector ∈ Rᵏ to store intermediate solutions;
38+
- `sol`: a vector ∈ Rᵏ to store intermediate solutions;
39+
- `intermediate_structure_updated`: inform if the intermediate structures are up-to-date or not.
40+
This implementation is designed to work either on CPU or GPU.
41+
"""
1542
mutable struct CompressedLBFGS{T, M<:AbstractMatrix{T}, V<:AbstractVector{T}}
1643
m::Int # memory of the operator
1744
n::Int # vector size
@@ -36,7 +63,13 @@ default_gpu() = CUDA.functional() ? true : false
3663
default_matrix_type(gpu::Bool, T::DataType) = gpu ? CuMatrix{T} : Matrix{T}
3764
default_vector_type(gpu::Bool, T::DataType) = gpu ? CuVector{T} : Vector{T}
3865

39-
function CompressedLBFGS(m::Int, n::Int; T=Float64, gpu=default_gpu(), M=default_matrix_type(gpu, T), V=default_vector_type(gpu, T))
66+
"""
67+
CompressedLBFGS(n::Int; [T=Float64, m=5], gpu:Bool)
68+
69+
A implementation of a LBFGS operator (forward), representing a `nxn` linear application.
70+
It considers at most `k` BFGS iterates, and fit the architecture depending if it is launched on a CPU or a GPU.
71+
"""
72+
function CompressedLBFGS(n::Int; m::Int=5, T=Float64, gpu=default_gpu(), M=default_matrix_type(gpu, T), V=default_vector_type(gpu, T))
4073
α = (T)(1)
4174
k = 0
4275
Sₖ = M(undef, n, m)
@@ -56,20 +89,16 @@ function CompressedLBFGS(m::Int, n::Int; T=Float64, gpu=default_gpu(), M=default
5689
end
5790

5891
function Base.push!(op::CompressedLBFGS{T,M,V}, s::V, y::V) where {T,M,V<:AbstractVector{T}}
59-
if op.k < op.m # still some place in structures
92+
if op.k < op.m # still some place in the structures
6093
op.k += 1
6194
op.Sₖ[:, op.k] .= s
6295
op.Yₖ[:, op.k] .= y
6396
op.Dₖ.diag[op.k] = dot(s, y)
6497
op.Lₖ.data[op.k, op.k] = 0
6598
for i in 1:op.k-1
66-
# op.Lₖ.data[op.k, i] = dot(s, op.Yₖ[:, i])
6799
op.Lₖ.data[op.k, i] = dot(op.Sₖ[:, op.k], op.Yₖ[:, i])
68100
end
69-
# the secan equation fails if this line is uncommented
70-
else # update matrix with circular shift
71-
println("else")
72-
# must be tested
101+
else # k == m update circurlarly the intermediary structures
73102
op.Sₖ .= circshift(op.Sₖ, (0, -1))
74103
op.Yₖ .= circshift(op.Yₖ, (0, -1))
75104
op.Dₖ .= circshift(op.Dₖ, (-1, -1))
@@ -84,15 +113,13 @@ function Base.push!(op::CompressedLBFGS{T,M,V}, s::V, y::V) where {T,M,V<:Abstra
84113
end
85114
end
86115
end
87-
@show op.Lₖ
88-
@show op.Sₖ
89-
@show op.Yₖ
90-
@show op.Dₖ
116+
# secant equation fails if uncommented
91117
# op.α = dot(y,s)/dot(s,s)
92118
op.intermediate_structure_updated = false
93119
return op
94120
end
95121

122+
# Algorithm 3.2 (p15)
96123
# Theorem 2.3 (p6)
97124
function Base.Matrix(op::CompressedLBFGS{T,M,V}) where {T,M,V}
98125
B₀ = M(zeros(T, op.n, op.n))
@@ -112,6 +139,7 @@ function Base.Matrix(op::CompressedLBFGS{T,M,V}) where {T,M,V}
112139
return Bₖ
113140
end
114141

142+
# Algorithm 3.2 (p15)
115143
# step 4, Jₖ is computed only if needed
116144
function inverse_cholesky(op::CompressedLBFGS)
117145
view(op.chol_matrix, 1:op.k, 1:op.k) .= op.α .* (transpose(view(op.Sₖ, :, 1:op.k)) * view(op.Sₖ, :, 1:op.k)) .+ view(op.Lₖ, 1:op.k, 1:op.k) * inv(op.Dₖ[1:op.k, 1:op.k]) * transpose(view(op.Lₖ, 1:op.k, 1:op.k))
@@ -144,10 +172,10 @@ end
144172
function LinearAlgebra.mul!(Bv::V, op::CompressedLBFGS{T,M,V}, v::V) where {T,M,V<:AbstractVector{T}}
145173
# step 1-3 mainly done by Base.push!
146174

147-
# steps 4 and 6, in case the intermediary required structure are not up to date
175+
# steps 4 and 6, in case the intermediary structures required are not up to date
148176
(!op.intermediate_structure_updated) && (precompile_iterated_structure!(op))
149177

150-
# step 5, try views for mul!
178+
# step 5
151179
mul!(view(op.sol, 1:op.k), transpose(view(op.Yₖ, :, 1:op.k)), v)
152180
mul!(view(op.sol, op.k+1:2*op.k), transpose(view(op.Sₖ, :, 1:op.k)), v)
153181
# scal!(op.α, view(op.sol, op.k+1:2*op.k)) # more allocation, slower
@@ -159,7 +187,6 @@ function LinearAlgebra.mul!(Bv::V, op::CompressedLBFGS{T,M,V}, v::V) where {T,M,
159187

160188
# step 7
161189
# Bv .= op.α .* v .- (view(op.Yₖ, :,1:op.k) * view(op.sol, 1:op.k) .+ op.α .* view(op.Sₖ, :, 1:op.k) * view(op.sol, op.k+1:2*op.k))
162-
163190
mul!(Bv, view(op.Yₖ, :, 1:op.k), view(op.sol, 1:op.k))
164191
mul!(Bv, view(op.Sₖ, :, 1:op.k), view(op.sol, op.k+1:2*op.k), - op.α, (T)(-1))
165192
Bv .+= op.α .* v

0 commit comments

Comments
 (0)