@@ -10,25 +10,25 @@ Implemented by Paul Raynaud (supervised by Dominique Orban)
10
10
using LinearAlgebra, LinearAlgebra. BLAS
11
11
using CUDA
12
12
13
- export CompressedLBFGS
13
+ export CompressedLBFGSOperator
14
14
15
15
"""
16
- CompressedLBFGS {T, M<:AbstractMatrix{T}, V<:AbstractVector{T}}
16
+ CompressedLBFGSOperator {T, M<:AbstractMatrix{T}, V<:AbstractVector{T}}
17
17
18
18
A LBFGS limited-memory operator.
19
- It represents a linear application Rⁿˣⁿ, considering at most `m ` BFGS updates.
19
+ It represents a linear application Rⁿˣⁿ, considering at most `mem ` BFGS updates.
20
20
This implementation considers the bloc matrices reoresentation of the BFGS (forward) update.
21
21
It follows the algorithm described in [REPRESENTATIONS OF QUASI-NEWTON MATRICES AND THEIR USE IN LIMITED MEMORY METHODS](https://link.springer.com/article/10.1007/BF01582063) from Richard H. Byrd, Jorge Nocedal and Robert B. Schnabel (1994).
22
22
This operator considers several fields directly related to the bloc representation of the operator:
23
- - `m `: the maximal memory of the operator;
23
+ - `mem `: the maximal memory of the operator;
24
24
- `n`: the dimension of the linear application;
25
25
- `k`: the current memory's size of the operator;
26
26
- `α`: scalar for `B₀ = α I`;
27
27
- `Sₖ`: retain the `k`-th last vectors `s` from the updates parametrized by `(s,y)`;
28
28
- `Yₖ`: retain the `k`-th last vectors `y` from the updates parametrized by `(s,y)`;;
29
29
- `Dₖ`: a diagonal matrix mandatory to perform the linear application and to form the matrix;
30
30
- `Lₖ`: a lower diagonal mandatory to perform the linear application and to form the matrix.
31
- In addition to this structures which are circurlarly update when `k` reaches `m `, we consider other intermediate data structures renew at each update:
31
+ In addition to this structures which are circurlarly update when `k` reaches `mem `, we consider other intermediate data structures renew at each update:
32
32
- `chol_matrix`: a matrix required to store a Cholesky factorization of a Rᵏˣᵏ matrix;
33
33
- `intermediate_1`: a R²ᵏˣ²ᵏ matrix;
34
34
- `intermediate_2`: a R²ᵏˣ²ᵏ matrix;
@@ -38,24 +38,24 @@ In addition to this structures which are circurlarly update when `k` reaches `m`
38
38
- `sol`: a vector ∈ Rᵏ to store intermediate solutions;
39
39
This implementation is designed to work either on CPU or GPU.
40
40
"""
41
- mutable struct CompressedLBFGS {T, M<: AbstractMatrix{T} , V<: AbstractVector{T} }
42
- m :: Int # memory of the operator
41
+ mutable struct CompressedLBFGSOperator {T, M<: AbstractMatrix{T} , V<: AbstractVector{T} }
42
+ mem :: Int # memory of the operator
43
43
n:: Int # vector size
44
- k:: Int # k ≤ m , active memory of the operator
44
+ k:: Int # k ≤ mem , active memory of the operator
45
45
α:: T # B₀ = αI
46
46
Sₖ:: M # gather all sₖ₋ₘ
47
47
Yₖ:: M # gather all yₖ₋ₘ
48
- Dₖ:: Diagonal{T,V} # m * m
49
- Lₖ:: LowerTriangular{T,M} # m * m
48
+ Dₖ:: Diagonal{T,V} # mem * mem
49
+ Lₖ:: LowerTriangular{T,M} # mem * mem
50
50
51
51
chol_matrix:: M # 2m * 2m
52
- intermediate_diagonal:: Diagonal{T,V} # m * m
52
+ intermediate_diagonal:: Diagonal{T,V} # mem * mem
53
53
intermediate_1:: UpperTriangular{T,M} # 2m * 2m
54
54
intermediate_2:: LowerTriangular{T,M} # 2m * 2m
55
55
inverse_intermediate_1:: UpperTriangular{T,M} # 2m * 2m
56
56
inverse_intermediate_2:: LowerTriangular{T,M} # 2m * 2m
57
57
intermediary_vector:: V # 2m
58
- sol:: V # m
58
+ sol:: V # mem
59
59
end
60
60
61
61
default_gpu () = CUDA. functional () ? true : false
@@ -67,53 +67,54 @@ function columnshift!(A::AbstractMatrix{T}; direction::Int=-1, indicemax::Int=si
67
67
return A
68
68
end
69
69
70
- function columnshift! (A :: AbstractMatrix {T} ; direction:: Int = - 1 , indicemax:: Int = size (A)[ 1 ] ) where T
71
- map (i -> view (A,:,i + direction) .= view (A,:,i), 1 - direction: indicemax)
72
- return A
70
+ function vectorshift! (v :: AbstractVector {T} ; direction:: Int = - 1 , indicemax:: Int = length (v) ) where T
71
+ view (v, 1 : indicemax + direction) .= view (v, 1 - direction: indicemax)
72
+ return v
73
73
end
74
74
75
75
"""
76
- CompressedLBFGS (n::Int; [T=Float64, m =5], gpu:Bool)
76
+ CompressedLBFGSOperator (n::Int; [T=Float64, mem =5], gpu:Bool)
77
77
78
78
A implementation of a LBFGS operator (forward), representing a `nxn` linear application.
79
79
It considers at most `k` BFGS iterates, and fit the architecture depending if it is launched on a CPU or a GPU.
80
80
"""
81
- function CompressedLBFGS (n:: Int ; m :: Int = 5 , T= Float64, gpu= default_gpu (), M= default_matrix_type (gpu; T), V= default_vector_type (gpu; T))
81
+ function CompressedLBFGSOperator (n:: Int ; mem :: Int = 5 , T= Float64, gpu= default_gpu (), M= default_matrix_type (gpu; T), V= default_vector_type (gpu; T))
82
82
α = (T)(1 )
83
83
k = 0
84
- Sₖ = M (undef, n, m )
85
- Yₖ = M (undef, n, m )
86
- Dₖ = Diagonal (V (undef, m ))
87
- Lₖ = LowerTriangular (M (undef, m, m ))
84
+ Sₖ = M (undef, n, mem )
85
+ Yₖ = M (undef, n, mem )
86
+ Dₖ = Diagonal (V (undef, mem ))
87
+ Lₖ = LowerTriangular (M (undef, mem, mem ))
88
88
Lₖ .= (T)(0 )
89
89
90
- chol_matrix = M (undef, m, m )
91
- intermediate_diagonal = Diagonal (V (undef, m ))
92
- intermediate_1 = UpperTriangular (M (undef, 2 * m , 2 * m ))
93
- intermediate_2 = LowerTriangular (M (undef, 2 * m , 2 * m ))
94
- inverse_intermediate_1 = UpperTriangular (M (undef, 2 * m , 2 * m ))
95
- inverse_intermediate_2 = LowerTriangular (M (undef, 2 * m , 2 * m ))
96
- intermediary_vector = V (undef, 2 * m )
97
- sol = V (undef, 2 * m )
98
- return CompressedLBFGS {T,M,V} (m , n, k, α, Sₖ, Yₖ, Dₖ, Lₖ, chol_matrix, intermediate_diagonal, intermediate_1, intermediate_2, inverse_intermediate_1, inverse_intermediate_2, intermediary_vector, sol)
90
+ chol_matrix = M (undef, mem, mem )
91
+ intermediate_diagonal = Diagonal (V (undef, mem ))
92
+ intermediate_1 = UpperTriangular (M (undef, 2 * mem , 2 * mem ))
93
+ intermediate_2 = LowerTriangular (M (undef, 2 * mem , 2 * mem ))
94
+ inverse_intermediate_1 = UpperTriangular (M (undef, 2 * mem , 2 * mem ))
95
+ inverse_intermediate_2 = LowerTriangular (M (undef, 2 * mem , 2 * mem ))
96
+ intermediary_vector = V (undef, 2 * mem )
97
+ sol = V (undef, 2 * mem )
98
+ return CompressedLBFGSOperator {T,M,V} (mem , n, k, α, Sₖ, Yₖ, Dₖ, Lₖ, chol_matrix, intermediate_diagonal, intermediate_1, intermediate_2, inverse_intermediate_1, inverse_intermediate_2, intermediary_vector, sol)
99
99
end
100
100
101
- function Base. push! (op:: CompressedLBFGS {T,M,V} , s:: V , y:: V ) where {T,M,V<: AbstractVector{T} }
102
- if op. k < op. m # still some place in the structures
101
+ function Base. push! (op:: CompressedLBFGSOperator {T,M,V} , s:: V , y:: V ) where {T,M,V<: AbstractVector{T} }
102
+ if op. k < op. mem # still some place in the structures
103
103
op. k += 1
104
104
view (op. Sₖ, :, op. k) .= s
105
105
view (op. Yₖ, :, op. k) .= y
106
106
view (op. Dₖ. diag, op. k) .= dot (s, y)
107
107
mul! (view (op. Lₖ. data, op. k, 1 : op. k- 1 ), transpose (view (op. Yₖ, :, 1 : op. k- 1 )), view (op. Sₖ, :, op. k) )
108
- else # k == m update circurlarly the intermediary structures
108
+ else # k == mem update circurlarly the intermediary structures
109
109
columnshift! (op. Sₖ; indicemax= op. k)
110
110
columnshift! (op. Yₖ; indicemax= op. k)
111
- op. Dₖ .= circshift (op. Dₖ, (- 1 , - 1 ))
111
+ # op.Dₖ .= circshift(op.Dₖ, (-1, -1))
112
+ vectorshift! (op. Dₖ. diag; indicemax= op. k)
112
113
view (op. Sₖ, :, op. k) .= s
113
114
view (op. Yₖ, :, op. k) .= y
114
115
view (op. Dₖ. diag, op. k) .= dot (s, y)
115
116
116
- map (i-> view (op. Lₖ, i: op. m - 1 , i- 1 ) .= view (op. Lₖ, i+ 1 : op. m , i), 2 : op. m )
117
+ map (i-> view (op. Lₖ, i: op. mem - 1 , i- 1 ) .= view (op. Lₖ, i+ 1 : op. mem , i), 2 : op. mem )
117
118
mul! (view (op. Lₖ. data, op. k, 1 : op. k- 1 ), transpose (view (op. Yₖ, :, 1 : op. k- 1 )), view (op. Sₖ, :, op. k) )
118
119
end
119
120
127
128
128
129
# Algorithm 3.2 (p15)
129
130
# Theorem 2.3 (p6)
130
- function Base. Matrix (op:: CompressedLBFGS {T,M,V} ) where {T,M,V}
131
+ function Base. Matrix (op:: CompressedLBFGSOperator {T,M,V} ) where {T,M,V}
131
132
B₀ = M (zeros (T, op. n, op. n))
132
133
map (i -> B₀[i, i] = op. α, 1 : op. n)
133
134
147
148
148
149
# Algorithm 3.2 (p15)
149
150
# step 4, Jₖ is computed only if needed
150
- function inverse_cholesky (op:: CompressedLBFGS {T,M,V} ) where {T,M,V}
151
+ function inverse_cholesky (op:: CompressedLBFGSOperator {T,M,V} ) where {T,M,V}
151
152
view (op. intermediate_diagonal. diag, 1 : op. k) .= inv .(view (op. Dₖ. diag, 1 : op. k))
152
153
153
154
mul! (view (op. inverse_intermediate_1, 1 : op. k, 1 : op. k), view (op. intermediate_diagonal, 1 : op. k, 1 : op. k), transpose (view (op. Lₖ, 1 : op. k, 1 : op. k)))
@@ -161,7 +162,7 @@ function inverse_cholesky(op::CompressedLBFGS{T,M,V}) where {T,M,V}
161
162
end
162
163
163
164
# step 6, must be improve
164
- function precompile_iterated_structure! (op:: CompressedLBFGS )
165
+ function precompile_iterated_structure! (op:: CompressedLBFGSOperator )
165
166
Jₖ = inverse_cholesky (op)
166
167
167
168
# constant update
@@ -186,7 +187,7 @@ function precompile_iterated_structure!(op::CompressedLBFGS)
186
187
end
187
188
188
189
# Algorithm 3.2 (p15)
189
- function LinearAlgebra. mul! (Bv:: V , op:: CompressedLBFGS {T,M,V} , v:: V ) where {T,M,V<: AbstractVector{T} }
190
+ function LinearAlgebra. mul! (Bv:: V , op:: CompressedLBFGSOperator {T,M,V} , v:: V ) where {T,M,V<: AbstractVector{T} }
190
191
# step 1-4 and 6 mainly done by Base.push!
191
192
# step 5
192
193
mul! (view (op. sol, 1 : op. k), transpose (view (op. Yₖ, :, 1 : op. k)), v)
0 commit comments