@@ -36,7 +36,6 @@ In addition to this structures which are circurlarly update when `k` reaches `m`
36
36
- `inverse_intermediate_2`: a R²ᵏˣ²ᵏ matrix;
37
37
- `intermediary_vector`: a vector ∈ Rᵏ to store intermediate solutions;
38
38
- `sol`: a vector ∈ Rᵏ to store intermediate solutions;
39
- - `intermediate_structure_updated`: inform if the intermediate structures are up-to-date or not.
40
39
This implementation is designed to work either on CPU or GPU.
41
40
"""
42
41
mutable struct CompressedLBFGS{T, M<: AbstractMatrix{T} , V<: AbstractVector{T} }
@@ -50,54 +49,58 @@ mutable struct CompressedLBFGS{T, M<:AbstractMatrix{T}, V<:AbstractVector{T}}
50
49
Lₖ:: LowerTriangular{T,M} # m * m
51
50
52
51
chol_matrix:: M # 2m * 2m
52
+ intermediate_diagonal:: Diagonal{T,V} # m * m
53
53
intermediate_1:: UpperTriangular{T,M} # 2m * 2m
54
54
intermediate_2:: LowerTriangular{T,M} # 2m * 2m
55
55
inverse_intermediate_1:: UpperTriangular{T,M} # 2m * 2m
56
56
inverse_intermediate_2:: LowerTriangular{T,M} # 2m * 2m
57
57
intermediary_vector:: V # 2m
58
58
sol:: V # m
59
- intermediate_structure_updated:: Bool
60
59
end
61
60
62
61
default_gpu () = CUDA. functional () ? true : false
63
- default_matrix_type (gpu:: Bool , T:: DataType ) = gpu ? CuMatrix{T} : Matrix{T}
64
- default_vector_type (gpu:: Bool , T:: DataType ) = gpu ? CuVector{T} : Vector{T}
62
+ default_matrix_type (gpu:: Bool ; T:: DataType = Float64) = gpu ? CuMatrix{T} : Matrix{T}
63
+ default_vector_type (gpu:: Bool ; T:: DataType = Float64) = gpu ? CuVector{T} : Vector{T}
64
+
65
+ function columnshift! (A:: AbstractMatrix{T} ; direction:: Int = - 1 , indicemax:: Int = size (A)[1 ]) where T
66
+ map (i-> view (A,:,i+ direction) .= view (A,:,i), 1 - direction: indicemax)
67
+ return A
68
+ end
65
69
66
70
"""
67
71
CompressedLBFGS(n::Int; [T=Float64, m=5], gpu:Bool)
68
72
69
73
A implementation of a LBFGS operator (forward), representing a `nxn` linear application.
70
74
It considers at most `k` BFGS iterates, and fit the architecture depending if it is launched on a CPU or a GPU.
71
75
"""
72
- function CompressedLBFGS (n:: Int ; m:: Int = 5 , T= Float64, gpu= default_gpu (), M= default_matrix_type (gpu, T), V= default_vector_type (gpu, T))
76
+ function CompressedLBFGS (n:: Int ; m:: Int = 5 , T= Float64, gpu= default_gpu (), M= default_matrix_type (gpu; T), V= default_vector_type (gpu; T))
73
77
α = (T)(1 )
74
78
k = 0
75
79
Sₖ = M (undef, n, m)
76
80
Yₖ = M (undef, n, m)
77
81
Dₖ = Diagonal (V (undef, m))
78
82
Lₖ = LowerTriangular (M (undef, m, m))
83
+ Lₖ .= (T)(0 )
79
84
80
85
chol_matrix = M (undef, m, m)
86
+ intermediate_diagonal = Diagonal (V (undef, m))
81
87
intermediate_1 = UpperTriangular (M (undef, 2 * m, 2 * m))
82
88
intermediate_2 = LowerTriangular (M (undef, 2 * m, 2 * m))
83
89
inverse_intermediate_1 = UpperTriangular (M (undef, 2 * m, 2 * m))
84
90
inverse_intermediate_2 = LowerTriangular (M (undef, 2 * m, 2 * m))
85
91
intermediary_vector = V (undef, 2 * m)
86
92
sol = V (undef, 2 * m)
87
- intermediate_structure_updated = false
88
- return CompressedLBFGS {T,M,V} (m, n, k, α, Sₖ, Yₖ, Dₖ, Lₖ, chol_matrix, intermediate_1, intermediate_2, inverse_intermediate_1, inverse_intermediate_2, intermediary_vector, sol, intermediate_structure_updated)
93
+ return CompressedLBFGS {T,M,V} (m, n, k, α, Sₖ, Yₖ, Dₖ, Lₖ, chol_matrix, intermediate_diagonal, intermediate_1, intermediate_2, inverse_intermediate_1, inverse_intermediate_2, intermediary_vector, sol)
89
94
end
90
95
91
96
function Base. push! (op:: CompressedLBFGS{T,M,V} , s:: V , y:: V ) where {T,M,V<: AbstractVector{T} }
92
97
if op. k < op. m # still some place in the structures
93
98
op. k += 1
94
- op. Sₖ[:, op. k] .= s
95
- op. Yₖ[:, op. k] .= y
96
- op. Dₖ. diag[op. k] = dot (s, y)
97
- op. Lₖ. data[op. k, op. k] = 0
98
- for i in 1 : op. k- 1
99
- op. Lₖ. data[op. k, i] = dot (op. Sₖ[:, op. k], op. Yₖ[:, i])
100
- end
99
+ view (op. Sₖ, :, op. k) .= s
100
+ view (op. Yₖ, :, op. k) .= y
101
+ view (op. Dₖ. diag, op. k) .= dot (s, y)
102
+ mul! (view (op. Lₖ. data, op. k, 1 : op. k- 1 ), transpose (view (op. Yₖ, :, 1 : op. k- 1 )), view (op. Sₖ, :, op. k) )
103
+
101
104
else # k == m update circurlarly the intermediary structures
102
105
op. Sₖ .= circshift (op. Sₖ, (0 , - 1 ))
103
106
op. Yₖ .= circshift (op. Yₖ, (0 , - 1 ))
@@ -109,13 +112,16 @@ function Base.push!(op::CompressedLBFGS{T,M,V}, s::V, y::V) where {T,M,V<:Abstra
109
112
# for the time being, reinstantiate completely the Lₖ matrix
110
113
for j in 1 : op. k
111
114
for i in 1 : j- 1
112
- op. Lₖ. data[j, i] = dot (op. Sₖ[ :, j], op. Yₖ[ :, i] )
115
+ op. Lₖ. data[j, i] = dot (view ( op. Sₖ, :, j), view ( op. Yₖ, :, i) )
113
116
end
114
117
end
115
118
end
119
+
120
+ # step 4 and 6
121
+ precompile_iterated_structure! (op)
122
+
116
123
# secant equation fails if uncommented
117
124
# op.α = dot(y,s)/dot(s,s)
118
- op. intermediate_structure_updated = false
119
125
return op
120
126
end
121
127
141
147
142
148
# Algorithm 3.2 (p15)
143
149
# step 4, Jₖ is computed only if needed
144
- function inverse_cholesky (op:: CompressedLBFGS )
145
- view (op. chol_matrix, 1 : op. k, 1 : op. k) .= op. α .* (transpose (view (op. Sₖ, :, 1 : op. k)) * view (op. Sₖ, :, 1 : op. k)) .+ view (op. Lₖ, 1 : op. k, 1 : op. k) * inv (Diagonal (op. Dₖ[1 : op. k, 1 : op. k])) * transpose (view (op. Lₖ, 1 : op. k, 1 : op. k))
150
+ function inverse_cholesky (op:: CompressedLBFGS{T,M,V} ) where {T,M,V}
151
+ view (op. intermediate_diagonal. diag, 1 : op. k) .= inv .(view (op. Dₖ. diag, 1 : op. k))
152
+
153
+ # view(op.Lₖ, 1:op.k, 1:op.k) * inv(Diagonal(op.Dₖ[1:op.k, 1:op.k])) * transpose(view(op.Lₖ, 1:op.k, 1:op.k))
154
+ mul! (view (op. inverse_intermediate_1, 1 : op. k, 1 : op. k), view (op. intermediate_diagonal, 1 : op. k, 1 : op. k), transpose (view (op. Lₖ, 1 : op. k, 1 : op. k)))
155
+ mul! (view (op. chol_matrix, 1 : op. k, 1 : op. k), view (op. Lₖ, 1 : op. k, 1 : op. k), view (op. inverse_intermediate_1, 1 : op. k, 1 : op. k))
156
+
157
+ # view(op.chol_matrix, 1:op.k, 1:op.k) .= op.α .* (transpose(view(op.Sₖ, :, 1:op.k)) * view(op.Sₖ, :, 1:op.k))
158
+ mul! (view (op. chol_matrix, 1 : op. k, 1 : op. k), transpose (view (op. Sₖ, :, 1 : op. k)), view (op. Sₖ, :, 1 : op. k), op. α, (T)(1 ))
159
+
160
+ # view(op.chol_matrix, 1:op.k, 1:op.k) .= op.α .* (transpose(view(op.Sₖ, :, 1:op.k)) * view(op.Sₖ, :, 1:op.k)) .+ view(op.Lₖ, 1:op.k, 1:op.k) * inv(Diagonal(op.Dₖ[1:op.k, 1:op.k])) * transpose(view(op.Lₖ, 1:op.k, 1:op.k))
161
+
146
162
cholesky! (Symmetric (view (op. chol_matrix, 1 : op. k, 1 : op. k)))
147
163
Jₖ = transpose (UpperTriangular (view (op. chol_matrix, 1 : op. k, 1 : op. k)))
148
164
return Jₖ
@@ -152,29 +168,32 @@ end
152
168
function precompile_iterated_structure! (op:: CompressedLBFGS )
153
169
Jₖ = inverse_cholesky (op)
154
170
155
- view (op. intermediate_1, 1 : op. k,1 : op. k) .= .- view (op. Dₖ, 1 : op. k, 1 : op. k)^ (1 / 2 )
156
- view (op. intermediate_1, 1 : op. k,op. k+ 1 : 2 * op. k) .= view (op. Dₖ, 1 : op. k, 1 : op. k)^ (- 1 / 2 ) * transpose (view (op. Lₖ, 1 : op. k, 1 : op. k))
171
+ # constant update
157
172
view (op. intermediate_1, op. k+ 1 : 2 * op. k, 1 : op. k) .= 0
158
- view (op. intermediate_1, op. k+ 1 : 2 * op. k, op. k+ 1 : 2 * op. k) .= transpose (Jₖ)
159
-
160
- view (op. intermediate_2, 1 : op. k, 1 : op. k) .= view (op. Dₖ, 1 : op. k, 1 : op. k)^ (1 / 2 )
161
173
view (op. intermediate_2, 1 : op. k, op. k+ 1 : 2 * op. k) .= 0
162
- view (op. intermediate_2 , op. k+ 1 : 2 * op. k, 1 : op. k) . = .- view (op . Lₖ, 1 : op. k, 1 : op . k) * view (op . Dₖ, 1 : op . k, 1 : op . k) ^ ( - 1 / 2 )
174
+ view (op. intermediate_1 , op. k+ 1 : 2 * op. k, op. k+ 1 : 2 * op. k) . = transpose (Jₖ )
163
175
view (op. intermediate_2, op. k+ 1 : 2 * op. k, op. k+ 1 : 2 * op. k) .= Jₖ
164
176
177
+ # updates related to D^(1/2)
178
+ view (op. intermediate_diagonal. diag, 1 : op. k) .= sqrt .(view (op. Dₖ. diag, 1 : op. k))
179
+ view (op. intermediate_1, 1 : op. k,1 : op. k) .= .- view (op. intermediate_diagonal, 1 : op. k, 1 : op. k)
180
+ view (op. intermediate_2, 1 : op. k, 1 : op. k) .= view (op. intermediate_diagonal, 1 : op. k, 1 : op. k)
181
+
182
+ # updates related to D^(-1/2)
183
+ view (op. intermediate_diagonal. diag, 1 : op. k) .= (x -> 1 / sqrt (x)). (view (op. Dₖ. diag, 1 : op. k))
184
+ mul! (view (op. intermediate_1, 1 : op. k,op. k+ 1 : 2 * op. k), view (op. intermediate_diagonal, 1 : op. k, 1 : op. k), transpose (view (op. Lₖ, 1 : op. k, 1 : op. k)))
185
+ # view(op.intermediate_1, 1:op.k,op.k+1:2*op.k) .= view(op.Dₖ, 1:op.k, 1:op.k)^(-1/2) * transpose(view(op.Lₖ, 1:op.k, 1:op.k))
186
+ mul! (view (op. intermediate_2, op. k+ 1 : 2 * op. k, 1 : op. k), view (op. Lₖ, 1 : op. k, 1 : op. k), view (op. intermediate_diagonal, 1 : op. k, 1 : op. k))
187
+ view (op. intermediate_2, op. k+ 1 : 2 * op. k, 1 : op. k) .= view (op. intermediate_2, op. k+ 1 : 2 * op. k, 1 : op. k) .* - 1
188
+ # view(op.intermediate_2, op.k+1:2*op.k, 1:op.k) .= .- view(op.Lₖ, 1:op.k, 1:op.k) * view(op.Dₖ, 1:op.k, 1:op.k)^(-1/2)
189
+
165
190
view (op. inverse_intermediate_1, 1 : 2 * op. k, 1 : 2 * op. k) .= inv (op. intermediate_1[1 : 2 * op. k, 1 : 2 * op. k])
166
191
view (op. inverse_intermediate_2, 1 : 2 * op. k, 1 : 2 * op. k) .= inv (op. intermediate_2[1 : 2 * op. k, 1 : 2 * op. k])
167
-
168
- op. intermediate_structure_updated = true
169
192
end
170
193
171
194
# Algorithm 3.2 (p15)
172
195
function LinearAlgebra. mul! (Bv:: V , op:: CompressedLBFGS{T,M,V} , v:: V ) where {T,M,V<: AbstractVector{T} }
173
- # step 1-3 mainly done by Base.push!
174
-
175
- # steps 4 and 6, in case the intermediary structures required are not up to date
176
- (! op. intermediate_structure_updated) && (precompile_iterated_structure! (op))
177
-
196
+ # step 1-4 and 6 mainly done by Base.push!
178
197
# step 5
179
198
mul! (view (op. sol, 1 : op. k), transpose (view (op. Yₖ, :, 1 : op. k)), v)
180
199
mul! (view (op. sol, op. k+ 1 : 2 * op. k), transpose (view (op. Sₖ, :, 1 : op. k)), v)
0 commit comments