quala 0.0.1a1
Quasi-Newton and other accelerators
lbfgs.hpp
Go to the documentation of this file.
1#pragma once
2
4#include <stdexcept>
5#include <type_traits>
6
7namespace quala {
8
9inline bool LBFGS::update_valid(const LBFGSParams &params, real_t yᵀs,
10 real_t sᵀs, real_t pᵀp) {
11 // Check if this L-BFGS update is accepted
12 if (sᵀs <= params.min_abs_s)
13 return false;
14 if (not std::isfinite(yᵀs))
15 return false;
16 real_t a_yᵀs = params.force_pos_def ? yᵀs : std::abs(yᵀs);
17 if (a_yᵀs <= params.min_div_fac * sᵀs)
18 return false;
19
20 // CBFGS condition: https://epubs.siam.org/doi/10.1137/S1052623499354242
21 if (params.cbfgs) {
22 const real_t α = params.cbfgs.α;
23 const real_t ϵ = params.cbfgs.ϵ;
24 // Condition: yᵀs / sᵀs >= ϵ ‖p‖^α
25 bool cbfgs_cond = a_yᵀs >= sᵀs * ϵ * std::pow(pᵀp, α / 2);
26 if (not cbfgs_cond)
27 return false;
28 }
29
30 return true;
31}
32
33template <class VecS, class VecY>
34inline bool LBFGS::update_sy(const anymat<VecS> &s, const anymat<VecY> &y,
35 real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced) {
36 real_t yᵀs = y.dot(s);
37 real_t ρ = 1 / yᵀs;
38 if (not forced) {
39 real_t sᵀs = s.squaredNorm();
40 if (not update_valid(params, yᵀs, sᵀs, pₙₑₓₜᵀpₙₑₓₜ))
41 return false;
42 }
43
44 // Store the new s and y vectors
45 sto.s(idx) = s;
46 sto.y(idx) = y;
47 sto.ρ(idx) = ρ;
48
49 // Increment the index in the circular buffer
50 idx = succ(idx);
51 full |= idx == 0;
52
53 return true;
54}
55
56inline bool LBFGS::update(crvec xₖ, crvec xₙₑₓₜ, crvec pₖ, crvec pₙₑₓₜ, Sign sign,
57 bool forced) {
58 const auto s = xₙₑₓₜ - xₖ;
59 const auto y = (sign == Sign::Positive) ? pₙₑₓₜ - pₖ : pₖ - pₙₑₓₜ;
60 real_t pₙₑₓₜᵀpₙₑₓₜ = params.cbfgs ? pₙₑₓₜ.squaredNorm() : 0;
61 return update_sy(s, y, pₙₑₓₜᵀpₙₑₓₜ, forced);
62}
63
64inline bool LBFGS::apply(rvec q, real_t γ) {
65 // Only apply if we have previous vectors s and y
66 if (idx == 0 && not full)
67 return false;
68
69 // If the step size is negative, compute it as sᵀy/yᵀy
70 if (γ < 0) {
71 auto new_idx = pred(idx);
72 real_t yᵀy = y(new_idx).squaredNorm();
73 γ = 1 / (ρ(new_idx) * yᵀy);
74 }
75
76 foreach_rev([&](index_t i) {
77 α(i) = ρ(i) * s(i).dot(q);
78 q -= α(i) * y(i);
79 });
80
81 // r ← H₀ q
82 q *= γ;
83
84 foreach_fwd([&](index_t i) {
85 real_t β = ρ(i) * y(i).dot(q);
86 q -= (β - α(i)) * s(i);
87 });
88
89 return true;
90}
91
92template <class IndexVec>
93bool LBFGS::apply(rvec q, real_t γ, const IndexVec &J) {
94 // Only apply if we have previous vectors s and y
95 if (idx == 0 && not full)
96 return false;
97 const bool fullJ = q.size() == static_cast<index_t>(J.size());
98
99 if (params.cbfgs)
100 throw std::invalid_argument("CBFGS check not supported when using "
101 "masked version of LBFGS::apply()");
102
103 // Eigen 3.3.9 doesn't yet support indexing using a vector of indices
104 // so we'll have to do it manually.
105 // TODO: Abstract this away in an expression template / nullary expression?
106 // Or wait for Eigen update?
107 // Update: Eigen 3.4's indexing seems significantly slower, so the manual
108 // for loops stay for now.
109
110 // Dot product of two vectors, adding only the indices in set J
111 const auto dotJ = [&J, fullJ](const auto &a, const auto &b) {
112 if (fullJ) {
113 return a.dot(b);
114 } else {
115 real_t acc = 0;
116 for (auto j : J)
117 acc += a(j) * b(j);
118 return acc;
119 }
120 };
121 // y -= a x, scaling and subtracting only the indices in set J
122 const auto axmyJ = [&J, fullJ](real_t a, const auto &x, auto &y) {
123 if (fullJ) {
124 y -= a * x;
125 } else {
126 for (auto j : J)
127 y(j) -= a * x(j);
128 }
129 };
130 // x *= a, scaling only the indices in set J
131 const auto scalJ = [&J, fullJ](real_t a, auto &x) {
132 if (fullJ) {
133 x *= a;
134 } else {
135 for (auto j : J)
136 x(j) *= a;
137 }
138 };
139
140 foreach_rev([&](index_t i) {
141 // Recompute ρ, it depends on the index set J. Note that even if ρ was
142 // positive for the full vectors s and y, that's not necessarily the
143 // case for the smaller vectors s(J) and y(J).
144 real_t yᵀs = dotJ(s(i), y(i));
145 real_t sᵀs = dotJ(s(i), s(i));
146 ρ(i) = 1 / yᵀs;
147 // Check if we should include this pair of vectors
148 if (not update_valid(params, yᵀs, sᵀs, 0)) {
149 ρ(i) = NaN;
150 return;
151 }
152
153 α(i) = ρ(i) * dotJ(s(i), q); // αᵢ = ρᵢ〈sᵢ, q〉
154 axmyJ(α(i), y(i), q); // q -= αᵢ yᵢ
155
156 if (γ < 0) {
157 // Compute step size based on most recent valid yᵀs/yᵀy
158 real_t yᵀy = dotJ(y(i), y(i));
159 γ = 1 / (ρ(i) * yᵀy);
160 }
161 });
162
163 // If all ρ == 0, fail
164 if (γ < 0)
165 return false;
166
167 // r ← H₀ q
168 scalJ(γ, q); // q *= γ
169
170 foreach_fwd([&](index_t i) {
171 if (std::isnan(ρ(i)))
172 return;
173 real_t β = ρ(i) * dotJ(y(i), q); // βᵢ = ρᵢ〈yᵢ, q〉
174 axmyJ(β - α(i), s(i), q); // q -= (βᵢ - αᵢ) sᵢ
175 });
176
177 return true;
178}
179
180inline void LBFGS::reset() {
181 idx = 0;
182 full = false;
183}
184
185inline void LBFGS::resize(length_t n) {
186 if (params.memory < 1)
187 throw std::invalid_argument("LBFGS::Params::memory must be >= 1");
189 reset();
190}
191
192inline void LBFGSStorage::resize(length_t n, length_t history) {
193 sto.resize(n + 1, history * 2);
194}
195
196inline void LBFGS::scale_y(real_t factor) {
197 if (full) {
198 for (index_t i = 0; i < history(); ++i) {
199 y(i) *= factor;
200 ρ(i) *= 1 / factor;
201 }
202 } else {
203 for (index_t i = 0; i < idx; ++i) {
204 y(i) *= factor;
205 ρ(i) *= 1 / factor;
206 }
207 }
208}
209
210} // namespace quala
void foreach_rev(const F &fun) const
Iterate over the indices in the history buffer, newest first.
Definition: decl/lbfgs.hpp:158
static bool update_valid(const LBFGSParams &params, real_t yᵀs, real_t sᵀs, real_t pᵀp)
Check if the new vectors s and y allow for a valid BFGS update that preserves the positive definitene...
Definition: lbfgs.hpp:9
auto s(index_t i)
Definition: decl/lbfgs.hpp:136
LBFGSStorage sto
Definition: decl/lbfgs.hpp:168
index_t succ(index_t i) const
Get the next index in the circular buffer of previous s and y vectors.
Definition: decl/lbfgs.hpp:129
index_t pred(index_t i) const
Get the previous index in the circular buffer of s and y vectors.
Definition: decl/lbfgs.hpp:131
length_t n() const
Get the size of the s and y vectors in the buffer.
Definition: decl/lbfgs.hpp:125
bool apply(rvec q, real_t γ=-1)
Apply the inverse Hessian approximation to the given vector q.
Definition: lbfgs.hpp:64
length_t history() const
Get the number of previous vectors s and y stored in the buffer.
Definition: decl/lbfgs.hpp:127
void foreach_fwd(const F &fun) const
Iterate over the indices in the history buffer, oldest first.
Definition: decl/lbfgs.hpp:147
void resize(length_t n)
Re-allocate storage for a problem with a different size.
Definition: lbfgs.hpp:185
real_t & α(index_t i)
Definition: decl/lbfgs.hpp:142
real_t & ρ(index_t i)
Definition: decl/lbfgs.hpp:140
void reset()
Throw away the approximation and all previous vectors s and y.
Definition: lbfgs.hpp:180
Sign
The sign of the vectors passed to the LBFGS::update method.
Definition: decl/lbfgs.hpp:78
void scale_y(real_t factor)
Scale the stored y vectors by the given factor.
Definition: lbfgs.hpp:196
bool update(crvec xₖ, crvec xₙₑₓₜ, crvec pₖ, crvec pₙₑₓₜ, Sign sign=Sign::Positive, bool forced=false)
Update the inverse Hessian approximation using the new vectors xₖ₊₁ and pₖ₊₁.
Definition: lbfgs.hpp:56
auto y(index_t i)
Definition: decl/lbfgs.hpp:138
bool update_sy(const anymat< VecS > &s, const anymat< VecY > &y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced=false)
Update the inverse Hessian approximation using the new vectors sₖ = xₖ₊₁ - xₖ and yₖ = pₖ₊₁ - pₖ.
Definition: lbfgs.hpp:34
Eigen::Ref< const vec > crvec
Default type for immutable references to vectors.
Definition: vec.hpp:18
real_t min_abs_s
Reject update if .
Definition: decl/lbfgs.hpp:15
CBFGSParams cbfgs
Parameters in the cautious BFGS update condition.
Definition: decl/lbfgs.hpp:26
constexpr real_t NaN
Not a number.
Definition: vec.hpp:45
length_t memory
Length of the history to keep.
Definition: decl/lbfgs.hpp:11
index_t length_t
Default type for vector sizes.
Definition: vec.hpp:29
bool force_pos_def
If set to true, the inverse Hessian estimate should remain definite, i.e.
Definition: decl/lbfgs.hpp:33
Eigen::Index index_t
Default type for vector indices.
Definition: vec.hpp:27
double real_t
Default floating point type.
Definition: vec.hpp:8
Eigen::MatrixBase< Derived > anymat
Generic type for vector and matrix arguments.
Definition: vec.hpp:40
real_t min_div_fac
Reject update if .
Definition: decl/lbfgs.hpp:13
Eigen::Ref< vec > rvec
Default type for mutable references to vectors.
Definition: vec.hpp:16
Parameters for the LBFGS class.
Definition: decl/lbfgs.hpp:9
real_t ϵ
Set to zero to disable CBFGS check.
Definition: decl/lbfgs.hpp:20
auto s(index_t i)
Definition: decl/lbfgs.hpp:56
void resize(length_t n, length_t history)
Re-allocate storage for a problem with a different size.
Definition: lbfgs.hpp:192
length_t n() const
Get the size of the s and y vectors in the buffer.
Definition: decl/lbfgs.hpp:52
length_t history() const
Get the number of previous vectors s and y stored in the buffer.
Definition: decl/lbfgs.hpp:54
real_t & ρ(index_t i)
Definition: decl/lbfgs.hpp:60
auto y(index_t i)
Definition: decl/lbfgs.hpp:58