quala 0.0.1a1
Quasi-Newton and other accelerators
decl/lbfgs.hpp
Go to the documentation of this file.
1#pragma once
2
4#include <quala/util/vec.hpp>
5
6namespace quala {
7
8/// Parameters for the @ref LBFGS class.
9struct LBFGSParams {
10 /// Length of the history to keep.
12 /// Reject update if @f$ y^\top s \le \text{min_div_fac} \cdot s^\top s @f$.
14 /// Reject update if @f$ s^\top s \le \text{min_abs_s} @f$.
16 /// Cautious BFGS update.
17 /// @see @ref cbfgs
18 struct CBFGSParams {
20 real_t ϵ = 0; ///< Set to zero to disable CBFGS check.
21 explicit operator bool() const { return ϵ > 0; }
22 };
23 /// Parameters in the cautious BFGS update condition
24 /// @f[ \frac{y^\top s}{s^\top s} \ge \epsilon \| g \|^\alpha @f]
25 /// @see https://epubs.siam.org/doi/10.1137/S1052623499354242
27 /// If set to true, the inverse Hessian estimate should remain definite,
28 /// i.e. a check is performed that rejects the update if
29 /// @f$ y^\top s \le \text{min_div_fac} \cdot s^\top s @f$.
30 /// If set to false, just try to prevent a singular Hessian by rejecting the
31 /// update if
32 /// @f$ \left| y^\top s \right| \le \text{min_div_fac} \cdot s^\top s @f$.
33 bool force_pos_def = true;
34};
35
36/// Layout:
37/// ~~~
38/// ┌───── 2 m ─────┐
39/// ┌ ┌───┬───┬───┬───┐
40/// │ │ │ │ │ │
41/// │ │ s │ y │ s │ y │
42/// n+1 │ │ │ │ │ │
43/// │ ├───┼───┼───┼───┤
44/// │ │ ρ │ α │ ρ │ α │
45/// └ └───┴───┴───┴───┘
46/// ~~~
48 /// Re-allocate storage for a problem with a different size.
50
51 /// Get the size of the s and y vectors in the buffer.
52 length_t n() const { return sto.rows() - 1; }
53 /// Get the number of previous vectors s and y stored in the buffer.
54 length_t history() const { return sto.cols() / 2; }
55
56 auto s(index_t i) { return sto.col(2 * i).topRows(n()); }
57 auto s(index_t i) const { return sto.col(2 * i).topRows(n()); }
58 auto y(index_t i) { return sto.col(2 * i + 1).topRows(n()); }
59 auto y(index_t i) const { return sto.col(2 * i + 1).topRows(n()); }
60 real_t &ρ(index_t i) { return sto.coeffRef(n(), 2 * i); }
61 const real_t &ρ(index_t i) const { return sto.coeff(n(), 2 * i); }
62 real_t &α(index_t i) { return sto.coeffRef(n(), 2 * i + 1); }
63 const real_t &α(index_t i) const { return sto.coeff(n(), 2 * i + 1); }
64
65 using storage_t =
66 Eigen::Matrix<real_t, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>;
68};
69
70/// Limited memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) algorithm
71/// @ingroup accelerators-grp
72class LBFGS {
73 public:
75
76 /// The sign of the vectors @f$ p @f$ passed to the @ref LBFGS::update
77 /// method.
78 enum class Sign {
79 Positive, ///< @f$ p \sim \nabla \psi(x) @f$
80 Negative, ///< @f$ p \sim -\nabla \psi(x) @f$
81 };
82
85
86 /// Check if the new vectors s and y allow for a valid BFGS update that
87 /// preserves the positive definiteness of the Hessian approximation.
88 static bool update_valid(const LBFGSParams &params, real_t yᵀs, real_t sᵀs,
89 real_t pᵀp);
90
91 /// Update the inverse Hessian approximation using the new vectors
92 /// sₖ = xₖ₊₁ - xₖ and yₖ = pₖ₊₁ - pₖ.
93 template <class VecS, class VecY>
94 bool update_sy(const anymat<VecS> &s, const anymat<VecY> &y,
95 real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced = false);
96
97 /// Update the inverse Hessian approximation using the new vectors xₖ₊₁
98 /// and pₖ₊₁.
99 bool update(crvec xₖ, crvec xₙₑₓₜ, crvec pₖ, crvec pₙₑₓₜ,
100 Sign sign = Sign::Positive, bool forced = false);
101
102 /// Apply the inverse Hessian approximation to the given vector q.
103 /// Initial inverse Hessian approximation is set to @f$ H_0 = \gamma I @f$.
104 /// If @p γ is negative, @f$ H_0 = \frac{s^\top y}{y^\top y} I @f$.
105 bool apply(rvec q, real_t γ = -1);
106
107 /// Apply the inverse Hessian approximation to the given vector q, applying
108 /// only the columns and rows of the Hessian in the index set J.
109 template <class IndexVec>
110 bool apply(rvec q, real_t γ, const IndexVec &J);
111
112 /// Throw away the approximation and all previous vectors s and y.
113 void reset();
114 /// Re-allocate storage for a problem with a different size. Causes
115 /// a @ref reset.
116 void resize(length_t n);
117
118 /// Scale the stored y vectors by the given factor.
119 void scale_y(real_t factor);
120
121 /// Get the parameters.
122 const Params &get_params() const { return params; }
123
124 /// Get the size of the s and y vectors in the buffer.
125 length_t n() const { return sto.n(); }
126 /// Get the number of previous vectors s and y stored in the buffer.
127 length_t history() const { return sto.history(); }
128 /// Get the next index in the circular buffer of previous s and y vectors.
129 index_t succ(index_t i) const { return i + 1 < history() ? i + 1 : 0; }
130 /// Get the previous index in the circular buffer of s and y vectors.
131 index_t pred(index_t i) const { return i > 0 ? i - 1 : history() - 1; }
132 /// Get the number of previous s and y vectors currently stored in the
133 /// buffer.
134 length_t current_history() const { return full ? history() : idx; }
135
136 auto s(index_t i) { return sto.s(i); }
137 auto s(index_t i) const { return sto.s(i); }
138 auto y(index_t i) { return sto.y(i); }
139 auto y(index_t i) const { return sto.y(i); }
140 real_t &ρ(index_t i) { return sto.ρ(i); }
141 const real_t &ρ(index_t i) const { return sto.ρ(i); }
142 real_t &α(index_t i) { return sto.α(i); }
143 const real_t &α(index_t i) const { return sto.α(i); }
144
145 /// Iterate over the indices in the history buffer, oldest first.
146 template <class F>
147 void foreach_fwd(const F &fun) const {
148 if (full)
149 for (index_t i = idx; i < history(); ++i)
150 fun(i);
151 if (idx)
152 for (index_t i = 0; i < idx; ++i)
153 fun(i);
154 }
155
156 /// Iterate over the indices in the history buffer, newest first.
157 template <class F>
158 void foreach_rev(const F &fun) const {
159 if (idx)
160 for (index_t i = idx; i-- > 0;)
161 fun(i);
162 if (full)
163 for (index_t i = history(); i-- > idx;)
164 fun(i);
165 }
166
167 private:
170 bool full = false;
172};
173
174} // namespace quala
Limited memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) algorithm.
Definition: decl/lbfgs.hpp:72
void foreach_rev(const F &fun) const
Iterate over the indices in the history buffer, newest first.
Definition: decl/lbfgs.hpp:158
static bool update_valid(const LBFGSParams &params, real_t yᵀs, real_t sᵀs, real_t pᵀp)
Check if the new vectors s and y allow for a valid BFGS update that preserves the positive definitene...
Definition: lbfgs.hpp:9
auto s(index_t i)
Definition: decl/lbfgs.hpp:136
auto s(index_t i) const
Definition: decl/lbfgs.hpp:137
LBFGSStorage sto
Definition: decl/lbfgs.hpp:168
length_t current_history() const
Get the number of previous s and y vectors currently stored in the buffer.
Definition: decl/lbfgs.hpp:134
index_t succ(index_t i) const
Get the next index in the circular buffer of previous s and y vectors.
Definition: decl/lbfgs.hpp:129
index_t pred(index_t i) const
Get the previous index in the circular buffer of s and y vectors.
Definition: decl/lbfgs.hpp:131
length_t n() const
Get the size of the s and y vectors in the buffer.
Definition: decl/lbfgs.hpp:125
LBFGS(Params params, length_t n)
Definition: decl/lbfgs.hpp:84
LBFGS(Params params)
Definition: decl/lbfgs.hpp:83
const real_t & ρ(index_t i) const
Definition: decl/lbfgs.hpp:141
const Params & get_params() const
Get the parameters.
Definition: decl/lbfgs.hpp:122
bool apply(rvec q, real_t γ=-1)
Apply the inverse Hessian approximation to the given vector q.
Definition: lbfgs.hpp:64
length_t history() const
Get the number of previous vectors s and y stored in the buffer.
Definition: decl/lbfgs.hpp:127
void foreach_fwd(const F &fun) const
Iterate over the indices in the history buffer, oldest first.
Definition: decl/lbfgs.hpp:147
void resize(length_t n)
Re-allocate storage for a problem with a different size.
Definition: lbfgs.hpp:185
real_t & α(index_t i)
Definition: decl/lbfgs.hpp:142
real_t & ρ(index_t i)
Definition: decl/lbfgs.hpp:140
void reset()
Throw away the approximation and all previous vectors s and y.
Definition: lbfgs.hpp:180
auto y(index_t i) const
Definition: decl/lbfgs.hpp:139
Sign
The sign of the vectors passed to the LBFGS::update method.
Definition: decl/lbfgs.hpp:78
void scale_y(real_t factor)
Scale the stored y vectors by the given factor.
Definition: lbfgs.hpp:196
bool update(crvec xₖ, crvec xₙₑₓₜ, crvec pₖ, crvec pₙₑₓₜ, Sign sign=Sign::Positive, bool forced=false)
Update the inverse Hessian approximation using the new vectors xₖ₊₁ and pₖ₊₁.
Definition: lbfgs.hpp:56
auto y(index_t i)
Definition: decl/lbfgs.hpp:138
const real_t & α(index_t i) const
Definition: decl/lbfgs.hpp:143
bool update_sy(const anymat< VecS > &s, const anymat< VecY > &y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced=false)
Update the inverse Hessian approximation using the new vectors sₖ = xₖ₊₁ - xₖ and yₖ = pₖ₊₁ - pₖ.
Definition: lbfgs.hpp:34
Eigen::Ref< const vec > crvec
Default type for immutable references to vectors.
Definition: vec.hpp:18
real_t min_abs_s
Reject update if .
Definition: decl/lbfgs.hpp:15
CBFGSParams cbfgs
Parameters in the cautious BFGS update condition.
Definition: decl/lbfgs.hpp:26
length_t memory
Length of the history to keep.
Definition: decl/lbfgs.hpp:11
index_t length_t
Default type for vector sizes.
Definition: vec.hpp:29
bool force_pos_def
If set to true, the inverse Hessian estimate should remain definite, i.e.
Definition: decl/lbfgs.hpp:33
Eigen::Index index_t
Default type for vector indices.
Definition: vec.hpp:27
double real_t
Default floating point type.
Definition: vec.hpp:8
Eigen::MatrixBase< Derived > anymat
Generic type for vector and matrix arguments.
Definition: vec.hpp:40
real_t min_div_fac
Reject update if .
Definition: decl/lbfgs.hpp:13
Eigen::Ref< vec > rvec
Default type for mutable references to vectors.
Definition: vec.hpp:16
Parameters for the LBFGS class.
Definition: decl/lbfgs.hpp:9
Cautious BFGS update.
Definition: decl/lbfgs.hpp:18
real_t ϵ
Set to zero to disable CBFGS check.
Definition: decl/lbfgs.hpp:20
auto s(index_t i)
Definition: decl/lbfgs.hpp:56
auto s(index_t i) const
Definition: decl/lbfgs.hpp:57
void resize(length_t n, length_t history)
Re-allocate storage for a problem with a different size.
Definition: lbfgs.hpp:192
length_t n() const
Get the size of the s and y vectors in the buffer.
Definition: decl/lbfgs.hpp:52
const real_t & ρ(index_t i) const
Definition: decl/lbfgs.hpp:61
length_t history() const
Get the number of previous vectors s and y stored in the buffer.
Definition: decl/lbfgs.hpp:54
Eigen::Matrix< real_t, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor > storage_t
Definition: decl/lbfgs.hpp:66
real_t & α(index_t i)
Definition: decl/lbfgs.hpp:62
real_t & ρ(index_t i)
Definition: decl/lbfgs.hpp:60
auto y(index_t i) const
Definition: decl/lbfgs.hpp:59
auto y(index_t i)
Definition: decl/lbfgs.hpp:58
const real_t & α(index_t i) const
Definition: decl/lbfgs.hpp:63