PANOC-ALM  quadratic-penalty
Nonconvex constrained optimization
lbfgs.hpp
Go to the documentation of this file.
1 #pragma once
2 
4 #include <type_traits>
5 
6 namespace pa {
7 
9  real_t pᵀp) {
10  // Smallest number we want to divide by without overflow
11  const real_t min_divisor = std::sqrt(std::numeric_limits<real_t>::min());
12 
13  // Check if this L-BFGS update is accepted
14  if (not std::isfinite(yᵀs))
15  return false;
16  if (yᵀs < min_divisor)
17  return false;
18  if (sᵀs < min_divisor)
19  return false;
20 
21  // CBFGS condition: https://epubs.siam.org/doi/10.1137/S1052623499354242
22  real_t α = params.cbfgs.α;
23  real_t ϵ = params.cbfgs.ϵ;
24  // Condition: yᵀs / sᵀs >= ϵ ‖p‖^α
25  bool cbfgs_cond = yᵀs / sᵀs >= ϵ * std::pow(pᵀp, α / 2);
26  if (not cbfgs_cond)
27  return false;
28 
29  return true;
30 }
31 
32 inline bool LBFGS::update(crvec xₖ, crvec xₖ₊₁, crvec pₖ,
33  crvec pₖ₊₁, Sign sign, bool forced) {
34  const auto s = xₖ₊₁ - xₖ;
35  const auto y = sign == Sign::Positive ? pₖ₊₁ - pₖ : pₖ - pₖ₊₁;
36  real_t yᵀs = y.dot(s);
37  real_t ρ = 1 / yᵀs;
38  if (not forced) {
39  real_t sᵀs = s.squaredNorm();
40  real_t pᵀp = params.cbfgs.ϵ > 0 ? pₖ₊₁.squaredNorm() : 0;
41  if (not update_valid(params, yᵀs, sᵀs, pᵀp))
42  return false;
43  }
44 
45  // Store the new s and y vectors
46  this->s(idx) = s;
47  this->y(idx) = y;
48  this->ρ(idx) = ρ;
49 
50  // Increment the index in the circular buffer
51  idx = succ(idx);
52  full |= idx == 0;
53 
54  return true;
55 }
56 
57 template <class Vec>
58 bool LBFGS::apply(Vec &&q, real_t γ) {
59  // Only apply if we have previous vectors s and y
60  if (idx == 0 && not full)
61  return false;
62 
63  // If the step size is negative, compute it as sᵀy/yᵀy
64  if (γ < 0) {
65  auto new_idx = idx > 0 ? idx - 1 : history() - 1;
66  real_t yᵀy = y(new_idx).squaredNorm();
67  γ = 1. / (ρ(new_idx) * yᵀy);
68  }
69 
70  auto update1 = [&](size_t i) {
71  α(i) = ρ(i) * (s(i).dot(q));
72  q -= α(i) * y(i);
73  };
74  if (idx)
75  for (size_t i = idx; i-- > 0;)
76  update1(i);
77  if (full)
78  for (size_t i = history(); i-- > idx;)
79  update1(i);
80 
81  // r ← H₀ q
82  q *= γ;
83 
84  auto update2 = [&](size_t i) {
85  real_t β = ρ(i) * (y(i).dot(q));
86  q += (α(i) - β) * s(i);
87  };
88  if (full)
89  for (size_t i = idx; i < history(); ++i)
90  update2(i);
91  for (size_t i = 0; i < idx; ++i)
92  update2(i);
93 
94  return true;
95 }
96 
97 template <class Vec, class IndexVec>
98 bool LBFGS::apply(Vec &&q, real_t γ, const IndexVec &J) {
99  // Only apply if we have previous vectors s and y
100  if (idx == 0 && not full)
101  return false;
102  using Index = typename std::remove_reference_t<Vec>::Index;
103  bool fullJ = q.size() == Index(J.size());
104 
105  // Eigen 3.3.9 doesn't yet support indexing using a vector of indices
106  // so we'll have to do it manually
107  // TODO: Abstract this away in an expression template / nullary expression?
108  // Or wait for Eigen update?
109 
110  // Dot product of two vectors, adding only the indices in set J
111  auto dotJ = [&J, fullJ](const auto &a, const auto &b) {
112  if (fullJ) {
113  return a.dot(b);
114  } else {
115  real_t acc = 0;
116  for (auto j : J)
117  acc += a(j) * b(j);
118  return acc;
119  }
120  };
121 
122  auto update1 = [&](size_t i) {
123  // Recompute ρ, it depends on the index set J. Note that even if ρ was
124  // positive for the full vectors s and y, that's not necessarily the
125  // case for the smaller vectors s(J) and y(J).
126  if (not fullJ)
127  ρ(i) = 1. / dotJ(s(i), y(i));
128 
129  if (ρ(i) <= 0) // Reject negative ρ to ensure positive definiteness
130  return;
131 
132  α(i) = ρ(i) * dotJ(s(i), q);
133  if (fullJ)
134  q -= α(i) * y(i);
135  else
136  for (auto j : J)
137  q(j) -= α(i) * y(i)(j);
138 
139  if (γ < 0) {
140  // Compute step size based on most recent yᵀs/yᵀy > 0
141  real_t yᵀy = dotJ(y(i), y(i));
142  γ = 1. / (ρ(i) * yᵀy);
143  }
144  };
145  if (idx)
146  for (size_t i = idx; i-- > 0;)
147  update1(i);
148  if (full)
149  for (size_t i = history(); i-- > idx;)
150  update1(i);
151 
152  // If all ρ <= 0, fail
153  if (γ < 0)
154  return false;
155 
156  // r ← H₀ q
157  if (fullJ)
158  q *= γ;
159  else
160  for (auto j : J)
161  q(j) *= γ;
162 
163  auto update2 = [&](size_t i) {
164  if (ρ(i) <= 0)
165  return;
166  real_t β = ρ(i) * dotJ(y(i), q);
167  if (fullJ)
168  q += (α(i) - β) * s(i);
169  else
170  for (auto j : J)
171  q(j) += (α(i) - β) * s(i)(j);
172  };
173  if (full)
174  for (size_t i = idx; i < history(); ++i)
175  update2(i);
176  for (size_t i = 0; i < idx; ++i)
177  update2(i);
178 
179  return true;
180 }
181 
182 inline void LBFGS::reset() {
183  idx = 0;
184  full = false;
185 }
186 
187 inline void LBFGS::resize(size_t n) {
188  sto.resize(n + 1, params.memory * 2);
189  reset();
190 }
191 
192 inline void LBFGS::scale_y(real_t factor) {
193  if (full) {
194  for (size_t i = 0; i < history(); ++i) {
195  y(i) *= factor;
196  ρ(i) *= 1. / factor;
197  }
198  } else {
199  for (size_t i = 0; i < idx; ++i) {
200  y(i) *= factor;
201  ρ(i) *= 1. / factor;
202  }
203  }
204 }
205 
207  crvec p₀, crvec grad₀) {
208  lbfgs.resize(x₀.size());
209  (void)x̂₀;
210  (void)p₀;
211  (void)grad₀;
212 }
213 
214 inline bool PANOCDirection<LBFGS>::update(crvec xₖ, crvec xₖ₊₁,
215  crvec pₖ, crvec pₖ₊₁,
216  crvec grad_new, const Box &C,
217  real_t γ_new) {
218  (void)grad_new;
219  (void)C;
220  (void)γ_new;
221  return lbfgs.update(xₖ, xₖ₊₁, pₖ, pₖ₊₁, LBFGS::Sign::Negative);
222 }
223 
224 inline bool PANOCDirection<LBFGS>::apply(crvec xₖ, crvec x̂ₖ,
225  crvec pₖ, real_t γ, rvec qₖ) {
226  (void)xₖ;
227  (void)x̂ₖ;
228  qₖ = pₖ;
229  return lbfgs.apply(qₖ, γ);
230 }
231 
232 inline void PANOCDirection<LBFGS>::changed_γ(real_t γₖ, real_t old_γₖ) {
233  if (lbfgs.get_params().rescale_when_γ_changes)
234  lbfgs.scale_y(γₖ / old_γₖ);
235  else
236  lbfgs.reset();
237 }
238 
239 inline void PANOCDirection<LBFGS>::reset() { lbfgs.reset(); }
240 
241 inline std::string PANOCDirection<LBFGS>::get_name() const {
242  return lbfgs.get_name();
243 }
244 
246  return lbfgs.get_params();
247 }
248 
249 } // namespace pa
pa::LBFGS::update_valid
static bool update_valid(LBFGSParams params, real_t yᵀs, real_t sᵀs, real_t pᵀp)
Check if the new vectors s and y allow for a valid BFGS update that preserves the positive definitene...
Definition: lbfgs.hpp:8
pa::LBFGS::y
auto y(size_t i)
Definition: decl/lbfgs.hpp:84
pa::LBFGS::sto
storage_t sto
Definition: decl/lbfgs.hpp:94
pa::LBFGS::n
size_t n() const
Get the size of the s and y vectors in the buffer.
Definition: decl/lbfgs.hpp:76
pa::LBFGS::Sign::Negative
@ Negative
pa::PANOCDirection::apply
static bool apply(DirectionProviderT &dp, crvec xₖ, crvec x̂ₖ, crvec pₖ, real_t γ, rvec qₖ)=delete
Apply the direction estimation in the current point.
lbfgs.hpp
pa::LBFGS::idx
size_t idx
Definition: decl/lbfgs.hpp:95
pa::rvec
Eigen::Ref< vec > rvec
Default type for mutable references to vectors.
Definition: vec.hpp:16
pa::LBFGS::ρ
real_t & ρ(size_t i)
Definition: decl/lbfgs.hpp:86
pa::LBFGS::α
real_t & α(size_t i)
Definition: decl/lbfgs.hpp:88
pa::LBFGSParams::cbfgs
struct pa::LBFGSParams::@0 cbfgs
Parameters in the cautious BFGS update condition.
pa
Definition: alm.hpp:10
pa::LBFGS::s
auto s(size_t i)
Definition: decl/lbfgs.hpp:82
pa::LBFGS::reset
void reset()
Throw away the approximation and all previous vectors s and y.
Definition: lbfgs.hpp:182
pa::PANOCDirection::initialize
static void initialize(DirectionProviderT &dp, crvec x₀, crvec x̂₀, crvec p₀, crvec grad₀)=delete
pa::Box
Definition: box.hpp:7
pa::LBFGS::update
bool update(crvec xₖ, crvec xₖ₊₁, crvec pₖ, crvec pₖ₊₁, Sign sign, bool forced=false)
Update the inverse Hessian approximation using the new vectors xₖ₊₁ and pₖ₊₁.
Definition: lbfgs.hpp:32
pa::LBFGS::succ
size_t succ(size_t i) const
Get the next index in the circular buffer of previous s and y vectors.
Definition: decl/lbfgs.hpp:80
pa::LBFGS::scale_y
void scale_y(real_t factor)
Scale the stored y vectors by the given factor.
Definition: lbfgs.hpp:192
pa::LBFGS::resize
void resize(size_t n)
Re-allocate storage for a problem with a different size.
Definition: lbfgs.hpp:187
pa::crvec
Eigen::Ref< const vec > crvec
Default type for immutable references to vectors.
Definition: vec.hpp:18
panocpy.test.params
params
Definition: test.py:217
pa::LBFGSParams
Parameters for the LBFGS and SpecializedLBFGS classes.
Definition: decl/lbfgs.hpp:12
pa::PANOCDirection::update
static bool update(DirectionProviderT &dp, crvec xₖ, crvec xₖ₊₁, crvec pₖ, crvec pₖ₊₁, crvec gradₖ₊₁, const Box &C, real_t γₖ₊₁)=delete
main.b
b
Definition: main.py:11
panocpy.test.C
C
Definition: test.py:204
pa::LBFGS::params
Params params
Definition: decl/lbfgs.hpp:97
pa::PANOCDirection
Definition: panoc-direction-update.hpp:8
pa::LBFGS::history
size_t history() const
Get the number of previous vectors s and y stored in the buffer.
Definition: decl/lbfgs.hpp:78
pa::LBFGSParams::memory
unsigned memory
Length of the history to keep.
Definition: decl/lbfgs.hpp:14
pa::PANOCDirection::changed_γ
static void changed_γ(DirectionProviderT &dp, real_t γₖ, real_t old_γₖ)=delete
pa::LBFGS::full
bool full
Definition: decl/lbfgs.hpp:96
panocpy.test.n
int n
Definition: test.py:38
pa::LBFGS::apply
bool apply(Vec &&q, real_t γ)
Apply the inverse Hessian approximation to the given vector q.
Definition: lbfgs.hpp:58
pa::LBFGS::Sign
Sign
The sign of the vectors passed to the LBFGS::update method.
Definition: decl/lbfgs.hpp:35
pa::real_t
double real_t
Default floating point type.
Definition: vec.hpp:8
pa::LBFGS::Sign::Positive
@ Positive