PANOC-ALM  quadratic-penalty
Nonconvex constrained optimization
standalone/panoc.hpp
Go to the documentation of this file.
1 #pragma once
2 
7 #include <panoc-alm/util/box.hpp>
8 
9 #include <iomanip>
10 #include <iostream>
11 #include <stdexcept>
12 
13 namespace pa {
14 
15 namespace detail {
16 
19 template <class ObjFunT, class ObjGradFunT>
22  const ObjFunT &ψ,
24  const ObjGradFunT &grad_ψ,
26  crvec xₖ,
28  real_t ε,
30  real_t δ,
32  real_t L_min,
34  real_t L_max,
36  real_t &ψₖ,
38  rvec grad_ψₖ,
40  vec_allocator &alloc) {
41 
42  auto h = (xₖ * ε).cwiseAbs().cwiseMax(δ);
43  auto xh = alloc.alloc() = xₖ + h;
44  real_t norm_h = h.norm();
45  auto grad_ψxh = alloc.alloc();
46  // Calculate ∇ψ(x₀ + h)
47  grad_ψ(xh, grad_ψxh);
48  // Calculate ψ(xₖ), ∇ψ(x₀)
49  ψₖ = ψ(xₖ);
50  grad_ψ(xₖ, grad_ψₖ);
51 
52  // Estimate Lipschitz constant using finite differences
53  real_t L = (grad_ψxh - grad_ψₖ).norm() / norm_h;
54  alloc.free(xh, grad_ψxh);
55  return std::clamp(L, L_min, L_max);
56 }
57 
58 inline void calc_x̂(real_t γ,
59  crvec x,
60  const Box &C,
61  crvec grad_ψ,
62  rvec x̂,
63  rvec p
64 ) {
65  p = projected_gradient_step(C, γ, x, grad_ψ);
66  x̂ = x + p;
67 }
68 
81 template <class ObjFunT>
84  const ObjFunT &ψ,
86  const Box &C,
92  real_t rounding_tolerance,
96  real_t L_max,
98  crvec xₖ,
100  real_t ψₖ,
102  crvec grad_ψₖ,
104  rvec x̂ₖ,
106  rvec pₖ,
108  real_t &ψx̂ₖ,
110  real_t &norm_sq_pₖ,
112  real_t &grad_ψₖᵀpₖ,
114  real_t &Lₖ,
116  real_t &γₖ) {
117 
118  real_t old_γₖ = γₖ;
119  real_t margin = (1 + std::abs(ψₖ)) * rounding_tolerance;
120  while (ψx̂ₖ - ψₖ > grad_ψₖᵀpₖ + 0.5 * Lₖ * norm_sq_pₖ + margin) {
121  if (not(Lₖ * 2 <= L_max))
122  break;
123 
124  Lₖ *= 2;
125  γₖ /= 2;
126 
127  // Calculate x̂ₖ and pₖ (with new step size)
128  calc_x̂(γₖ, xₖ, C, grad_ψₖ, /* in ⟹ out */ x̂ₖ, pₖ);
129  // Calculate ∇ψ(xₖ)ᵀpₖ and ‖pₖ‖²
130  grad_ψₖᵀpₖ = grad_ψₖ.dot(pₖ);
131  norm_sq_pₖ = pₖ.squaredNorm();
132 
133  // Calculate ψ(x̂ₖ)
134  ψx̂ₖ = ψ(x̂ₖ);
135  }
136  return old_γₖ;
137 }
138 
139 inline void print_progress(unsigned k, real_t ψₖ, crvec grad_ψₖ, real_t pₖᵀpₖ,
140  real_t γₖ, real_t εₖ) {
141  std::cout << "[PANOC] " << std::setw(6) << k //
142  << ": ψ = " << std::setw(13) << ψₖ //
143  << ", ‖∇ψ‖ = " << std::setw(13) << grad_ψₖ.norm() //
144  << ", ‖p‖ = " << std::setw(13) << std::sqrt(pₖᵀpₖ) //
145  << ", γ = " << std::setw(13) << γₖ //
146  << ", εₖ = " << std::setw(13) << εₖ << "\r\n";
147 };
148 
149 } // namespace detail
150 
151 using std::chrono::duration_cast;
152 using std::chrono::microseconds;
153 
154 constexpr size_t panoc_min_alloc_size = 10;
155 
156 template <class ObjFunT, class ObjGradFunT, class DirectionT>
157 inline PANOCStats panoc_impl(ObjFunT &ψ, ObjGradFunT &grad_ψ, const Box &C,
158  rvec x, real_t ε, const PANOCParams &params,
159  vec_allocator &alloc,
160  DirectionT &direction_provider) {
161  auto start_time = std::chrono::steady_clock::now();
162  PANOCStats s;
163 
164  // Keep track of how many successive iterations didn't update the iterate
165  unsigned no_progress = 0;
166 
167  // Future parameters?
168  AtomicStopSignal stop_signal;
169 
170  // Estimate Lipschitz constant ---------------------------------------------
171 
172  real_t ψₖ, Lₖ;
173  auto grad_ψₖ = alloc.alloc();
174  auto xₖ = alloc.alloc() = x;
175  // Finite difference approximation of ∇²ψ in starting point
176  if (params.Lipschitz.L₀ <= 0) {
178  ψ, grad_ψ, xₖ, params.Lipschitz.ε, params.Lipschitz.δ, params.L_min,
179  params.L_max, ψₖ, grad_ψₖ, alloc);
180  }
181  // Initial Lipschitz constant provided by the user
182  else {
183  Lₖ = params.Lipschitz.L₀;
184  // Calculate ψ(xₖ), ∇ψ(x₀)
185  ψₖ = ψ(xₖ);
186  grad_ψ(xₖ, grad_ψₖ);
187  }
188  if (not std::isfinite(Lₖ)) {
189  s.status = SolverStatus::NotFinite;
190  return s;
191  }
192  real_t γₖ = params.Lipschitz.Lγ_factor / Lₖ;
193  real_t τ = NaN;
194 
195  // First projected gradient step -------------------------------------------
196 
197  auto x̂ₖ = alloc.alloc(), pₖ = alloc.alloc(), grad_̂ψₖ = alloc.alloc();
198  auto xₖ₊₁ = alloc.alloc(), qₖ = alloc.alloc(), grad_ψₖ₊₁ = alloc.alloc();
199  auto x̂ₖ₊₁ = alloc.alloc(), pₖ₊₁ = alloc.alloc();
200 
201  // Calculate x̂₀, p₀ (projected gradient step)
202  detail::calc_x̂(γₖ, xₖ, C, grad_ψₖ, /* in ⟹ out */ x̂ₖ, pₖ);
203  // Calculate ψ(x̂ₖ)
204  real_t ψx̂ₖ = ψ(x̂ₖ);
205  real_t grad_ψₖᵀpₖ = grad_ψₖ.dot(pₖ);
206  real_t pₖᵀpₖ = pₖ.squaredNorm();
207  // Compute forward-backward envelope
208  real_t φₖ = ψₖ + 1 / (2 * γₖ) * pₖᵀpₖ + grad_ψₖᵀpₖ;
209 
210  // Main PANOC loop
211  // =========================================================================
212  for (unsigned k = 0; k <= params.max_iter; ++k) {
213 
214  // Quadratic upper bound -----------------------------------------------
215  if (k == 0 || params.update_lipschitz_in_linesearch == false) {
216  // Decrease step size until quadratic upper bound is satisfied
217  real_t old_γₖ = detail::descent_lemma(
218  ψ, C, params.quadratic_upperbound_tolerance_factor,
219  params.L_max, xₖ, ψₖ, grad_ψₖ,
220  /* in ⟹ out */ x̂ₖ, pₖ,
221  /* inout */ ψx̂ₖ, pₖᵀpₖ, grad_ψₖᵀpₖ, Lₖ, γₖ);
222  if (k > 0 && γₖ != old_γₖ) // Flush L-BFGS if γ changed
223  direction_provider.changed_γ(γₖ, old_γₖ);
224  else if (k == 0) // Initialize L-BFGS
225  direction_provider.initialize(xₖ, x̂ₖ, pₖ, grad_ψₖ);
226  if (γₖ != old_γₖ)
227  φₖ = ψₖ + 1 / (2 * γₖ) * pₖᵀpₖ + grad_ψₖᵀpₖ;
228  }
229  // Calculate ∇ψ(x̂ₖ)
230  grad_ψ(x̂ₖ, grad_̂ψₖ);
231  // Check stop condition ------------------------------------------------
232  real_t εₖ = detail::calc_error_stop_crit(params.stop_crit, pₖ, γₖ, xₖ,
233  grad_̂ψₖ, grad_ψₖ, C);
234 
235  // Print progress
236  if (params.print_interval != 0 && k % params.print_interval == 0)
237  detail::print_progress(k, ψₖ, grad_ψₖ, pₖᵀpₖ, γₖ, εₖ);
238 
239  auto time_elapsed = std::chrono::steady_clock::now() - start_time;
240  auto stop_status = detail::check_all_stop_conditions(
241  params, time_elapsed, k, stop_signal, ε, εₖ, no_progress);
242  if (stop_status != SolverStatus::Unknown) {
243  x = std::move(x̂ₖ);
244  s.iterations = k;
245  s.ε = εₖ;
246  s.elapsed_time = duration_cast<microseconds>(time_elapsed);
247  s.status = stop_status;
248  alloc.free(grad_ψₖ, xₖ, x̂ₖ, pₖ, grad_̂ψₖ, xₖ₊₁, qₖ, grad_ψₖ₊₁, x̂ₖ₊₁,
249  pₖ₊₁);
250  return s;
251  }
252 
253  // Calculate quasi-Newton step -----------------------------------------
254  real_t step_size = -1;
255  if (params.lbfgs_stepsize == LBFGSStepSize::BasedOnGradientStepSize)
256  step_size = 1;
257  if (k > 0)
258  direction_provider.apply(xₖ, x̂ₖ, pₖ, step_size,
259  /* in ⟹ out */ qₖ);
260 
261  // Line search initialization ------------------------------------------
262  τ = 1;
263  real_t σₖγₖ⁻¹pₖᵀpₖ = (1 - γₖ * Lₖ) * pₖᵀpₖ / (2 * γₖ);
264  real_t φₖ₊₁, ψₖ₊₁, ψx̂ₖ₊₁, grad_ψₖ₊₁ᵀpₖ₊₁, pₖ₊₁ᵀpₖ₊₁;
265  real_t Lₖ₊₁, γₖ₊₁;
266  real_t ls_cond;
267  // TODO: make separate parameter
268  real_t margin =
269  (1 + std::abs(φₖ)) * params.quadratic_upperbound_tolerance_factor;
270 
271  // Make sure quasi-Newton step is valid
272  if (k == 0) {
273  τ = 0; // Always use prox step on first iteration
274  } else if (not qₖ.allFinite()) {
275  τ = 0;
276  ++s.lbfgs_failures;
277  direction_provider.reset(); // Is there anything else we can do?
278  }
279 
280  // Line search loop ----------------------------------------------------
281  do {
282  Lₖ₊₁ = Lₖ;
283  γₖ₊₁ = γₖ;
284 
285  // Calculate xₖ₊₁
286  if (τ / 2 < params.τ_min) { // line search failed
287  xₖ₊₁.swap(x̂ₖ); // → safe prox step
288  ψₖ₊₁ = ψx̂ₖ;
289  grad_ψₖ₊₁.swap(grad_̂ψₖ);
290  } else { // line search didn't fail (yet)
291  if (τ == 1) // → faster quasi-Newton step
292  xₖ₊₁ = xₖ + qₖ;
293  else
294  xₖ₊₁ = xₖ + (1 - τ) * pₖ + τ * qₖ;
295  // Calculate ψ(xₖ₊₁), ∇ψ(xₖ₊₁)
296  ψₖ₊₁ = ψ(xₖ₊₁);
297  grad_ψ(xₖ₊₁, grad_ψₖ₊₁);
298  }
299 
300  // Calculate x̂ₖ₊₁, pₖ₊₁ (projected gradient step in xₖ₊₁)
301  detail::calc_x̂(γₖ₊₁, xₖ₊₁, C, grad_ψₖ₊₁, /* in ⟹ out */ x̂ₖ₊₁, pₖ₊₁);
302  // Calculate ψ(x̂ₖ₊₁)
303  ψx̂ₖ₊₁ = ψ(x̂ₖ₊₁);
304 
305  // Quadratic upper bound -------------------------------------------
306  grad_ψₖ₊₁ᵀpₖ₊₁ = grad_ψₖ₊₁.dot(pₖ₊₁);
307  pₖ₊₁ᵀpₖ₊₁ = pₖ₊₁.squaredNorm();
308  real_t pₖ₊₁ᵀpₖ₊₁_ₖ = pₖ₊₁ᵀpₖ₊₁; // prox step with step size γₖ
309 
310  if (params.update_lipschitz_in_linesearch == true) {
311  // Decrease step size until quadratic upper bound is satisfied
312  (void)detail::descent_lemma(
313  ψ, C, params.quadratic_upperbound_tolerance_factor,
314  params.L_max, xₖ₊₁, ψₖ₊₁, grad_ψₖ₊₁,
315  /* in ⟹ out */ x̂ₖ₊₁, pₖ₊₁,
316  /* inout */ ψx̂ₖ₊₁, pₖ₊₁ᵀpₖ₊₁, grad_ψₖ₊₁ᵀpₖ₊₁, Lₖ₊₁, γₖ₊₁);
317  }
318 
319  // Compute forward-backward envelope
320  φₖ₊₁ = ψₖ₊₁ + 1 / (2 * γₖ₊₁) * pₖ₊₁ᵀpₖ₊₁ + grad_ψₖ₊₁ᵀpₖ₊₁;
321  // Compute line search condition
322  ls_cond = φₖ₊₁ - (φₖ - σₖγₖ⁻¹pₖᵀpₖ);
323  if (params.alternative_linesearch_cond)
324  ls_cond -= (0.5 / γₖ₊₁ - 0.5 / γₖ) * pₖ₊₁ᵀpₖ₊₁_ₖ;
325 
326  τ /= 2;
327  } while (ls_cond > margin && τ >= params.τ_min);
328 
329  // If τ < τ_min the line search failed and we accepted the prox step
330  if (τ < params.τ_min && k != 0) {
332  τ = 0;
333  }
334  if (k != 0) {
335  s.count_τ += 1;
336  s.sum_τ += τ * 2;
337  s.τ_1_accepted += τ * 2 == 1;
338  }
339 
340  // Update L-BFGS -------------------------------------------------------
341  if (γₖ != γₖ₊₁) // Flush L-BFGS if γ changed
342  direction_provider.changed_γ(γₖ₊₁, γₖ);
343 
344  s.lbfgs_rejected += not direction_provider.update(xₖ, xₖ₊₁, pₖ, pₖ₊₁,
345  grad_ψₖ₊₁, C, γₖ₊₁);
346 
347  // Check if we made any progress
348  if (no_progress > 0 || k % params.max_no_progress == 0)
349  no_progress = xₖ == xₖ₊₁ ? no_progress + 1 : 0;
350 
351  // Advance step --------------------------------------------------------
352  Lₖ = Lₖ₊₁;
353  γₖ = γₖ₊₁;
354 
355  ψₖ = ψₖ₊₁;
356  ψx̂ₖ = ψx̂ₖ₊₁;
357  φₖ = φₖ₊₁;
358 
359  xₖ.swap(xₖ₊₁);
360  x̂ₖ.swap(x̂ₖ₊₁);
361  pₖ.swap(pₖ₊₁);
362  grad_ψₖ.swap(grad_ψₖ₊₁);
363  grad_ψₖᵀpₖ = grad_ψₖ₊₁ᵀpₖ₊₁;
364  pₖᵀpₖ = pₖ₊₁ᵀpₖ₊₁;
365  }
366  throw std::logic_error("[PANOC] loop error");
367 }
368 
369 template <class DirectionProviderT = LBFGS, class ObjFunT, class ObjGradFunT>
370 PANOCStats panoc(ObjFunT &ψ, ObjGradFunT &grad_ψ, const Box &C, rvec x,
371  real_t ε, const PANOCParams &params,
373  vec_allocator &alloc) {
374  if (alloc.size() < panoc_min_alloc_size)
375  throw std::logic_error("Allocator too small, should be at least " +
376  std::to_string(panoc_min_alloc_size));
377  const auto n = x.size();
378  if (alloc.vector_size() != n)
379  throw std::logic_error("Allocator size mismatch");
380 
381  return panoc_impl(ψ, grad_ψ, C, x, ε, params, alloc, direction);
382 }
383 
384 template <class DirectionProviderT = LBFGS, class ObjFunT, class ObjGradFunT>
385 PANOCStats panoc(ObjFunT &ψ, ObjGradFunT &grad_ψ, const Box &C, rvec x,
386  real_t ε, const PANOCParams &params,
388  vec_allocator alloc{panoc_min_alloc_size, x.size()};
389  return panoc_impl(ψ, grad_ψ, C, x, ε, params, alloc, direction);
390 }
391 
392 } // namespace pa
pa::PANOCStats::τ_1_accepted
unsigned τ_1_accepted
Definition: inner/decl/panoc.hpp:59
pa::vec_allocator
Definition: alloc.hpp:13
pa::detail::print_progress
void print_progress(unsigned k, real_t ψₖ, crvec grad_ψₖ, real_t pₖᵀpₖ, real_t γₖ, real_t εₖ)
Definition: standalone/panoc.hpp:139
pa::AtomicStopSignal
Definition: atomic_stop_signal.hpp:7
pa::detail::calc_x̂
void calc_x̂(const ProblemT &prob, real_t γ, crvec x, crvec grad_ψ, rvec x̂, rvec p)
Definition: panoc-helpers.hpp:271
pa::rvec
Eigen::Ref< vec > rvec
Default type for mutable references to vectors.
Definition: vec.hpp:16
pa::NaN
constexpr real_t NaN
Not a number.
Definition: vec.hpp:28
pa::vec_allocator::alloc
auto alloc()
Definition: alloc.hpp:83
pa::PANOCStats::iterations
unsigned iterations
Definition: inner/decl/panoc.hpp:55
atomic_stop_signal.hpp
panocpy.test.ε
int ε
Definition: test.py:71
box.hpp
pa::vec_allocator::vector_size
Eigen::Index vector_size() const
Definition: alloc.hpp:112
pa::PANOCStats::count_τ
unsigned count_τ
Definition: inner/decl/panoc.hpp:60
pa::panoc_impl
PANOCStats panoc_impl(ObjFunT &ψ, ObjGradFunT &grad_ψ, const Box &C, rvec x, real_t ε, const PANOCParams &params, vec_allocator &alloc, DirectionT &direction_provider)
Definition: standalone/panoc.hpp:157
pa::panoc
PANOCStats panoc(ObjFunT &ψ, ObjGradFunT &grad_ψ, const Box &C, rvec x, real_t ε, const PANOCParams &params, PANOCDirection< DirectionProviderT > direction, vec_allocator &alloc)
Definition: standalone/panoc.hpp:370
pa::detail::projected_gradient_step
auto projected_gradient_step(const Box &C, real_t γ, crvec x, crvec grad_ψ)
Projected gradient step.
Definition: panoc-helpers.hpp:259
pa::PANOCStats::status
SolverStatus status
Definition: inner/decl/panoc.hpp:52
pa::PANOCParams
Tuning parameters for the PANOC algorithm.
Definition: inner/decl/panoc.hpp:20
pa
Definition: alm.hpp:10
panocpy.test.x
x
Definition: test.py:40
pa::PANOCStats::lbfgs_rejected
unsigned lbfgs_rejected
Definition: inner/decl/panoc.hpp:58
pa::detail::check_all_stop_conditions
SolverStatus check_all_stop_conditions(const ParamsT &params, DurationT time_elapsed, unsigned iteration, const AtomicStopSignal &stop_signal, real_t ε, real_t εₖ, unsigned no_progress)
Check all stop conditions (required tolerance reached, out of time, maximum number of iterations exce...
Definition: panoc-helpers.hpp:469
pa::Box
Definition: box.hpp:7
pa::panoc_min_alloc_size
constexpr size_t panoc_min_alloc_size
Definition: standalone/panoc.hpp:154
panoc-helpers.hpp
pa::detail::calc_error_stop_crit
real_t calc_error_stop_crit(PANOCStopCrit crit, crvec pₖ, real_t γ, crvec xₖ, crvec grad_̂ψₖ, crvec grad_ψₖ, const Box &C)
Definition: panoc-helpers.hpp:284
pa::PANOCStats::ε
real_t ε
Definition: inner/decl/panoc.hpp:53
panocpy.test.L
int L
Definition: test.py:47
pa::detail::descent_lemma
real_t descent_lemma(const Problem &problem, real_t rounding_tolerance, real_t L_max, crvec xₖ, real_t ψₖ, crvec grad_ψₖ, crvec y, crvec Σ, rvec x̂ₖ, rvec pₖ, rvec ŷx̂ₖ, real_t &ψx̂ₖ, real_t &norm_sq_pₖ, real_t &grad_ψₖᵀpₖ, real_t &Lₖ, real_t &γₖ)
Increase the estimate of the Lipschitz constant of the objective gradient and decrease the step size ...
Definition: panoc-helpers.hpp:327
pa::crvec
Eigen::Ref< const vec > crvec
Default type for immutable references to vectors.
Definition: vec.hpp:18
panocpy.test.params
params
Definition: test.py:217
panoc.hpp
pa::vec_allocator::size
size_t size() const
Definition: alloc.hpp:109
panocpy.test.C
C
Definition: test.py:204
pa::PANOCStats::sum_τ
real_t sum_τ
Definition: inner/decl/panoc.hpp:61
pa::PANOCStats
Definition: inner/decl/panoc.hpp:51
pa::detail::initial_lipschitz_estimate
real_t initial_lipschitz_estimate(const Problem &problem, crvec xₖ, crvec y, crvec Σ, real_t ε, real_t δ, real_t L_min, real_t L_max, real_t &ψ, rvec grad_ψ, rvec work_n1, rvec work_n2, rvec work_n3, rvec work_m)
Estimate the Lipschitz constant of the gradient using finite differences.
Definition: panoc-helpers.hpp:574
pa::PANOCDirection
Definition: panoc-direction-update.hpp:8
panocpy.test.p
p
Definition: test.py:57
panocpy.test.n
int n
Definition: test.py:38
pa::PANOCStats::linesearch_failures
unsigned linesearch_failures
Definition: inner/decl/panoc.hpp:56
pa::PANOCStats::elapsed_time
std::chrono::microseconds elapsed_time
Definition: inner/decl/panoc.hpp:54
pa::real_t
double real_t
Default floating point type.
Definition: vec.hpp:8
alloc.hpp
pa::vec_allocator::free
void free(rvec v)
Definition: alloc.hpp:94
pa::PANOCStats::lbfgs_failures
unsigned lbfgs_failures
Definition: inner/decl/panoc.hpp:57