PANOC-ALM main
Nonconvex constrained optimization
panoc-helpers.hpp
Go to the documentation of this file.
1#pragma once
2
7
8#include <stdexcept>
9
10namespace pa::detail {
11
16inline real_t calc_ψ_ŷ(const Problem &p,
17 crvec x,
18 crvec y,
19 crvec Σ,
20 rvec ŷ
21) {
22 if (p.m == 0) /* [[unlikely]] */
23 return p.f(x);
24
25 // g(x)
26 p.g(x, ŷ);
27 // ζ = g(x) + Σ⁻¹y
28 ŷ += Σ.asDiagonal().inverse() * y;
29 // d = ζ - Π(ζ, D)
30 ŷ = projecting_difference(ŷ, p.D);
31 // dᵀŷ, ŷ = Σ d
32 real_t dᵀŷ = 0;
33 for (unsigned i = 0; i < p.m; ++i) {
34 dᵀŷ += ŷ(i) * Σ(i) * ŷ(i); // TODO: vectorize
35 ŷ(i) = Σ(i) * ŷ(i);
36 }
37 // ψ(x) = f(x) + ½ dᵀŷ
38 real_t ψ = p.f(x) + 0.5 * dᵀŷ;
39
40 return ψ;
41}
42
44inline void calc_grad_ψ_from_ŷ(const Problem &p,
45 crvec x,
46 crvec ŷ,
47 rvec grad_ψ,
48 rvec work_n
49) {
50 // ∇ψ = ∇f(x) + ∇g(x) ŷ
51 p.grad_f(x, grad_ψ);
52 if (p.m != 0) /* [[likely]] */ {
53 p.grad_g_prod(x, ŷ, work_n);
54 grad_ψ += work_n;
55 }
56}
57
63 crvec x,
64 crvec y,
65 crvec Σ,
66 rvec grad_ψ,
67 rvec work_n,
68 rvec work_m
69) {
70 // ψ(x) = f(x) + ½ dᵀŷ
71 real_t ψ = calc_ψ_ŷ(p, x, y, Σ, work_m);
72 // ∇ψ = ∇f(x) + ∇g(x) ŷ
73 calc_grad_ψ_from_ŷ(p, x, work_m, grad_ψ, work_n);
74 return ψ;
75}
76
79inline void calc_grad_ψ(const Problem &p,
80 crvec x,
81 crvec y,
82 crvec Σ,
83 rvec grad_ψ,
84 rvec work_n,
85 rvec work_m
86) {
87 if (p.m == 0) /* [[unlikely]] */
88 return p.grad_f(x, grad_ψ);
89
90 // g(x)
91 p.g(x, work_m);
92 // ζ = g(x) + Σ⁻¹y
93 work_m += (y.array() / Σ.array()).matrix();
94 // d = ζ - Π(ζ, D)
95 work_m = projecting_difference(work_m, p.D);
96 // ŷ = Σ d
97 work_m = Σ.asDiagonal() * work_m;
98
99 // ∇ψ = ∇f(x) + ∇g(x) ŷ
100 p.grad_f(x, grad_ψ);
101 p.grad_g_prod(x, work_m, work_n);
102 grad_ψ += work_n;
103}
104
107inline void calc_err_z(const Problem &p,
108 crvec x̂,
109 crvec y,
110 crvec Σ,
111 rvec err_z
112) {
113 // g(x̂)
114 p.g(x̂, err_z);
115 // ζ = g(x̂) + Σ⁻¹y
116 // ẑ = Π(ζ, D)
117 // g(x) - ẑ
118 err_z = err_z - project(err_z + Σ.asDiagonal().inverse() * y, p.D);
119 // TODO: catastrophic cancellation?
120}
121
130inline auto
132 real_t γ,
133 crvec x,
134 crvec grad_ψ
135) {
136 using binary_real_f = real_t (*)(real_t, real_t);
137 return (-γ * grad_ψ)
138 .binaryExpr(C.lowerbound - x, binary_real_f(std::fmax))
139 .binaryExpr(C.upperbound - x, binary_real_f(std::fmin));
140}
141
142inline void calc_x̂(const Problem &prob,
143 real_t γ,
144 crvec x,
145 crvec grad_ψ,
146 rvec x̂,
147 rvec p
148) {
149 p = projected_gradient_step(prob.C, γ, x, grad_ψ);
150 x̂ = x + p;
151}
152
154 switch (crit) {
155 case PANOCStopCrit::ApproxKKT: [[fallthrough]];
156 case PANOCStopCrit::ApproxKKT2: return true;
157 case PANOCStopCrit::ProjGradNorm: [[fallthrough]];
158 case PANOCStopCrit::ProjGradNorm2: [[fallthrough]];
159 case PANOCStopCrit::ProjGradUnitNorm: [[fallthrough]];
160 case PANOCStopCrit::ProjGradUnitNorm2: [[fallthrough]];
161 case PANOCStopCrit::FPRNorm: [[fallthrough]];
162 case PANOCStopCrit::FPRNorm2: return false;
163 case PANOCStopCrit::Ipopt: return true;
164 }
165 throw std::out_of_range("Invalid PANOCStopCrit");
166}
167
170 const Box &C,
171 PANOCStopCrit crit,
172 crvec pₖ,
173 real_t γ,
174 crvec xₖ,
175 crvec x̂ₖ,
176 crvec ŷₖ,
177 crvec grad_ψₖ,
178 crvec grad_̂ψₖ
179) {
180 switch (crit) {
182 auto err = (1 / γ) * pₖ + (grad_ψₖ - grad_̂ψₖ);
183 // These parentheses ^^^ ^^^ are important to
184 // prevent catastrophic cancellation when the step is small
185 return vec_util::norm_inf(err);
186 }
188 auto err = (1 / γ) * pₖ + (grad_ψₖ - grad_̂ψₖ);
189 // These parentheses ^^^ ^^^ are important to
190 // prevent catastrophic cancellation when the step is small
191 return err.norm();
192 }
194 return vec_util::norm_inf(pₖ);
195 }
197 return pₖ.norm();
198 }
200 return vec_util::norm_inf(
201 projected_gradient_step(C, 1, xₖ, grad_ψₖ));
202 }
204 return projected_gradient_step(C, 1, xₖ, grad_ψₖ).norm();
205 }
207 return vec_util::norm_inf(pₖ) / γ;
208 }
210 return pₖ.norm() / γ;
211 }
213 auto err =
214 vec_util::norm_inf(projected_gradient_step(C, 1, x̂ₖ, grad_̂ψₖ));
215 auto n = 2 * (ŷₖ.size() + x̂ₖ.size());
216 if (n == 0)
217 return err;
218 auto C_lagr_mult =
219 vec_util::norm_1(projecting_difference(x̂ₖ - grad_̂ψₖ, C));
220 auto D_lagr_mult = vec_util::norm_1(ŷₖ);
221 const real_t s_max = 100;
222 real_t s_d =
223 std::max(s_max, (C_lagr_mult + D_lagr_mult) / n) / s_max;
224 return err / s_d;
225 }
226 }
227 throw std::out_of_range("Invalid PANOCStopCrit");
228}
229
244 const Problem &problem,
250 real_t rounding_tolerance,
254 real_t L_max,
256 crvec xₖ,
258 real_t ψₖ,
260 crvec grad_ψₖ,
262 crvec y,
264 crvec Σ,
266 rvec x̂ₖ,
268 rvec pₖ,
270 rvec ŷx̂ₖ,
272 real_t &ψx̂ₖ,
274 real_t &norm_sq_pₖ,
276 real_t &grad_ψₖᵀpₖ,
278 real_t &Lₖ,
280 real_t &γₖ) {
281
282 real_t old_γₖ = γₖ;
283 real_t margin = (1 + std::abs(ψₖ)) * rounding_tolerance;
284 while (ψx̂ₖ - ψₖ > grad_ψₖᵀpₖ + 0.5 * Lₖ * norm_sq_pₖ + margin) {
285 if (not(Lₖ * 2 <= L_max))
286 break;
287
288 Lₖ *= 2;
289 γₖ /= 2;
290
291 // Calculate x̂ₖ and pₖ (with new step size)
292 calc_x̂(problem, γₖ, xₖ, grad_ψₖ, /* in ⟹ out */ x̂ₖ, pₖ);
293 // Calculate ∇ψ(xₖ)ᵀpₖ and ‖pₖ‖²
294 grad_ψₖᵀpₖ = grad_ψₖ.dot(pₖ);
295 norm_sq_pₖ = pₖ.squaredNorm();
296
297 // Calculate ψ(x̂ₖ) and ŷ(x̂ₖ)
298 ψx̂ₖ = calc_ψ_ŷ(problem, x̂ₖ, y, Σ, /* in ⟹ out */ ŷx̂ₖ);
299 }
300 return old_γₖ;
301}
302
306template <class ParamsT, class DurationT>
309 const ParamsT &params,
311 DurationT time_elapsed,
313 unsigned iteration,
315 const AtomicStopSignal &stop_signal,
317 real_t ε,
319 real_t εₖ,
321 unsigned no_progress) {
322
323 bool out_of_time = time_elapsed > params.max_time;
324 bool out_of_iter = iteration == params.max_iter;
325 bool interrupted = stop_signal.stop_requested();
326 bool not_finite = not std::isfinite(εₖ);
327 bool conv = εₖ <= ε;
328 bool max_no_progress = no_progress > params.max_no_progress;
329 return conv ? SolverStatus::Converged
330 : out_of_time ? SolverStatus::MaxTime
331 : out_of_iter ? SolverStatus::MaxIter
332 : not_finite ? SolverStatus::NotFinite
333 : max_no_progress ? SolverStatus::NoProgress
334 : interrupted ? SolverStatus::Interrupted
336}
337
344 const Problem &problem,
346 crvec xₖ,
348 crvec ŷxₖ,
350 crvec y,
352 crvec Σ,
354 rvec g,
356 mat &H,
358 rvec work_n) {
359
360 // Compute the Hessian of the Lagrangian
361 problem.hess_L(xₖ, ŷxₖ, H);
362 // Compute the Hessian of the augmented Lagrangian
363 problem.g(xₖ, g);
364 for (vec::Index i = 0; i < problem.m; ++i) {
365 real_t ζ = g(i) + y(i) / Σ(i);
366 bool inactive =
367 problem.D.lowerbound(i) < ζ && ζ < problem.D.upperbound(i);
368 if (not inactive) {
369 problem.grad_gi(xₖ, i, work_n);
370 H += work_n * Σ(i) * work_n.transpose();
371 }
372 }
373}
374
381 const Problem &problem,
383 crvec xₖ,
385 crvec y,
387 crvec Σ,
389 crvec grad_ψ,
391 crvec v,
393 rvec Hv,
395 rvec work_n1,
397 rvec work_n2,
399 rvec work_m) {
400
401 real_t cbrt_ε = std::cbrt(std::numeric_limits<real_t>::epsilon());
402 real_t h = cbrt_ε * (1 + xₖ.norm());
403 rvec xₖh = work_n1;
404 xₖh = xₖ + h * v;
405 calc_grad_ψ(problem, xₖh, y, Σ, Hv, work_n2, work_m);
406 Hv -= grad_ψ;
407 Hv /= h;
408}
409
414 const Problem &problem,
416 crvec xₖ,
418 crvec y,
420 crvec Σ,
422 real_t ε,
424 real_t δ,
426 real_t L_min,
428 real_t L_max,
430 real_t &ψ,
432 rvec grad_ψ,
434 rvec work_n1,
436 rvec work_n2,
438 rvec work_n3,
440 rvec work_m) {
441
442 auto h = (xₖ * ε).cwiseAbs().cwiseMax(δ);
443 work_n1 = xₖ + h;
444 real_t norm_h = h.norm();
445 // Calculate ∇ψ(x₀ + h)
446 calc_grad_ψ(problem, work_n1, y, Σ, /* in ⟹ out */ work_n2, work_n3,
447 work_m);
448 // Calculate ψ(xₖ), ∇ψ(x₀)
449 ψ = calc_ψ_grad_ψ(problem, xₖ, y, Σ, /* in ⟹ out */ grad_ψ, work_n1,
450 work_m);
451
452 // Estimate Lipschitz constant using finite differences
453 real_t L = (work_n2 - grad_ψ).norm() / norm_h;
454 return std::clamp(L, L_min, L_max);
455}
456
461 const Problem &problem,
463 crvec xₖ,
465 crvec y,
467 crvec Σ,
469 real_t ε,
471 real_t δ,
473 real_t L_min,
475 real_t L_max,
477 rvec grad_ψ,
479 rvec work_n1,
481 rvec work_n2,
483 rvec work_n3,
485 rvec work_m) {
486
487 auto h = (xₖ * ε).cwiseAbs().cwiseMax(δ);
488 work_n1 = xₖ + h;
489 real_t norm_h = h.norm();
490 // Calculate ∇ψ(x₀ + h)
491 calc_grad_ψ(problem, work_n1, y, Σ, /* in ⟹ out */ work_n2, work_n3,
492 work_m);
493 // Calculate ∇ψ(x₀)
494 calc_grad_ψ(problem, xₖ, y, Σ, /* in ⟹ out */ grad_ψ, work_n1, work_m);
495
496 // Estimate Lipschitz constant using finite differences
497 real_t L = (work_n2 - grad_ψ).norm() / norm_h;
498 return std::clamp(L, L_min, L_max);
499}
500
501} // namespace pa::detail
problem
Definition: main.py:16
H
Definition: main.py:8
bool stop_crit_requires_grad_̂ψₖ(PANOCStopCrit crit)
void calc_err_z(const Problem &p, crvec x̂, crvec y, crvec Σ, rvec err_z)
Calculate the error between ẑ and g(x).
void calc_augmented_lagrangian_hessian_prod_fd(const Problem &problem, crvec xₖ, crvec y, crvec Σ, crvec grad_ψ, crvec v, rvec Hv, rvec work_n1, rvec work_n2, rvec work_m)
Compute the Hessian matrix of the augmented Lagrangian function multiplied by the given vector,...
void calc_grad_ψ(const Problem &p, crvec x, crvec y, crvec Σ, rvec grad_ψ, rvec work_n, rvec work_m)
Calculate the gradient ∇ψ(x).
real_t calc_ψ_ŷ(const Problem &p, crvec x, crvec y, crvec Σ, rvec ŷ)
Calculate both ψ(x) and the vector ŷ that can later be used to compute ∇ψ.
auto projected_gradient_step(const Box &C, real_t γ, crvec x, crvec grad_ψ)
Projected gradient step.
void calc_grad_ψ_from_ŷ(const Problem &p, crvec x, crvec ŷ, rvec grad_ψ, rvec work_n)
Calculate ∇ψ(x) using ŷ.
real_t initial_lipschitz_estimate(const Problem &problem, crvec xₖ, crvec y, crvec Σ, real_t ε, real_t δ, real_t L_min, real_t L_max, real_t &ψ, rvec grad_ψ, rvec work_n1, rvec work_n2, rvec work_n3, rvec work_m)
Estimate the Lipschitz constant of the gradient using finite differences.
SolverStatus check_all_stop_conditions(const ParamsT &params, DurationT time_elapsed, unsigned iteration, const AtomicStopSignal &stop_signal, real_t ε, real_t εₖ, unsigned no_progress)
Check all stop conditions (required tolerance reached, out of time, maximum number of iterations exce...
real_t calc_ψ_grad_ψ(const Problem &p, crvec x, crvec y, crvec Σ, rvec grad_ψ, rvec work_n, rvec work_m)
Calculate both ψ(x) and its gradient ∇ψ(x).
void calc_x̂(const Problem &prob, real_t γ, crvec x, crvec grad_ψ, rvec x̂, rvec p)
real_t descent_lemma(const Problem &problem, real_t rounding_tolerance, real_t L_max, crvec xₖ, real_t ψₖ, crvec grad_ψₖ, crvec y, crvec Σ, rvec x̂ₖ, rvec pₖ, rvec ŷx̂ₖ, real_t &ψx̂ₖ, real_t &norm_sq_pₖ, real_t &grad_ψₖᵀpₖ, real_t &Lₖ, real_t &γₖ)
Increase the estimate of the Lipschitz constant of the objective gradient and decrease the step size ...
real_t calc_error_stop_crit(const Box &C, PANOCStopCrit crit, crvec pₖ, real_t γ, crvec xₖ, crvec x̂ₖ, crvec ŷₖ, crvec grad_ψₖ, crvec grad_̂ψₖ)
Compute the ε from the stopping criterion, see PANOCStopCrit.
void calc_augmented_lagrangian_hessian(const Problem &problem, crvec xₖ, crvec ŷxₖ, crvec y, crvec Σ, rvec g, mat &H, rvec work_n)
Compute the Hessian matrix of the augmented Lagrangian function.
real_t norm_1(const Vec &v)
Get the 1-norm of the given vector.
Definition: vec.hpp:49
real_t norm_inf(const Vec &v)
Get the maximum or infinity-norm of the given vector.
Definition: vec.hpp:42
@ ProjGradUnitNorm
∞-norm of the projected gradient with unit step size:
@ ProjGradNorm
∞-norm of the projected gradient with step size γ:
@ Ipopt
The stopping criterion used by Ipopt, see https://link.springer.com/article/10.1007/s10107-004-0559-y...
@ FPRNorm2
2-norm of fixed point residual:
@ ProjGradNorm2
2-norm of the projected gradient with step size γ:
@ ApproxKKT
Find an ε-approximate KKT point in the ∞-norm:
@ FPRNorm
∞-norm of fixed point residual:
@ ApproxKKT2
Find an ε-approximate KKT point in the 2-norm:
@ ProjGradUnitNorm2
2-norm of the projected gradient with unit step size:
Eigen::Ref< const vec > crvec
Default type for immutable references to vectors.
Definition: vec.hpp:18
auto project(const Vec &v, const Box &box)
Project a vector onto a box.
Definition: box.hpp:15
SolverStatus
Exit status of a numerical solver such as ALM or PANOC.
Definition: solverstatus.hpp:7
@ Interrupted
Solver was interrupted by the user.
@ Unknown
Initial value.
@ MaxTime
Maximum allowed execution time exceeded.
@ NoProgress
No progress was made in the last iteration.
@ MaxIter
Maximum number of iterations exceeded.
@ Converged
Converged and reached given tolerance.
@ NotFinite
Intermediate results were infinite or not-a-number.
realmat mat
Default type for matrices.
Definition: vec.hpp:20
double real_t
Default floating point type.
Definition: vec.hpp:8
auto projecting_difference(const Vec &v, const Box &box)
Get the difference between the given vector and its projection.
Definition: box.hpp:28
Eigen::Ref< vec > rvec
Default type for mutable references to vectors.
Definition: vec.hpp:16
Definition: box.hpp:7
int L
Definition: test.py:49
int Σ
Definition: test.py:72
int n
Definition: test.py:40
int ε
Definition: test.py:73
Problem description for minimization problems.