Line data Source code
1 : #pragma once
2 :
3 : #include <panoc-alm/inner/decl/panoc-stop-crit.hpp>
4 : #include <panoc-alm/util/atomic_stop_signal.hpp>
5 : #include <panoc-alm/util/problem.hpp>
6 : #include <panoc-alm/util/solverstatus.hpp>
7 :
8 : #include <stdexcept>
9 :
10 : namespace pa::detail {
11 :
12 : /// Calculate both ψ(x) and the vector ŷ that can later be used to compute ∇ψ.
13 : /// @f[ \psi(x^k) = f(x^k) + \frac{1}{2}
14 : /// \text{dist}_\Sigma^2\left(g(x^k) + \Sigma^{-1}y,\;D\right) @f]
15 : /// @f[ \hat{y} @f]
16 1798 : inline real_t calc_ψ_ŷ(const Problem &p, ///< [in] Problem description
17 : crvec x, ///< [in] Decision variable @f$ x @f$
18 : crvec y, ///< [in] Lagrange multipliers @f$ y @f$
19 : crvec Σ, ///< [in] Penalty weights @f$ \Sigma @f$
20 : rvec ŷ ///< [out] @f$ \hat{y} @f$
21 : ) {
22 1798 : if (p.m == 0) /* [[unlikely]] */
23 9 : return p.f(x);
24 :
25 : // g(x)
26 1789 : p.g(x, ŷ);
27 : // ζ = g(x) + Σ⁻¹y
28 1789 : ŷ += Σ.asDiagonal().inverse() * y;
29 : // d = ζ - Π(ζ, D)
30 1789 : ŷ = projecting_difference(ŷ, p.D);
31 : // dᵀŷ, ŷ = Σ d
32 1789 : real_t dᵀŷ = 0;
33 8940 : for (unsigned i = 0; i < p.m; ++i) {
34 7151 : dᵀŷ += ŷ(i) * Σ(i) * ŷ(i); // TODO: vectorize
35 7151 : ŷ(i) = Σ(i) * ŷ(i);
36 7151 : }
37 : // ψ(x) = f(x) + ½ dᵀŷ
38 1789 : real_t ψ = p.f(x) + 0.5 * dᵀŷ;
39 :
40 1789 : return ψ;
41 1798 : }
42 :
43 : /// Calculate ∇ψ(x) using ŷ.
44 1394 : inline void calc_grad_ψ_from_ŷ(const Problem &p, ///< [in] Problem description
45 : crvec x, ///< [in] Decision variable @f$ x @f$
46 : crvec ŷ, ///< [in] @f$ \hat{y} @f$
47 : rvec grad_ψ, ///< [out] @f$ \nabla \psi(x) @f$
48 : rvec work_n ///< Dimension n
49 : ) {
50 : // ∇ψ = ∇f(x) + ∇g(x) ŷ
51 1394 : p.grad_f(x, grad_ψ);
52 1394 : if (p.m != 0) /* [[likely]] */ {
53 1385 : p.grad_g_prod(x, ŷ, work_n);
54 1385 : grad_ψ += work_n;
55 1385 : }
56 1394 : }
57 :
58 : /// Calculate both ψ(x) and its gradient ∇ψ(x).
59 : /// @f[ \psi(x^k) = f(x^k) + \frac{1}{2}
60 : /// \text{dist}_\Sigma^2\left(g(x^k) + \Sigma^{-1}y,\;D\right) @f]
61 : /// @f[ \nabla \psi(x) = \nabla f(x) + \nabla g(x)\ \hat{y}(x) @f]
62 813 : inline real_t calc_ψ_grad_ψ(const Problem &p, ///< [in] Problem description
63 : crvec x, ///< [in] Decision variable @f$ x @f$
64 : crvec y, ///< [in] Lagrange multipliers @f$ y @f$
65 : crvec Σ, ///< [in] Penalty weights @f$ \Sigma @f$
66 : rvec grad_ψ, ///< [out] @f$ \nabla \psi(x) @f$
67 : rvec work_n, ///< Dimension n
68 : rvec work_m ///< Dimension m
69 : ) {
70 : // ψ(x) = f(x) + ½ dᵀŷ
71 813 : real_t ψ = calc_ψ_ŷ(p, x, y, Σ, work_m);
72 : // ∇ψ = ∇f(x) + ∇g(x) ŷ
73 813 : calc_grad_ψ_from_ŷ(p, x, work_m, grad_ψ, work_n);
74 1626 : return ψ;
75 813 : }
76 :
77 : /// Calculate the gradient ∇ψ(x).
78 : /// @f[ \nabla \psi(x) = \nabla f(x) + \nabla g(x)\ \hat{y}(x) @f]
79 41 : inline void calc_grad_ψ(const Problem &p, ///< [in] Problem description
80 : crvec x, ///< [in] Decision variable @f$ x @f$
81 : crvec y, ///< [in] Lagrange multipliers @f$ y @f$
82 : crvec Σ, ///< [in] Penalty weights @f$ \Sigma @f$
83 : rvec grad_ψ, ///< [out] @f$ \nabla \psi(x) @f$
84 : rvec work_n, ///< Dimension n
85 : rvec work_m ///< Dimension m
86 : ) {
87 41 : if (p.m == 0) /* [[unlikely]] */
88 4 : return p.grad_f(x, grad_ψ);
89 :
90 : // g(x)
91 37 : p.g(x, work_m);
92 : // ζ = g(x) + Σ⁻¹y
93 37 : work_m += (y.array() / Σ.array()).matrix();
94 : // d = ζ - Π(ζ, D)
95 37 : work_m = projecting_difference(work_m, p.D);
96 : // ŷ = Σ d
97 37 : work_m = Σ.asDiagonal() * work_m;
98 :
99 : // ∇ψ = ∇f(x) + ∇g(x) ŷ
100 37 : p.grad_f(x, grad_ψ);
101 37 : p.grad_g_prod(x, work_m, work_n);
102 37 : grad_ψ += work_n;
103 41 : }
104 :
105 : /// Calculate the error between ẑ and g(x).
106 : /// @f[ \hat{z}^k = \Pi_D\left(g(x^k) + \Sigma^{-1}y\right) @f]
107 33 : inline void calc_err_z(const Problem &p, ///< [in] Problem description
108 : crvec x̂, ///< [in] Decision variable @f$ \hat{x} @f$
109 : crvec y, ///< [in] Lagrange multipliers @f$ y @f$
110 : crvec Σ, ///< [in] Penalty weights @f$ \Sigma @f$
111 : rvec err_z ///< [out] @f$ g(\hat{x}) - \hat{z} @f$
112 : ) {
113 : // g(x̂)
114 33 : p.g(x̂, err_z);
115 : // ζ = g(x̂) + Σ⁻¹y
116 : // ẑ = Π(ζ, D)
117 : // g(x) - ẑ
118 33 : err_z = err_z - project(err_z + Σ.asDiagonal().inverse() * y, p.D);
119 : // TODO: catastrophic cancellation?
120 33 : }
121 :
122 : /**
123 : * Projected gradient step
124 : * @f[ \begin{aligned}
125 : * \hat{x}^k &= T_{\gamma^k}\left(x^k\right) \\
126 : * &= \Pi_C\left(x^k - \gamma^k \nabla \psi(x^k)\right) \\
127 : * p^k &= \hat{x}^k - x^k \\
128 : * \end{aligned} @f]
129 : */
130 : inline auto
131 521072 : projected_gradient_step(const Box &C, ///< [in] Set to project onto
132 : real_t γ, ///< [in] Step size
133 : crvec x, ///< [in] Decision variable @f$ x @f$
134 : crvec grad_ψ ///< [in] @f$ \nabla \psi(x^k) @f$
135 : ) {
136 : using binary_real_f = real_t (*)(real_t, real_t);
137 1563216 : return (-γ * grad_ψ)
138 521072 : .binaryExpr(C.lowerbound - x, binary_real_f(std::fmax))
139 521072 : .binaryExpr(C.upperbound - x, binary_real_f(std::fmin));
140 : }
141 :
142 983 : inline void calc_x̂(const Problem &prob, ///< [in] Problem description
143 : real_t γ, ///< [in] Step size
144 : crvec x, ///< [in] Decision variable @f$ x @f$
145 : crvec grad_ψ, ///< [in] @f$ \nabla \psi(x^k) @f$
146 : rvec x̂, ///< [out] @f$ \hat{x}^k = T_{\gamma^k}(x^k) @f$
147 : rvec p ///< [out] @f$ \hat{x}^k - x^k @f$
148 : ) {
149 983 : p = projected_gradient_step(prob.C, γ, x, grad_ψ);
150 983 : x̂ = x + p;
151 983 : }
152 :
153 31 : inline bool stop_crit_requires_grad_̂ψₖ(PANOCStopCrit crit) {
154 31 : switch (crit) {
155 : case PANOCStopCrit::ApproxKKT: [[fallthrough]];
156 31 : case PANOCStopCrit::ApproxKKT2: return true;
157 : case PANOCStopCrit::ProjGradNorm: [[fallthrough]];
158 : case PANOCStopCrit::ProjGradNorm2: [[fallthrough]];
159 : case PANOCStopCrit::ProjGradUnitNorm: [[fallthrough]];
160 : case PANOCStopCrit::ProjGradUnitNorm2: [[fallthrough]];
161 : case PANOCStopCrit::FPRNorm: [[fallthrough]];
162 0 : case PANOCStopCrit::FPRNorm2: return false;
163 0 : case PANOCStopCrit::Ipopt: return true;
164 : }
165 0 : throw std::out_of_range("Invalid PANOCStopCrit");
166 31 : }
167 :
168 : /// Compute the ε from the stopping criterion, see @ref PANOCStopCrit.
169 200599 : inline real_t calc_error_stop_crit(
170 : const Box &C, ///< [in] Box constraints on x
171 : PANOCStopCrit crit, ///< [in] What stoppint criterion to use
172 : crvec pₖ, ///< [in] Projected gradient step @f$ \hat x^k - x^k @f$
173 : real_t γ, ///< [in] Step size
174 : crvec xₖ, ///< [in] Current iterate
175 : crvec x̂ₖ, ///< [in] Current iterate after projected gradient step
176 : crvec ŷₖ, ///< [in] Candidate Lagrange multipliers
177 : crvec grad_ψₖ, ///< [in] Gradient in @f$ x^k @f$
178 : crvec grad_̂ψₖ ///< [in] Gradient in @f$ \hat x^k @f$
179 : ) {
180 200599 : switch (crit) {
181 : case PANOCStopCrit::ApproxKKT: {
182 200599 : auto err = (1 / γ) * pₖ + (grad_ψₖ - grad_̂ψₖ);
183 : // These parentheses ^^^ ^^^ are important to
184 : // prevent catastrophic cancellation when the step is small
185 200599 : return vec_util::norm_inf(err);
186 200599 : }
187 : case PANOCStopCrit::ApproxKKT2: {
188 0 : auto err = (1 / γ) * pₖ + (grad_ψₖ - grad_̂ψₖ);
189 : // These parentheses ^^^ ^^^ are important to
190 : // prevent catastrophic cancellation when the step is small
191 0 : return err.norm();
192 0 : }
193 : case PANOCStopCrit::ProjGradNorm: {
194 0 : return vec_util::norm_inf(pₖ);
195 : }
196 : case PANOCStopCrit::ProjGradNorm2: {
197 0 : return pₖ.norm();
198 : }
199 : case PANOCStopCrit::ProjGradUnitNorm: {
200 0 : return vec_util::norm_inf(
201 0 : projected_gradient_step(C, 1, xₖ, grad_ψₖ));
202 : }
203 : case PANOCStopCrit::ProjGradUnitNorm2: {
204 0 : return projected_gradient_step(C, 1, xₖ, grad_ψₖ).norm();
205 : }
206 : case PANOCStopCrit::FPRNorm: {
207 0 : return vec_util::norm_inf(pₖ) / γ;
208 : }
209 : case PANOCStopCrit::FPRNorm2: {
210 0 : return pₖ.norm() / γ;
211 : }
212 : case PANOCStopCrit::Ipopt: {
213 0 : auto err =
214 0 : vec_util::norm_inf(projected_gradient_step(C, 1, x̂ₖ, grad_̂ψₖ));
215 0 : auto n = 2 * (ŷₖ.size() + x̂ₖ.size());
216 0 : if (n == 0)
217 0 : return err;
218 0 : auto C_lagr_mult =
219 0 : vec_util::norm_1(projecting_difference(x̂ₖ - grad_̂ψₖ, C));
220 0 : auto D_lagr_mult = vec_util::norm_1(ŷₖ);
221 0 : const real_t s_max = 100;
222 0 : real_t s_d =
223 0 : std::max(s_max, (C_lagr_mult + D_lagr_mult) / n) / s_max;
224 0 : return err / s_d;
225 0 : }
226 : }
227 0 : throw std::out_of_range("Invalid PANOCStopCrit");
228 200599 : }
229 :
230 : /// Increase the estimate of the Lipschitz constant of the objective gradient
231 : /// and decrease the step size until quadratic upper bound or descent lemma is
232 : /// satisfied:
233 : /// @f[ \psi(x + p) \le \psi(x) + \nabla\psi(x)^\top p + \frac{L}{2} \|p\|^2 @f]
234 : /// The projected gradient iterate @f$ \hat x^k @f$ and step @f$ p^k @f$ are
235 : /// updated with the new step size @f$ \gamma^k @f$, and so are other quantities
236 : /// that depend on them, such as @f$ \nabla\psi(x^k)^\top p^k @f$ and
237 : /// @f$ \|p\|^2 @f$. The intermediate vector @f$ \hat y(x^k) @f$ can be used to
238 : /// compute the gradient @f$ \nabla\psi(\hat x^k) @f$ or to update the Lagrange
239 : /// multipliers.
240 : ///
241 : /// @return The original step size, before it was reduced by this function.
242 859 : inline real_t descent_lemma(
243 : /// [in] Problem description
244 : const Problem &problem,
245 : /// [in] Tolerance used to ignore rounding errors when the function
246 : /// @f$ \psi(x) @f$ is relatively flat or the step size is very
247 : /// small, which could cause @f$ \psi(x^k) < \psi(\hat x^k) @f$,
248 : /// which is mathematically impossible but could occur in finite
249 : /// precision floating point arithmetic.
250 : real_t rounding_tolerance,
251 : /// [in] Maximum allowed Lipschitz constant estimate (prevents infinite
252 : /// loop if function or derivatives are discontinuous, and keeps
253 : /// step size bounded away from zero).
254 : real_t L_max,
255 : /// [in] Current iterate @f$ x^k @f$
256 : crvec xₖ,
257 : /// [in] Objective function @f$ \psi(x^k) @f$
258 : real_t ψₖ,
259 : /// [in] Gradient of objective @f$ \nabla\psi(x^k) @f$
260 : crvec grad_ψₖ,
261 : /// [in] Lagrange multipliers @f$ y @f$
262 : crvec y,
263 : /// [in] Penalty weights @f$ \Sigma @f$
264 : crvec Σ,
265 : /// [out] Projected gradient iterate @f$ \hat x^k @f$
266 : rvec x̂ₖ,
267 : /// [out] Projected gradient step @f$ p^k @f$
268 : rvec pₖ,
269 : /// [out] Intermediate vector @f$ \hat y(x^k) @f$
270 : rvec ŷx̂ₖ,
271 : /// [inout] Objective function @f$ \psi(\hat x^k) @f$
272 : real_t &ψx̂ₖ,
273 : /// [inout] Squared norm of the step @f$ \left\| p^k \right\|^2 @f$
274 : real_t &norm_sq_pₖ,
275 : /// [inout] Gradient of objective times step @f$ \nabla\psi(x^k)^\top p^k@f$
276 : real_t &grad_ψₖᵀpₖ,
277 : /// [inout] Lipschitz constant estimate @f$ L_{\nabla\psi}^k @f$
278 : real_t &Lₖ,
279 : /// [inout] Step size @f$ \gamma^k @f$
280 : real_t &γₖ) {
281 :
282 859 : real_t old_γₖ = γₖ;
283 859 : real_t margin = (1 + std::abs(ψₖ)) * rounding_tolerance;
284 983 : while (ψx̂ₖ - ψₖ > grad_ψₖᵀpₖ + 0.5 * Lₖ * norm_sq_pₖ + margin) {
285 124 : if (not(Lₖ * 2 <= L_max))
286 0 : break;
287 :
288 124 : Lₖ *= 2;
289 124 : γₖ /= 2;
290 :
291 : // Calculate x̂ₖ and pₖ (with new step size)
292 124 : calc_x̂(problem, γₖ, xₖ, grad_ψₖ, /* in ⟹ out */ x̂ₖ, pₖ);
293 : // Calculate ∇ψ(xₖ)ᵀpₖ and ‖pₖ‖²
294 124 : grad_ψₖᵀpₖ = grad_ψₖ.dot(pₖ);
295 124 : norm_sq_pₖ = pₖ.squaredNorm();
296 :
297 : // Calculate ψ(x̂ₖ) and ŷ(x̂ₖ)
298 124 : ψx̂ₖ = calc_ψ_ŷ(problem, x̂ₖ, y, Σ, /* in ⟹ out */ ŷx̂ₖ);
299 : }
300 1718 : return old_γₖ;
301 859 : }
302 :
303 : /// Check all stop conditions (required tolerance reached, out of time,
304 : /// maximum number of iterations exceeded, interrupted by user,
305 : /// infinite iterate, no progress made)
306 : template <class ParamsT, class DurationT>
307 200599 : inline SolverStatus check_all_stop_conditions(
308 : /// [in] Parameters including `max_iter`, `max_time` and `max_no_progress`
309 : const ParamsT ¶ms,
310 : /// [in] Time elapsed since the start of the algorithm
311 : DurationT time_elapsed,
312 : /// [in] The current iteration number
313 : unsigned iteration,
314 : /// [in] A stop signal for the user to interrupt the algorithm
315 : const AtomicStopSignal &stop_signal,
316 : /// [in] Desired primal tolerance
317 : real_t ε,
318 : /// [in] Tolerance of the current iterate
319 : real_t εₖ,
320 : /// [in] The number of successive iterations no progress was made
321 : unsigned no_progress) {
322 :
323 200599 : bool out_of_time = time_elapsed > params.max_time;
324 200599 : bool out_of_iter = iteration == params.max_iter;
325 200599 : bool interrupted = stop_signal.stop_requested();
326 200599 : bool not_finite = not std::isfinite(εₖ);
327 200599 : bool conv = εₖ <= ε;
328 200599 : bool max_no_progress = no_progress > params.max_no_progress;
329 391166 : return conv ? SolverStatus::Converged
330 381134 : : out_of_time ? SolverStatus::MaxTime
331 381134 : : out_of_iter ? SolverStatus::MaxIter
332 381134 : : not_finite ? SolverStatus::NotFinite
333 381134 : : max_no_progress ? SolverStatus::NoProgress
334 190567 : : interrupted ? SolverStatus::Interrupted
335 : : SolverStatus::Unknown;
336 200599 : }
337 :
338 : /// Compute the Hessian matrix of the augmented Lagrangian function
339 : /// @f[ \nabla^2_{xx} L_\Sigma(x, y) =
340 : /// \Big. \nabla_{xx}^2 L(x, y) \Big|_{\big(x,\, \hat y(x, y)\big)}
341 : /// + \sum_{i\in\mathcal{I}} \Sigma_i\,\nabla g_i(x) \nabla g_i(x)^\top @f]
342 1 : inline void calc_augmented_lagrangian_hessian(
343 : /// [in] Problem description
344 : const Problem &problem,
345 : /// [in] Current iterate @f$ x^k @f$
346 : crvec xₖ,
347 : /// [in] Intermediate vector @f$ \hat y(x^k) @f$
348 : crvec ŷxₖ,
349 : /// [in] Lagrange multipliers @f$ y @f$
350 : crvec y,
351 : /// [in] Penalty weights @f$ \Sigma @f$
352 : crvec Σ,
353 : /// [out] The constraint values @f$ g(x^k) @f$
354 : rvec g,
355 : /// [out] Hessian matrix @f$ H(x, y) @f$
356 : mat &H,
357 : /// Dimension n
358 : rvec work_n) {
359 :
360 : // Compute the Hessian of the Lagrangian
361 1 : problem.hess_L(xₖ, ŷxₖ, H);
362 : // Compute the Hessian of the augmented Lagrangian
363 1 : problem.g(xₖ, g);
364 3 : for (vec::Index i = 0; i < problem.m; ++i) {
365 2 : real_t ζ = g(i) + y(i) / Σ(i);
366 4 : bool inactive =
367 2 : problem.D.lowerbound(i) < ζ && ζ < problem.D.upperbound(i);
368 2 : if (not inactive) {
369 1 : problem.grad_gi(xₖ, i, work_n);
370 1 : H += work_n * Σ(i) * work_n.transpose();
371 1 : }
372 2 : }
373 1 : }
374 :
375 : /// Compute the Hessian matrix of the augmented Lagrangian function multiplied
376 : /// by the given vector, using finite differences.
377 : /// @f[ \nabla^2_{xx} L_\Sigma(x, y)\, v \approx
378 : /// \frac{\nabla_x L_\Sigma(x+hv, y) - \nabla_x L_\Sigma(x, y)}{h} @f]
379 : inline void calc_augmented_lagrangian_hessian_prod_fd(
380 : /// [in] Problem description
381 : const Problem &problem,
382 : /// [in] Current iterate @f$ x^k @f$
383 : crvec xₖ,
384 : /// [in] Lagrange multipliers @f$ y @f$
385 : crvec y,
386 : /// [in] Penalty weights @f$ \Sigma @f$
387 : crvec Σ,
388 : /// [in] Gradient @f$ \nabla \psi(x^k) @f$
389 : crvec grad_ψ,
390 : /// [in] Vector to multiply with the Hessian
391 : crvec v,
392 : /// [out] Hessian-vector product
393 : rvec Hv,
394 : /// Dimension n
395 : rvec work_n1,
396 : /// Dimension n
397 : rvec work_n2,
398 : /// Dimension m
399 : rvec work_m) {
400 :
401 : real_t cbrt_ε = std::cbrt(std::numeric_limits<real_t>::epsilon());
402 : real_t h = cbrt_ε * (1 + xₖ.norm());
403 : rvec xₖh = work_n1;
404 : xₖh = xₖ + h * v;
405 : calc_grad_ψ(problem, xₖh, y, Σ, Hv, work_n2, work_m);
406 : Hv -= grad_ψ;
407 : Hv /= h;
408 : }
409 :
410 : /// Estimate the Lipschitz constant of the gradient @f$ \nabla \psi @f$ using
411 : /// finite differences.
412 31 : inline real_t initial_lipschitz_estimate(
413 : /// [in] Problem description
414 : const Problem &problem,
415 : /// [in] Current iterate @f$ x^k @f$
416 : crvec xₖ,
417 : /// [in] Lagrange multipliers @f$ y @f$
418 : crvec y,
419 : /// [in] Penalty weights @f$ \Sigma @f$
420 : crvec Σ,
421 : /// [in] Finite difference step size relative to xₖ
422 : real_t ε,
423 : /// [in] Minimum absolute finite difference step size
424 : real_t δ,
425 : /// [in] Minimum allowed Lipschitz estimate.
426 : real_t L_min,
427 : /// [in] Maximum allowed Lipschitz estimate.
428 : real_t L_max,
429 : /// [out] @f$ \psi(x^k) @f$
430 : real_t &ψ,
431 : /// [out] Gradient @f$ \nabla \psi(x^k) @f$
432 : rvec grad_ψ,
433 : /// Dimension n
434 : rvec work_n1,
435 : /// Dimension n
436 : rvec work_n2,
437 : /// Dimension n
438 : rvec work_n3,
439 : /// Dimension m
440 : rvec work_m) {
441 :
442 31 : auto h = (xₖ * ε).cwiseAbs().cwiseMax(δ);
443 31 : work_n1 = xₖ + h;
444 31 : real_t norm_h = h.norm();
445 : // Calculate ∇ψ(x₀ + h)
446 62 : calc_grad_ψ(problem, work_n1, y, Σ, /* in ⟹ out */ work_n2, work_n3,
447 31 : work_m);
448 : // Calculate ψ(xₖ), ∇ψ(x₀)
449 62 : ψ = calc_ψ_grad_ψ(problem, xₖ, y, Σ, /* in ⟹ out */ grad_ψ, work_n1,
450 31 : work_m);
451 :
452 : // Estimate Lipschitz constant using finite differences
453 31 : real_t L = (work_n2 - grad_ψ).norm() / norm_h;
454 31 : return std::clamp(L, L_min, L_max);
455 31 : }
456 :
457 : /// Estimate the Lipschitz constant of the gradient @f$ \nabla \psi @f$ using
458 : /// finite differences.
459 0 : inline real_t initial_lipschitz_estimate(
460 : /// [in] Problem description
461 : const Problem &problem,
462 : /// [in] Current iterate @f$ x^k @f$
463 : crvec xₖ,
464 : /// [in] Lagrange multipliers @f$ y @f$
465 : crvec y,
466 : /// [in] Penalty weights @f$ \Sigma @f$
467 : crvec Σ,
468 : /// [in] Finite difference step size relative to xₖ
469 : real_t ε,
470 : /// [in] Minimum absolute finite difference step size
471 : real_t δ,
472 : /// [in] Minimum allowed Lipschitz estimate.
473 : real_t L_min,
474 : /// [in] Maximum allowed Lipschitz estimate.
475 : real_t L_max,
476 : /// [out] Gradient @f$ \nabla \psi(x^k) @f$
477 : rvec grad_ψ,
478 : /// Dimension n
479 : rvec work_n1,
480 : /// Dimension n
481 : rvec work_n2,
482 : /// Dimension n
483 : rvec work_n3,
484 : /// Dimension m
485 : rvec work_m) {
486 :
487 0 : auto h = (xₖ * ε).cwiseAbs().cwiseMax(δ);
488 0 : work_n1 = xₖ + h;
489 0 : real_t norm_h = h.norm();
490 : // Calculate ∇ψ(x₀ + h)
491 0 : calc_grad_ψ(problem, work_n1, y, Σ, /* in ⟹ out */ work_n2, work_n3,
492 0 : work_m);
493 : // Calculate ∇ψ(x₀)
494 0 : calc_grad_ψ(problem, xₖ, y, Σ, /* in ⟹ out */ grad_ψ, work_n1, work_m);
495 :
496 : // Estimate Lipschitz constant using finite differences
497 0 : real_t L = (work_n2 - grad_ψ).norm() / norm_h;
498 0 : return std::clamp(L, L_min, L_max);
499 0 : }
500 :
501 : } // namespace pa::detail
|