Line data Source code
1 : #pragma once
2 :
3 : #include <panoc-alm/inner/decl/panoc.hpp>
4 : #include <panoc-alm/inner/detail/panoc-helpers.hpp>
5 : #include <panoc-alm/util/alloc.hpp>
6 : #include <panoc-alm/util/atomic_stop_signal.hpp>
7 : #include <panoc-alm/util/box.hpp>
8 :
9 : #include <iomanip>
10 : #include <iostream>
11 : #include <stdexcept>
12 :
13 : namespace pa {
14 :
15 : namespace detail {
16 :
17 : /// Estimate the Lipschitz constant of the gradient @f$ \nabla \psi @f$ using
18 : /// finite differences.
19 : template <class ObjFunT, class ObjGradFunT>
20 10001 : real_t initial_lipschitz_estimate(
21 : /// [in] Objective
22 : const ObjFunT &ψ,
23 : /// [in] Gradient of
24 : const ObjGradFunT &grad_ψ,
25 : /// [in] Current iterate @f$ x^k @f$
26 : crvec xₖ,
27 : /// [in] Finite difference step size relative to xₖ
28 : real_t ε,
29 : /// [in] Minimum absolute finite difference step size
30 : real_t δ,
31 : /// [in] Minimum allowed Lipschitz estimate.
32 : real_t L_min,
33 : /// [in] Maximum allowed Lipschitz estimate.
34 : real_t L_max,
35 : /// [out] @f$ \psi(x^k) @f$
36 : real_t &ψₖ,
37 : /// [out] Gradient @f$ \nabla \psi(x^k) @f$
38 : rvec grad_ψₖ,
39 : /// [in]
40 : vec_allocator &alloc) {
41 :
42 10001 : auto h = (xₖ * ε).cwiseAbs().cwiseMax(δ);
43 10001 : auto xh = alloc.alloc() = xₖ + h;
44 10001 : real_t norm_h = h.norm();
45 10001 : auto grad_ψxh = alloc.alloc();
46 : // Calculate ∇ψ(x₀ + h)
47 10001 : grad_ψ(xh, grad_ψxh);
48 : // Calculate ψ(xₖ), ∇ψ(x₀)
49 10001 : ψₖ = ψ(xₖ);
50 10001 : grad_ψ(xₖ, grad_ψₖ);
51 :
52 : // Estimate Lipschitz constant using finite differences
53 10001 : real_t L = (grad_ψxh - grad_ψₖ).norm() / norm_h;
54 10001 : alloc.free(xh, grad_ψxh);
55 10001 : return std::clamp(L, L_min, L_max);
56 10001 : }
57 :
58 520052 : inline void calc_x̂(real_t γ, ///< [in] Step size
59 : crvec x, ///< [in] Decision variable @f$ x @f$
60 : const Box &C, ///< [in] Box constraints @f$ C @f$
61 : crvec grad_ψ, ///< [in] @f$ \nabla \psi(x^k) @f$
62 : rvec x̂, ///< [out] @f$ \hat{x}^k = T_{\gamma^k}(x^k) @f$
63 : rvec p ///< [out] @f$ \hat{x}^k - x^k @f$
64 : ) {
65 520052 : p = projected_gradient_step(C, γ, x, grad_ψ);
66 520052 : x̂ = x + p;
67 520052 : }
68 :
69 : /// Increase the estimate of the Lipschitz constant of the objective gradient
70 : /// and decrease the step size until quadratic upper bound or descent lemma is
71 : /// satisfied:
72 : /// @f[ \psi(x + p) \le \psi(x) + \nabla\psi(x)^\top p + \frac{L}{2} \|p\|^2 @f]
73 : /// The projected gradient iterate @f$ \hat x^k @f$ and step @f$ p^k @f$ are
74 : /// updated with the new step size @f$ \gamma^k @f$, and so are other quantities
75 : /// that depend on them, such as @f$ \nabla\psi(x^k)^\top p^k @f$ and
76 : /// @f$ \|p\|^2 @f$. The intermediate vector @f$ \hat y(x^k) @f$ can be used to
77 : /// compute the gradient @f$ \nabla\psi(\hat x^k) @f$ or to update the Lagrange
78 : /// multipliers.
79 : ///
80 : /// @return The original step size, before it was reduced by this function.
81 : template <class ObjFunT>
82 280028 : real_t descent_lemma(
83 : /// [in] Objective.
84 : const ObjFunT &ψ,
85 : /// [in] Box constraints.
86 : const Box &C,
87 : /// [in] Tolerance used to ignore rounding errors when the function
88 : /// @f$ \psi(x) @f$ is relatively flat or the step size is very
89 : /// small, which could cause @f$ \psi(x^k) < \psi(\hat x^k) @f$,
90 : /// which is mathematically impossible but could occur in finite
91 : /// precision floating point arithmetic.
92 : real_t rounding_tolerance,
93 : /// [in] Maximum allowed Lipschitz constant estimate (prevents infinite
94 : /// loop if function or derivatives are discontinuous, and keeps
95 : /// step size bounded away from zero).
96 : real_t L_max,
97 : /// [in] Current iterate @f$ x^k @f$
98 : crvec xₖ,
99 : /// [in] Objective function @f$ \psi(x^k) @f$
100 : real_t ψₖ,
101 : /// [in] Gradient of objective @f$ \nabla\psi(x^k) @f$
102 : crvec grad_ψₖ,
103 : /// [out] Projected gradient iterate @f$ \hat x^k @f$
104 : rvec x̂ₖ,
105 : /// [out] Projected gradient step @f$ p^k @f$
106 : rvec pₖ,
107 : /// [inout] Objective function @f$ \psi(\hat x^k) @f$
108 : real_t &ψx̂ₖ,
109 : /// [inout] Squared norm of the step @f$ \left\| p^k \right\|^2 @f$
110 : real_t &norm_sq_pₖ,
111 : /// [inout] Gradient of objective times step @f$ \nabla\psi(x^k)^\top p^k@f$
112 : real_t &grad_ψₖᵀpₖ,
113 : /// [inout] Lipschitz constant estimate @f$ L_{\nabla\psi}^k @f$
114 : real_t &Lₖ,
115 : /// [inout] Step size @f$ \gamma^k @f$
116 : real_t &γₖ) {
117 :
118 280028 : real_t old_γₖ = γₖ;
119 280028 : real_t margin = (1 + std::abs(ψₖ)) * rounding_tolerance;
120 520052 : while (ψx̂ₖ - ψₖ > grad_ψₖᵀpₖ + 0.5 * Lₖ * norm_sq_pₖ + margin) {
121 240024 : if (not(Lₖ * 2 <= L_max))
122 0 : break;
123 :
124 240024 : Lₖ *= 2;
125 240024 : γₖ /= 2;
126 :
127 : // Calculate x̂ₖ and pₖ (with new step size)
128 240024 : calc_x̂(γₖ, xₖ, C, grad_ψₖ, /* in ⟹ out */ x̂ₖ, pₖ);
129 : // Calculate ∇ψ(xₖ)ᵀpₖ and ‖pₖ‖²
130 240024 : grad_ψₖᵀpₖ = grad_ψₖ.dot(pₖ);
131 240024 : norm_sq_pₖ = pₖ.squaredNorm();
132 :
133 : // Calculate ψ(x̂ₖ)
134 240024 : ψx̂ₖ = ψ(x̂ₖ);
135 : }
136 560056 : return old_γₖ;
137 280028 : }
138 :
139 0 : inline void print_progress(unsigned k, real_t ψₖ, crvec grad_ψₖ, real_t pₖᵀpₖ,
140 : real_t γₖ, real_t εₖ) {
141 0 : std::cout << "[PANOC] " << std::setw(6) << k //
142 0 : << ": ψ = " << std::setw(13) << ψₖ //
143 0 : << ", ‖∇ψ‖ = " << std::setw(13) << grad_ψₖ.norm() //
144 0 : << ", ‖p‖ = " << std::setw(13) << std::sqrt(pₖᵀpₖ) //
145 0 : << ", γ = " << std::setw(13) << γₖ //
146 0 : << ", εₖ = " << std::setw(13) << εₖ << "\r\n";
147 0 : };
148 :
149 : } // namespace detail
150 :
151 : using std::chrono::duration_cast;
152 : using std::chrono::microseconds;
153 :
154 : constexpr size_t panoc_min_alloc_size = 10;
155 :
156 : template <class ObjFunT, class ObjGradFunT, class DirectionT>
157 10001 : inline PANOCStats panoc_impl(ObjFunT &ψ, ObjGradFunT &grad_ψ, const Box &C,
158 : rvec x, real_t ε, const PANOCParams ¶ms,
159 : vec_allocator &alloc,
160 : DirectionT &direction_provider) {
161 10001 : auto start_time = std::chrono::steady_clock::now();
162 10001 : PANOCStats s;
163 :
164 : // Keep track of how many successive iterations didn't update the iterate
165 10001 : unsigned no_progress = 0;
166 :
167 : // Future parameters?
168 10001 : AtomicStopSignal stop_signal;
169 :
170 : // Estimate Lipschitz constant ---------------------------------------------
171 :
172 10001 : real_t ψₖ, Lₖ;
173 10001 : auto grad_ψₖ = alloc.alloc();
174 10001 : auto xₖ = alloc.alloc() = x;
175 : // Finite difference approximation of ∇²ψ in starting point
176 10001 : if (params.Lipschitz.L₀ <= 0) {
177 20002 : Lₖ = detail::initial_lipschitz_estimate(
178 10001 : ψ, grad_ψ, xₖ, params.Lipschitz.ε, params.Lipschitz.δ, params.L_min,
179 10001 : params.L_max, ψₖ, grad_ψₖ, alloc);
180 10001 : }
181 : // Initial Lipschitz constant provided by the user
182 : else {
183 0 : Lₖ = params.Lipschitz.L₀;
184 : // Calculate ψ(xₖ), ∇ψ(x₀)
185 0 : ψₖ = ψ(xₖ);
186 0 : grad_ψ(xₖ, grad_ψₖ);
187 : }
188 10001 : if (not std::isfinite(Lₖ)) {
189 0 : s.status = SolverStatus::NotFinite;
190 0 : return s;
191 : }
192 10001 : real_t γₖ = params.Lipschitz.Lγ_factor / Lₖ;
193 10001 : real_t τ = NaN;
194 :
195 : // First projected gradient step -------------------------------------------
196 :
197 10001 : auto x̂ₖ = alloc.alloc(), pₖ = alloc.alloc(), grad_̂ψₖ = alloc.alloc();
198 10001 : auto xₖ₊₁ = alloc.alloc(), qₖ = alloc.alloc(), grad_ψₖ₊₁ = alloc.alloc();
199 10001 : auto x̂ₖ₊₁ = alloc.alloc(), pₖ₊₁ = alloc.alloc();
200 :
201 : // Calculate x̂₀, p₀ (projected gradient step)
202 10001 : detail::calc_x̂(γₖ, xₖ, C, grad_ψₖ, /* in ⟹ out */ x̂ₖ, pₖ);
203 : // Calculate ψ(x̂ₖ)
204 10001 : real_t ψx̂ₖ = ψ(x̂ₖ);
205 10001 : real_t grad_ψₖᵀpₖ = grad_ψₖ.dot(pₖ);
206 10001 : real_t pₖᵀpₖ = pₖ.squaredNorm();
207 : // Compute forward-backward envelope
208 10001 : real_t φₖ = ψₖ + 1 / (2 * γₖ) * pₖᵀpₖ + grad_ψₖᵀpₖ;
209 :
210 : // Main PANOC loop
211 : // =========================================================================
212 210021 : for (unsigned k = 0; k <= params.max_iter; ++k) {
213 :
214 : // Quadratic upper bound -----------------------------------------------
215 200020 : if (k == 0 || params.update_lipschitz_in_linesearch == false) {
216 : // Decrease step size until quadratic upper bound is satisfied
217 30003 : real_t old_γₖ = detail::descent_lemma(
218 10001 : ψ, C, params.quadratic_upperbound_tolerance_factor,
219 10001 : params.L_max, xₖ, ψₖ, grad_ψₖ,
220 10001 : /* in ⟹ out */ x̂ₖ, pₖ,
221 : /* inout */ ψx̂ₖ, pₖᵀpₖ, grad_ψₖᵀpₖ, Lₖ, γₖ);
222 10001 : if (k > 0 && γₖ != old_γₖ) // Flush L-BFGS if γ changed
223 0 : direction_provider.changed_γ(γₖ, old_γₖ);
224 10001 : else if (k == 0) // Initialize L-BFGS
225 10001 : direction_provider.initialize(xₖ, x̂ₖ, pₖ, grad_ψₖ);
226 10001 : if (γₖ != old_γₖ)
227 0 : φₖ = ψₖ + 1 / (2 * γₖ) * pₖᵀpₖ + grad_ψₖᵀpₖ;
228 10001 : }
229 : // Calculate ∇ψ(x̂ₖ)
230 200020 : grad_ψ(x̂ₖ, grad_̂ψₖ);
231 : // Check stop condition ------------------------------------------------
232 600060 : real_t εₖ = detail::calc_error_stop_crit(
233 200020 : C, params.stop_crit, pₖ, γₖ, xₖ, x̂ₖ, vec(), grad_ψₖ, grad_̂ψₖ);
234 :
235 : // Print progress
236 200020 : if (params.print_interval != 0 && k % params.print_interval == 0)
237 0 : detail::print_progress(k, ψₖ, grad_ψₖ, pₖᵀpₖ, γₖ, εₖ);
238 :
239 200020 : auto time_elapsed = std::chrono::steady_clock::now() - start_time;
240 400040 : auto stop_status = detail::check_all_stop_conditions(
241 200020 : params, time_elapsed, k, stop_signal, ε, εₖ, no_progress);
242 200020 : if (stop_status != SolverStatus::Unknown) {
243 10001 : x = std::move(x̂ₖ);
244 10001 : s.iterations = k;
245 10001 : s.ε = εₖ;
246 10001 : s.elapsed_time = duration_cast<microseconds>(time_elapsed);
247 10001 : s.status = stop_status;
248 10001 : alloc.free(grad_ψₖ, xₖ, x̂ₖ, pₖ, grad_̂ψₖ, xₖ₊₁, qₖ, grad_ψₖ₊₁, x̂ₖ₊₁,
249 : pₖ₊₁);
250 10001 : return s;
251 : }
252 :
253 : // Calculate quasi-Newton step -----------------------------------------
254 190019 : real_t step_size = -1;
255 190019 : if (params.lbfgs_stepsize == LBFGSStepSize::BasedOnGradientStepSize)
256 0 : step_size = 1;
257 190019 : if (k > 0)
258 180018 : direction_provider.apply(xₖ, x̂ₖ, pₖ, step_size,
259 180018 : /* in ⟹ out */ qₖ);
260 :
261 : // Line search initialization ------------------------------------------
262 190019 : τ = 1;
263 190019 : real_t σₖγₖ⁻¹pₖᵀpₖ = (1 - γₖ * Lₖ) * pₖᵀpₖ / (2 * γₖ);
264 190019 : real_t φₖ₊₁, ψₖ₊₁, ψx̂ₖ₊₁, grad_ψₖ₊₁ᵀpₖ₊₁, pₖ₊₁ᵀpₖ₊₁;
265 190019 : real_t Lₖ₊₁, γₖ₊₁;
266 190019 : real_t ls_cond;
267 : // TODO: make separate parameter
268 190019 : real_t margin =
269 190019 : (1 + std::abs(φₖ)) * params.quadratic_upperbound_tolerance_factor;
270 :
271 : // Make sure quasi-Newton step is valid
272 190019 : if (k == 0) {
273 10001 : τ = 0; // Always use prox step on first iteration
274 190019 : } else if (not qₖ.allFinite()) {
275 0 : τ = 0;
276 0 : ++s.lbfgs_failures;
277 0 : direction_provider.reset(); // Is there anything else we can do?
278 0 : }
279 :
280 : // Line search loop ----------------------------------------------------
281 190019 : do {
282 270027 : Lₖ₊₁ = Lₖ;
283 270027 : γₖ₊₁ = γₖ;
284 :
285 : // Calculate xₖ₊₁
286 270027 : if (τ / 2 < params.τ_min) { // line search failed
287 20002 : xₖ₊₁.swap(x̂ₖ); // → safe prox step
288 20002 : ψₖ₊₁ = ψx̂ₖ;
289 20002 : grad_ψₖ₊₁.swap(grad_̂ψₖ);
290 20002 : } else { // line search didn't fail (yet)
291 250025 : if (τ == 1) // → faster quasi-Newton step
292 180018 : xₖ₊₁ = xₖ + qₖ;
293 : else
294 70007 : xₖ₊₁ = xₖ + (1 - τ) * pₖ + τ * qₖ;
295 : // Calculate ψ(xₖ₊₁), ∇ψ(xₖ₊₁)
296 250025 : ψₖ₊₁ = ψ(xₖ₊₁);
297 250025 : grad_ψ(xₖ₊₁, grad_ψₖ₊₁);
298 : }
299 :
300 : // Calculate x̂ₖ₊₁, pₖ₊₁ (projected gradient step in xₖ₊₁)
301 270027 : detail::calc_x̂(γₖ₊₁, xₖ₊₁, C, grad_ψₖ₊₁, /* in ⟹ out */ x̂ₖ₊₁, pₖ₊₁);
302 : // Calculate ψ(x̂ₖ₊₁)
303 270027 : ψx̂ₖ₊₁ = ψ(x̂ₖ₊₁);
304 :
305 : // Quadratic upper bound -------------------------------------------
306 270027 : grad_ψₖ₊₁ᵀpₖ₊₁ = grad_ψₖ₊₁.dot(pₖ₊₁);
307 270027 : pₖ₊₁ᵀpₖ₊₁ = pₖ₊₁.squaredNorm();
308 270027 : real_t pₖ₊₁ᵀpₖ₊₁_ₖ = pₖ₊₁ᵀpₖ₊₁; // prox step with step size γₖ
309 :
310 270027 : if (params.update_lipschitz_in_linesearch == true) {
311 : // Decrease step size until quadratic upper bound is satisfied
312 540054 : (void)detail::descent_lemma(
313 270027 : ψ, C, params.quadratic_upperbound_tolerance_factor,
314 270027 : params.L_max, xₖ₊₁, ψₖ₊₁, grad_ψₖ₊₁,
315 270027 : /* in ⟹ out */ x̂ₖ₊₁, pₖ₊₁,
316 : /* inout */ ψx̂ₖ₊₁, pₖ₊₁ᵀpₖ₊₁, grad_ψₖ₊₁ᵀpₖ₊₁, Lₖ₊₁, γₖ₊₁);
317 270027 : }
318 :
319 : // Compute forward-backward envelope
320 270027 : φₖ₊₁ = ψₖ₊₁ + 1 / (2 * γₖ₊₁) * pₖ₊₁ᵀpₖ₊₁ + grad_ψₖ₊₁ᵀpₖ₊₁;
321 : // Compute line search condition
322 270027 : ls_cond = φₖ₊₁ - (φₖ - σₖγₖ⁻¹pₖᵀpₖ);
323 270027 : if (params.alternative_linesearch_cond)
324 0 : ls_cond -= (0.5 / γₖ₊₁ - 0.5 / γₖ) * pₖ₊₁ᵀpₖ₊₁_ₖ;
325 :
326 270027 : τ /= 2;
327 270027 : } while (ls_cond > margin && τ >= params.τ_min);
328 :
329 : // If τ < τ_min the line search failed and we accepted the prox step
330 190019 : if (τ < params.τ_min && k != 0) {
331 10001 : ++s.linesearch_failures;
332 10001 : τ = 0;
333 10001 : }
334 190019 : if (k != 0) {
335 180018 : s.count_τ += 1;
336 180018 : s.sum_τ += τ * 2;
337 180018 : s.τ_1_accepted += τ * 2 == 1;
338 180018 : }
339 :
340 : // Update L-BFGS -------------------------------------------------------
341 190019 : if (γₖ != γₖ₊₁) // Flush L-BFGS if γ changed
342 0 : direction_provider.changed_γ(γₖ₊₁, γₖ);
343 :
344 380038 : s.lbfgs_rejected += not direction_provider.update(xₖ, xₖ₊₁, pₖ, pₖ₊₁,
345 190019 : grad_ψₖ₊₁, C, γₖ₊₁);
346 :
347 : // Check if we made any progress
348 190019 : if (no_progress > 0 || k % params.max_no_progress == 0)
349 20002 : no_progress = xₖ == xₖ₊₁ ? no_progress + 1 : 0;
350 :
351 : // Advance step --------------------------------------------------------
352 190019 : Lₖ = Lₖ₊₁;
353 190019 : γₖ = γₖ₊₁;
354 :
355 190019 : ψₖ = ψₖ₊₁;
356 190019 : ψx̂ₖ = ψx̂ₖ₊₁;
357 190019 : φₖ = φₖ₊₁;
358 :
359 190019 : xₖ.swap(xₖ₊₁);
360 190019 : x̂ₖ.swap(x̂ₖ₊₁);
361 190019 : pₖ.swap(pₖ₊₁);
362 190019 : grad_ψₖ.swap(grad_ψₖ₊₁);
363 190019 : grad_ψₖᵀpₖ = grad_ψₖ₊₁ᵀpₖ₊₁;
364 190019 : pₖᵀpₖ = pₖ₊₁ᵀpₖ₊₁;
365 200020 : }
366 0 : throw std::logic_error("[PANOC] loop error");
367 10001 : }
368 :
369 : template <class DirectionProviderT = LBFGS, class ObjFunT, class ObjGradFunT>
370 10001 : PANOCStats panoc(ObjFunT &ψ, ObjGradFunT &grad_ψ, const Box &C, rvec x,
371 : real_t ε, const PANOCParams ¶ms,
372 : PANOCDirection<DirectionProviderT> direction,
373 : vec_allocator &alloc) {
374 10001 : if (alloc.size() < panoc_min_alloc_size)
375 0 : throw std::logic_error("Allocator too small, should be at least " +
376 0 : std::to_string(panoc_min_alloc_size));
377 10001 : const auto n = x.size();
378 10001 : if (alloc.vector_size() != n)
379 0 : throw std::logic_error("Allocator size mismatch");
380 :
381 10001 : return panoc_impl(ψ, grad_ψ, C, x, ε, params, alloc, direction);
382 10001 : }
383 :
384 : template <class DirectionProviderT = LBFGS, class ObjFunT, class ObjGradFunT>
385 : PANOCStats panoc(ObjFunT &ψ, ObjGradFunT &grad_ψ, const Box &C, rvec x,
386 : real_t ε, const PANOCParams ¶ms,
387 : PANOCDirection<DirectionProviderT> direction) {
388 : vec_allocator alloc{panoc_min_alloc_size, x.size()};
389 : return panoc_impl(ψ, grad_ψ, C, x, ε, params, alloc, direction);
390 : }
391 :
392 : } // namespace pa
|