Line data Source code
1 : #pragma once
2 :
3 : #include <panoc-alm/inner/decl/panoc.hpp>
4 : #include <panoc-alm/inner/detail/panoc-helpers.hpp>
5 : #include <panoc-alm/inner/directions/decl/panoc-direction-update.hpp>
6 :
7 : #include <cassert>
8 : #include <cmath>
9 : #include <iomanip>
10 : #include <iostream>
11 : #include <stdexcept>
12 :
13 : namespace pa {
14 :
15 : using std::chrono::duration_cast;
16 : using std::chrono::microseconds;
17 :
18 : template <class DirectionProviderT>
19 0 : std::string PANOCSolver<DirectionProviderT>::get_name() const {
20 0 : return "PANOCSolver<" + direction_provider.get_name() + ">";
21 0 : }
22 :
23 : template <class DirectionProviderT>
24 : typename PANOCSolver<DirectionProviderT>::Stats
25 31 : PANOCSolver<DirectionProviderT>::operator()(
26 : /// [in] Problem description
27 : const Problem &problem,
28 : /// [in] Constraint weights @f$ \Sigma @f$
29 : crvec Σ,
30 : /// [in] Tolerance @f$ \varepsilon @f$
31 : real_t ε,
32 : /// [in] Overwrite @p x, @p y and @p err_z even if not converged
33 : bool always_overwrite_results,
34 : /// [inout] Decision variable @f$ x @f$
35 : rvec x,
36 : /// [inout] Lagrange multipliers @f$ y @f$
37 : rvec y,
38 : /// [out] Slack variable error @f$ g(x) - z @f$
39 : rvec err_z) {
40 :
41 31 : auto start_time = std::chrono::steady_clock::now();
42 31 : Stats s;
43 :
44 31 : const auto n = problem.n;
45 31 : const auto m = problem.m;
46 :
47 : // Allocate vectors, init L-BFGS -------------------------------------------
48 :
49 : // TODO: the L-BFGS objects and vectors allocate on each iteration of ALM,
50 : // and there are more vectors than strictly necessary.
51 :
52 31 : bool need_grad_̂ψₖ = detail::stop_crit_requires_grad_̂ψₖ(params.stop_crit);
53 :
54 62 : vec xₖ = x, // Value of x at the beginning of the iteration
55 31 : x̂ₖ(n), // Value of x after a projected gradient step
56 31 : xₖ₊₁(n), // xₖ for next iteration
57 31 : x̂ₖ₊₁(n), // x̂ₖ for next iteration
58 31 : ŷx̂ₖ(m), // ŷ(x̂ₖ) = Σ (g(x̂ₖ) - ẑₖ)
59 31 : ŷx̂ₖ₊₁(m), // ŷ(x̂ₖ) for next iteration
60 31 : pₖ(n), // Projected gradient step pₖ = x̂ₖ - xₖ
61 31 : pₖ₊₁(n), // Projected gradient step pₖ₊₁ = x̂ₖ₊₁ - xₖ₊₁
62 31 : qₖ(n), // Newton step Hₖ pₖ
63 31 : grad_ψₖ(n), // ∇ψ(xₖ)
64 31 : grad_̂ψₖ(need_grad_̂ψₖ ? n : 0), // ∇ψ(x̂ₖ)
65 31 : grad_ψₖ₊₁(n); // ∇ψ(xₖ₊₁)
66 :
67 31 : vec work_n(n), work_m(m);
68 :
69 : // Keep track of how many successive iterations didn't update the iterate
70 31 : unsigned no_progress = 0;
71 :
72 : // Helper functions --------------------------------------------------------
73 :
74 : // Wrappers for helper functions that automatically pass along any arguments
75 : // that are constant within PANOC (for readability in the main algorithm)
76 890 : auto calc_ψ_ŷ = [&problem, &y, &Σ](crvec x, rvec ŷ) {
77 859 : return detail::calc_ψ_ŷ(problem, x, y, Σ, ŷ);
78 0 : };
79 811 : auto calc_ψ_grad_ψ = [&problem, &y, &Σ, &work_n, &work_m](crvec x,
80 : rvec grad_ψ) {
81 780 : return detail::calc_ψ_grad_ψ(problem, x, y, Σ, grad_ψ, work_n, work_m);
82 0 : };
83 610 : auto calc_grad_ψ_from_ŷ = [&problem, &work_n](crvec x, crvec ŷ,
84 : rvec grad_ψ) {
85 579 : detail::calc_grad_ψ_from_ŷ(problem, x, ŷ, grad_ψ, work_n);
86 579 : };
87 890 : auto calc_x̂ = [&problem](real_t γ, crvec x, crvec grad_ψ, rvec x̂, rvec p) {
88 859 : detail::calc_x̂(problem, γ, x, grad_ψ, x̂, p);
89 859 : };
90 62 : auto calc_err_z = [&problem, &y, &Σ](crvec x̂, rvec err_z) {
91 31 : detail::calc_err_z(problem, x̂, y, Σ, err_z);
92 31 : };
93 890 : auto descent_lemma = [this, &problem, &y,
94 : &Σ](crvec xₖ, real_t ψₖ, crvec grad_ψₖ, rvec x̂ₖ,
95 : rvec pₖ, rvec ŷx̂ₖ, real_t &ψx̂ₖ, real_t &pₖᵀpₖ,
96 : real_t &grad_ψₖᵀpₖ, real_t &Lₖ, real_t &γₖ) {
97 1718 : return detail::descent_lemma(
98 859 : problem, params.quadratic_upperbound_tolerance_factor, params.L_max,
99 859 : xₖ, ψₖ, grad_ψₖ, y, Σ, x̂ₖ, pₖ, ŷx̂ₖ, ψx̂ₖ, pₖᵀpₖ, grad_ψₖᵀpₖ, Lₖ, γₖ);
100 0 : };
101 31 : auto print_progress = [&](unsigned k, real_t ψₖ, crvec grad_ψₖ,
102 : real_t pₖᵀpₖ, real_t γₖ, real_t εₖ) {
103 0 : std::cout << "[PANOC] " << std::setw(6) << k
104 0 : << ": ψ = " << std::setw(13) << ψₖ
105 0 : << ", ‖∇ψ‖ = " << std::setw(13) << grad_ψₖ.norm()
106 0 : << ", ‖p‖ = " << std::setw(13) << std::sqrt(pₖᵀpₖ)
107 0 : << ", γ = " << std::setw(13) << γₖ
108 0 : << ", εₖ = " << std::setw(13) << εₖ << "\r\n";
109 0 : };
110 :
111 : // Estimate Lipschitz constant ---------------------------------------------
112 :
113 31 : real_t ψₖ, Lₖ;
114 : // Finite difference approximation of ∇²ψ in starting point
115 31 : if (params.Lipschitz.L₀ <= 0) {
116 62 : Lₖ = detail::initial_lipschitz_estimate(
117 31 : problem, xₖ, y, Σ, params.Lipschitz.ε, params.Lipschitz.δ,
118 31 : params.L_min, params.L_max,
119 31 : /* in ⟹ out */ ψₖ, grad_ψₖ, x̂ₖ, grad_ψₖ₊₁, work_n, work_m);
120 31 : }
121 : // Initial Lipschitz constant provided by the user
122 : else {
123 0 : Lₖ = params.Lipschitz.L₀;
124 : // Calculate ψ(xₖ), ∇ψ(x₀)
125 0 : ψₖ = calc_ψ_grad_ψ(xₖ, /* in ⟹ out */ grad_ψₖ);
126 : }
127 31 : if (not std::isfinite(Lₖ)) {
128 0 : s.status = SolverStatus::NotFinite;
129 0 : return s;
130 : }
131 31 : real_t γₖ = params.Lipschitz.Lγ_factor / Lₖ;
132 31 : real_t τ = NaN;
133 :
134 : // First projected gradient step -------------------------------------------
135 :
136 : // Calculate x̂₀, p₀ (projected gradient step)
137 31 : calc_x̂(γₖ, xₖ, grad_ψₖ, /* in ⟹ out */ x̂ₖ, pₖ);
138 : // Calculate ψ(x̂ₖ) and ŷ(x̂ₖ)
139 31 : real_t ψx̂ₖ = calc_ψ_ŷ(x̂ₖ, /* in ⟹ out */ ŷx̂ₖ);
140 31 : real_t grad_ψₖᵀpₖ = grad_ψₖ.dot(pₖ);
141 31 : real_t pₖᵀpₖ = pₖ.squaredNorm();
142 : // Compute forward-backward envelope
143 31 : real_t φₖ = ψₖ + 1 / (2 * γₖ) * pₖᵀpₖ + grad_ψₖᵀpₖ;
144 :
145 : // Main PANOC loop
146 : // =========================================================================
147 610 : for (unsigned k = 0; k <= params.max_iter; ++k) {
148 :
149 : // Quadratic upper bound -----------------------------------------------
150 579 : if (k == 0 || params.update_lipschitz_in_linesearch == false) {
151 : // Decrease step size until quadratic upper bound is satisfied
152 62 : real_t old_γₖ =
153 62 : descent_lemma(xₖ, ψₖ, grad_ψₖ,
154 31 : /* in ⟹ out */ x̂ₖ, pₖ, ŷx̂ₖ,
155 : /* inout */ ψx̂ₖ, pₖᵀpₖ, grad_ψₖᵀpₖ, Lₖ, γₖ);
156 31 : if (k > 0 && γₖ != old_γₖ) // Flush L-BFGS if γ changed
157 0 : direction_provider.changed_γ(γₖ, old_γₖ);
158 31 : else if (k == 0) // Initialize L-BFGS
159 31 : direction_provider.initialize(xₖ, x̂ₖ, pₖ, grad_ψₖ);
160 31 : if (γₖ != old_γₖ)
161 13 : φₖ = ψₖ + 1 / (2 * γₖ) * pₖᵀpₖ + grad_ψₖᵀpₖ;
162 31 : }
163 : // Calculate ∇ψ(x̂ₖ)
164 579 : if (need_grad_̂ψₖ)
165 579 : calc_grad_ψ_from_ŷ(x̂ₖ, ŷx̂ₖ, /* in ⟹ out */ grad_̂ψₖ);
166 :
167 : // Check stop condition ------------------------------------------------
168 1737 : real_t εₖ = detail::calc_error_stop_crit(
169 579 : problem.C, params.stop_crit, pₖ, γₖ, xₖ, x̂ₖ, ŷx̂ₖ, grad_ψₖ, grad_̂ψₖ);
170 :
171 : // Print progress
172 579 : if (params.print_interval != 0 && k % params.print_interval == 0)
173 0 : print_progress(k, ψₖ, grad_ψₖ, pₖᵀpₖ, γₖ, εₖ);
174 579 : if (progress_cb)
175 0 : progress_cb({k, xₖ, pₖ, pₖᵀpₖ, x̂ₖ, φₖ, ψₖ, grad_ψₖ, ψx̂ₖ, grad_̂ψₖ,
176 0 : Lₖ, γₖ, τ, εₖ, Σ, y, problem, params});
177 :
178 579 : auto time_elapsed = std::chrono::steady_clock::now() - start_time;
179 1158 : auto stop_status = detail::check_all_stop_conditions(
180 579 : params, time_elapsed, k, stop_signal, ε, εₖ, no_progress);
181 579 : if (stop_status != SolverStatus::Unknown) {
182 : // TODO: We could cache g(x) and ẑ, but would that faster?
183 : // It saves 1 evaluation of g per ALM iteration, but requires
184 : // many extra stores in the inner loops of PANOC.
185 : // TODO: move the computation of ẑ and g(x) to ALM?
186 31 : if (stop_status == SolverStatus::Converged ||
187 0 : stop_status == SolverStatus::Interrupted ||
188 0 : always_overwrite_results) {
189 31 : calc_err_z(x̂ₖ, /* in ⟹ out */ err_z);
190 31 : x = std::move(x̂ₖ);
191 31 : y = std::move(ŷx̂ₖ);
192 31 : }
193 31 : s.iterations = k;
194 31 : s.ε = εₖ;
195 31 : s.elapsed_time = duration_cast<microseconds>(time_elapsed);
196 31 : s.status = stop_status;
197 31 : return s;
198 : }
199 :
200 : // Calculate quasi-Newton step -----------------------------------------
201 548 : real_t step_size =
202 548 : params.lbfgs_stepsize == LBFGSStepSize::BasedOnGradientStepSize
203 : ? 1
204 : : -1;
205 548 : if (k > 0)
206 520 : direction_provider.apply(xₖ, x̂ₖ, pₖ, step_size,
207 520 : /* in ⟹ out */ qₖ);
208 :
209 : // Line search initialization ------------------------------------------
210 548 : τ = 1;
211 548 : real_t σₖγₖ⁻¹pₖᵀpₖ = (1 - γₖ * Lₖ) * pₖᵀpₖ / (2 * γₖ);
212 548 : real_t φₖ₊₁, ψₖ₊₁, ψx̂ₖ₊₁, grad_ψₖ₊₁ᵀpₖ₊₁, pₖ₊₁ᵀpₖ₊₁;
213 548 : real_t Lₖ₊₁, γₖ₊₁;
214 548 : real_t ls_cond;
215 : // TODO: make separate parameter
216 1096 : real_t margin =
217 548 : (1 + std::abs(φₖ)) * params.quadratic_upperbound_tolerance_factor;
218 :
219 : // Make sure quasi-Newton step is valid
220 548 : if (k == 0) {
221 28 : τ = 0; // Always use prox step on first iteration
222 548 : } else if (not qₖ.allFinite()) {
223 0 : τ = 0;
224 0 : ++s.lbfgs_failures;
225 0 : direction_provider.reset(); // Is there anything else we can do?
226 0 : }
227 :
228 : // Line search loop ----------------------------------------------------
229 548 : do {
230 828 : Lₖ₊₁ = Lₖ;
231 828 : γₖ₊₁ = γₖ;
232 :
233 : // Calculate xₖ₊₁
234 828 : if (τ / 2 < params.τ_min) { // line search failed
235 48 : xₖ₊₁.swap(x̂ₖ); // → safe prox step
236 48 : ψₖ₊₁ = ψx̂ₖ;
237 48 : if (need_grad_̂ψₖ)
238 48 : grad_ψₖ₊₁.swap(grad_̂ψₖ);
239 : else
240 0 : calc_grad_ψ_from_ŷ(xₖ₊₁, ŷx̂ₖ, /* in ⟹ out */ grad_ψₖ₊₁);
241 48 : } else { // line search didn't fail (yet)
242 780 : if (τ == 1) // → faster quasi-Newton step
243 520 : xₖ₊₁ = xₖ + qₖ;
244 : else
245 260 : xₖ₊₁ = xₖ + (1 - τ) * pₖ + τ * qₖ;
246 : // Calculate ψ(xₖ₊₁), ∇ψ(xₖ₊₁)
247 780 : ψₖ₊₁ = calc_ψ_grad_ψ(xₖ₊₁, /* in ⟹ out */ grad_ψₖ₊₁);
248 : }
249 :
250 : // Calculate x̂ₖ₊₁, pₖ₊₁ (projected gradient step in xₖ₊₁)
251 828 : calc_x̂(γₖ₊₁, xₖ₊₁, grad_ψₖ₊₁, /* in ⟹ out */ x̂ₖ₊₁, pₖ₊₁);
252 : // Calculate ψ(x̂ₖ₊₁) and ŷ(x̂ₖ₊₁)
253 828 : ψx̂ₖ₊₁ = calc_ψ_ŷ(x̂ₖ₊₁, /* in ⟹ out */ ŷx̂ₖ₊₁);
254 :
255 : // Quadratic upper bound -------------------------------------------
256 828 : grad_ψₖ₊₁ᵀpₖ₊₁ = grad_ψₖ₊₁.dot(pₖ₊₁);
257 828 : pₖ₊₁ᵀpₖ₊₁ = pₖ₊₁.squaredNorm();
258 828 : real_t pₖ₊₁ᵀpₖ₊₁_ₖ = pₖ₊₁ᵀpₖ₊₁; // prox step with step size γₖ
259 :
260 828 : if (params.update_lipschitz_in_linesearch == true) {
261 : // Decrease step size until quadratic upper bound is satisfied
262 1656 : (void)descent_lemma(xₖ₊₁, ψₖ₊₁, grad_ψₖ₊₁,
263 828 : /* in ⟹ out */ x̂ₖ₊₁, pₖ₊₁, ŷx̂ₖ₊₁,
264 : /* inout */ ψx̂ₖ₊₁, pₖ₊₁ᵀpₖ₊₁,
265 : grad_ψₖ₊₁ᵀpₖ₊₁, Lₖ₊₁, γₖ₊₁);
266 828 : }
267 :
268 : // Compute forward-backward envelope
269 828 : φₖ₊₁ = ψₖ₊₁ + 1 / (2 * γₖ₊₁) * pₖ₊₁ᵀpₖ₊₁ + grad_ψₖ₊₁ᵀpₖ₊₁;
270 : // Compute line search condition
271 828 : ls_cond = φₖ₊₁ - (φₖ - σₖγₖ⁻¹pₖᵀpₖ);
272 828 : if (params.alternative_linesearch_cond)
273 0 : ls_cond -= (0.5 / γₖ₊₁ - 0.5 / γₖ) * pₖ₊₁ᵀpₖ₊₁_ₖ;
274 :
275 828 : τ /= 2;
276 828 : } while (ls_cond > margin && τ >= params.τ_min);
277 :
278 : // If τ < τ_min the line search failed and we accepted the prox step
279 548 : if (τ < params.τ_min && k != 0) {
280 20 : ++s.linesearch_failures;
281 20 : τ = 0;
282 20 : }
283 548 : if (k != 0) {
284 520 : s.count_τ += 1;
285 520 : s.sum_τ += τ * 2;
286 520 : s.τ_1_accepted += τ * 2 == 1;
287 520 : }
288 :
289 : // Update L-BFGS -------------------------------------------------------
290 548 : if (γₖ != γₖ₊₁) // Flush L-BFGS if γ changed
291 9 : direction_provider.changed_γ(γₖ₊₁, γₖ);
292 :
293 1096 : s.lbfgs_rejected += not direction_provider.update(
294 548 : xₖ, xₖ₊₁, pₖ, pₖ₊₁, grad_ψₖ₊₁, problem.C, γₖ₊₁);
295 :
296 : // Check if we made any progress
297 548 : if (no_progress > 0 || k % params.max_no_progress == 0)
298 69 : no_progress = xₖ == xₖ₊₁ ? no_progress + 1 : 0;
299 :
300 : // Advance step --------------------------------------------------------
301 548 : Lₖ = Lₖ₊₁;
302 548 : γₖ = γₖ₊₁;
303 :
304 548 : ψₖ = ψₖ₊₁;
305 548 : ψx̂ₖ = ψx̂ₖ₊₁;
306 548 : φₖ = φₖ₊₁;
307 :
308 548 : xₖ.swap(xₖ₊₁);
309 548 : x̂ₖ.swap(x̂ₖ₊₁);
310 548 : ŷx̂ₖ.swap(ŷx̂ₖ₊₁);
311 548 : pₖ.swap(pₖ₊₁);
312 548 : grad_ψₖ.swap(grad_ψₖ₊₁);
313 548 : grad_ψₖᵀpₖ = grad_ψₖ₊₁ᵀpₖ₊₁;
314 548 : pₖᵀpₖ = pₖ₊₁ᵀpₖ₊₁;
315 579 : }
316 0 : throw std::logic_error("[PANOC] loop error");
317 31 : }
318 :
319 : } // namespace pa
|