Line data Source code
1 : #pragma once 2 : 3 : #include <panoc-alm/util/box.hpp> 4 : #include <panoc-alm/util/vec.hpp> 5 : 6 : #include <panoc-alm/inner/directions/decl/lbfgs-fwd.hpp> 7 : #include <panoc-alm/inner/directions/decl/panoc-direction-update.hpp> 8 : 9 : namespace pa { 10 : 11 : /// Parameters for the @ref LBFGS and @ref SpecializedLBFGS classes. 12 20 : struct LBFGSParams { 13 : /// Length of the history to keep. 14 10 : unsigned memory = 10; 15 10 : struct { 16 10 : real_t α = 1; 17 10 : real_t ϵ = 0; 18 : } 19 : /// Parameters in the cautious BFGS update condition 20 : /// @f[ \frac{y^\top s}{s^\top s} \ge \epsilon \| g \|^\alpha @f] 21 : /// @see https://epubs.siam.org/doi/10.1137/S1052623499354242 22 : cbfgs; 23 : 24 10 : bool rescale_when_γ_changes = false; 25 : }; 26 : 27 : /// Limited memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) algorithm 28 : /// @ingroup grp_PANOCDirectionProviders 29 10029 : class LBFGS { 30 : public: 31 : using Params = LBFGSParams; 32 : 33 : /// The sign of the vectors @f$ p @f$ passed to the @ref LBFGS::update 34 : /// method. 35 : enum class Sign { 36 : Positive, ///< @f$ p \sim \nabla \psi(x) @f$ 37 : Negative, ///< @f$ p \sim -\nabla \psi(x) @f$ 38 : }; 39 : 40 30030 : LBFGS(Params params) : params(params) {} 41 3 : LBFGS(Params params, size_t n) : params(params) { resize(n); } 42 : 43 : /// Check if the new vectors s and y allow for a valid BFGS update that 44 : /// preserves the positive definiteness of the Hessian approximation. 45 : static bool update_valid(LBFGSParams params, real_t yᵀs, real_t sᵀs, 46 : real_t pᵀp); 47 : 48 : /// Update the inverse Hessian approximation using the new vectors xₖ₊₁ 49 : /// and pₖ₊₁. 50 : bool update(crvec xₖ, crvec xₖ₊₁, crvec pₖ, crvec pₖ₊₁, 51 : Sign sign, bool forced = false); 52 : 53 : /// Apply the inverse Hessian approximation to the given vector q. 54 : template <class Vec> 55 : bool apply(Vec &&q, real_t γ); 56 : 57 : /// Apply the inverse Hessian approximation to the given vector q, applying 58 : /// only the columns and rows of the Hessian in the index set J. 59 : template <class Vec, class IndexVec> 60 : bool apply(Vec &&q, real_t γ, const IndexVec &J); 61 : 62 : /// Throw away the approximation and all previous vectors s and y. 63 : void reset(); 64 : /// Re-allocate storage for a problem with a different size. Causes 65 : /// a @ref reset. 66 : void resize(size_t n); 67 : 68 : /// Scale the stored y vectors by the given factor. 69 : void scale_y(real_t factor); 70 : 71 0 : std::string get_name() const { return "LBFGS"; } 72 : 73 22 : const Params &get_params() const { return params; } 74 : 75 : /// Get the size of the s and y vectors in the buffer. 76 29615296 : size_t n() const { return sto.rows() - 1; } 77 : /// Get the number of previous vectors s and y stored in the buffer. 78 349326 : size_t history() const { return sto.cols() / 2; } 79 : /// Get the next index in the circular buffer of previous s and y vectors. 80 341434 : size_t succ(size_t i) const { return i + 1 < history() ? i + 1 : 0; } 81 : 82 6534606 : auto s(size_t i) { return sto.col(2 * i).topRows(n()); } 83 : auto s(size_t i) const { return sto.col(2 * i).topRows(n()); } 84 6895466 : auto y(size_t i) { return sto.col(2 * i + 1).topRows(n()); } 85 : auto y(size_t i) const { return sto.col(2 * i + 1).topRows(n()); } 86 6895466 : real_t &ρ(size_t i) { return sto.coeffRef(n(), 2 * i); } 87 : const real_t &ρ(size_t i) const { return sto.coeff(n(), 2 * i); } 88 9289758 : real_t &α(size_t i) { return sto.coeffRef(n(), 2 * i + 1); } 89 : const real_t &α(size_t i) const { return sto.coeff(n(), 2 * i + 1); } 90 : 91 : private: 92 : using storage_t = Eigen::Matrix<real_t, Eigen::Dynamic, Eigen::Dynamic>; 93 : 94 : storage_t sto; 95 10011 : size_t idx = 0; 96 10011 : bool full = false; 97 : Params params; 98 : }; 99 : 100 : template <> 101 10028 : struct PANOCDirection<LBFGS> { 102 : LBFGS lbfgs; 103 20020 : PANOCDirection(const LBFGSParams ¶ms) : lbfgs(params) {} 104 : PANOCDirection(const LBFGS &lbfgs) : lbfgs(lbfgs) {} 105 : PANOCDirection(LBFGS &&lbfgs) : lbfgs(std::move(lbfgs)) {} 106 : 107 : void initialize(crvec x₀, crvec x̂₀, crvec p₀, 108 : crvec grad₀); 109 : bool update(crvec xₖ, crvec xₖ₊₁, crvec pₖ, crvec pₖ₊₁, 110 : crvec grad_new, const Box &C, real_t γ_new); 111 : bool apply(crvec xₖ, crvec x̂ₖ, crvec pₖ, real_t γ, rvec qₖ); 112 : void changed_γ(real_t γₖ, real_t old_γₖ); 113 : void reset(); 114 : std::string get_name() const; 115 : LBFGSParams get_params() const; 116 : }; 117 : 118 : } // namespace pa