0.0.1a1/Doxygen/decl_2lbfgs_8hpp_source.html

#pragma once


#include <quala/decl/lbfgs-fwd.hpp>

#include <quala/util/vec.hpp>


namespace quala {


/// Parameters for the @ref LBFGS class.

struct LBFGSParams {

    /// Length of the history to keep.

    length_t memory = 10;

    /// Reject update if @f$ y^\top s \le \text{min_div_fac} \cdot s^\top s @f$.

    real_t min_div_fac = 1e-10;

    /// Reject update if @f$ s^\top s \le \text{min_abs_s} @f$.

    real_t min_abs_s = 1e-32;

    /// Cautious BFGS update.

    /// @see @ref cbfgs

    struct CBFGSParams {

        real_t α = 1;

        real_t ϵ = 0; ///< Set to zero to disable CBFGS check.

        explicit operator bool() const { return ϵ > 0; }

    };

    /// Parameters in the cautious BFGS update condition

    /// @f[ \frac{y^\top s}{s^\top s} \ge \epsilon \| g \|^\alpha @f]

    /// @see https://epubs.siam.org/doi/10.1137/S1052623499354242

    CBFGSParams cbfgs;

    /// If set to true, the inverse Hessian estimate should remain definite,

    /// i.e. a check is performed that rejects the update if

    /// @f$ y^\top s \le \text{min_div_fac} \cdot s^\top s @f$.

    /// If set to false, just try to prevent a singular Hessian by rejecting the

    /// update if

    /// @f$ \left| y^\top s \right| \le \text{min_div_fac} \cdot s^\top s @f$.

    bool force_pos_def = true;

};


/// Layout:

/// ~~~

///       ┌───── 2 m ─────┐

///     ┌ ┌───┬───┬───┬───┐

///     │ │   │   │   │   │

///     │ │ s │ y │ s │ y │

/// n+1 │ │   │   │   │   │

///     │ ├───┼───┼───┼───┤

///     │ │ ρ │ α │ ρ │ α │

///     └ └───┴───┴───┴───┘

/// ~~~

struct LBFGSStorage {

    /// Re-allocate storage for a problem with a different size.

    void resize(length_t n, length_t history);


    /// Get the size of the s and y vectors in the buffer.

    length_t n() const { return sto.rows() - 1; }

    /// Get the number of previous vectors s and y stored in the buffer.

    length_t history() const { return sto.cols() / 2; }


    auto s(index_t i) { return sto.col(2 * i).topRows(n()); }

    auto s(index_t i) const { return sto.col(2 * i).topRows(n()); }

    auto y(index_t i) { return sto.col(2 * i + 1).topRows(n()); }

    auto y(index_t i) const { return sto.col(2 * i + 1).topRows(n()); }

    real_t &ρ(index_t i) { return sto.coeffRef(n(), 2 * i); }

    const real_t &ρ(index_t i) const { return sto.coeff(n(), 2 * i); }

    real_t &α(index_t i) { return sto.coeffRef(n(), 2 * i + 1); }

    const real_t &α(index_t i) const { return sto.coeff(n(), 2 * i + 1); }


    using storage_t =

        Eigen::Matrix<real_t, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>;

    storage_t sto;

};


/// Limited memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) algorithm

/// @ingroup accelerators-grp

class LBFGS {

  public:

    using Params = LBFGSParams;


    /// The sign of the vectors @f$ p @f$ passed to the @ref LBFGS::update

    /// method.

    enum class Sign {

        Positive, ///< @f$ p \sim \nabla \psi(x) @f$

        Negative, ///< @f$ p \sim -\nabla \psi(x) @f$

    };


    LBFGS(Params params) : params(params) {}

    LBFGS(Params params, length_t n) : params(params) { resize(n); }


    /// Check if the new vectors s and y allow for a valid BFGS update that

    /// preserves the positive definiteness of the Hessian approximation.

    static bool update_valid(const LBFGSParams &params, real_t yᵀs, real_t sᵀs,

                             real_t pᵀp);


    /// Update the inverse Hessian approximation using the new vectors

    /// sₖ = xₖ₊₁ - xₖ and yₖ = pₖ₊₁ - pₖ.

    template <class VecS, class VecY>

    bool update_sy(const anymat<VecS> &s, const anymat<VecY> &y,

                   real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced = false);


    /// Update the inverse Hessian approximation using the new vectors xₖ₊₁

    /// and pₖ₊₁.

    bool update(crvec xₖ, crvec xₙₑₓₜ, crvec pₖ, crvec pₙₑₓₜ,

                Sign sign = Sign::Positive, bool forced = false);


    /// Apply the inverse Hessian approximation to the given vector q.

    /// Initial inverse Hessian approximation is set to @f$ H_0 = \gamma I @f$.

    /// If @p γ is negative, @f$ H_0 = \frac{s^\top y}{y^\top y} I @f$.

    bool apply(rvec q, real_t γ = -1);


    /// Apply the inverse Hessian approximation to the given vector q, applying

    /// only the columns and rows of the Hessian in the index set J.

    template <class IndexVec>

    bool apply(rvec q, real_t γ, const IndexVec &J);


    /// Throw away the approximation and all previous vectors s and y.

    void reset();

    /// Re-allocate storage for a problem with a different size. Causes

    /// a @ref reset.

    void resize(length_t n);


    /// Scale the stored y vectors by the given factor.

    void scale_y(real_t factor);


    /// Get the parameters.

    const Params &get_params() const { return params; }


    /// Get the size of the s and y vectors in the buffer.

    length_t n() const { return sto.n(); }

    /// Get the number of previous vectors s and y stored in the buffer.

    length_t history() const { return sto.history(); }

    /// Get the next index in the circular buffer of previous s and y vectors.

    index_t succ(index_t i) const { return i + 1 < history() ? i + 1 : 0; }

    /// Get the previous index in the circular buffer of s and y vectors.

    index_t pred(index_t i) const { return i > 0 ? i - 1 : history() - 1; }

    /// Get the number of previous s and y vectors currently stored in the

    /// buffer.

    length_t current_history() const { return full ? history() : idx; }


    auto s(index_t i) { return sto.s(i); }

    auto s(index_t i) const { return sto.s(i); }

    auto y(index_t i) { return sto.y(i); }

    auto y(index_t i) const { return sto.y(i); }

    real_t &ρ(index_t i) { return sto.ρ(i); }

    const real_t &ρ(index_t i) const { return sto.ρ(i); }

    real_t &α(index_t i) { return sto.α(i); }

    const real_t &α(index_t i) const { return sto.α(i); }


    /// Iterate over the indices in the history buffer, oldest first.

    template <class F>

    void foreach_fwd(const F &fun) const {

        if (full)

            for (index_t i = idx; i < history(); ++i)

                fun(i);

        if (idx)

            for (index_t i = 0; i < idx; ++i)

                fun(i);

    }


    /// Iterate over the indices in the history buffer, newest first.

    template <class F>

    void foreach_rev(const F &fun) const {

        if (idx)

            for (index_t i = idx; i-- > 0;)

                fun(i);

        if (full)

            for (index_t i = history(); i-- > idx;)

                fun(i);

    }


  private:

    LBFGSStorage sto;

    index_t idx = 0;

    bool full   = false;

    Params params;

};


} // namespace quala

quala::LBFGS
Limited memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) algorithm.
Definition: decl/lbfgs.hpp:72

quala::LBFGS::params
Params params
Definition: decl/lbfgs.hpp:171

quala::LBFGS::foreach_rev
void foreach_rev(const F &fun) const
Iterate over the indices in the history buffer, newest first.
Definition: decl/lbfgs.hpp:158

quala::LBFGS::update_valid
static bool update_valid(const LBFGSParams &params, real_t yᵀs, real_t sᵀs, real_t pᵀp)
Check if the new vectors s and y allow for a valid BFGS update that preserves the positive definitene...
Definition: lbfgs.hpp:9

quala::LBFGS::s
auto s(index_t i)
Definition: decl/lbfgs.hpp:136

quala::LBFGS::full
bool full
Definition: decl/lbfgs.hpp:170

quala::LBFGS::s
auto s(index_t i) const
Definition: decl/lbfgs.hpp:137

quala::LBFGS::sto
LBFGSStorage sto
Definition: decl/lbfgs.hpp:168

quala::LBFGS::current_history
length_t current_history() const
Get the number of previous s and y vectors currently stored in the buffer.
Definition: decl/lbfgs.hpp:134

quala::LBFGS::succ
index_t succ(index_t i) const
Get the next index in the circular buffer of previous s and y vectors.
Definition: decl/lbfgs.hpp:129

quala::LBFGS::pred
index_t pred(index_t i) const
Get the previous index in the circular buffer of s and y vectors.
Definition: decl/lbfgs.hpp:131

quala::LBFGS::n
length_t n() const
Get the size of the s and y vectors in the buffer.
Definition: decl/lbfgs.hpp:125

quala::LBFGS::LBFGS
LBFGS(Params params, length_t n)
Definition: decl/lbfgs.hpp:84

quala::LBFGS::LBFGS
LBFGS(Params params)
Definition: decl/lbfgs.hpp:83

quala::LBFGS::ρ
const real_t & ρ(index_t i) const
Definition: decl/lbfgs.hpp:141

quala::LBFGS::get_params
const Params & get_params() const
Get the parameters.
Definition: decl/lbfgs.hpp:122

quala::LBFGS::apply
bool apply(rvec q, real_t γ=-1)
Apply the inverse Hessian approximation to the given vector q.
Definition: lbfgs.hpp:64

quala::LBFGS::history
length_t history() const
Get the number of previous vectors s and y stored in the buffer.
Definition: decl/lbfgs.hpp:127

quala::LBFGS::foreach_fwd
void foreach_fwd(const F &fun) const
Iterate over the indices in the history buffer, oldest first.
Definition: decl/lbfgs.hpp:147

quala::LBFGS::idx
index_t idx
Definition: decl/lbfgs.hpp:169

quala::LBFGS::resize
void resize(length_t n)
Re-allocate storage for a problem with a different size.
Definition: lbfgs.hpp:185

quala::LBFGS::α
real_t & α(index_t i)
Definition: decl/lbfgs.hpp:142

quala::LBFGS::ρ
real_t & ρ(index_t i)
Definition: decl/lbfgs.hpp:140

quala::LBFGS::reset
void reset()
Throw away the approximation and all previous vectors s and y.
Definition: lbfgs.hpp:180

quala::LBFGS::y
auto y(index_t i) const
Definition: decl/lbfgs.hpp:139

quala::LBFGS::Sign
Sign
The sign of the vectors  passed to the LBFGS::update method.
Definition: decl/lbfgs.hpp:78

quala::LBFGS::Sign::Positive
@ Positive

quala::LBFGS::Sign::Negative
@ Negative

quala::LBFGS::scale_y
void scale_y(real_t factor)
Scale the stored y vectors by the given factor.
Definition: lbfgs.hpp:196

quala::LBFGS::update
bool update(crvec xₖ, crvec xₙₑₓₜ, crvec pₖ, crvec pₙₑₓₜ, Sign sign=Sign::Positive, bool forced=false)
Update the inverse Hessian approximation using the new vectors xₖ₊₁ and pₖ₊₁.
Definition: lbfgs.hpp:56

quala::LBFGS::y
auto y(index_t i)
Definition: decl/lbfgs.hpp:138

quala::LBFGS::α
const real_t & α(index_t i) const
Definition: decl/lbfgs.hpp:143

quala::LBFGS::update_sy
bool update_sy(const anymat< VecS > &s, const anymat< VecY > &y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced=false)
Update the inverse Hessian approximation using the new vectors sₖ = xₖ₊₁ - xₖ and yₖ = pₖ₊₁ - pₖ.
Definition: lbfgs.hpp:34

lbfgs-fwd.hpp

quala
Definition: anderson-acceleration.hpp:6

quala::crvec
Eigen::Ref< const vec > crvec
Default type for immutable references to vectors.
Definition: vec.hpp:18

quala::LBFGSParams::min_abs_s
real_t min_abs_s
Reject update if .
Definition: decl/lbfgs.hpp:15

quala::LBFGSParams::cbfgs
CBFGSParams cbfgs
Parameters in the cautious BFGS update condition.
Definition: decl/lbfgs.hpp:26

quala::LBFGSParams::memory
length_t memory
Length of the history to keep.
Definition: decl/lbfgs.hpp:11

quala::length_t
index_t length_t
Default type for vector sizes.
Definition: vec.hpp:29

quala::LBFGSParams::force_pos_def
bool force_pos_def
If set to true, the inverse Hessian estimate should remain definite, i.e.
Definition: decl/lbfgs.hpp:33

quala::index_t
Eigen::Index index_t
Default type for vector indices.
Definition: vec.hpp:27

quala::real_t
double real_t
Default floating point type.
Definition: vec.hpp:8

quala::anymat
Eigen::MatrixBase< Derived > anymat
Generic type for vector and matrix arguments.
Definition: vec.hpp:40

quala::LBFGSParams::min_div_fac
real_t min_div_fac
Reject update if .
Definition: decl/lbfgs.hpp:13

quala::rvec
Eigen::Ref< vec > rvec
Default type for mutable references to vectors.
Definition: vec.hpp:16

quala::LBFGSParams
Parameters for the LBFGS class.
Definition: decl/lbfgs.hpp:9

quala::LBFGSParams::CBFGSParams
Cautious BFGS update.
Definition: decl/lbfgs.hpp:18

quala::LBFGSParams::CBFGSParams::α
real_t α
Definition: decl/lbfgs.hpp:19

quala::LBFGSParams::CBFGSParams::ϵ
real_t ϵ
Set to zero to disable CBFGS check.
Definition: decl/lbfgs.hpp:20

quala::LBFGSStorage
Layout:
Definition: decl/lbfgs.hpp:47

quala::LBFGSStorage::s
auto s(index_t i)
Definition: decl/lbfgs.hpp:56

quala::LBFGSStorage::s
auto s(index_t i) const
Definition: decl/lbfgs.hpp:57

quala::LBFGSStorage::resize
void resize(length_t n, length_t history)
Re-allocate storage for a problem with a different size.
Definition: lbfgs.hpp:192

quala::LBFGSStorage::n
length_t n() const
Get the size of the s and y vectors in the buffer.
Definition: decl/lbfgs.hpp:52

quala::LBFGSStorage::ρ
const real_t & ρ(index_t i) const
Definition: decl/lbfgs.hpp:61

quala::LBFGSStorage::history
length_t history() const
Get the number of previous vectors s and y stored in the buffer.
Definition: decl/lbfgs.hpp:54

quala::LBFGSStorage::storage_t
Eigen::Matrix< real_t, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor > storage_t
Definition: decl/lbfgs.hpp:66

quala::LBFGSStorage::α
real_t & α(index_t i)
Definition: decl/lbfgs.hpp:62

quala::LBFGSStorage::ρ
real_t & ρ(index_t i)
Definition: decl/lbfgs.hpp:60

quala::LBFGSStorage::y
auto y(index_t i) const
Definition: decl/lbfgs.hpp:59

quala::LBFGSStorage::sto
storage_t sto
Definition: decl/lbfgs.hpp:67

quala::LBFGSStorage::y
auto y(index_t i)
Definition: decl/lbfgs.hpp:58

quala::LBFGSStorage::α
const real_t & α(index_t i) const
Definition: decl/lbfgs.hpp:63

vec.hpp