Detailed Description

Batched linear algebra operations.

Topics
	FLOP counts
	Theoretical floating point operation counts for linear algebra operations.

Compression of masks containing zeros
template<index_t N = 8, simdifiable VA, simdifiable VS, simdifiable VAo, simdifiable VSo>
index_t	batmat::linalg::compress_masks (VA &&Ain, VS &&Sin, VAo &&Aout, VSo &&Sout)
template<index_t N = 8, simdifiable VS>
index_t	batmat::linalg::compress_masks_count (VS &&Sin)
template<index_t N = 8, simdifiable VA, simdifiable VS, simdifiable VAo>
index_t	batmat::linalg::compress_masks_sqrt (VA &&Ain, VS &&Sin, VAo &&Aout)
template<index_t N = 8, simdifiable VA, simdifiable VS, simdifiable VAo, simdifiable VSo>
index_t	batmat::linalg::compress_masks_sqrt (VA &&Ain, VS &&Sin, VAo &&Aout, VSo &&Sout)

Copying and filling batches of matrices
template<simdifiable VA, simdifiable VB, rotate_opt... Opts>
void	batmat::linalg::copy (VA &&A, VB &&B, Opts... opts)
	B = A.
template<MatrixStructure S, simdifiable VA, simdifiable VB, rotate_opt... Opts>
void	batmat::linalg::copy (Structured< VA, S > A, Structured< VB, S > B, Opts... opts)
	B = A.
template<simdifiable VB>
void	batmat::linalg::fill (simdified_value_t< VB > a, VB &&B)
	B = a.
template<MatrixStructure S, simdifiable VB>
void	batmat::linalg::fill (simdified_value_t< VB > a, Structured< VB, S > B)
	B = a.

Single-batch elementwise operations
template<simdifiable Vx, simdifiable Vz, std::convertible_to< simdified_simd_t< Vx > > T>
void	batmat::linalg::scale (T alpha, Vx &&x, Vz &&z)
	Multiply a vector by a scalar z = αx.
template<simdifiable Vx, std::convertible_to< simdified_simd_t< Vx > > T>
void	batmat::linalg::scale (T alpha, Vx &&x)
	Multiply a vector by a scalar x = αx.
template<simdifiable Vx, simdifiable Vy, simdifiable Vz>
void	batmat::linalg::hadamard (Vx &&x, Vy &&y, Vz &&z)
	Compute the Hadamard (elementwise) product of two vectors z = x ⊙ y.
template<simdifiable Vx, simdifiable Vy>
void	batmat::linalg::hadamard (Vx &&x, Vy &&y)
	Compute the Hadamard (elementwise) product of two vectors x = x ⊙ y.
template<simdifiable Vx, simdifiable Vlo, simdifiable Vhi, simdifiable Vz>
void	batmat::linalg::clamp (Vx &&x, Vlo &&lo, Vhi &&hi, Vz &&z)
	Elementwise clamping z = max(lo, min(x, hi)).
template<simdifiable Vx, simdifiable Vlo, simdifiable Vhi, simdifiable Vz>
void	batmat::linalg::clamp_resid (Vx &&x, Vlo &&lo, Vhi &&hi, Vz &&z)
	Elementwise clamping residual z = x - max(lo, min(x, hi)).
template<simdifiable Vx, simdifiable Vz>
void	batmat::linalg::clamp (Vx &&x, simdified_simd_t< Vx > lo, simdified_simd_t< Vx > hi, Vz &&z)
	Elementwise clamping z = max(lo, min(x, hi)), with scalar lo and hi.
template<simdifiable Vx, simdifiable Vy, simdifiable Vz, std::convertible_to< simdified_simd_t< Vx > > Ta, std::convertible_to< simdified_simd_t< Vx > > Tb>
void	batmat::linalg::axpby (Ta alpha, Vx &&x, Tb beta, Vy &&y, Vz &&z)
	Add scaled vector z = αx + βy.
template<simdifiable Vx, simdifiable Vy, std::convertible_to< simdified_simd_t< Vx > > Ta, std::convertible_to< simdified_simd_t< Vx > > Tb>
void	batmat::linalg::axpby (Ta alpha, Vx &&x, Tb beta, Vy &&y)
	Add scaled vector y = αx + βy.
template<auto Beta = 1, simdifiable Vy, simdifiable... Vx>
void	batmat::linalg::axpy (Vy &&y, const std::array< simdified_simd_t< Vy >, sizeof...(Vx)> &alphas, Vx &&...x)
	Add scaled vector y = ∑ᵢ αᵢxᵢ + βy.
template<simdifiable Vx, simdifiable Vy, simdifiable Vz, std::convertible_to< simdified_simd_t< Vx > > Ta>
void	batmat::linalg::axpy (Ta alpha, Vx &&x, Vy &&y, Vz &&z)
	Add scaled vector z = αx + y.
template<auto Beta = 1, simdifiable Vx, simdifiable Vy, std::convertible_to< simdified_simd_t< Vx > > Ta>
void	batmat::linalg::axpy (Ta alpha, Vx &&x, Vy &&y)
	Add scaled vector y = αx + βy (where β is a compile-time constant).
template<simdifiable VA, simdifiable VB, int Rotate = 0>
void	batmat::linalg::negate (VA &&A, VB &&B, with_rotate_t< Rotate >={})
	Negate a matrix or vector B = -A.
template<simdifiable VA, int Rotate = 0>
void	batmat::linalg::negate (VA &&A, with_rotate_t< Rotate >={})
	Negate a matrix or vector A = -A.
template<simdifiable VA, simdifiable VB, simdifiable VC, int Rotate = 0>
void	batmat::linalg::sub (VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate >={})
	Subtract two matrices or vectors C = A - B. Rotate affects B.
template<simdifiable VA, simdifiable VB, int Rotate = 0>
void	batmat::linalg::sub (VA &&A, VB &&B, with_rotate_t< Rotate >={})
	Subtract two matrices or vectors A = A - B. Rotate affects B.
template<simdifiable VA, simdifiable VB, simdifiable VC, int Rotate = 0>
void	batmat::linalg::add (VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate >={})
	Add two matrices or vectors C = A + B. Rotate affects B.
template<simdifiable VA, simdifiable VB, int Rotate = 0>
void	batmat::linalg::add (VA &&A, VB &&B, with_rotate_t< Rotate >={})
	Add two matrices or vectors A = A + B. Rotate affects B.
template<class F, simdifiable VA, simdifiable... VAs>
void	batmat::linalg::for_each_elementwise (F &&fun, VA &&A, VAs &&...As)
	Apply a function to all elements of the given matrices or vectors.
template<class F, simdifiable VA, simdifiable... VAs>
void	batmat::linalg::transform_elementwise (F &&fun, VA &&A, VAs &&...As)
	Apply a function to all elements of the given matrices or vectors, storing the result in the first argument.
template<class F, simdifiable VA, simdifiable VB, simdifiable... VAs>
void	batmat::linalg::transform2_elementwise (F &&fun, VA &&A, VB &&B, VAs &&...As)
	Apply a function to all elements of the given matrices or vectors, storing the results in the first two arguments.
template<class F, simdifiable... VAs, simdifiable... VBs>
void	batmat::linalg::transform_n_elementwise (F &&fun, std::tuple< VAs... > As, VBs &&...Bs)
	Apply a function to all elements of the given matrices or vectors, storing the results in the tuple of matrices given as the first argument.
template<class F, simdifiable... VAs, simdifiable... VBs>
void	batmat::linalg::transform_n_diag (F &&fun, std::tuple< VAs... > As, VBs &&...Bs)
	Apply a function to all elements of the given vectors and the diagonal elements of the given square matrices, storing the results in the tuple of vectors or matrices given as the first argument.
template<class F, simdifiable VA, simdifiable VB>
void	batmat::linalg::copy_diag (VA &&A, VB &&B)
	Copy the diagonal elements of a matrix.
template<simdifiable VA, simdifiable VB, simdifiable VC>
void	batmat::linalg::add_diag (VA &&A, VB &&b, VC &&C)
	C = A + diag(b).
template<simdifiable VA, simdifiable VB>
void	batmat::linalg::add_diag (VA &&A, VB &&b)
	A += diag(b).

Multi-batch elementwise operations
template<simdifiable_multi Vx, simdifiable_multi Vz, std::convertible_to< simdified_simd_t< Vx > > T>
void	batmat::linalg::multi::scale (T alpha, Vx &&x, Vz &&z)
	Multiply a vector by a scalar z = αx.
template<simdifiable_multi Vx, std::convertible_to< simdified_simd_t< Vx > > T>
void	batmat::linalg::multi::scale (T alpha, Vx &&x)
	Multiply a vector by a scalar x = αx.
template<simdifiable_multi Vx, simdifiable_multi Vy, simdifiable_multi Vz>
void	batmat::linalg::multi::hadamard (Vx &&x, Vy &&y, Vz &&z)
	Compute the Hadamard (elementwise) product of two vectors z = x ⊙ y.
template<simdifiable_multi Vx, simdifiable_multi Vy>
void	batmat::linalg::multi::hadamard (Vx &&x, Vy &&y)
	Compute the Hadamard (elementwise) product of two vectors x = x ⊙ y.
template<simdifiable_multi Vx, simdifiable_multi Vlo, simdifiable_multi Vhi, simdifiable_multi Vz>
void	batmat::linalg::multi::clamp (Vx &&x, Vlo &&lo, Vhi &&hi, Vz &&z)
	Elementwise clamping z = max(lo, min(x, hi)).
template<simdifiable_multi Vx, simdifiable_multi Vlo, simdifiable_multi Vhi, simdifiable_multi Vz>
void	batmat::linalg::multi::clamp_resid (Vx &&x, Vlo &&lo, Vhi &&hi, Vz &&z)
	Elementwise clamping residual z = x - max(lo, min(x, hi)).
template<simdifiable_multi Vx, simdifiable_multi Vz>
void	batmat::linalg::multi::clamp (Vx &&x, simdified_simd_t< Vx > lo, simdified_simd_t< Vx > hi, Vz &&z)
	Elementwise clamping z = max(lo, min(x, hi)), with scalar lo and hi.
template<simdifiable_multi Vx, simdifiable_multi Vy, simdifiable_multi Vz, std::convertible_to< simdified_simd_t< Vx > > Ta, std::convertible_to< simdified_simd_t< Vx > > Tb>
void	batmat::linalg::multi::axpby (Ta alpha, Vx &&x, Tb beta, Vy &&y, Vz &&z)
	Add scaled vector z = αx + βy.
template<simdifiable_multi Vx, simdifiable_multi Vy, std::convertible_to< simdified_simd_t< Vx > > Ta, std::convertible_to< simdified_simd_t< Vx > > Tb>
void	batmat::linalg::multi::axpby (Ta alpha, Vx &&x, Tb beta, Vy &&y)
	Add scaled vector y = αx + βy.
template<auto Beta = 1, simdifiable_multi Vy, simdifiable_multi... Vx>
void	batmat::linalg::multi::axpy (Vy &&y, const std::array< simdified_simd_t< Vy >, sizeof...(Vx)> &alphas, Vx &&...x)
	Add scaled vector y = ∑ᵢ αᵢxᵢ + βy.
template<simdifiable_multi Vx, simdifiable_multi Vy, simdifiable_multi Vz, std::convertible_to< simdified_simd_t< Vx > > Ta>
void	batmat::linalg::multi::axpy (Ta alpha, Vx &&x, Vy &&y, Vz &&z)
	Add scaled vector z = αx + y.
template<auto Beta = 1, simdifiable_multi Vx, simdifiable_multi Vy, std::convertible_to< simdified_simd_t< Vx > > Ta>
void	batmat::linalg::multi::axpy (Ta alpha, Vx &&x, Vy &&y)
	Add scaled vector y = αx + βy (where β is a compile-time constant).
template<simdifiable_multi VA, simdifiable_multi VB, int Rotate = 0>
void	batmat::linalg::multi::negate (VA &&A, VB &&B, with_rotate_t< Rotate > rot={})
	Negate a matrix or vector B = -A.
template<simdifiable_multi VA, int Rotate = 0>
void	batmat::linalg::multi::negate (VA &&A, with_rotate_t< Rotate > rot={})
	Negate a matrix or vector A = -A.
template<simdifiable_multi VA, simdifiable_multi VB, simdifiable_multi VC, int Rotate = 0>
void	batmat::linalg::multi::sub (VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate > rot={})
	Subtract two matrices or vectors C = A - B. Rotate affects B.
template<simdifiable_multi VA, simdifiable_multi VB, int Rotate = 0>
void	batmat::linalg::multi::sub (VA &&A, VB &&B, with_rotate_t< Rotate > rot={})
	Subtract two matrices or vectors A = A - B. Rotate affects B.
template<simdifiable_multi VA, simdifiable_multi VB, simdifiable_multi VC, int Rotate = 0>
void	batmat::linalg::multi::add (VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate > rot={})
	Add two matrices or vectors C = A + B. Rotate affects B.
template<simdifiable_multi VA, simdifiable_multi VB, int Rotate = 0>
void	batmat::linalg::multi::add (VA &&A, VB &&B, with_rotate_t< Rotate > rot={})
	Add two matrices or vectors A = A + B. Rotate affects B.
template<class F, simdifiable_multi VA, simdifiable_multi... VAs>
void	batmat::linalg::multi::for_each_elementwise (F &&fun, VA &&A, VAs &&...As)
	Apply a function to all elements of the given matrices or vectors.
template<class F, simdifiable_multi VA, simdifiable_multi... VAs>
void	batmat::linalg::multi::transform_elementwise (F &&fun, VA &&A, VAs &&...As)
	Apply a function to all elements of the given matrices or vectors, storing the result in the first argument.
template<class F, simdifiable_multi VA, simdifiable_multi VB, simdifiable_multi... VAs>
void	batmat::linalg::multi::transform2_elementwise (F &&fun, VA &&A, VB &&B, VAs &&...As)
	Apply a function to all elements of the given matrices or vectors, storing the results in the first two arguments.
template<class F, simdifiable_multi... VAs, simdifiable_multi... VBs>
void	batmat::linalg::multi::transform_n_elementwise (F &&fun, std::tuple< VAs... > As, VBs &&...Bs)
	Apply a function to all elements of the given matrices or vectors, storing the results in the tuple of matrices given as the first argument.

Multiplication of batches of matrices with diagonal scaling
template<simdifiable VA, simdifiable VB, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	batmat::linalg::gemm_diag (VA &&A, VB &&B, VD &&D, Vd &&d, Opts... opts)
	D = A diag(d) B.
template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	batmat::linalg::gemm_diag_add (VA &&A, VB &&B, VC &&C, VD &&D, Vd &&d, Opts... opts)
	D = C + A diag(d) B.
template<simdifiable VA, simdifiable VB, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	batmat::linalg::gemm_diag_add (VA &&A, VB &&B, VD &&D, Vd &&d, Opts... opts)
	D += A diag(d) B.
template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	batmat::linalg::gemm_diag_sub (VA &&A, VB &&B, VC &&C, VD &&D, Vd &&d, Opts... opts)
	D = C - A diag(d) B.
template<simdifiable VA, simdifiable VB, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	batmat::linalg::gemm_diag_sub (VA &&A, VB &&B, VD &&D, Vd &&d, Opts... opts)
	D -= A diag(d) B.
template<MatrixStructure SC, simdifiable VA, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	batmat::linalg::syrk_diag (VA &&A, Structured< VD, SC > D, Vd &&d, Opts... opts)
	D = A diag(d) Aᵀ with D symmetric.
template<MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	batmat::linalg::syrk_diag_add (VA &&A, Structured< VC, SC > C, Structured< VD, SC > D, Vd &&d, Opts... opts)
	D = C + A diag(d) Aᵀ with C, D symmetric.
template<MatrixStructure SC, simdifiable VA, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	batmat::linalg::syrk_diag_add (VA &&A, Structured< VD, SC > D, Vd &&d, Opts... opts)
	D += A diag(d) Aᵀ with D symmetric.
template<MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	batmat::linalg::syrk_diag_sub (VA &&A, Structured< VC, SC > C, Structured< VD, SC > D, Vd &&d, Opts... opts)
	D = C - A diag(d) Aᵀ with C, D symmetric.
template<MatrixStructure SC, simdifiable VA, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	batmat::linalg::syrk_diag_sub (VA &&A, Structured< VD, SC > D, Vd &&d, Opts... opts)
	D -= A diag(d) Aᵀ with D symmetric.

Multiplication of batches of general matrices
template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::gemm (VA &&A, VB &&B, VD &&D, TilingOptions packing={}, Opts... opts)
	D = A B.
template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::gemm_neg (VA &&A, VB &&B, VD &&D, TilingOptions packing={}, Opts... opts)
	D = -A B.
template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::gemm_add (VA &&A, VB &&B, VC &&C, VD &&D, TilingOptions packing={}, Opts... opts)
	D = C + A B.
template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::gemm_add (VA &&A, VB &&B, VD &&D, TilingOptions packing={}, Opts... opts)
	D += A B.
template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::gemm_sub (VA &&A, VB &&B, VC &&C, VD &&D, TilingOptions packing={}, Opts... opts)
	D = C - A B.
template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::gemm_sub (VA &&A, VB &&B, VD &&D, TilingOptions packing={}, Opts... opts)
	D -= A B.

Multiplication of batches of matrices with symmetric results
template<MatrixStructure SA, MatrixStructure SD, simdifiable VA, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::syrk (Structured< VA, SA > A, Structured< VD, SD > D, Opts... opts)
	D = A Aᵀ with D symmetric.
template<MatrixStructure SD, class TA, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::syrk (TA &&A, Structured< VD, SD > D, Opts... opts)
	D = A Aᵀ with D symmetric.
template<MatrixStructure SD, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::syrk (Structured< VD, SD > D, Opts... opts)
	D = D Dᵀ with D triangular on input and symmetric on output.
template<MatrixStructure SA, MatrixStructure SD, simdifiable VA, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::syrk_neg (Structured< VA, SA > A, Structured< VD, SD > D, Opts... opts)
	D = -A Aᵀ with D symmetric.
template<MatrixStructure SD, class TA, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::syrk_neg (TA &&A, Structured< VD, SD > D, Opts... opts)
	D = A Aᵀ with D symmetric.
template<MatrixStructure SD, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::syrk_neg (Structured< VD, SD > D, Opts... opts)
	D = -D Dᵀ with D triangular on input and symmetric on output.
template<MatrixStructure SD, simdifiable VA, simdifiable VC, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::syrk_add (VA &&A, Structured< VC, SD > C, Structured< VD, SD > D, Opts... opts)
	D = C + A Aᵀ with C, D symmetric.
template<MatrixStructure SD, simdifiable VA, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::syrk_add (VA &&A, Structured< VD, SD > D, Opts... opts)
	D += A Aᵀ with D symmetric.
template<MatrixStructure SD, simdifiable VA, simdifiable VC, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::syrk_sub (VA &&A, Structured< VC, SD > C, Structured< VD, SD > D, Opts... opts)
	D = C - A Aᵀ with C, D symmetric.
template<MatrixStructure SD, simdifiable VA, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::syrk_sub (VA &&A, Structured< VD, SD > D, Opts... opts)
	D -= A Aᵀ with D symmetric.

Multiplication of batches of triangular matrices
template<MatrixStructure SA, MatrixStructure SB, MatrixStructure SD, simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::trmm (Structured< VA, SA > A, Structured< VB, SB > B, Structured< VD, SD > D, Opts... opts)
	D = A B with A and/or B triangular.
template<class TA, class TB, class TD, shift_opt... Opts>
void	batmat::linalg::trmm (TA &&A, TB &&B, TD &&D, Opts... opts)
	D = A B with A and/or B triangular.
template<MatrixStructure SA, simdifiable VA, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::trmm (Structured< VA, SA > A, VD &&D, Opts... opts)
	D = A D with A triangular.
template<MatrixStructure SB, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::trmm (VD &&D, Structured< VB, SB > B, Opts... opts)
	D = D B with B triangular.
template<MatrixStructure SA, MatrixStructure SB, MatrixStructure SD, simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::trmm_neg (Structured< VA, SA > A, Structured< VB, SB > B, Structured< VD, SD > D, Opts... opts)
	D = -A B with A and/or B triangular.
template<class TA, class TB, class TD, shift_opt... Opts>
void	batmat::linalg::trmm_neg (TA &&A, TB &&B, TD &&D, Opts... opts)
	D = -A B with A and/or B triangular.
template<MatrixStructure SA, MatrixStructure SB, MatrixStructure SD, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::trmm_add (Structured< VA, SA > A, Structured< VB, SB > B, Structured< VC, SD > C, Structured< VD, SD > D, Opts... opts)
	D = C + A B with A and/or B triangular.
template<class TA, class TB, class TC, class TD, shift_opt... Opts>
void	batmat::linalg::trmm_add (TA &&A, TB &&B, TC &&C, TD &&D, Opts... opts)
	D = C + A B with A and/or B triangular.
template<MatrixStructure SA, MatrixStructure SB, MatrixStructure SD, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::trmm_sub (Structured< VA, SA > A, Structured< VB, SB > B, Structured< VC, SD > C, Structured< VD, SD > D, Opts... opts)
	D = C - A B with A and/or B triangular.
template<class TA, class TB, class TC, class TD, shift_opt... Opts>
void	batmat::linalg::trmm_sub (TA &&A, TB &&B, TC &&C, TD &&D, Opts... opts)
	D = C - A B with A and/or B triangular.

Matrix-vector multiplication of batches of matrices
template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::gemv (VA &&A, VB &&B, VD &&D, Opts... opts)
	d = A b
template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::gemv_neg (VA &&A, VB &&B, VD &&D, Opts... opts)
	d = -A b
template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::gemv_add (VA &&A, VB &&B, VC &&C, VD &&D, Opts... opts)
	d = c + A b
template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::gemv_add (VA &&A, VB &&B, VD &&D, Opts... opts)
	d = d + A b
template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::gemv_sub (VA &&A, VB &&B, VC &&C, VD &&D, Opts... opts)
	d = c - A b
template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	batmat::linalg::gemv_sub (VA &&A, VB &&B, VD &&D, Opts... opts)
	d = d - A b

Cholesky factorization updates
template<MatrixStructure SL, simdifiable VL, simdifiable VA, simdifiable Vd>
void	batmat::linalg::hyhound_diag (Structured< VL, SL > L, VA &&A, Vd &&d)
	Update Cholesky factor L using low-rank term A diag(d) Aᵀ.
template<MatrixStructure SL, simdifiable VL, simdifiable VA, simdifiable Vd, simdifiable VW>
void	batmat::linalg::hyhound_diag (Structured< VL, SL > L, VA &&A, Vd &&d, VW &&W)
	Update Cholesky factor L using low-rank term A diag(d) Aᵀ, with full Householder representation.
template<MatrixStructure SL, simdifiable VL>
auto	batmat::linalg::hyhound_size_W (Structured< VL, SL > L)
	Get the size of the storage for the matrix W returned by hyhound_diag(Structured<VL,SL>, VA&&, Vd&&, VW&&).
template<simdifiable VL, simdifiable VA, simdifiable VD, simdifiable VB, simdifiable Vd, simdifiable VW>
void	batmat::linalg::hyhound_diag_apply (VL &&L, VA &&A, VD &&D, VB &&B, Vd &&d, VW &&W, index_t kA_in_offset=0)
	Apply Householder transformation generated by hyhound_diag, computing (L̃, D) = (L, A) Q̆.
template<simdifiable VL, simdifiable VA, simdifiable VB, simdifiable Vd, simdifiable VW>
void	batmat::linalg::hyhound_diag_apply (VL &&L, VA &&A, VB &&B, Vd &&d, VW &&W)
	Apply Householder transformation generated by hyhound_diag, computing (L̃, Ã) = (L, A) Q̆.
template<MatrixStructure SL, simdifiable VL, simdifiable VA, simdifiable Vd>
void	batmat::linalg::hyhound_sign (Structured< VL, SL > L, VA &&A, Vd &&d)
	Update Cholesky factor L using low-rank term A diag(copysign(1, d)) Aᵀ, where d contains only ±0 values.
template<MatrixStructure SL, simdifiable VL1, simdifiable VA1, simdifiable VL2, simdifiable VA2, simdifiable Vd>
void	batmat::linalg::hyhound_diag_2 (Structured< VL1, SL > L1, VA1 &&A1, VL2 &&L2, VA2 &&A2, Vd &&d)
	Update Cholesky factor L using low-rank term A diag(d) Aᵀ, where L and A are stored as two separate block rows.
template<MatrixStructure SL, simdifiable VL11, simdifiable VA1, simdifiable VL21, simdifiable VA2, simdifiable VA2o, simdifiable VU, simdifiable VA3, simdifiable VA3o, simdifiable Vd>
void	batmat::linalg::hyhound_diag_cyclic (Structured< VL11, SL > L11, VA1 &&A1, VL21 &&L21, VA2 &&A22, VA2o &&A2_out, VU &&L31, VA3 &&A31, VA3o &&A3_out, Vd &&d)
	Update structured Cholesky factor L using structured low-rank term A diag(d) Aᵀ,.
template<MatrixStructure SL, simdifiable VL11, simdifiable VA1, simdifiable VL21, simdifiable VA2, simdifiable VA2o, simdifiable VLu1, simdifiable VAuo, simdifiable Vd>
void	batmat::linalg::hyhound_diag_riccati (Structured< VL11, SL > L11, VA1 &&A1, VL21 &&L21, VA2 &&A2, VA2o &&A2_out, VLu1 &&Lu1, VAuo &&Au_out, Vd &&d, bool shift_A_out=false)
	Update structured Cholesky factor L using structured low-rank term A diag(d) Aᵀ,.

Cholesky factorization of batches of matrices
template<MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD>
void	batmat::linalg::syrk_add_potrf (VA &&A, Structured< VC, SC > C, Structured< VD, SC > D, simdified_value_t< VA > regularization=0)
	D = chol(C + AAᵀ) with C symmetric, D triangular.
template<MatrixStructure SC, simdifiable VA, simdifiable VD>
void	batmat::linalg::syrk_add_potrf (VA &&A, Structured< VD, SC > D)
	D = chol(D + AAᵀ) with D symmetric/triangular.
template<MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD>
void	batmat::linalg::syrk_sub_potrf (VA &&A, Structured< VC, SC > C, Structured< VD, SC > D, simdified_value_t< VA > regularization=0)
	D = chol(C - AAᵀ) with C symmetric, D triangular.
template<MatrixStructure SC, simdifiable VA, simdifiable VD>
void	batmat::linalg::syrk_sub_potrf (VA &&A, Structured< VD, SC > D, simdified_value_t< VA > regularization=0)
	D = chol(D - AAᵀ) with D symmetric/triangular.
template<MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD, simdifiable Vd>
void	batmat::linalg::syrk_diag_add_potrf (VA &&A, Structured< VC, SC > C, Structured< VD, SC > D, Vd &&d, simdified_value_t< VA > regularization=0)
	D = chol(C + A diag(d) Aᵀ) with C symmetric, D triangular.
template<MatrixStructure SC, simdifiable VA, simdifiable VD, simdifiable Vd>
void	batmat::linalg::syrk_diag_add_potrf (VA &&A, Structured< VD, SC > D, Vd &&d)
	D = chol(D + A diag(d) Aᵀ) with D symmetric/triangular.
template<MatrixStructure SC, simdifiable VC, simdifiable VD>
void	batmat::linalg::potrf (Structured< VC, SC > C, Structured< VD, SC > D, simdified_value_t< VC > regularization=0)
	D = chol(C) with C symmetric, D triangular.
template<MatrixStructure SD, simdifiable VD>
void	batmat::linalg::potrf (Structured< VD, SD > D, simdified_value_t< VD > regularization=0)
	D = chol(D) with D symmetric/triangular.

Single-batch reduction operations
template<simdifiable Vx>
norms< simdified_value_t< Vx >, simdified_simd_t< Vx > >::result_simd	batmat::linalg::vnorms_all (Vx &&x)
	Compute the lane-wise norms (max, 1-norm, and 2-norm) of a batch of vectors.
template<simdifiable Vx>
norms< simdified_value_t< Vx > >::result	batmat::linalg::norms_all (Vx &&x)
	Compute the norms (max, 1-norm, and 2-norm) of a vector.
template<simdifiable Vx>
simdified_simd_t< Vx >	batmat::linalg::vnorm_inf (Vx &&x)
	Compute the lane-wise infinity norms of a batch of vectors.
template<simdifiable Vx>
simdified_value_t< Vx >	batmat::linalg::norm_inf (Vx &&x)
	Compute the infinity norm of a vector.
template<simdifiable Vx>
simdified_simd_t< Vx >	batmat::linalg::vnorm_1 (Vx &&x)
	Compute the lane-wise 1-norms of a batch of vectors.
template<simdifiable Vx>
simdified_value_t< Vx >	batmat::linalg::norm_1 (Vx &&x)
	Compute the 1-norm of a vector.
template<simdifiable Vx>
simdified_simd_t< Vx >	batmat::linalg::vnorm_2_squared (Vx &&x)
	Compute the lane-wise squared 2-norms of a batch of vectors.
template<simdifiable Vx>
simdified_value_t< Vx >	batmat::linalg::norm_2_squared (Vx &&x)
	Compute the squared 2-norm of a vector.
template<simdifiable Vx>
simdified_simd_t< Vx >	batmat::linalg::vnorm_2 (Vx &&x)
	Compute the lane-wise 2-norms of a batch of vectors.
template<simdifiable Vx>
simdified_value_t< Vx >	batmat::linalg::norm_2 (Vx &&x)
	Compute the 2-norm of a vector.
template<simdifiable Vx, simdifiable Vy>
simdified_simd_t< Vx >	batmat::linalg::vdot (Vx &&x, Vy &&y)
	Compute the lane-wise dot products of two batches of vectors.
template<simdifiable Vx, simdifiable Vy>
simdified_value_t< Vx >	batmat::linalg::dot (Vx &&x, Vy &&y)
	Compute the dot product of two vectors.
template<simdifiable Vw, simdifiable Va>
simdified_simd_t< Vw >	batmat::linalg::weighted_vnorm_sq (Vw &&w, Va &&a)
	∑ wᵢ aᵢ² (lane-wise).
template<simdifiable Vw, simdifiable Va>
simdified_value_t< Vw >	batmat::linalg::weighted_norm_sq (Vw &&w, Va &&a)
	∑ wᵢ aᵢ².
template<simdifiable Vw, simdifiable Va, simdifiable Vb>
simdified_simd_t< Vw >	batmat::linalg::weighted_vnorm_sq_diff (Vw &&w, Va &&a, Vb &&b)
	∑ wᵢ(aᵢ - bᵢ)² (lane-wise).
template<simdifiable Vw, simdifiable Va, simdifiable Vb>
simdified_value_t< Vw >	batmat::linalg::weighted_norm_sq_diff (Vw &&w, Va &&a, Vb &&b)
	∑ wᵢ(aᵢ - bᵢ)².

Multi-batch reduction operations
template<simdifiable_multi Vx>
norms< simdified_value_t< Vx > >::result	batmat::linalg::multi::norms_all (Vx &&x)
	Compute the norms (max, 1-norm, and 2-norm) of a vector.
template<simdifiable_multi Vx>
simdified_value_t< Vx >	batmat::linalg::multi::norm_inf (Vx &&x)
	Compute the infinity norm of a vector.
template<simdifiable_multi Vx>
simdified_value_t< Vx >	batmat::linalg::multi::norm_1 (Vx &&x)
	Compute the 1-norm of a vector.
template<simdifiable_multi Vx>
simdified_value_t< Vx >	batmat::linalg::multi::norm_2_squared (Vx &&x)
	Compute the squared 2-norm of a vector.
template<simdifiable_multi Vx>
simdified_value_t< Vx >	batmat::linalg::multi::norm_2 (Vx &&x)
	Compute the 2-norm of a vector.
template<simdifiable_multi Vx, simdifiable_multi Vy>
simdified_value_t< Vx >	batmat::linalg::multi::dot (Vx &&x, Vy &&y)
	Compute the dot product of two vectors.
template<simdifiable_multi Vw, simdifiable_multi Vx>
simdified_value_t< Vw >	batmat::linalg::multi::weighted_norm_sq (Vw &&w, Vx &&x)
	∑ wᵢ xᵢ².
template<simdifiable_multi Vw, simdifiable_multi Vx, simdifiable_multi Vy>
simdified_value_t< Vw >	batmat::linalg::multi::weighted_norm_sq_difference (Vw &&w, Vx &&x, Vy &&y)
	∑ wᵢ(xᵢ - yᵢ)².
template<simdifiable_multi Vx>
norms< simdified_value_t< Vx >, simdified_simd_t< Vx > >::result_simd	batmat::linalg::multi::vnorms_all (Vx &&x)
	Compute the lane-wise norms (max, 1-norm, and 2-norm) of a batch of vectors.
template<simdifiable_multi Vx>
simdified_simd_t< Vx >	batmat::linalg::multi::vnorm_inf (Vx &&x)
	Compute the lane-wise infinity norms of a batch of vectors.
template<simdifiable_multi Vx>
simdified_simd_t< Vx >	batmat::linalg::multi::vnorm_1 (Vx &&x)
	Compute the lane-wise 1-norms of a batch of vectors.
template<simdifiable_multi Vx>
simdified_simd_t< Vx >	batmat::linalg::multi::vnorm_2_squared (Vx &&x)
	Compute the lane-wise squared 2-norms of a batch of vectors.
template<simdifiable_multi Vx>
simdified_simd_t< Vx >	batmat::linalg::multi::vnorm_2 (Vx &&x)
	Compute the lane-wise 2-norms of a batch of vectors.
template<simdifiable_multi Vx, simdifiable_multi Vy>
simdified_simd_t< Vx >	batmat::linalg::multi::vdot (Vx &&x, Vy &&y)
	Compute the lane-wise dot products of two batches of vectors.
template<simdifiable_multi Vw, simdifiable_multi Vx>
simdified_simd_t< Vw >	batmat::linalg::multi::weighted_vnorm_sq (Vw &&w, Vx &&x)
	∑ wᵢ xᵢ² (lane-wise).
template<simdifiable_multi Vw, simdifiable_multi Vx, simdifiable_multi Vy>
simdified_simd_t< Vw >	batmat::linalg::multi::weighted_vnorm_sq_diff (Vw &&w, Vx &&x, Vy &&y)
	∑ wᵢ(xᵢ - yᵢ)² (lane-wise).

Symmetric multiplication of batches of matrices
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD>
void	batmat::linalg::symm_add (Structured< VA, SA > A, VB &&B, VC &&C, VD &&D)
	D = C + A B with A symmetric.
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
void	batmat::linalg::symm_add (Structured< VA, SA > A, VB &&B, VD &&D)
	D = D + A B with A symmetric.

Symmetric matrix-vector multiplication of batches of matrices
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
void	batmat::linalg::symv (Structured< VA, SA > A, VB &&B, VD &&D)
	d = A b where A is symmetric
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
void	batmat::linalg::symv_neg (Structured< VA, SA > A, VB &&B, VD &&D)
	d = -A b where A is symmetric
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD>
void	batmat::linalg::symv_add (Structured< VA, SA > A, VB &&B, VC &&C, VD &&D)
	d = c + A b where A is symmetric
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
void	batmat::linalg::symv_add (Structured< VA, SA > A, VB &&B, VD &&D)
	d = d + A b where A is symmetric
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD>
void	batmat::linalg::symv_sub (Structured< VA, SA > A, VB &&B, VC &&C, VD &&D)
	d = c - A b where A is symmetric
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
void	batmat::linalg::symv_sub (Structured< VA, SA > A, VB &&B, VD &&D)
	d = d - A b where A is symmetric

Symmetric matrix-vector multiplication of a block tridiagonal matrix
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
void	batmat::linalg::syomv (Structured< VA, SA > A, VB &&B, VD &&D)
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
void	batmat::linalg::syomv_neg (Structured< VA, SA > A, VB &&B, VD &&D)

Triangular views of batches of matrices
template<class M>
constexpr auto	batmat::linalg::tril (M &&m)
	Lower-triangular view.
template<class M>
constexpr auto	batmat::linalg::triu (M &&m)
	Upper-triangular view.
template<MatrixStructure S, class M>
constexpr auto	batmat::linalg::make_structured (M &&m)
	View with the given structure.

Triangular solve of batches of matrices
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD, int RotB = 0>
void	batmat::linalg::trsm (Structured< VA, SA > A, VB &&B, VD &&D, with_rotate_B_t< RotB >={})
	D = A⁻¹ B with A triangular.
template<MatrixStructure SA, simdifiable VA, simdifiable VD, int RotB = 0>
void	batmat::linalg::trsm (Structured< VA, SA > A, VD &&D, with_rotate_B_t< RotB > shift={})
	D = A⁻¹ D with A triangular.
template<MatrixStructure SB, simdifiable VA, simdifiable VB, simdifiable VD, int RotA = 0>
void	batmat::linalg::trsm (VA &&A, Structured< VB, SB > B, VD &&D, with_rotate_A_t< RotA >={})
	D = A B⁻¹ with B triangular.
template<MatrixStructure SB, simdifiable VB, simdifiable VD, int RotA = 0>
void	batmat::linalg::trsm (VD &&D, Structured< VB, SB > B, with_rotate_A_t< RotA > shift={})
	D = D B⁻¹ with B triangular.

Triangular inversion of batches of matrices
template<simdifiable VA, simdifiable VD>
void	batmat::linalg::trtri (Structured< VA, MatrixStructure::LowerTriangular > A, Structured< VD, MatrixStructure::LowerTriangular > D)
	D = A⁻¹ with A, D lower triangular.
template<simdifiable VA, simdifiable VD>
void	batmat::linalg::trtri (Structured< VA, MatrixStructure::UpperTriangular > A, Structured< VD, MatrixStructure::UpperTriangular > D)
	D = A⁻¹ with A, D upper triangular.
template<simdifiable VD>
void	batmat::linalg::trtri (Structured< VD, MatrixStructure::LowerTriangular > D)
	D = D⁻¹ with D lower triangular.
template<simdifiable VD>
void	batmat::linalg::trtri (Structured< VD, MatrixStructure::UpperTriangular > D)
	D = D⁻¹ with D upper triangular.

Classes
struct	batmat::linalg::TilingOptions
	Packing and tiling options for matrix-matrix multiplication. More...
struct	batmat::linalg::Structured< M, S >
	Light-weight wrapper class used for overload resolution of triangular and symmetric matrices. More...

Enumerations
enum class	batmat::linalg::PackingSelector : int8_t { batmat::linalg::PackingSelector::Never , batmat::linalg::PackingSelector::Always , batmat::linalg::PackingSelector::Transpose }
	Decides which matrices to pack during large matrix-matrix multiplication. More...
enum class	batmat::linalg::MatrixStructure : int8_t { batmat::linalg::MatrixStructure::General , batmat::linalg::MatrixStructure::LowerTriangular , batmat::linalg::MatrixStructure::UpperTriangular }

Functions
constexpr MatrixStructure	batmat::linalg::transpose (MatrixStructure s)
template<class M>
	batmat::linalg::Structured (M &&) -> Structured< M >

Class Documentation

◆ batmat::linalg::TilingOptions

struct batmat::linalg::TilingOptions

Class Members
bool	no_tiling = false	Don't use cache tiling.
PackingSelector	pack_A = PackingSelector::Transpose	When to pack matrix A.
PackingSelector	pack_B = PackingSelector::Always	When to pack matrix B.
index_t	n_c = 0	Cache block size in the N dimension (columns of B, C and D).
index_t	k_c = 0	Cache block size in the K dimension (columns of A, rows of B).
index_t	m_c = 0	Cache block size in the M dimension (rows of A, C and D).

Enumeration Type Documentation

◆ PackingSelector

enum class batmat::linalg::PackingSelector : int8_t

strong

#include <batmat/linalg/gemm.hpp>

Decides which matrices to pack during large matrix-matrix multiplication.

Enumerator
Never	Access the original matrices directly in the micro-kernels.
Always	Always pack the blocks of the matrix in a contiguous workspace.
Transpose	Pack the blocks of the matrix only if it is not in the optimal storage order.

Definition at line 20 of file gemm.hpp.

◆ MatrixStructure

enum class batmat::linalg::MatrixStructure : int8_t

strong

#include <batmat/linalg/structure.hpp>

Enumerator
General
LowerTriangular
UpperTriangular

Definition at line 8 of file structure.hpp.

Function Documentation

◆ compress_masks()

template<index_t N = 8, simdifiable VA, simdifiable VS, simdifiable VAo, simdifiable VSo>

index_t batmat::linalg::compress_masks	(	VA &&	Ain,
		VS &&	Sin,
		VAo &&	Aout,
		VSo &&	Sout )

#include <batmat/linalg/compress.hpp>

Definition at line 237 of file compress.hpp.

◆ compress_masks_count()

template<index_t N = 8, simdifiable VS>

index_t batmat::linalg::compress_masks_count ( VS && Sin )

#include <batmat/linalg/compress.hpp>

Definition at line 243 of file compress.hpp.

◆ compress_masks_sqrt() [1/2]

template<index_t N = 8, simdifiable VA, simdifiable VS, simdifiable VAo>

index_t batmat::linalg::compress_masks_sqrt	(	VA &&	Ain,
		VS &&	Sin,
		VAo &&	Aout )

#include <batmat/linalg/compress.hpp>

Definition at line 249 of file compress.hpp.

◆ compress_masks_sqrt() [2/2]

template<index_t N = 8, simdifiable VA, simdifiable VS, simdifiable VAo, simdifiable VSo>

index_t batmat::linalg::compress_masks_sqrt	(	VA &&	Ain,
		VS &&	Sin,
		VAo &&	Aout,
		VSo &&	Sout )

#include <batmat/linalg/compress.hpp>

Definition at line 255 of file compress.hpp.

◆ copy() [1/2]

template<simdifiable VA, simdifiable VB, rotate_opt... Opts>

void batmat::linalg::copy	(	VA &&	A,
		VB &&	B,
		Opts...	opts )

#include <batmat/linalg/copy.hpp>

B = A.

Definition at line 187 of file copy.hpp.

◆ copy() [2/2]

template<MatrixStructure S, simdifiable VA, simdifiable VB, rotate_opt... Opts>

void batmat::linalg::copy	(	Structured< VA, S >	A,
		Structured< VB, S >	B,
		Opts...	opts )

#include <batmat/linalg/copy.hpp>

B = A.

Definition at line 196 of file copy.hpp.

◆ fill() [1/2]

template<simdifiable VB>

void batmat::linalg::fill	(	simdified_value_t< VB >	a,
		VB &&	B )

#include <batmat/linalg/copy.hpp>

B = a.

Examples: example.cpp.

Definition at line 204 of file copy.hpp.

◆ fill() [2/2]

template<MatrixStructure S, simdifiable VB>

void batmat::linalg::fill	(	simdified_value_t< VB >	a,
		Structured< VB, S >	B )

#include <batmat/linalg/copy.hpp>

B = a.

Definition at line 210 of file copy.hpp.

◆ scale() [1/4]

template<simdifiable Vx, simdifiable Vz, std::convertible_to< simdified_simd_t< Vx > > T>

void batmat::linalg::scale	(	T	alpha,
		Vx &&	x,
		Vz &&	z )

#include <batmat/linalg/elementwise.hpp>

Multiply a vector by a scalar z = αx.

Definition at line 281 of file elementwise.hpp.

◆ scale() [2/4]

template<simdifiable Vx, std::convertible_to< simdified_simd_t< Vx > > T>

void batmat::linalg::scale	(	T	alpha,
		Vx &&	x )

#include <batmat/linalg/elementwise.hpp>

Multiply a vector by a scalar x = αx.

Definition at line 289 of file elementwise.hpp.

◆ hadamard() [1/4]

template<simdifiable Vx, simdifiable Vy, simdifiable Vz>

void batmat::linalg::hadamard	(	Vx &&	x,
		Vy &&	y,
		Vz &&	z )

#include <batmat/linalg/elementwise.hpp>

Compute the Hadamard (elementwise) product of two vectors z = x ⊙ y.

Definition at line 298 of file elementwise.hpp.

◆ hadamard() [2/4]

template<simdifiable Vx, simdifiable Vy>

void batmat::linalg::hadamard	(	Vx &&	x,
		Vy &&	y )

#include <batmat/linalg/elementwise.hpp>

Compute the Hadamard (elementwise) product of two vectors x = x ⊙ y.

Definition at line 307 of file elementwise.hpp.

◆ clamp() [1/4]

template<simdifiable Vx, simdifiable Vlo, simdifiable Vhi, simdifiable Vz>

void batmat::linalg::clamp	(	Vx &&	x,
		Vlo &&	lo,
		Vhi &&	hi,
		Vz &&	z )

#include <batmat/linalg/elementwise.hpp>

Elementwise clamping z = max(lo, min(x, hi)).

Definition at line 316 of file elementwise.hpp.

◆ clamp_resid() [1/2]

template<simdifiable Vx, simdifiable Vlo, simdifiable Vhi, simdifiable Vz>

void batmat::linalg::clamp_resid	(	Vx &&	x,
		Vlo &&	lo,
		Vhi &&	hi,
		Vz &&	z )

#include <batmat/linalg/elementwise.hpp>

Elementwise clamping residual z = x - max(lo, min(x, hi)).

Definition at line 325 of file elementwise.hpp.

◆ clamp() [2/4]

template<simdifiable Vx, simdifiable Vz>

void batmat::linalg::clamp	(	Vx &&	x,
		simdified_simd_t< Vx >	lo,
		simdified_simd_t< Vx >	hi,
		Vz &&	z )

#include <batmat/linalg/elementwise.hpp>

Elementwise clamping z = max(lo, min(x, hi)), with scalar lo and hi.

Definition at line 334 of file elementwise.hpp.

◆ axpby() [1/4]

template<simdifiable Vx, simdifiable Vy, simdifiable Vz, std::convertible_to< simdified_simd_t< Vx > > Ta, std::convertible_to< simdified_simd_t< Vx > > Tb>

void batmat::linalg::axpby	(	Ta	alpha,
		Vx &&	x,
		Tb	beta,
		Vy &&	y,
		Vz &&	z )

#include <batmat/linalg/elementwise.hpp>

Add scaled vector z = αx + βy.

Definition at line 345 of file elementwise.hpp.

◆ axpby() [2/4]

template<simdifiable Vx, simdifiable Vy, std::convertible_to< simdified_simd_t< Vx > > Ta, std::convertible_to< simdified_simd_t< Vx > > Tb>

void batmat::linalg::axpby	(	Ta	alpha,
		Vx &&	x,
		Tb	beta,
		Vy &&	y )

#include <batmat/linalg/elementwise.hpp>

Add scaled vector y = αx + βy.

Definition at line 356 of file elementwise.hpp.

◆ axpy() [1/6]

template<auto Beta = 1, simdifiable Vy, simdifiable... Vx>

void batmat::linalg::axpy	(	Vy &&	y,
		const std::array< simdified_simd_t< Vy >, sizeof...(Vx)> &	alphas,
		Vx &&...	x )

#include <batmat/linalg/elementwise.hpp>

Add scaled vector y = ∑ᵢ αᵢxᵢ + βy.

Definition at line 365 of file elementwise.hpp.

◆ axpy() [2/6]

template<simdifiable Vx, simdifiable Vy, simdifiable Vz, std::convertible_to< simdified_simd_t< Vx > > Ta>

void batmat::linalg::axpy	(	Ta	alpha,
		Vx &&	x,
		Vy &&	y,
		Vz &&	z )

#include <batmat/linalg/elementwise.hpp>

Add scaled vector z = αx + y.

Definition at line 377 of file elementwise.hpp.

◆ axpy() [3/6]

template<auto Beta = 1, simdifiable Vx, simdifiable Vy, std::convertible_to< simdified_simd_t< Vx > > Ta>

void batmat::linalg::axpy	(	Ta	alpha,
		Vx &&	x,
		Vy &&	y )

#include <batmat/linalg/elementwise.hpp>

Add scaled vector y = αx + βy (where β is a compile-time constant).

Definition at line 385 of file elementwise.hpp.

◆ negate() [1/4]

template<simdifiable VA, simdifiable VB, int Rotate = 0>

void batmat::linalg::negate	(	VA &&	A,
		VB &&	B,
		with_rotate_t< Rotate >	= {} )

#include <batmat/linalg/elementwise.hpp>

Negate a matrix or vector B = -A.

Definition at line 396 of file elementwise.hpp.

◆ negate() [2/4]

template<simdifiable VA, int Rotate = 0>

void batmat::linalg::negate	(	VA &&	A,
		with_rotate_t< Rotate >	= {} )

#include <batmat/linalg/elementwise.hpp>

Negate a matrix or vector A = -A.

Definition at line 404 of file elementwise.hpp.

◆ sub() [1/4]

template<simdifiable VA, simdifiable VB, simdifiable VC, int Rotate = 0>

void batmat::linalg::sub	(	VA &&	A,
		VB &&	B,
		VC &&	C,
		with_rotate_t< Rotate >	= {} )

#include <batmat/linalg/elementwise.hpp>

Subtract two matrices or vectors C = A - B. Rotate affects B.

Definition at line 413 of file elementwise.hpp.

◆ sub() [2/4]

template<simdifiable VA, simdifiable VB, int Rotate = 0>

void batmat::linalg::sub	(	VA &&	A,
		VB &&	B,
		with_rotate_t< Rotate >	= {} )

#include <batmat/linalg/elementwise.hpp>

Subtract two matrices or vectors A = A - B. Rotate affects B.

Definition at line 422 of file elementwise.hpp.

◆ add() [1/4]

template<simdifiable VA, simdifiable VB, simdifiable VC, int Rotate = 0>

void batmat::linalg::add	(	VA &&	A,
		VB &&	B,
		VC &&	C,
		with_rotate_t< Rotate >	= {} )

#include <batmat/linalg/elementwise.hpp>

Add two matrices or vectors C = A + B. Rotate affects B.

Definition at line 431 of file elementwise.hpp.

◆ add() [2/4]

template<simdifiable VA, simdifiable VB, int Rotate = 0>

void batmat::linalg::add	(	VA &&	A,
		VB &&	B,
		with_rotate_t< Rotate >	= {} )

#include <batmat/linalg/elementwise.hpp>

Add two matrices or vectors A = A + B. Rotate affects B.

Definition at line 440 of file elementwise.hpp.

◆ for_each_elementwise() [1/2]

template<class F, simdifiable VA, simdifiable... VAs>

void batmat::linalg::for_each_elementwise	(	F &&	fun,
		VA &&	A,
		VAs &&...	As )

#include <batmat/linalg/elementwise.hpp>

Apply a function to all elements of the given matrices or vectors.

Definition at line 449 of file elementwise.hpp.

◆ transform_elementwise() [1/2]

template<class F, simdifiable VA, simdifiable... VAs>

void batmat::linalg::transform_elementwise	(	F &&	fun,
		VA &&	A,
		VAs &&...	As )

#include <batmat/linalg/elementwise.hpp>

Apply a function to all elements of the given matrices or vectors, storing the result in the first argument.

Definition at line 459 of file elementwise.hpp.

◆ transform2_elementwise() [1/2]

template<class F, simdifiable VA, simdifiable VB, simdifiable... VAs>

void batmat::linalg::transform2_elementwise	(	F &&	fun,
		VA &&	A,
		VB &&	B,
		VAs &&...	As )

#include <batmat/linalg/elementwise.hpp>

Apply a function to all elements of the given matrices or vectors, storing the results in the first two arguments.

Definition at line 469 of file elementwise.hpp.

◆ transform_n_elementwise() [1/2]

template<class F, simdifiable... VAs, simdifiable... VBs>

void batmat::linalg::transform_n_elementwise	(	F &&	fun,
		std::tuple< VAs... >	As,
		VBs &&...	Bs )

#include <batmat/linalg/elementwise.hpp>

Apply a function to all elements of the given matrices or vectors, storing the results in the tuple of matrices given as the first argument.

Definition at line 479 of file elementwise.hpp.

◆ transform_n_diag()

template<class F, simdifiable... VAs, simdifiable... VBs>

void batmat::linalg::transform_n_diag	(	F &&	fun,
		std::tuple< VAs... >	As,
		VBs &&...	Bs )

#include <batmat/linalg/elementwise.hpp>

Apply a function to all elements of the given vectors and the diagonal elements of the given square matrices, storing the results in the tuple of vectors or matrices given as the first argument.

Most efficient if only the first argument contains matrices, and all other arguments are column vectors.

Definition at line 494 of file elementwise.hpp.

◆ copy_diag()

template<class F, simdifiable VA, simdifiable VB>

void batmat::linalg::copy_diag	(	VA &&	A,
		VB &&	B )

#include <batmat/linalg/elementwise.hpp>

Copy the diagonal elements of a matrix.

The arguments A and B must either be square matrices or vectors. This function supports setting the diagonal of a matrix to the values of a vector, copying the diagonal of one matrix to the diagonal of another, or copying the diagonal elements of a matrix to a vector.

Definition at line 524 of file elementwise.hpp.

◆ add_diag() [1/2]

template<simdifiable VA, simdifiable VB, simdifiable VC>

void batmat::linalg::add_diag	(	VA &&	A,
		VB &&	b,
		VC &&	C )

#include <batmat/linalg/elementwise.hpp>

C = A + diag(b).

Definition at line 550 of file elementwise.hpp.

◆ add_diag() [2/2]

template<simdifiable VA, simdifiable VB>

void batmat::linalg::add_diag	(	VA &&	A,
		VB &&	b )

#include <batmat/linalg/elementwise.hpp>

A += diag(b).

Definition at line 570 of file elementwise.hpp.

◆ scale() [3/4]

template<simdifiable_multi Vx, simdifiable_multi Vz, std::convertible_to< simdified_simd_t< Vx > > T>

void batmat::linalg::multi::scale	(	T	alpha,
		Vx &&	x,
		Vz &&	z )

#include <batmat/linalg/elementwise.hpp>

Multiply a vector by a scalar z = αx.

Definition at line 591 of file elementwise.hpp.

◆ scale() [4/4]

template<simdifiable_multi Vx, std::convertible_to< simdified_simd_t< Vx > > T>

void batmat::linalg::multi::scale	(	T	alpha,
		Vx &&	x )

#include <batmat/linalg/elementwise.hpp>

Multiply a vector by a scalar x = αx.

Definition at line 599 of file elementwise.hpp.

◆ hadamard() [3/4]

template<simdifiable_multi Vx, simdifiable_multi Vy, simdifiable_multi Vz>

void batmat::linalg::multi::hadamard	(	Vx &&	x,
		Vy &&	y,
		Vz &&	z )

#include <batmat/linalg/elementwise.hpp>

Compute the Hadamard (elementwise) product of two vectors z = x ⊙ y.

Definition at line 607 of file elementwise.hpp.

◆ hadamard() [4/4]

template<simdifiable_multi Vx, simdifiable_multi Vy>

void batmat::linalg::multi::hadamard	(	Vx &&	x,
		Vy &&	y )

#include <batmat/linalg/elementwise.hpp>

Compute the Hadamard (elementwise) product of two vectors x = x ⊙ y.

Definition at line 617 of file elementwise.hpp.

◆ clamp() [3/4]

template<simdifiable_multi Vx, simdifiable_multi Vlo, simdifiable_multi Vhi, simdifiable_multi Vz>

void batmat::linalg::multi::clamp	(	Vx &&	x,
		Vlo &&	lo,
		Vhi &&	hi,
		Vz &&	z )

#include <batmat/linalg/elementwise.hpp>

Elementwise clamping z = max(lo, min(x, hi)).

Definition at line 626 of file elementwise.hpp.

◆ clamp_resid() [2/2]

template<simdifiable_multi Vx, simdifiable_multi Vlo, simdifiable_multi Vhi, simdifiable_multi Vz>

void batmat::linalg::multi::clamp_resid	(	Vx &&	x,
		Vlo &&	lo,
		Vhi &&	hi,
		Vz &&	z )

#include <batmat/linalg/elementwise.hpp>

Elementwise clamping residual z = x - max(lo, min(x, hi)).

Definition at line 637 of file elementwise.hpp.

◆ clamp() [4/4]

template<simdifiable_multi Vx, simdifiable_multi Vz>

void batmat::linalg::multi::clamp	(	Vx &&	x,
		simdified_simd_t< Vx >	lo,
		simdified_simd_t< Vx >	hi,
		Vz &&	z )

#include <batmat/linalg/elementwise.hpp>

Elementwise clamping z = max(lo, min(x, hi)), with scalar lo and hi.

Definition at line 648 of file elementwise.hpp.

◆ axpby() [3/4]

template<simdifiable_multi Vx, simdifiable_multi Vy, simdifiable_multi Vz, std::convertible_to< simdified_simd_t< Vx > > Ta, std::convertible_to< simdified_simd_t< Vx > > Tb>

void batmat::linalg::multi::axpby	(	Ta	alpha,
		Vx &&	x,
		Tb	beta,
		Vy &&	y,
		Vz &&	z )

#include <batmat/linalg/elementwise.hpp>

Add scaled vector z = αx + βy.

Definition at line 659 of file elementwise.hpp.

◆ axpby() [4/4]

template<simdifiable_multi Vx, simdifiable_multi Vy, std::convertible_to< simdified_simd_t< Vx > > Ta, std::convertible_to< simdified_simd_t< Vx > > Tb>

void batmat::linalg::multi::axpby	(	Ta	alpha,
		Vx &&	x,
		Tb	beta,
		Vy &&	y )

#include <batmat/linalg/elementwise.hpp>

Add scaled vector y = αx + βy.

Definition at line 671 of file elementwise.hpp.

◆ axpy() [4/6]

template<auto Beta = 1, simdifiable_multi Vy, simdifiable_multi... Vx>

void batmat::linalg::multi::axpy	(	Vy &&	y,
		const std::array< simdified_simd_t< Vy >, sizeof...(Vx)> &	alphas,
		Vx &&...	x )

#include <batmat/linalg/elementwise.hpp>

Add scaled vector y = ∑ᵢ αᵢxᵢ + βy.

Definition at line 680 of file elementwise.hpp.

◆ axpy() [5/6]

template<simdifiable_multi Vx, simdifiable_multi Vy, simdifiable_multi Vz, std::convertible_to< simdified_simd_t< Vx > > Ta>

void batmat::linalg::multi::axpy	(	Ta	alpha,
		Vx &&	x,
		Vy &&	y,
		Vz &&	z )

#include <batmat/linalg/elementwise.hpp>

Add scaled vector z = αx + y.

Definition at line 690 of file elementwise.hpp.

◆ axpy() [6/6]

template<auto Beta = 1, simdifiable_multi Vx, simdifiable_multi Vy, std::convertible_to< simdified_simd_t< Vx > > Ta>

void batmat::linalg::multi::axpy	(	Ta	alpha,
		Vx &&	x,
		Vy &&	y )

#include <batmat/linalg/elementwise.hpp>

Add scaled vector y = αx + βy (where β is a compile-time constant).

Definition at line 698 of file elementwise.hpp.

◆ negate() [3/4]

template<simdifiable_multi VA, simdifiable_multi VB, int Rotate = 0>

void batmat::linalg::multi::negate	(	VA &&	A,
		VB &&	B,
		with_rotate_t< Rotate >	rot = {} )

#include <batmat/linalg/elementwise.hpp>

Negate a matrix or vector B = -A.

Definition at line 707 of file elementwise.hpp.

◆ negate() [4/4]

template<simdifiable_multi VA, int Rotate = 0>

void batmat::linalg::multi::negate	(	VA &&	A,
		with_rotate_t< Rotate >	rot = {} )

#include <batmat/linalg/elementwise.hpp>

Negate a matrix or vector A = -A.

Definition at line 715 of file elementwise.hpp.

◆ sub() [3/4]

template<simdifiable_multi VA, simdifiable_multi VB, simdifiable_multi VC, int Rotate = 0>

void batmat::linalg::multi::sub	(	VA &&	A,
		VB &&	B,
		VC &&	C,
		with_rotate_t< Rotate >	rot = {} )

#include <batmat/linalg/elementwise.hpp>

Subtract two matrices or vectors C = A - B. Rotate affects B.

Definition at line 723 of file elementwise.hpp.

◆ sub() [4/4]

template<simdifiable_multi VA, simdifiable_multi VB, int Rotate = 0>

void batmat::linalg::multi::sub	(	VA &&	A,
		VB &&	B,
		with_rotate_t< Rotate >	rot = {} )

#include <batmat/linalg/elementwise.hpp>

Subtract two matrices or vectors A = A - B. Rotate affects B.

Definition at line 733 of file elementwise.hpp.

◆ add() [3/4]

template<simdifiable_multi VA, simdifiable_multi VB, simdifiable_multi VC, int Rotate = 0>

void batmat::linalg::multi::add	(	VA &&	A,
		VB &&	B,
		VC &&	C,
		with_rotate_t< Rotate >	rot = {} )

#include <batmat/linalg/elementwise.hpp>

Add two matrices or vectors C = A + B. Rotate affects B.

Definition at line 742 of file elementwise.hpp.

◆ add() [4/4]

template<simdifiable_multi VA, simdifiable_multi VB, int Rotate = 0>

void batmat::linalg::multi::add	(	VA &&	A,
		VB &&	B,
		with_rotate_t< Rotate >	rot = {} )

#include <batmat/linalg/elementwise.hpp>

Add two matrices or vectors A = A + B. Rotate affects B.

Definition at line 752 of file elementwise.hpp.

◆ for_each_elementwise() [2/2]

template<class F, simdifiable_multi VA, simdifiable_multi... VAs>

void batmat::linalg::multi::for_each_elementwise	(	F &&	fun,
		VA &&	A,
		VAs &&...	As )

#include <batmat/linalg/elementwise.hpp>

Apply a function to all elements of the given matrices or vectors.

Definition at line 761 of file elementwise.hpp.

◆ transform_elementwise() [2/2]

template<class F, simdifiable_multi VA, simdifiable_multi... VAs>

void batmat::linalg::multi::transform_elementwise	(	F &&	fun,
		VA &&	A,
		VAs &&...	As )

#include <batmat/linalg/elementwise.hpp>

Apply a function to all elements of the given matrices or vectors, storing the result in the first argument.

Definition at line 771 of file elementwise.hpp.

◆ transform2_elementwise() [2/2]

template<class F, simdifiable_multi VA, simdifiable_multi VB, simdifiable_multi... VAs>

void batmat::linalg::multi::transform2_elementwise	(	F &&	fun,
		VA &&	A,
		VB &&	B,
		VAs &&...	As )

#include <batmat/linalg/elementwise.hpp>

Apply a function to all elements of the given matrices or vectors, storing the results in the first two arguments.

Definition at line 781 of file elementwise.hpp.

◆ transform_n_elementwise() [2/2]

template<class F, simdifiable_multi... VAs, simdifiable_multi... VBs>

void batmat::linalg::multi::transform_n_elementwise	(	F &&	fun,
		std::tuple< VAs... >	As,
		VBs &&...	Bs )

#include <batmat/linalg/elementwise.hpp>

Apply a function to all elements of the given matrices or vectors, storing the results in the tuple of matrices given as the first argument.

Definition at line 792 of file elementwise.hpp.

◆ gemm_diag()

template<simdifiable VA, simdifiable VB, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>

void batmat::linalg::gemm_diag	(	VA &&	A,
		VB &&	B,
		VD &&	D,
		Vd &&	d,
		Opts...	opts )

#include <batmat/linalg/gemm-diag.hpp>

D = A diag(d) B.

Definition at line 93 of file gemm-diag.hpp.

◆ gemm_diag_add() [1/2]

template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>

void batmat::linalg::gemm_diag_add	(	VA &&	A,
		VB &&	B,
		VC &&	C,
		VD &&	D,
		Vd &&	d,
		Opts...	opts )

#include <batmat/linalg/gemm-diag.hpp>

D = C + A diag(d) B.

Definition at line 104 of file gemm-diag.hpp.

◆ gemm_diag_add() [2/2]

template<simdifiable VA, simdifiable VB, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>

void batmat::linalg::gemm_diag_add	(	VA &&	A,
		VB &&	B,
		VD &&	D,
		Vd &&	d,
		Opts...	opts )

#include <batmat/linalg/gemm-diag.hpp>

D += A diag(d) B.

Definition at line 114 of file gemm-diag.hpp.

◆ gemm_diag_sub() [1/2]

template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>

void batmat::linalg::gemm_diag_sub	(	VA &&	A,
		VB &&	B,
		VC &&	C,
		VD &&	D,
		Vd &&	d,
		Opts...	opts )

#include <batmat/linalg/gemm-diag.hpp>

D = C - A diag(d) B.

Definition at line 122 of file gemm-diag.hpp.

◆ gemm_diag_sub() [2/2]

template<simdifiable VA, simdifiable VB, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>

void batmat::linalg::gemm_diag_sub	(	VA &&	A,
		VB &&	B,
		VD &&	D,
		Vd &&	d,
		Opts...	opts )

#include <batmat/linalg/gemm-diag.hpp>

D -= A diag(d) B.

Definition at line 132 of file gemm-diag.hpp.

◆ syrk_diag()

template<MatrixStructure SC, simdifiable VA, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>

void batmat::linalg::syrk_diag	(	VA &&	A,
		Structured< VD, SC >	D,
		Vd &&	d,
		Opts...	opts )

#include <batmat/linalg/gemm-diag.hpp>

D = A diag(d) Aᵀ with D symmetric.

Definition at line 140 of file gemm-diag.hpp.

◆ syrk_diag_add() [1/2]

template<MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>

void batmat::linalg::syrk_diag_add	(	VA &&	A,
		Structured< VC, SC >	C,
		Structured< VD, SC >	D,
		Vd &&	d,
		Opts...	opts )

#include <batmat/linalg/gemm-diag.hpp>

D = C + A diag(d) Aᵀ with C, D symmetric.

Definition at line 154 of file gemm-diag.hpp.

◆ syrk_diag_add() [2/2]

template<MatrixStructure SC, simdifiable VA, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>

void batmat::linalg::syrk_diag_add	(	VA &&	A,
		Structured< VD, SC >	D,
		Vd &&	d,
		Opts...	opts )

#include <batmat/linalg/gemm-diag.hpp>

D += A diag(d) Aᵀ with D symmetric.

Definition at line 166 of file gemm-diag.hpp.

◆ syrk_diag_sub() [1/2]

template<MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>

void batmat::linalg::syrk_diag_sub	(	VA &&	A,
		Structured< VC, SC >	C,
		Structured< VD, SC >	D,
		Vd &&	d,
		Opts...	opts )

#include <batmat/linalg/gemm-diag.hpp>

D = C - A diag(d) Aᵀ with C, D symmetric.

Definition at line 174 of file gemm-diag.hpp.

◆ syrk_diag_sub() [2/2]

template<MatrixStructure SC, simdifiable VA, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>

void batmat::linalg::syrk_diag_sub	(	VA &&	A,
		Structured< VD, SC >	D,
		Vd &&	d,
		Opts...	opts )

#include <batmat/linalg/gemm-diag.hpp>

D -= A diag(d) Aᵀ with D symmetric.

Definition at line 186 of file gemm-diag.hpp.

◆ gemm()

template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>

void batmat::linalg::gemm	(	VA &&	A,
		VB &&	B,
		VD &&	D,
		TilingOptions	packing = {},
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = A B.

Definition at line 255 of file gemm.hpp.

◆ gemm_neg()

template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>

void batmat::linalg::gemm_neg	(	VA &&	A,
		VB &&	B,
		VD &&	D,
		TilingOptions	packing = {},
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = -A B.

Definition at line 265 of file gemm.hpp.

◆ gemm_add() [1/2]

template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>

void batmat::linalg::gemm_add	(	VA &&	A,
		VB &&	B,
		VC &&	C,
		VD &&	D,
		TilingOptions	packing = {},
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = C + A B.

Definition at line 275 of file gemm.hpp.

◆ gemm_add() [2/2]

template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>

void batmat::linalg::gemm_add	(	VA &&	A,
		VB &&	B,
		VD &&	D,
		TilingOptions	packing = {},
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D += A B.

Definition at line 283 of file gemm.hpp.

◆ gemm_sub() [1/2]

template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>

void batmat::linalg::gemm_sub	(	VA &&	A,
		VB &&	B,
		VC &&	C,
		VD &&	D,
		TilingOptions	packing = {},
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = C - A B.

Definition at line 290 of file gemm.hpp.

◆ gemm_sub() [2/2]

template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>

void batmat::linalg::gemm_sub	(	VA &&	A,
		VB &&	B,
		VD &&	D,
		TilingOptions	packing = {},
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D -= A B.

Definition at line 298 of file gemm.hpp.

◆ syrk() [1/3]

template<MatrixStructure SA, MatrixStructure SD, simdifiable VA, simdifiable VD, shift_opt... Opts>

void batmat::linalg::syrk	(	Structured< VA, SA >	A,
		Structured< VD, SD >	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = A Aᵀ with D symmetric.

Examples: example.cpp.

Definition at line 310 of file gemm.hpp.

◆ syrk() [2/3]

template<MatrixStructure SD, class TA, simdifiable VD, shift_opt... Opts>

void batmat::linalg::syrk	(	TA &&	A,
		Structured< VD, SD >	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = A Aᵀ with D symmetric.

Definition at line 323 of file gemm.hpp.

◆ syrk() [3/3]

template<MatrixStructure SD, simdifiable VD, shift_opt... Opts>

void batmat::linalg::syrk	(	Structured< VD, SD >	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = D Dᵀ with D triangular on input and symmetric on output.

Definition at line 329 of file gemm.hpp.

◆ syrk_neg() [1/3]

template<MatrixStructure SA, MatrixStructure SD, simdifiable VA, simdifiable VD, shift_opt... Opts>

void batmat::linalg::syrk_neg	(	Structured< VA, SA >	A,
		Structured< VD, SD >	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = -A Aᵀ with D symmetric.

Definition at line 343 of file gemm.hpp.

◆ syrk_neg() [2/3]

template<MatrixStructure SD, class TA, simdifiable VD, shift_opt... Opts>

void batmat::linalg::syrk_neg	(	TA &&	A,
		Structured< VD, SD >	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = A Aᵀ with D symmetric.

Definition at line 356 of file gemm.hpp.

◆ syrk_neg() [3/3]

template<MatrixStructure SD, simdifiable VD, shift_opt... Opts>

void batmat::linalg::syrk_neg	(	Structured< VD, SD >	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = -D Dᵀ with D triangular on input and symmetric on output.

Definition at line 362 of file gemm.hpp.

◆ syrk_add() [1/2]

template<MatrixStructure SD, simdifiable VA, simdifiable VC, simdifiable VD, shift_opt... Opts>

void batmat::linalg::syrk_add	(	VA &&	A,
		Structured< VC, SD >	C,
		Structured< VD, SD >	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = C + A Aᵀ with C, D symmetric.

Definition at line 376 of file gemm.hpp.

◆ syrk_add() [2/2]

template<MatrixStructure SD, simdifiable VA, simdifiable VD, shift_opt... Opts>

void batmat::linalg::syrk_add	(	VA &&	A,
		Structured< VD, SD >	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D += A Aᵀ with D symmetric.

Definition at line 386 of file gemm.hpp.

◆ syrk_sub() [1/2]

template<MatrixStructure SD, simdifiable VA, simdifiable VC, simdifiable VD, shift_opt... Opts>

void batmat::linalg::syrk_sub	(	VA &&	A,
		Structured< VC, SD >	C,
		Structured< VD, SD >	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = C - A Aᵀ with C, D symmetric.

Definition at line 393 of file gemm.hpp.

◆ syrk_sub() [2/2]

template<MatrixStructure SD, simdifiable VA, simdifiable VD, shift_opt... Opts>

void batmat::linalg::syrk_sub	(	VA &&	A,
		Structured< VD, SD >	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D -= A Aᵀ with D symmetric.

Definition at line 403 of file gemm.hpp.

◆ trmm() [1/4]

template<MatrixStructure SA, MatrixStructure SB, MatrixStructure SD, simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>

void batmat::linalg::trmm	(	Structured< VA, SA >	A,
		Structured< VB, SB >	B,
		Structured< VD, SD >	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = A B with A and/or B triangular.

Definition at line 416 of file gemm.hpp.

◆ trmm() [2/4]

template<class TA, class TB, class TD, shift_opt... Opts>

void batmat::linalg::trmm	(	TA &&	A,
		TB &&	B,
		TD &&	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = A B with A and/or B triangular.

Definition at line 425 of file gemm.hpp.

◆ trmm() [3/4]

template<MatrixStructure SA, simdifiable VA, simdifiable VD, shift_opt... Opts>

void batmat::linalg::trmm	(	Structured< VA, SA >	A,
		VD &&	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = A D with A triangular.

Definition at line 431 of file gemm.hpp.

◆ trmm() [4/4]

template<MatrixStructure SB, simdifiable VB, simdifiable VD, shift_opt... Opts>

void batmat::linalg::trmm	(	VD &&	D,
		Structured< VB, SB >	B,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = D B with B triangular.

Definition at line 436 of file gemm.hpp.

◆ trmm_neg() [1/2]

template<MatrixStructure SA, MatrixStructure SB, MatrixStructure SD, simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>

void batmat::linalg::trmm_neg	(	Structured< VA, SA >	A,
		Structured< VB, SB >	B,
		Structured< VD, SD >	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = -A B with A and/or B triangular.

Definition at line 444 of file gemm.hpp.

◆ trmm_neg() [2/2]

template<class TA, class TB, class TD, shift_opt... Opts>

void batmat::linalg::trmm_neg	(	TA &&	A,
		TB &&	B,
		TD &&	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = -A B with A and/or B triangular.

Definition at line 453 of file gemm.hpp.

◆ trmm_add() [1/2]

template<MatrixStructure SA, MatrixStructure SB, MatrixStructure SD, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>

void batmat::linalg::trmm_add	(	Structured< VA, SA >	A,
		Structured< VB, SB >	B,
		Structured< VC, SD >	C,
		Structured< VD, SD >	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = C + A B with A and/or B triangular.

Definition at line 462 of file gemm.hpp.

◆ trmm_add() [2/2]

template<class TA, class TB, class TC, class TD, shift_opt... Opts>

void batmat::linalg::trmm_add	(	TA &&	A,
		TB &&	B,
		TC &&	C,
		TD &&	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = C + A B with A and/or B triangular.

Definition at line 472 of file gemm.hpp.

◆ trmm_sub() [1/2]

template<MatrixStructure SA, MatrixStructure SB, MatrixStructure SD, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>

void batmat::linalg::trmm_sub	(	Structured< VA, SA >	A,
		Structured< VB, SB >	B,
		Structured< VC, SD >	C,
		Structured< VD, SD >	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = C - A B with A and/or B triangular.

Definition at line 481 of file gemm.hpp.

◆ trmm_sub() [2/2]

template<class TA, class TB, class TC, class TD, shift_opt... Opts>

void batmat::linalg::trmm_sub	(	TA &&	A,
		TB &&	B,
		TC &&	C,
		TD &&	D,
		Opts...	opts )

#include <batmat/linalg/gemm.hpp>

D = C - A B with A and/or B triangular.

Definition at line 491 of file gemm.hpp.

◆ gemv()

template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>

void batmat::linalg::gemv	(	VA &&	A,
		VB &&	B,
		VD &&	D,
		Opts...	opts )

#include <batmat/linalg/gemv.hpp>

d = A b

Definition at line 73 of file gemv.hpp.

◆ gemv_neg()

template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>

void batmat::linalg::gemv_neg	(	VA &&	A,
		VB &&	B,
		VD &&	D,
		Opts...	opts )

#include <batmat/linalg/gemv.hpp>

d = -A b

Definition at line 83 of file gemv.hpp.

◆ gemv_add() [1/2]

template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>

void batmat::linalg::gemv_add	(	VA &&	A,
		VB &&	B,
		VC &&	C,
		VD &&	D,
		Opts...	opts )

#include <batmat/linalg/gemv.hpp>

d = c + A b

Definition at line 93 of file gemv.hpp.

◆ gemv_add() [2/2]

template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>

void batmat::linalg::gemv_add	(	VA &&	A,
		VB &&	B,
		VD &&	D,
		Opts...	opts )

#include <batmat/linalg/gemv.hpp>

d = d + A b

Definition at line 101 of file gemv.hpp.

◆ gemv_sub() [1/2]

template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>

void batmat::linalg::gemv_sub	(	VA &&	A,
		VB &&	B,
		VC &&	C,
		VD &&	D,
		Opts...	opts )

#include <batmat/linalg/gemv.hpp>

d = c - A b

Definition at line 108 of file gemv.hpp.

◆ gemv_sub() [2/2]

template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>

void batmat::linalg::gemv_sub	(	VA &&	A,
		VB &&	B,
		VD &&	D,
		Opts...	opts )

#include <batmat/linalg/gemv.hpp>

d = d - A b

Definition at line 116 of file gemv.hpp.

◆ hyhound_diag() [1/2]

template<MatrixStructure SL, simdifiable VL, simdifiable VA, simdifiable Vd>

void batmat::linalg::hyhound_diag	(	Structured< VL, SL >	L,
		VA &&	A,
		Vd &&	d )

#include <batmat/linalg/hyhound.hpp>

Update Cholesky factor L using low-rank term A diag(d) Aᵀ.

Definition at line 152 of file hyhound.hpp.

◆ hyhound_diag() [2/2]

template<MatrixStructure SL, simdifiable VL, simdifiable VA, simdifiable Vd, simdifiable VW>

void batmat::linalg::hyhound_diag	(	Structured< VL, SL >	L,
		VA &&	A,
		Vd &&	d,
		VW &&	W )

#include <batmat/linalg/hyhound.hpp>

Update Cholesky factor L using low-rank term A diag(d) Aᵀ, with full Householder representation.

Definition at line 161 of file hyhound.hpp.

◆ hyhound_size_W()

template<MatrixStructure SL, simdifiable VL>

auto batmat::linalg::hyhound_size_W ( Structured< VL, SL > L )

#include <batmat/linalg/hyhound.hpp>

Get the size of the storage for the matrix W returned by hyhound_diag(Structured<VL,SL>, VA&&, Vd&&, VW&&).

Definition at line 170 of file hyhound.hpp.

◆ hyhound_diag_apply() [1/2]

template<simdifiable VL, simdifiable VA, simdifiable VD, simdifiable VB, simdifiable Vd, simdifiable VW>

void batmat::linalg::hyhound_diag_apply	(	VL &&	L,
		VA &&	A,
		VD &&	D,
		VB &&	B,
		Vd &&	d,
		VW &&	W,
		index_t	kA_in_offset = 0 )

#include <batmat/linalg/hyhound.hpp>

Apply Householder transformation generated by hyhound_diag, computing (L̃, D) = (L, A) Q̆.

Parameters

[in,out]	L	Part of the Cholesky factor to be updated (rectangular). Overwritten by L̃.
[in]	A	Update matrix (rectangular).
[out]	D	Updated update matrix (rectangular).
[in]	B	Householder reflector vectors returned by hyhound_diag.
[in]	d	Diagonal update matrix.
[in]	W	Householder representation returned by hyhound_diag.
[in]	kA_in_offset	If A is smaller than D, A is implicitly padded with zero columns: the offset of the nonzero part of A in the padded matrix can be specified using `kA_in_offset`.

Definition at line 188 of file hyhound.hpp.

◆ hyhound_diag_apply() [2/2]

template<simdifiable VL, simdifiable VA, simdifiable VB, simdifiable Vd, simdifiable VW>

void batmat::linalg::hyhound_diag_apply	(	VL &&	L,
		VA &&	A,
		VB &&	B,
		Vd &&	d,
		VW &&	W )

#include <batmat/linalg/hyhound.hpp>

Apply Householder transformation generated by hyhound_diag, computing (L̃, Ã) = (L, A) Q̆.

Parameters

[in,out]	L	Part of the Cholesky factor to be updated (rectangular). Overwritten by L̃.
[in,out]	A	Update matrix (rectangular). Overwritten by Ã.
[in]	B	Householder reflector vectors returned by hyhound_diag.
[in]	d	Diagonal update matrix.
[in]	W	Householder representation returned by hyhound_diag.

Definition at line 202 of file hyhound.hpp.

◆ hyhound_sign()

template<MatrixStructure SL, simdifiable VL, simdifiable VA, simdifiable Vd>

void batmat::linalg::hyhound_sign	(	Structured< VL, SL >	L,
		VA &&	A,
		Vd &&	d )

#include <batmat/linalg/hyhound.hpp>

Update Cholesky factor L using low-rank term A diag(copysign(1, d)) Aᵀ, where d contains only ±0 values.

Definition at line 212 of file hyhound.hpp.

◆ hyhound_diag_2()

template<MatrixStructure SL, simdifiable VL1, simdifiable VA1, simdifiable VL2, simdifiable VA2, simdifiable Vd>

void batmat::linalg::hyhound_diag_2	(	Structured< VL1, SL >	L1,
		VA1 &&	A1,
		VL2 &&	L2,
		VA2 &&	A2,
		Vd &&	d )

#include <batmat/linalg/hyhound.hpp>

Update Cholesky factor L using low-rank term A diag(d) Aᵀ, where L and A are stored as two separate block rows.

\[ L = \begin{pmatrix} L_{11} \\ L_{21} \end{pmatrix}, \quad A = \begin{pmatrix} A_{1} \\ A_{2} \end{pmatrix}. \]

Definition at line 226 of file hyhound.hpp.

◆ hyhound_diag_cyclic()

template<MatrixStructure SL, simdifiable VL11, simdifiable VA1, simdifiable VL21, simdifiable VA2, simdifiable VA2o, simdifiable VU, simdifiable VA3, simdifiable VA3o, simdifiable Vd>

void batmat::linalg::hyhound_diag_cyclic	(	Structured< VL11, SL >	L11,
		VA1 &&	A1,
		VL21 &&	L21,
		VA2 &&	A22,
		VA2o &&	A2_out,
		VU &&	L31,
		VA3 &&	A31,
		VA3o &&	A3_out,
		Vd &&	d )

#include <batmat/linalg/hyhound.hpp>

Update structured Cholesky factor L using structured low-rank term A diag(d) Aᵀ,.

\[ L = \begin{pmatrix} L_{11} \\ L_{21} \\ L_{31} \end{pmatrix}, \quad A = \begin{pmatrix} A_{11} & A_{12} \\ 0 & A_{22} \\ A_{31} & 0 \end{pmatrix}, \quad \tilde A = \begin{pmatrix} 0 \\ \tilde A_{2} \\ \tilde A_{3} \end{pmatrix}. \]

Definition at line 240 of file hyhound.hpp.

◆ hyhound_diag_riccati()

template<MatrixStructure SL, simdifiable VL11, simdifiable VA1, simdifiable VL21, simdifiable VA2, simdifiable VA2o, simdifiable VLu1, simdifiable VAuo, simdifiable Vd>

void batmat::linalg::hyhound_diag_riccati	(	Structured< VL11, SL >	L11,
		VA1 &&	A1,
		VL21 &&	L21,
		VA2 &&	A2,
		VA2o &&	A2_out,
		VLu1 &&	Lu1,
		VAuo &&	Au_out,
		Vd &&	d,
		bool	shift_A_out = false )

#include <batmat/linalg/hyhound.hpp>

Update structured Cholesky factor L using structured low-rank term A diag(d) Aᵀ,.

\[ L = \begin{pmatrix} L_{11} \\ L_{21} \\ L_{u} \end{pmatrix}, \quad A = \begin{pmatrix} A_{1} \\ A_{2} \\ 0 \end{pmatrix}, \quad \tilde A = \begin{pmatrix} 0 \\ \tilde A_{2} \\ \tilde A_{u} \end{pmatrix}. \]

The shift_A_out parameter indicates whether the output matrix A2_out should be shifted along the batch dimension. This is used in the Cyqlone solver.

Definition at line 258 of file hyhound.hpp.

◆ syrk_add_potrf() [1/2]

template<MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD>

void batmat::linalg::syrk_add_potrf	(	VA &&	A,
		Structured< VC, SC >	C,
		Structured< VD, SC >	D,
		simdified_value_t< VA >	regularization = 0 )

#include <batmat/linalg/potrf.hpp>

D = chol(C + AAᵀ) with C symmetric, D triangular.

Definition at line 49 of file potrf.hpp.

◆ syrk_add_potrf() [2/2]

template<MatrixStructure SC, simdifiable VA, simdifiable VD>

void batmat::linalg::syrk_add_potrf	(	VA &&	A,
		Structured< VD, SC >	D )

#include <batmat/linalg/potrf.hpp>

D = chol(D + AAᵀ) with D symmetric/triangular.

Definition at line 57 of file potrf.hpp.

◆ syrk_sub_potrf() [1/2]

template<MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD>

void batmat::linalg::syrk_sub_potrf	(	VA &&	A,
		Structured< VC, SC >	C,
		Structured< VD, SC >	D,
		simdified_value_t< VA >	regularization = 0 )

#include <batmat/linalg/potrf.hpp>

D = chol(C - AAᵀ) with C symmetric, D triangular.

Definition at line 64 of file potrf.hpp.

◆ syrk_sub_potrf() [2/2]

template<MatrixStructure SC, simdifiable VA, simdifiable VD>

void batmat::linalg::syrk_sub_potrf	(	VA &&	A,
		Structured< VD, SC >	D,
		simdified_value_t< VA >	regularization = 0 )

#include <batmat/linalg/potrf.hpp>

D = chol(D - AAᵀ) with D symmetric/triangular.

Definition at line 72 of file potrf.hpp.

◆ syrk_diag_add_potrf() [1/2]

template<MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD, simdifiable Vd>

void batmat::linalg::syrk_diag_add_potrf	(	VA &&	A,
		Structured< VC, SC >	C,
		Structured< VD, SC >	D,
		Vd &&	d,
		simdified_value_t< VA >	regularization = 0 )

#include <batmat/linalg/potrf.hpp>

D = chol(C + A diag(d) Aᵀ) with C symmetric, D triangular.

Definition at line 79 of file potrf.hpp.

◆ syrk_diag_add_potrf() [2/2]

template<MatrixStructure SC, simdifiable VA, simdifiable VD, simdifiable Vd>

void batmat::linalg::syrk_diag_add_potrf	(	VA &&	A,
		Structured< VD, SC >	D,
		Vd &&	d )

#include <batmat/linalg/potrf.hpp>

D = chol(D + A diag(d) Aᵀ) with D symmetric/triangular.

Definition at line 90 of file potrf.hpp.

◆ potrf() [1/2]

template<MatrixStructure SC, simdifiable VC, simdifiable VD>

void batmat::linalg::potrf	(	Structured< VC, SC >	C,
		Structured< VD, SC >	D,
		simdified_value_t< VC >	regularization = 0 )

#include <batmat/linalg/potrf.hpp>

D = chol(C) with C symmetric, D triangular.

Examples: example.cpp.

Definition at line 97 of file potrf.hpp.

◆ potrf() [2/2]

template<MatrixStructure SD, simdifiable VD>

void batmat::linalg::potrf	(	Structured< VD, SD >	D,
		simdified_value_t< VD >	regularization = 0 )

#include <batmat/linalg/potrf.hpp>

D = chol(D) with D symmetric/triangular.

Definition at line 105 of file potrf.hpp.

◆ vnorms_all() [1/2]

template<simdifiable Vx>

norms< simdified_value_t< Vx >, simdified_simd_t< Vx > >::result_simd batmat::linalg::vnorms_all ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the lane-wise norms (max, 1-norm, and 2-norm) of a batch of vectors.

Definition at line 114 of file reduce.hpp.

◆ norms_all() [1/2]

template<simdifiable Vx>

norms< simdified_value_t< Vx > >::result batmat::linalg::norms_all ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the norms (max, 1-norm, and 2-norm) of a vector.

Definition at line 121 of file reduce.hpp.

◆ vnorm_inf() [1/2]

template<simdifiable Vx>

simdified_simd_t< Vx > batmat::linalg::vnorm_inf ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the lane-wise infinity norms of a batch of vectors.

Definition at line 128 of file reduce.hpp.

◆ norm_inf() [1/2]

template<simdifiable Vx>

simdified_value_t< Vx > batmat::linalg::norm_inf ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the infinity norm of a vector.

Definition at line 134 of file reduce.hpp.

◆ vnorm_1() [1/2]

template<simdifiable Vx>

simdified_simd_t< Vx > batmat::linalg::vnorm_1 ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the lane-wise 1-norms of a batch of vectors.

Definition at line 140 of file reduce.hpp.

◆ norm_1() [1/2]

template<simdifiable Vx>

simdified_value_t< Vx > batmat::linalg::norm_1 ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the 1-norm of a vector.

Definition at line 146 of file reduce.hpp.

◆ vnorm_2_squared() [1/2]

template<simdifiable Vx>

simdified_simd_t< Vx > batmat::linalg::vnorm_2_squared ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the lane-wise squared 2-norms of a batch of vectors.

Definition at line 152 of file reduce.hpp.

◆ norm_2_squared() [1/2]

template<simdifiable Vx>

simdified_value_t< Vx > batmat::linalg::norm_2_squared ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the squared 2-norm of a vector.

Definition at line 160 of file reduce.hpp.

◆ vnorm_2() [1/2]

template<simdifiable Vx>

simdified_simd_t< Vx > batmat::linalg::vnorm_2 ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the lane-wise 2-norms of a batch of vectors.

Definition at line 167 of file reduce.hpp.

◆ norm_2() [1/2]

template<simdifiable Vx>

simdified_value_t< Vx > batmat::linalg::norm_2 ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the 2-norm of a vector.

Definition at line 174 of file reduce.hpp.

◆ vdot() [1/2]

template<simdifiable Vx, simdifiable Vy>

simdified_simd_t< Vx > batmat::linalg::vdot	(	Vx &&	x,
		Vy &&	y )

#include <batmat/linalg/reduce.hpp>

Compute the lane-wise dot products of two batches of vectors.

Definition at line 182 of file reduce.hpp.

◆ dot() [1/2]

template<simdifiable Vx, simdifiable Vy>

simdified_value_t< Vx > batmat::linalg::dot	(	Vx &&	x,
		Vy &&	y )

#include <batmat/linalg/reduce.hpp>

Compute the dot product of two vectors.

Definition at line 191 of file reduce.hpp.

◆ weighted_vnorm_sq() [1/2]

template<simdifiable Vw, simdifiable Va>

simdified_simd_t< Vw > batmat::linalg::weighted_vnorm_sq	(	Vw &&	w,
		Va &&	a )

#include <batmat/linalg/reduce.hpp>

∑ wᵢ aᵢ² (lane-wise).

Definition at line 200 of file reduce.hpp.

◆ weighted_norm_sq() [1/2]

template<simdifiable Vw, simdifiable Va>

simdified_value_t< Vw > batmat::linalg::weighted_norm_sq	(	Vw &&	w,
		Va &&	a )

#include <batmat/linalg/reduce.hpp>

∑ wᵢ aᵢ².

Definition at line 209 of file reduce.hpp.

◆ weighted_vnorm_sq_diff() [1/2]

template<simdifiable Vw, simdifiable Va, simdifiable Vb>

simdified_simd_t< Vw > batmat::linalg::weighted_vnorm_sq_diff	(	Vw &&	w,
		Va &&	a,
		Vb &&	b )

#include <batmat/linalg/reduce.hpp>

∑ wᵢ(aᵢ - bᵢ)² (lane-wise).

Definition at line 218 of file reduce.hpp.

◆ weighted_norm_sq_diff()

template<simdifiable Vw, simdifiable Va, simdifiable Vb>

simdified_value_t< Vw > batmat::linalg::weighted_norm_sq_diff	(	Vw &&	w,
		Va &&	a,
		Vb &&	b )

#include <batmat/linalg/reduce.hpp>

∑ wᵢ(aᵢ - bᵢ)².

Definition at line 227 of file reduce.hpp.

◆ norms_all() [2/2]

template<simdifiable_multi Vx>

norms< simdified_value_t< Vx > >::result batmat::linalg::multi::norms_all ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the norms (max, 1-norm, and 2-norm) of a vector.

Definition at line 249 of file reduce.hpp.

◆ norm_inf() [2/2]

template<simdifiable_multi Vx>

simdified_value_t< Vx > batmat::linalg::multi::norm_inf ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the infinity norm of a vector.

Definition at line 259 of file reduce.hpp.

◆ norm_1() [2/2]

template<simdifiable_multi Vx>

simdified_value_t< Vx > batmat::linalg::multi::norm_1 ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the 1-norm of a vector.

Definition at line 265 of file reduce.hpp.

◆ norm_2_squared() [2/2]

template<simdifiable_multi Vx>

simdified_value_t< Vx > batmat::linalg::multi::norm_2_squared ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the squared 2-norm of a vector.

Definition at line 271 of file reduce.hpp.

◆ norm_2() [2/2]

template<simdifiable_multi Vx>

simdified_value_t< Vx > batmat::linalg::multi::norm_2 ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the 2-norm of a vector.

Definition at line 280 of file reduce.hpp.

◆ dot() [2/2]

template<simdifiable_multi Vx, simdifiable_multi Vy>

simdified_value_t< Vx > batmat::linalg::multi::dot	(	Vx &&	x,
		Vy &&	y )

#include <batmat/linalg/reduce.hpp>

Compute the dot product of two vectors.

Definition at line 288 of file reduce.hpp.

◆ weighted_norm_sq() [2/2]

template<simdifiable_multi Vw, simdifiable_multi Vx>

simdified_value_t< Vw > batmat::linalg::multi::weighted_norm_sq	(	Vw &&	w,
		Vx &&	x )

#include <batmat/linalg/reduce.hpp>

∑ wᵢ xᵢ².

Definition at line 299 of file reduce.hpp.

◆ weighted_norm_sq_difference()

template<simdifiable_multi Vw, simdifiable_multi Vx, simdifiable_multi Vy>

simdified_value_t< Vw > batmat::linalg::multi::weighted_norm_sq_difference	(	Vw &&	w,
		Vx &&	x,
		Vy &&	y )

#include <batmat/linalg/reduce.hpp>

∑ wᵢ(xᵢ - yᵢ)².

Definition at line 310 of file reduce.hpp.

◆ vnorms_all() [2/2]

template<simdifiable_multi Vx>

norms< simdified_value_t< Vx >, simdified_simd_t< Vx > >::result_simd batmat::linalg::multi::vnorms_all ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the lane-wise norms (max, 1-norm, and 2-norm) of a batch of vectors.

Definition at line 321 of file reduce.hpp.

◆ vnorm_inf() [2/2]

template<simdifiable_multi Vx>

simdified_simd_t< Vx > batmat::linalg::multi::vnorm_inf ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the lane-wise infinity norms of a batch of vectors.

Definition at line 332 of file reduce.hpp.

◆ vnorm_1() [2/2]

template<simdifiable_multi Vx>

simdified_simd_t< Vx > batmat::linalg::multi::vnorm_1 ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the lane-wise 1-norms of a batch of vectors.

Definition at line 338 of file reduce.hpp.

◆ vnorm_2_squared() [2/2]

template<simdifiable_multi Vx>

simdified_simd_t< Vx > batmat::linalg::multi::vnorm_2_squared ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the lane-wise squared 2-norms of a batch of vectors.

Definition at line 344 of file reduce.hpp.

◆ vnorm_2() [2/2]

template<simdifiable_multi Vx>

simdified_simd_t< Vx > batmat::linalg::multi::vnorm_2 ( Vx && x )

#include <batmat/linalg/reduce.hpp>

Compute the lane-wise 2-norms of a batch of vectors.

Definition at line 353 of file reduce.hpp.

◆ vdot() [2/2]

template<simdifiable_multi Vx, simdifiable_multi Vy>

simdified_simd_t< Vx > batmat::linalg::multi::vdot	(	Vx &&	x,
		Vy &&	y )

#include <batmat/linalg/reduce.hpp>

Compute the lane-wise dot products of two batches of vectors.

Definition at line 361 of file reduce.hpp.

◆ weighted_vnorm_sq() [2/2]

template<simdifiable_multi Vw, simdifiable_multi Vx>

simdified_simd_t< Vw > batmat::linalg::multi::weighted_vnorm_sq	(	Vw &&	w,
		Vx &&	x )

#include <batmat/linalg/reduce.hpp>

∑ wᵢ xᵢ² (lane-wise).

Definition at line 372 of file reduce.hpp.

◆ weighted_vnorm_sq_diff() [2/2]

template<simdifiable_multi Vw, simdifiable_multi Vx, simdifiable_multi Vy>

simdified_simd_t< Vw > batmat::linalg::multi::weighted_vnorm_sq_diff	(	Vw &&	w,
		Vx &&	x,
		Vy &&	y )

#include <batmat/linalg/reduce.hpp>

∑ wᵢ(xᵢ - yᵢ)² (lane-wise).

Definition at line 383 of file reduce.hpp.

◆ transpose()

MatrixStructure batmat::linalg::transpose ( MatrixStructure s )

constexpr

#include <batmat/linalg/structure.hpp>

Definition at line 11 of file structure.hpp.

◆ symm_add() [1/2]

template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD>

void batmat::linalg::symm_add	(	Structured< VA, SA >	A,
		VB &&	B,
		VC &&	C,
		VD &&	D )

#include <batmat/linalg/symm.hpp>

D = C + A B with A symmetric.

Definition at line 76 of file symm.hpp.

◆ symm_add() [2/2]

template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>

void batmat::linalg::symm_add	(	Structured< VA, SA >	A,
		VB &&	B,
		VD &&	D )

#include <batmat/linalg/symm.hpp>

D = D + A B with A symmetric.

Definition at line 83 of file symm.hpp.

◆ symv()

template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>

void batmat::linalg::symv	(	Structured< VA, SA >	A,
		VB &&	B,
		VD &&	D )

#include <batmat/linalg/symv.hpp>

d = A b where A is symmetric

Definition at line 47 of file symv.hpp.

◆ symv_neg()

template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>

void batmat::linalg::symv_neg	(	Structured< VA, SA >	A,
		VB &&	B,
		VD &&	D )

#include <batmat/linalg/symv.hpp>

d = -A b where A is symmetric

Definition at line 57 of file symv.hpp.

◆ symv_add() [1/2]

template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD>

void batmat::linalg::symv_add	(	Structured< VA, SA >	A,
		VB &&	B,
		VC &&	C,
		VD &&	D )

#include <batmat/linalg/symv.hpp>

d = c + A b where A is symmetric

Definition at line 67 of file symv.hpp.

◆ symv_add() [2/2]

template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>

void batmat::linalg::symv_add	(	Structured< VA, SA >	A,
		VB &&	B,
		VD &&	D )

#include <batmat/linalg/symv.hpp>

d = d + A b where A is symmetric

Definition at line 75 of file symv.hpp.

◆ symv_sub() [1/2]

template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD>

void batmat::linalg::symv_sub	(	Structured< VA, SA >	A,
		VB &&	B,
		VC &&	C,
		VD &&	D )

#include <batmat/linalg/symv.hpp>

d = c - A b where A is symmetric

Definition at line 82 of file symv.hpp.

◆ symv_sub() [2/2]

template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>

void batmat::linalg::symv_sub	(	Structured< VA, SA >	A,
		VB &&	B,
		VD &&	D )

#include <batmat/linalg/symv.hpp>

d = d - A b where A is symmetric

Definition at line 90 of file symv.hpp.

◆ syomv()

template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>

void batmat::linalg::syomv	(	Structured< VA, SA >	A,
		VB &&	B,
		VD &&	D )

#include <batmat/linalg/syomv.hpp>

Definition at line 46 of file syomv.hpp.

◆ syomv_neg()

template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>

void batmat::linalg::syomv_neg	(	Structured< VA, SA >	A,
		VB &&	B,
		VD &&	D )

#include <batmat/linalg/syomv.hpp>

Definition at line 55 of file syomv.hpp.

◆ Structured()

template<class M>

batmat::linalg::Structured ( M && ) -> Structured< M >

#include <batmat/linalg/triangular.hpp>

◆ tril()

template<class M>

auto batmat::linalg::tril ( M && m )

nodiscardconstexpr

#include <batmat/linalg/triangular.hpp>

Lower-triangular view.

Examples: example.cpp.

Definition at line 41 of file triangular.hpp.

◆ triu()

template<class M>

auto batmat::linalg::triu ( M && m )

nodiscardconstexpr

#include <batmat/linalg/triangular.hpp>

Upper-triangular view.

Examples: example.cpp.

Definition at line 47 of file triangular.hpp.

◆ make_structured()

template<MatrixStructure S, class M>

auto batmat::linalg::make_structured ( M && m )

nodiscardconstexpr

#include <batmat/linalg/triangular.hpp>

View with the given structure.

Definition at line 53 of file triangular.hpp.

◆ trsm() [1/4]

template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD, int RotB = 0>

void batmat::linalg::trsm	(	Structured< VA, SA >	A,
		VB &&	B,
		VD &&	D,
		with_rotate_B_t< RotB >	= {} )

#include <batmat/linalg/trsm.hpp>

D = A⁻¹ B with A triangular.

Definition at line 51 of file trsm.hpp.

◆ trsm() [2/4]

template<MatrixStructure SA, simdifiable VA, simdifiable VD, int RotB = 0>

void batmat::linalg::trsm	(	Structured< VA, SA >	A,
		VD &&	D,
		with_rotate_B_t< RotB >	shift = {} )

#include <batmat/linalg/trsm.hpp>

D = A⁻¹ D with A triangular.

Definition at line 58 of file trsm.hpp.

◆ trsm() [3/4]

template<MatrixStructure SB, simdifiable VA, simdifiable VB, simdifiable VD, int RotA = 0>

void batmat::linalg::trsm	(	VA &&	A,
		Structured< VB, SB >	B,
		VD &&	D,
		with_rotate_A_t< RotA >	= {} )

#include <batmat/linalg/trsm.hpp>

D = A B⁻¹ with B triangular.

Definition at line 65 of file trsm.hpp.

◆ trsm() [4/4]

template<MatrixStructure SB, simdifiable VB, simdifiable VD, int RotA = 0>

void batmat::linalg::trsm	(	VD &&	D,
		Structured< VB, SB >	B,
		with_rotate_A_t< RotA >	shift = {} )

#include <batmat/linalg/trsm.hpp>

D = D B⁻¹ with B triangular.

Definition at line 75 of file trsm.hpp.

◆ trtri() [1/4]

template<simdifiable VA, simdifiable VD>

void batmat::linalg::trtri	(	Structured< VA, MatrixStructure::LowerTriangular >	A,
		Structured< VD, MatrixStructure::LowerTriangular >	D )

#include <batmat/linalg/trtri.hpp>

D = A⁻¹ with A, D lower triangular.

Definition at line 41 of file trtri.hpp.

◆ trtri() [2/4]

template<simdifiable VA, simdifiable VD>

void batmat::linalg::trtri	(	Structured< VA, MatrixStructure::UpperTriangular >	A,
		Structured< VD, MatrixStructure::UpperTriangular >	D )

#include <batmat/linalg/trtri.hpp>

D = A⁻¹ with A, D upper triangular.

Definition at line 51 of file trtri.hpp.

◆ trtri() [3/4]

template<simdifiable VD>

void batmat::linalg::trtri ( Structured< VD, MatrixStructure::LowerTriangular > D )

#include <batmat/linalg/trtri.hpp>

D = D⁻¹ with D lower triangular.

Definition at line 58 of file trtri.hpp.

◆ trtri() [4/4]

template<simdifiable VD>

void batmat::linalg::trtri ( Structured< VD, MatrixStructure::UpperTriangular > D )

#include <batmat/linalg/trtri.hpp>

D = D⁻¹ with D upper triangular.

Definition at line 66 of file trtri.hpp.

Detailed Description

Topics

Compression of masks containing zeros

Copying and filling batches of matrices

Single-batch elementwise operations

Multi-batch elementwise operations

Multiplication of batches of matrices with diagonal scaling

Multiplication of batches of general matrices

Multiplication of batches of matrices with symmetric results

Multiplication of batches of triangular matrices

Matrix-vector multiplication of batches of matrices

Cholesky factorization updates

Cholesky factorization of batches of matrices

Single-batch reduction operations

Multi-batch reduction operations

Symmetric multiplication of batches of matrices

Symmetric matrix-vector multiplication of batches of matrices

Symmetric matrix-vector multiplication of a block tridiagonal matrix

Triangular views of batches of matrices

Triangular solve of batches of matrices

Triangular inversion of batches of matrices

Classes

Enumerations

Functions

Class Documentation

◆ batmat::linalg::TilingOptions

Enumeration Type Documentation

◆ PackingSelector

◆ MatrixStructure

Function Documentation

◆ compress_masks()

◆ compress_masks_count()

◆ compress_masks_sqrt() [1/2]

◆ compress_masks_sqrt() [2/2]

◆ copy() [1/2]

◆ copy() [2/2]

◆ fill() [1/2]

◆ fill() [2/2]

◆ scale() [1/4]

◆ scale() [2/4]

◆ hadamard() [1/4]

◆ hadamard() [2/4]

◆ clamp() [1/4]

◆ clamp_resid() [1/2]

◆ clamp() [2/4]

◆ axpby() [1/4]

◆ axpby() [2/4]

◆ axpy() [1/6]

◆ axpy() [2/6]

◆ axpy() [3/6]

◆ negate() [1/4]

◆ negate() [2/4]

◆ sub() [1/4]

◆ sub() [2/4]

◆ add() [1/4]

◆ add() [2/4]

◆ for_each_elementwise() [1/2]

◆ transform_elementwise() [1/2]

◆ transform2_elementwise() [1/2]

◆ transform_n_elementwise() [1/2]

◆ transform_n_diag()

◆ copy_diag()

◆ add_diag() [1/2]

◆ add_diag() [2/2]

◆ scale() [3/4]

◆ scale() [4/4]

◆ hadamard() [3/4]

◆ hadamard() [4/4]

◆ clamp() [3/4]

◆ clamp_resid() [2/2]

◆ clamp() [4/4]

◆ axpby() [3/4]

◆ axpby() [4/4]

◆ axpy() [4/6]

◆ axpy() [5/6]

◆ axpy() [6/6]

◆ negate() [3/4]

◆ negate() [4/4]

◆ sub() [3/4]

◆ sub() [4/4]