Namespaces
namespace	detail
namespace	multi
namespace	flops
namespace	micro_kernels

Compression of masks containing zeros
template<index_t N = 8, simdifiable VA, simdifiable VS, simdifiable VAo, simdifiable VSo>
index_t	compress_masks (VA &&Ain, VS &&Sin, VAo &&Aout, VSo &&Sout)
template<index_t N = 8, simdifiable VS>
index_t	compress_masks_count (VS &&Sin)
template<index_t N = 8, simdifiable VA, simdifiable VS, simdifiable VAo>
index_t	compress_masks_sqrt (VA &&Ain, VS &&Sin, VAo &&Aout)
template<index_t N = 8, simdifiable VA, simdifiable VS, simdifiable VAo, simdifiable VSo>
index_t	compress_masks_sqrt (VA &&Ain, VS &&Sin, VAo &&Aout, VSo &&Sout)

Copying and filling batches of matrices
template<simdifiable VA, simdifiable VB, rotate_opt... Opts>
void	copy (VA &&A, VB &&B, Opts... opts)
	B = A.
template<MatrixStructure S, simdifiable VA, simdifiable VB, rotate_opt... Opts>
void	copy (Structured< VA, S > A, Structured< VB, S > B, Opts... opts)
	B = A.
template<simdifiable VB>
void	fill (simdified_value_t< VB > a, VB &&B)
	B = a.
template<MatrixStructure S, simdifiable VB>
void	fill (simdified_value_t< VB > a, Structured< VB, S > B)
	B = a.

Copying and filling multiple batches of matrices
template<simdifiable_multi VA, simdifiable_multi VB, rotate_opt... Opts>
void	copy (VA &&A, VB &&B, Opts... opts)
	B = A.
template<MatrixStructure S, simdifiable_multi VA, simdifiable_multi VB, rotate_opt... Opts>
void	copy (Structured< VA, S > A, Structured< VB, S > B, Opts... opts)
	B = A.
template<simdifiable_multi VB>
void	fill (simdified_value_t< VB > a, VB &&B)
	B = A.
template<MatrixStructure S, simdifiable_multi VB>
void	fill (simdified_value_t< VB > a, Structured< VB, S > B)
	B = A.

Single-batch elementwise operations
template<simdifiable Vx, simdifiable Vz, std::convertible_to< simdified_simd_t< Vx > > T>
void	scale (T alpha, Vx &&x, Vz &&z)
	Multiply a vector by a scalar z = αx.
template<simdifiable Vx, std::convertible_to< simdified_simd_t< Vx > > T>
void	scale (T alpha, Vx &&x)
	Multiply a vector by a scalar x = αx.
template<simdifiable Vx, simdifiable Vy, simdifiable Vz>
void	hadamard (Vx &&x, Vy &&y, Vz &&z)
	Compute the Hadamard (elementwise) product of two vectors z = x ⊙ y.
template<simdifiable Vx, simdifiable Vy>
void	hadamard (Vx &&x, Vy &&y)
	Compute the Hadamard (elementwise) product of two vectors x = x ⊙ y.
template<simdifiable Vx, simdifiable Vlo, simdifiable Vhi, simdifiable Vz>
void	clamp (Vx &&x, Vlo &&lo, Vhi &&hi, Vz &&z)
	Elementwise clamping z = max(lo, min(x, hi)).
template<simdifiable Vx, simdifiable Vlo, simdifiable Vhi, simdifiable Vz>
void	clamp_resid (Vx &&x, Vlo &&lo, Vhi &&hi, Vz &&z)
	Elementwise clamping residual z = x - max(lo, min(x, hi)).
template<simdifiable Vx, simdifiable Vz>
void	clamp (Vx &&x, simdified_simd_t< Vx > lo, simdified_simd_t< Vx > hi, Vz &&z)
	Elementwise clamping z = max(lo, min(x, hi)), with scalar lo and hi.
template<simdifiable Vx, simdifiable Vy, simdifiable Vz, std::convertible_to< simdified_simd_t< Vx > > Ta, std::convertible_to< simdified_simd_t< Vx > > Tb>
void	axpby (Ta alpha, Vx &&x, Tb beta, Vy &&y, Vz &&z)
	Add scaled vector z = αx + βy.
template<simdifiable Vx, simdifiable Vy, std::convertible_to< simdified_simd_t< Vx > > Ta, std::convertible_to< simdified_simd_t< Vx > > Tb>
void	axpby (Ta alpha, Vx &&x, Tb beta, Vy &&y)
	Add scaled vector y = αx + βy.
template<auto Beta = 1, simdifiable Vy, simdifiable... Vx>
void	axpy (Vy &&y, const std::array< simdified_simd_t< Vy >, sizeof...(Vx)> &alphas, Vx &&...x)
	Add scaled vector y = ∑ᵢ αᵢxᵢ + βy.
template<simdifiable Vx, simdifiable Vy, simdifiable Vz, std::convertible_to< simdified_simd_t< Vx > > Ta>
void	axpy (Ta alpha, Vx &&x, Vy &&y, Vz &&z)
	Add scaled vector z = αx + y.
template<auto Beta = 1, simdifiable Vx, simdifiable Vy, std::convertible_to< simdified_simd_t< Vx > > Ta>
void	axpy (Ta alpha, Vx &&x, Vy &&y)
	Add scaled vector y = αx + βy (where β is a compile-time constant).
template<simdifiable VA, simdifiable VB, int Rotate = 0>
void	negate (VA &&A, VB &&B, with_rotate_t< Rotate >={})
	Negate a matrix or vector B = -A.
template<simdifiable VA, int Rotate = 0>
void	negate (VA &&A, with_rotate_t< Rotate >={})
	Negate a matrix or vector A = -A.
template<simdifiable VA, simdifiable VB, simdifiable VC, int Rotate = 0>
void	sub (VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate >={})
	Subtract two matrices or vectors C = A - B. Rotate affects B.
template<simdifiable VA, simdifiable VB, int Rotate = 0>
void	sub (VA &&A, VB &&B, with_rotate_t< Rotate >={})
	Subtract two matrices or vectors A = A - B. Rotate affects B.
template<simdifiable VA, simdifiable VB, simdifiable VC, int Rotate = 0>
void	add (VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate >={})
	Add two matrices or vectors C = A + B. Rotate affects B.
template<simdifiable VA, simdifiable VB, int Rotate = 0>
void	add (VA &&A, VB &&B, with_rotate_t< Rotate >={})
	Add two matrices or vectors A = A + B. Rotate affects B.
template<class F, simdifiable VA, simdifiable... VAs>
void	for_each_elementwise (F &&fun, VA &&A, VAs &&...As)
	Apply a function to all elements of the given matrices or vectors.
template<class F, simdifiable VA, simdifiable... VAs>
void	transform_elementwise (F &&fun, VA &&A, VAs &&...As)
	Apply a function to all elements of the given matrices or vectors, storing the result in the first argument.
template<class F, simdifiable VA, simdifiable VB, simdifiable... VAs>
void	transform2_elementwise (F &&fun, VA &&A, VB &&B, VAs &&...As)
	Apply a function to all elements of the given matrices or vectors, storing the results in the first two arguments.
template<class F, simdifiable... VAs, simdifiable... VBs>
void	transform_n_elementwise (F &&fun, std::tuple< VAs... > As, VBs &&...Bs)
	Apply a function to all elements of the given matrices or vectors, storing the results in the tuple of matrices given as the first argument.
template<class F, simdifiable... VAs, simdifiable... VBs>
void	transform_n_diag (F &&fun, std::tuple< VAs... > As, VBs &&...Bs)
	Apply a function to all elements of the given vectors and the diagonal elements of the given square matrices, storing the results in the tuple of vectors or matrices given as the first argument.
template<class F, simdifiable VA, simdifiable VB>
void	copy_diag (VA &&A, VB &&B)
	Copy the diagonal elements of a matrix.
template<simdifiable VA, simdifiable VB, simdifiable VC>
void	add_diag (VA &&A, VB &&b, VC &&C)
	C = A + diag(b).
template<simdifiable VA, simdifiable VB>
void	add_diag (VA &&A, VB &&b)
	A += diag(b).

Multi-batch elementwise operations
template<simdifiable_multi Vx, simdifiable_multi Vz, std::convertible_to< simdified_simd_t< Vx > > T>
void	scale (T alpha, Vx &&x, Vz &&z)
	Multiply a vector by a scalar z = αx.
template<simdifiable_multi Vx, std::convertible_to< simdified_simd_t< Vx > > T>
void	scale (T alpha, Vx &&x)
	Multiply a vector by a scalar x = αx.
template<simdifiable_multi Vx, simdifiable_multi Vy, simdifiable_multi Vz>
void	hadamard (Vx &&x, Vy &&y, Vz &&z)
	Compute the Hadamard (elementwise) product of two vectors z = x ⊙ y.
template<simdifiable_multi Vx, simdifiable_multi Vy>
void	hadamard (Vx &&x, Vy &&y)
	Compute the Hadamard (elementwise) product of two vectors x = x ⊙ y.
template<simdifiable_multi Vx, simdifiable_multi Vlo, simdifiable_multi Vhi, simdifiable_multi Vz>
void	clamp (Vx &&x, Vlo &&lo, Vhi &&hi, Vz &&z)
	Elementwise clamping z = max(lo, min(x, hi)).
template<simdifiable_multi Vx, simdifiable_multi Vlo, simdifiable_multi Vhi, simdifiable_multi Vz>
void	clamp_resid (Vx &&x, Vlo &&lo, Vhi &&hi, Vz &&z)
	Elementwise clamping residual z = x - max(lo, min(x, hi)).
template<simdifiable_multi Vx, simdifiable_multi Vz>
void	clamp (Vx &&x, simdified_simd_t< Vx > lo, simdified_simd_t< Vx > hi, Vz &&z)
	Elementwise clamping z = max(lo, min(x, hi)), with scalar lo and hi.
template<simdifiable_multi Vx, simdifiable_multi Vy, simdifiable_multi Vz, std::convertible_to< simdified_simd_t< Vx > > Ta, std::convertible_to< simdified_simd_t< Vx > > Tb>
void	axpby (Ta alpha, Vx &&x, Tb beta, Vy &&y, Vz &&z)
	Add scaled vector z = αx + βy.
template<simdifiable_multi Vx, simdifiable_multi Vy, std::convertible_to< simdified_simd_t< Vx > > Ta, std::convertible_to< simdified_simd_t< Vx > > Tb>
void	axpby (Ta alpha, Vx &&x, Tb beta, Vy &&y)
	Add scaled vector y = αx + βy.
template<auto Beta = 1, simdifiable_multi Vy, simdifiable_multi... Vx>
void	axpy (Vy &&y, const std::array< simdified_simd_t< Vy >, sizeof...(Vx)> &alphas, Vx &&...x)
	Add scaled vector y = ∑ᵢ αᵢxᵢ + βy.
template<simdifiable_multi Vx, simdifiable_multi Vy, simdifiable_multi Vz, std::convertible_to< simdified_simd_t< Vx > > Ta>
void	axpy (Ta alpha, Vx &&x, Vy &&y, Vz &&z)
	Add scaled vector z = αx + y.
template<auto Beta = 1, simdifiable_multi Vx, simdifiable_multi Vy, std::convertible_to< simdified_simd_t< Vx > > Ta>
void	axpy (Ta alpha, Vx &&x, Vy &&y)
	Add scaled vector y = αx + βy (where β is a compile-time constant).
template<simdifiable_multi VA, simdifiable_multi VB, int Rotate = 0>
void	negate (VA &&A, VB &&B, with_rotate_t< Rotate > rot={})
	Negate a matrix or vector B = -A.
template<simdifiable_multi VA, int Rotate = 0>
void	negate (VA &&A, with_rotate_t< Rotate > rot={})
	Negate a matrix or vector A = -A.
template<simdifiable_multi VA, simdifiable_multi VB, simdifiable_multi VC, int Rotate = 0>
void	sub (VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate > rot={})
	Subtract two matrices or vectors C = A - B. Rotate affects B.
template<simdifiable_multi VA, simdifiable_multi VB, int Rotate = 0>
void	sub (VA &&A, VB &&B, with_rotate_t< Rotate > rot={})
	Subtract two matrices or vectors A = A - B. Rotate affects B.
template<simdifiable_multi VA, simdifiable_multi VB, simdifiable_multi VC, int Rotate = 0>
void	add (VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate > rot={})
	Add two matrices or vectors C = A + B. Rotate affects B.
template<simdifiable_multi VA, simdifiable_multi VB, int Rotate = 0>
void	add (VA &&A, VB &&B, with_rotate_t< Rotate > rot={})
	Add two matrices or vectors A = A + B. Rotate affects B.
template<class F, simdifiable_multi VA, simdifiable_multi... VAs>
void	for_each_elementwise (F &&fun, VA &&A, VAs &&...As)
	Apply a function to all elements of the given matrices or vectors.
template<class F, simdifiable_multi VA, simdifiable_multi... VAs>
void	transform_elementwise (F &&fun, VA &&A, VAs &&...As)
	Apply a function to all elements of the given matrices or vectors, storing the result in the first argument.
template<class F, simdifiable_multi VA, simdifiable_multi VB, simdifiable_multi... VAs>
void	transform2_elementwise (F &&fun, VA &&A, VB &&B, VAs &&...As)
	Apply a function to all elements of the given matrices or vectors, storing the results in the first two arguments.
template<class F, simdifiable_multi... VAs, simdifiable_multi... VBs>
void	transform_n_elementwise (F &&fun, std::tuple< VAs... > As, VBs &&...Bs)
	Apply a function to all elements of the given matrices or vectors, storing the results in the tuple of matrices given as the first argument.

Multiplication of batches of matrices with diagonal scaling
template<simdifiable VA, simdifiable VB, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	gemm_diag (VA &&A, VB &&B, VD &&D, Vd &&d, Opts... opts)
	D = A diag(d) B.
template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	gemm_diag_add (VA &&A, VB &&B, VC &&C, VD &&D, Vd &&d, Opts... opts)
	D = C + A diag(d) B.
template<simdifiable VA, simdifiable VB, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	gemm_diag_add (VA &&A, VB &&B, VD &&D, Vd &&d, Opts... opts)
	D += A diag(d) B.
template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	gemm_diag_sub (VA &&A, VB &&B, VC &&C, VD &&D, Vd &&d, Opts... opts)
	D = C - A diag(d) B.
template<simdifiable VA, simdifiable VB, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	gemm_diag_sub (VA &&A, VB &&B, VD &&D, Vd &&d, Opts... opts)
	D -= A diag(d) B.
template<MatrixStructure SC, simdifiable VA, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	syrk_diag (VA &&A, Structured< VD, SC > D, Vd &&d, Opts... opts)
	D = A diag(d) Aᵀ with D symmetric.
template<MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	syrk_diag_add (VA &&A, Structured< VC, SC > C, Structured< VD, SC > D, Vd &&d, Opts... opts)
	D = C + A diag(d) Aᵀ with C, D symmetric.
template<MatrixStructure SC, simdifiable VA, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	syrk_diag_add (VA &&A, Structured< VD, SC > D, Vd &&d, Opts... opts)
	D += A diag(d) Aᵀ with D symmetric.
template<MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	syrk_diag_sub (VA &&A, Structured< VC, SC > C, Structured< VD, SC > D, Vd &&d, Opts... opts)
	D = C - A diag(d) Aᵀ with C, D symmetric.
template<MatrixStructure SC, simdifiable VA, simdifiable VD, simdifiable Vd, detail::gemm_diag::track_zeros_opt... Opts>
void	syrk_diag_sub (VA &&A, Structured< VD, SC > D, Vd &&d, Opts... opts)
	D -= A diag(d) Aᵀ with D symmetric.

Multiplication of batches of general matrices
template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	gemm (VA &&A, VB &&B, VD &&D, TilingOptions packing={}, Opts... opts)
	D = A B.
template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	gemm_neg (VA &&A, VB &&B, VD &&D, TilingOptions packing={}, Opts... opts)
	D = -A B.
template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>
void	gemm_add (VA &&A, VB &&B, VC &&C, VD &&D, TilingOptions packing={}, Opts... opts)
	D = C + A B.
template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	gemm_add (VA &&A, VB &&B, VD &&D, TilingOptions packing={}, Opts... opts)
	D += A B.
template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>
void	gemm_sub (VA &&A, VB &&B, VC &&C, VD &&D, TilingOptions packing={}, Opts... opts)
	D = C - A B.
template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	gemm_sub (VA &&A, VB &&B, VD &&D, TilingOptions packing={}, Opts... opts)
	D -= A B.

Multiplication of batches of matrices with symmetric results
template<MatrixStructure SA, MatrixStructure SD, simdifiable VA, simdifiable VD, shift_opt... Opts>
void	syrk (Structured< VA, SA > A, Structured< VD, SD > D, Opts... opts)
	D = A Aᵀ with D symmetric.
template<MatrixStructure SD, class TA, simdifiable VD, shift_opt... Opts>
void	syrk (TA &&A, Structured< VD, SD > D, Opts... opts)
	D = A Aᵀ with D symmetric.
template<MatrixStructure SD, simdifiable VD, shift_opt... Opts>
void	syrk (Structured< VD, SD > D, Opts... opts)
	D = D Dᵀ with D triangular on input and symmetric on output.
template<MatrixStructure SA, MatrixStructure SD, simdifiable VA, simdifiable VD, shift_opt... Opts>
void	syrk_neg (Structured< VA, SA > A, Structured< VD, SD > D, Opts... opts)
	D = -A Aᵀ with D symmetric.
template<MatrixStructure SD, class TA, simdifiable VD, shift_opt... Opts>
void	syrk_neg (TA &&A, Structured< VD, SD > D, Opts... opts)
	D = A Aᵀ with D symmetric.
template<MatrixStructure SD, simdifiable VD, shift_opt... Opts>
void	syrk_neg (Structured< VD, SD > D, Opts... opts)
	D = -D Dᵀ with D triangular on input and symmetric on output.
template<MatrixStructure SD, simdifiable VA, simdifiable VC, simdifiable VD, shift_opt... Opts>
void	syrk_add (VA &&A, Structured< VC, SD > C, Structured< VD, SD > D, Opts... opts)
	D = C + A Aᵀ with C, D symmetric.
template<MatrixStructure SD, simdifiable VA, simdifiable VD, shift_opt... Opts>
void	syrk_add (VA &&A, Structured< VD, SD > D, Opts... opts)
	D += A Aᵀ with D symmetric.
template<MatrixStructure SD, simdifiable VA, simdifiable VC, simdifiable VD, shift_opt... Opts>
void	syrk_sub (VA &&A, Structured< VC, SD > C, Structured< VD, SD > D, Opts... opts)
	D = C - A Aᵀ with C, D symmetric.
template<MatrixStructure SD, simdifiable VA, simdifiable VD, shift_opt... Opts>
void	syrk_sub (VA &&A, Structured< VD, SD > D, Opts... opts)
	D -= A Aᵀ with D symmetric.

Multiplication of batches of triangular matrices
template<MatrixStructure SA, MatrixStructure SB, MatrixStructure SD, simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	trmm (Structured< VA, SA > A, Structured< VB, SB > B, Structured< VD, SD > D, Opts... opts)
	D = A B with A and/or B triangular.
template<class TA, class TB, class TD, shift_opt... Opts>
void	trmm (TA &&A, TB &&B, TD &&D, Opts... opts)
	D = A B with A and/or B triangular.
template<MatrixStructure SA, simdifiable VA, simdifiable VD, shift_opt... Opts>
void	trmm (Structured< VA, SA > A, VD &&D, Opts... opts)
	D = A D with A triangular.
template<MatrixStructure SB, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	trmm (VD &&D, Structured< VB, SB > B, Opts... opts)
	D = D B with B triangular.
template<MatrixStructure SA, MatrixStructure SB, MatrixStructure SD, simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	trmm_neg (Structured< VA, SA > A, Structured< VB, SB > B, Structured< VD, SD > D, Opts... opts)
	D = -A B with A and/or B triangular.
template<class TA, class TB, class TD, shift_opt... Opts>
void	trmm_neg (TA &&A, TB &&B, TD &&D, Opts... opts)
	D = -A B with A and/or B triangular.
template<MatrixStructure SA, MatrixStructure SB, MatrixStructure SD, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>
void	trmm_add (Structured< VA, SA > A, Structured< VB, SB > B, Structured< VC, SD > C, Structured< VD, SD > D, Opts... opts)
	D = C + A B with A and/or B triangular.
template<class TA, class TB, class TC, class TD, shift_opt... Opts>
void	trmm_add (TA &&A, TB &&B, TC &&C, TD &&D, Opts... opts)
	D = C + A B with A and/or B triangular.
template<MatrixStructure SA, MatrixStructure SB, MatrixStructure SD, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>
void	trmm_sub (Structured< VA, SA > A, Structured< VB, SB > B, Structured< VC, SD > C, Structured< VD, SD > D, Opts... opts)
	D = C - A B with A and/or B triangular.
template<class TA, class TB, class TC, class TD, shift_opt... Opts>
void	trmm_sub (TA &&A, TB &&B, TC &&C, TD &&D, Opts... opts)
	D = C - A B with A and/or B triangular.

Matrix-vector multiplication of batches of matrices
template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	gemv (VA &&A, VB &&B, VD &&D, Opts... opts)
	d = A b
template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	gemv_neg (VA &&A, VB &&B, VD &&D, Opts... opts)
	d = -A b
template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>
void	gemv_add (VA &&A, VB &&B, VC &&C, VD &&D, Opts... opts)
	d = c + A b
template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	gemv_add (VA &&A, VB &&B, VD &&D, Opts... opts)
	d = d + A b
template<simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>
void	gemv_sub (VA &&A, VB &&B, VC &&C, VD &&D, Opts... opts)
	d = c - A b
template<simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
void	gemv_sub (VA &&A, VB &&B, VD &&D, Opts... opts)
	d = d - A b

Cholesky factorization updates
template<MatrixStructure SL, simdifiable VL, simdifiable VA, simdifiable Vd>
void	hyhound_diag (Structured< VL, SL > L, VA &&A, Vd &&d)
	Update Cholesky factor L using low-rank term A diag(d) Aᵀ.
template<MatrixStructure SL, simdifiable VL, simdifiable VA, simdifiable Vd, simdifiable VW>
void	hyhound_diag (Structured< VL, SL > L, VA &&A, Vd &&d, VW &&W)
	Update Cholesky factor L using low-rank term A diag(d) Aᵀ, with full Householder representation.
template<MatrixStructure SL, simdifiable VL>
auto	hyhound_size_W (Structured< VL, SL > L)
	Get the size of the storage for the matrix W returned by hyhound_diag(Structured<VL,SL>, VA&&, Vd&&, VW&&).
template<simdifiable VL, simdifiable VA, simdifiable VD, simdifiable VB, simdifiable Vd, simdifiable VW>
void	hyhound_diag_apply (VL &&L, VA &&A, VD &&D, VB &&B, Vd &&d, VW &&W, index_t kA_in_offset=0)
	Apply Householder transformation generated by hyhound_diag, computing (L̃, D) = (L, A) Q̆.
template<simdifiable VL, simdifiable VA, simdifiable VB, simdifiable Vd, simdifiable VW>
void	hyhound_diag_apply (VL &&L, VA &&A, VB &&B, Vd &&d, VW &&W)
	Apply Householder transformation generated by hyhound_diag, computing (L̃, Ã) = (L, A) Q̆.
template<MatrixStructure SL, simdifiable VL, simdifiable VA, simdifiable Vd>
void	hyhound_sign (Structured< VL, SL > L, VA &&A, Vd &&d)
	Update Cholesky factor L using low-rank term A diag(copysign(1, d)) Aᵀ, where d contains only ±0 values.
template<MatrixStructure SL, simdifiable VL1, simdifiable VA1, simdifiable VL2, simdifiable VA2, simdifiable Vd>
void	hyhound_diag_2 (Structured< VL1, SL > L1, VA1 &&A1, VL2 &&L2, VA2 &&A2, Vd &&d)
	Update Cholesky factor L using low-rank term A diag(d) Aᵀ, where L and A are stored as two separate block rows.
template<MatrixStructure SL, simdifiable VL11, simdifiable VA1, simdifiable VL21, simdifiable VA2, simdifiable VA2o, simdifiable VU, simdifiable VA3, simdifiable VA3o, simdifiable Vd>
void	hyhound_diag_cyclic (Structured< VL11, SL > L11, VA1 &&A1, VL21 &&L21, VA2 &&A22, VA2o &&A2_out, VU &&L31, VA3 &&A31, VA3o &&A3_out, Vd &&d)
	Update structured Cholesky factor L using structured low-rank term A diag(d) Aᵀ,.
template<MatrixStructure SL, simdifiable VL11, simdifiable VA1, simdifiable VL21, simdifiable VA2, simdifiable VA2o, simdifiable VLu1, simdifiable VAuo, simdifiable Vd>
void	hyhound_diag_riccati (Structured< VL11, SL > L11, VA1 &&A1, VL21 &&L21, VA2 &&A2, VA2o &&A2_out, VLu1 &&Lu1, VAuo &&Au_out, Vd &&d, bool shift_A_out=false)
	Update structured Cholesky factor L using structured low-rank term A diag(d) Aᵀ,.

Cholesky factorization of batches of matrices
template<MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD>
void	syrk_add_potrf (VA &&A, Structured< VC, SC > C, Structured< VD, SC > D, simdified_value_t< VA > regularization=0)
	D = chol(C + AAᵀ) with C symmetric, D triangular.
template<MatrixStructure SC, simdifiable VA, simdifiable VD>
void	syrk_add_potrf (VA &&A, Structured< VD, SC > D)
	D = chol(D + AAᵀ) with D symmetric/triangular.
template<MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD>
void	syrk_sub_potrf (VA &&A, Structured< VC, SC > C, Structured< VD, SC > D, simdified_value_t< VA > regularization=0)
	D = chol(C - AAᵀ) with C symmetric, D triangular.
template<MatrixStructure SC, simdifiable VA, simdifiable VD>
void	syrk_sub_potrf (VA &&A, Structured< VD, SC > D, simdified_value_t< VA > regularization=0)
	D = chol(D - AAᵀ) with D symmetric/triangular.
template<MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD, simdifiable Vd>
void	syrk_diag_add_potrf (VA &&A, Structured< VC, SC > C, Structured< VD, SC > D, Vd &&d, simdified_value_t< VA > regularization=0)
	D = chol(C + A diag(d) Aᵀ) with C symmetric, D triangular.
template<MatrixStructure SC, simdifiable VA, simdifiable VD, simdifiable Vd>
void	syrk_diag_add_potrf (VA &&A, Structured< VD, SC > D, Vd &&d)
	D = chol(D + A diag(d) Aᵀ) with D symmetric/triangular.
template<MatrixStructure SC, simdifiable VC, simdifiable VD>
void	potrf (Structured< VC, SC > C, Structured< VD, SC > D, simdified_value_t< VC > regularization=0)
	D = chol(C) with C symmetric, D triangular.
template<MatrixStructure SD, simdifiable VD>
void	potrf (Structured< VD, SD > D, simdified_value_t< VD > regularization=0)
	D = chol(D) with D symmetric/triangular.

Single-batch reduction operations
template<simdifiable Vx>
norms< simdified_value_t< Vx >, simdified_simd_t< Vx > >::result_simd	vnorms_all (Vx &&x)
	Compute the lane-wise norms (max, 1-norm, and 2-norm) of a batch of vectors.
template<simdifiable Vx>
norms< simdified_value_t< Vx > >::result	norms_all (Vx &&x)
	Compute the norms (max, 1-norm, and 2-norm) of a vector.
template<simdifiable Vx>
simdified_simd_t< Vx >	vnorm_inf (Vx &&x)
	Compute the lane-wise infinity norms of a batch of vectors.
template<simdifiable Vx>
simdified_value_t< Vx >	norm_inf (Vx &&x)
	Compute the infinity norm of a vector.
template<simdifiable Vx>
simdified_simd_t< Vx >	vnorm_1 (Vx &&x)
	Compute the lane-wise 1-norms of a batch of vectors.
template<simdifiable Vx>
simdified_value_t< Vx >	norm_1 (Vx &&x)
	Compute the 1-norm of a vector.
template<simdifiable Vx>
simdified_simd_t< Vx >	vnorm_2_squared (Vx &&x)
	Compute the lane-wise squared 2-norms of a batch of vectors.
template<simdifiable Vx>
simdified_value_t< Vx >	norm_2_squared (Vx &&x)
	Compute the squared 2-norm of a vector.
template<simdifiable Vx>
simdified_simd_t< Vx >	vnorm_2 (Vx &&x)
	Compute the lane-wise 2-norms of a batch of vectors.
template<simdifiable Vx>
simdified_value_t< Vx >	norm_2 (Vx &&x)
	Compute the 2-norm of a vector.
template<simdifiable Vx, simdifiable Vy>
simdified_simd_t< Vx >	vdot (Vx &&x, Vy &&y)
	Compute the lane-wise dot products of two batches of vectors.
template<simdifiable Vx, simdifiable Vy>
simdified_value_t< Vx >	dot (Vx &&x, Vy &&y)
	Compute the dot product of two vectors.
template<simdifiable Vw, simdifiable Va>
simdified_simd_t< Vw >	weighted_vnorm_sq (Vw &&w, Va &&a)
	∑ wᵢ aᵢ² (lane-wise).
template<simdifiable Vw, simdifiable Va>
simdified_value_t< Vw >	weighted_norm_sq (Vw &&w, Va &&a)
	∑ wᵢ aᵢ².
template<simdifiable Vw, simdifiable Va, simdifiable Vb>
simdified_simd_t< Vw >	weighted_vnorm_sq_diff (Vw &&w, Va &&a, Vb &&b)
	∑ wᵢ(aᵢ - bᵢ)² (lane-wise).
template<simdifiable Vw, simdifiable Va, simdifiable Vb>
simdified_value_t< Vw >	weighted_norm_sq_diff (Vw &&w, Va &&a, Vb &&b)
	∑ wᵢ(aᵢ - bᵢ)².

Multi-batch reduction operations
template<simdifiable_multi Vx>
norms< simdified_value_t< Vx > >::result	norms_all (Vx &&x)
	Compute the norms (max, 1-norm, and 2-norm) of a vector.
template<simdifiable_multi Vx>
simdified_value_t< Vx >	norm_inf (Vx &&x)
	Compute the infinity norm of a vector.
template<simdifiable_multi Vx>
simdified_value_t< Vx >	norm_1 (Vx &&x)
	Compute the 1-norm of a vector.
template<simdifiable_multi Vx>
simdified_value_t< Vx >	norm_2_squared (Vx &&x)
	Compute the squared 2-norm of a vector.
template<simdifiable_multi Vx>
simdified_value_t< Vx >	norm_2 (Vx &&x)
	Compute the 2-norm of a vector.
template<simdifiable_multi Vx, simdifiable_multi Vy>
simdified_value_t< Vx >	dot (Vx &&x, Vy &&y)
	Compute the dot product of two vectors.
template<simdifiable_multi Vw, simdifiable_multi Vx>
simdified_value_t< Vw >	weighted_norm_sq (Vw &&w, Vx &&x)
	∑ wᵢ xᵢ².
template<simdifiable_multi Vw, simdifiable_multi Vx, simdifiable_multi Vy>
simdified_value_t< Vw >	weighted_norm_sq_difference (Vw &&w, Vx &&x, Vy &&y)
	∑ wᵢ(xᵢ - yᵢ)².
template<simdifiable_multi Vx>
norms< simdified_value_t< Vx >, simdified_simd_t< Vx > >::result_simd	vnorms_all (Vx &&x)
	Compute the lane-wise norms (max, 1-norm, and 2-norm) of a batch of vectors.
template<simdifiable_multi Vx>
simdified_simd_t< Vx >	vnorm_inf (Vx &&x)
	Compute the lane-wise infinity norms of a batch of vectors.
template<simdifiable_multi Vx>
simdified_simd_t< Vx >	vnorm_1 (Vx &&x)
	Compute the lane-wise 1-norms of a batch of vectors.
template<simdifiable_multi Vx>
simdified_simd_t< Vx >	vnorm_2_squared (Vx &&x)
	Compute the lane-wise squared 2-norms of a batch of vectors.
template<simdifiable_multi Vx>
simdified_simd_t< Vx >	vnorm_2 (Vx &&x)
	Compute the lane-wise 2-norms of a batch of vectors.
template<simdifiable_multi Vx, simdifiable_multi Vy>
simdified_simd_t< Vx >	vdot (Vx &&x, Vy &&y)
	Compute the lane-wise dot products of two batches of vectors.
template<simdifiable_multi Vw, simdifiable_multi Vx>
simdified_simd_t< Vw >	weighted_vnorm_sq (Vw &&w, Vx &&x)
	∑ wᵢ xᵢ² (lane-wise).
template<simdifiable_multi Vw, simdifiable_multi Vx, simdifiable_multi Vy>
simdified_simd_t< Vw >	weighted_vnorm_sq_diff (Vw &&w, Vx &&x, Vy &&y)
	∑ wᵢ(xᵢ - yᵢ)² (lane-wise).

Symmetric multiplication of batches of matrices
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD>
void	symm_add (Structured< VA, SA > A, VB &&B, VC &&C, VD &&D)
	D = C + A B with A symmetric.
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
void	symm_add (Structured< VA, SA > A, VB &&B, VD &&D)
	D = D + A B with A symmetric.

Symmetric matrix-vector multiplication of batches of matrices
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
void	symv (Structured< VA, SA > A, VB &&B, VD &&D)
	d = A b where A is symmetric
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
void	symv_neg (Structured< VA, SA > A, VB &&B, VD &&D)
	d = -A b where A is symmetric
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD>
void	symv_add (Structured< VA, SA > A, VB &&B, VC &&C, VD &&D)
	d = c + A b where A is symmetric
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
void	symv_add (Structured< VA, SA > A, VB &&B, VD &&D)
	d = d + A b where A is symmetric
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD>
void	symv_sub (Structured< VA, SA > A, VB &&B, VC &&C, VD &&D)
	d = c - A b where A is symmetric
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
void	symv_sub (Structured< VA, SA > A, VB &&B, VD &&D)
	d = d - A b where A is symmetric

Symmetric matrix-vector multiplication of a block tridiagonal matrix
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
void	syomv (Structured< VA, SA > A, VB &&B, VD &&D)
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
void	syomv_neg (Structured< VA, SA > A, VB &&B, VD &&D)

Triangular views of batches of matrices
template<class M>
constexpr auto	tril (M &&m)
	Lower-triangular view.
template<class M>
constexpr auto	triu (M &&m)
	Upper-triangular view.
template<MatrixStructure S, class M>
constexpr auto	make_structured (M &&m)
	View with the given structure.

Triangular solve of batches of matrices
template<MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD, int RotB = 0>
void	trsm (Structured< VA, SA > A, VB &&B, VD &&D, with_rotate_B_t< RotB >={})
	D = A⁻¹ B with A triangular.
template<MatrixStructure SA, simdifiable VA, simdifiable VD, int RotB = 0>
void	trsm (Structured< VA, SA > A, VD &&D, with_rotate_B_t< RotB > shift={})
	D = A⁻¹ D with A triangular.
template<MatrixStructure SB, simdifiable VA, simdifiable VB, simdifiable VD, int RotA = 0>
void	trsm (VA &&A, Structured< VB, SB > B, VD &&D, with_rotate_A_t< RotA >={})
	D = A B⁻¹ with B triangular.
template<MatrixStructure SB, simdifiable VB, simdifiable VD, int RotA = 0>
void	trsm (VD &&D, Structured< VB, SB > B, with_rotate_A_t< RotA > shift={})
	D = D B⁻¹ with B triangular.

Triangular inversion of batches of matrices
template<simdifiable VA, simdifiable VD>
void	trtri (Structured< VA, MatrixStructure::LowerTriangular > A, Structured< VD, MatrixStructure::LowerTriangular > D)
	D = A⁻¹ with A, D lower triangular.
template<simdifiable VA, simdifiable VD>
void	trtri (Structured< VA, MatrixStructure::UpperTriangular > A, Structured< VD, MatrixStructure::UpperTriangular > D)
	D = A⁻¹ with A, D upper triangular.
template<simdifiable VD>
void	trtri (Structured< VD, MatrixStructure::LowerTriangular > D)
	D = D⁻¹ with D lower triangular.
template<simdifiable VD>
void	trtri (Structured< VD, MatrixStructure::UpperTriangular > D)
	D = D⁻¹ with D upper triangular.

Classes
struct	track_zeros_t
struct	TilingOptions
	Packing and tiling options for matrix-matrix multiplication. More...
struct	norms
	Utilities for computing vector norms. More...
struct	norms< T, void >
struct	with_shift_A_t
struct	with_rotate_B_t
struct	with_rotate_C_t
struct	with_rotate_D_t
struct	with_mask_D_t
struct	with_rotate_t
struct	with_mask_t
struct	Structured
	Light-weight wrapper class used for overload resolution of triangular and symmetric matrices. More...
struct	with_rotate_A_t
struct	simd_view_types
struct	uview
struct	uview_vec
struct	cached_uview

Concepts
concept	shift_opt
concept	rotate_opt
concept	simdifiable
concept	simdifiable_multi

Typedefs
template<class V>
using	simdified_view_type = detail::simdified_view_type<std::remove_reference_t<V>>
	Convert the given view or matrix type `V` (batmat::matrix::View or batmat::matrix::Matrix) to a batched view type using a deduced SIMD type.
template<simdifiable V>
using	simdified_view_t = typename simdified_view_type<V>::type
template<class V>
using	simdified_value_t = typename detail::simdified_value<V>::type
template<class V>
using	simdified_abi_t = typename detail::simdified_abi<V>::type
template<class V>
using	simdified_simd_t = typename detail::simdified_simd<V>::type
template<class V>
using	simdified_multi_view_type = detail::simdified_multi_view_type<std::remove_reference_t<V>>
template<simdifiable_multi V>
using	simdified_multi_view_t = typename simdified_multi_view_type<V>::type
template<class T, class Abi, StorageOrder Order = StorageOrder::ColMajor>
using	view = simd_view_types<std::remove_const_t<T>, Abi>::template view<T, Order>
template<class T, class Abi, StorageOrder Order = StorageOrder::ColMajor>
using	matrix = simd_view_types<std::remove_const_t<T>, Abi>::template matrix<T, Order>
template<class Abi, StorageOrder Order = StorageOrder::ColMajor>
using	real_view = simd_view_types<real_t, Abi>::template view<const real_t, Order>
template<class Abi, StorageOrder Order = StorageOrder::ColMajor>
using	mut_real_view = simd_view_types<real_t, Abi>::template view<real_t, Order>

Enumerations
enum class	PackingSelector : int8_t { PackingSelector::Never , PackingSelector::Always , PackingSelector::Transpose }
	Decides which matrices to pack during large matrix-matrix multiplication. More...
enum class	MatrixStructure : int8_t { MatrixStructure::General , MatrixStructure::LowerTriangular , MatrixStructure::UpperTriangular }
enum class	StorageOrder

Functions
constexpr auto	simdify (simdifiable auto &&a) -> simdified_view_t< decltype(a)>
constexpr auto	simdify (simdifiable_multi auto &&a) -> simdified_multi_view_t< decltype(a)>
constexpr MatrixStructure	transpose (MatrixStructure s)
template<class M>
	Structured (M &&) -> Structured< M >
template<class M, MatrixStructure S>
void	simdify (const Structured< M, S > &)=delete
template<index_t Rows, index_t Cols, class T, class Abi, StorageOrder Order>
cached_uview< Order==StorageOrder::ColMajor ? Cols :Rows, T, Abi, Order >	with_cached_access (const uview< T, Abi, Order > &o) noexcept
template<index_t Rows, index_t Cols, class T, class Abi>
cached_uview< Cols, T, Abi, StorageOrder::ColMajor >	with_cached_access (const uview< T, Abi, StorageOrder::ColMajor > &o) noexcept
template<index_t Rows, index_t Cols, class T, class Abi>
uview< T, Abi, StorageOrder::RowMajor >	with_cached_access (const uview< T, Abi, StorageOrder::RowMajor > &o) noexcept
template<index_t Rows, index_t Cols, class T, class Abi>
cached_uview< Rows, T, Abi, StorageOrder::RowMajor >	with_cached_access (const uview< T, Abi, StorageOrder::RowMajor > &o) noexcept
template<index_t Rows, index_t Cols, class T, class Abi>
uview< T, Abi, StorageOrder::ColMajor >	with_cached_access (const uview< T, Abi, StorageOrder::ColMajor > &o) noexcept

Variables
template<bool Z = true>
constexpr track_zeros_t< Z >	track_zeros
template<int I>
constexpr with_shift_A_t< I >	with_shift_A
template<int I>
constexpr with_rotate_B_t< I >	with_rotate_B
template<int I>
constexpr with_rotate_C_t< I >	with_rotate_C
template<int I>
constexpr with_rotate_D_t< I >	with_rotate_D
template<int I>
constexpr with_mask_D_t< I >	with_mask_D
template<class...>
constexpr std::optional< int >	shift_A = std::nullopt
template<class T, class... Ts>
constexpr std::optional< int >	shift_A< T, Ts... > = shift_A<Ts...>
template<int I, class... Ts>
constexpr std::optional< int >	shift_A< with_shift_A_t< I >, Ts... > = I
template<class...>
constexpr std::optional< int >	rotate_B = std::nullopt
template<class T, class... Ts>
constexpr std::optional< int >	rotate_B< T, Ts... > = rotate_B<Ts...>
template<int I, class... Ts>
constexpr std::optional< int >	rotate_B< with_rotate_B_t< I >, Ts... > = I
template<class...>
constexpr std::optional< int >	rotate_C = std::nullopt
template<class T, class... Ts>
constexpr std::optional< int >	rotate_C< T, Ts... > = rotate_C<Ts...>
template<int I, class... Ts>
constexpr std::optional< int >	rotate_C< with_rotate_C_t< I >, Ts... > = I
template<class...>
constexpr std::optional< int >	rotate_D = std::nullopt
template<class T, class... Ts>
constexpr std::optional< int >	rotate_D< T, Ts... > = rotate_D<Ts...>
template<int I, class... Ts>
constexpr std::optional< int >	rotate_D< with_rotate_D_t< I >, Ts... > = I
template<class...>
constexpr std::optional< int >	mask_D = std::nullopt
template<class T, class... Ts>
constexpr std::optional< int >	mask_D< T, Ts... > = mask_D<Ts...>
template<int I, class... Ts>
constexpr std::optional< int >	mask_D< with_mask_D_t< I >, Ts... > = I
template<class>
constexpr bool	is_shift_opt = false
template<int I>
constexpr bool	is_shift_opt< with_shift_A_t< I > > = true
template<int I>
constexpr bool	is_shift_opt< with_rotate_B_t< I > > = true
template<int I>
constexpr bool	is_shift_opt< with_rotate_C_t< I > > = true
template<int I>
constexpr bool	is_shift_opt< with_rotate_D_t< I > > = true
template<int I>
constexpr bool	is_shift_opt< with_mask_D_t< I > > = true
template<int I>
constexpr with_rotate_t< I >	with_rotate
template<int I>
constexpr with_mask_t< I >	with_mask
template<class...>
constexpr std::optional< int >	get_rotate = std::nullopt
template<class T, class... Ts>
constexpr std::optional< int >	get_rotate< T, Ts... > = get_rotate<Ts...>
template<int I, class... Ts>
constexpr std::optional< int >	get_rotate< with_rotate_t< I >, Ts... > = I
template<class...>
constexpr std::optional< int >	get_mask = std::nullopt
template<class T, class... Ts>
constexpr std::optional< int >	get_mask< T, Ts... > = get_mask<Ts...>
template<int I, class... Ts>
constexpr std::optional< int >	get_mask< with_mask_t< I >, Ts... > = I
template<class>
constexpr bool	is_rotate_opt = false
template<int I>
constexpr bool	is_rotate_opt< with_rotate_t< I > > = true
template<int I>
constexpr bool	is_rotate_opt< with_mask_t< I > > = true
template<class...>
constexpr bool	simdify_compatible = false
template<simdifiable V, simdifiable... Vs>
constexpr bool	simdify_compatible< V, Vs... >
template<int I>
constexpr with_rotate_A_t< I >	with_rotate_A

Typedef Documentation

◆ simdified_view_type

template<class V>

using batmat::linalg::simdified_view_type = detail::simdified_view_type<std::remove_reference_t<V>>

Convert the given view or matrix type V (batmat::matrix::View or batmat::matrix::Matrix) to a batched view type using a deduced SIMD type.

This conversion takes place in the wrapper around the optimized implementations (which require views with a proper SIMD-compatible stride).

Definition at line 177 of file simdify.hpp.

◆ simdified_view_t

template<simdifiable V>

using batmat::linalg::simdified_view_t = typename simdified_view_type<V>::type

Definition at line 183 of file simdify.hpp.

◆ simdified_value_t

template<class V>

using batmat::linalg::simdified_value_t = typename detail::simdified_value<V>::type

Definition at line 214 of file simdify.hpp.

◆ simdified_abi_t

template<class V>

using batmat::linalg::simdified_abi_t = typename detail::simdified_abi<V>::type

Definition at line 216 of file simdify.hpp.

◆ simdified_simd_t

template<class V>

using batmat::linalg::simdified_simd_t = typename detail::simdified_simd<V>::type

Definition at line 218 of file simdify.hpp.

◆ simdified_multi_view_type

template<class V>

using batmat::linalg::simdified_multi_view_type = detail::simdified_multi_view_type<std::remove_reference_t<V>>

Definition at line 238 of file simdify.hpp.

◆ simdified_multi_view_t

template<simdifiable_multi V>

using batmat::linalg::simdified_multi_view_t = typename simdified_multi_view_type<V>::type

Definition at line 245 of file simdify.hpp.

◆ view

template<class T, class Abi, StorageOrder Order = StorageOrder::ColMajor>

using batmat::linalg::view = simd_view_types<std::remove_const_t<T>, Abi>::template view<T, Order>

Definition at line 70 of file uview.hpp.

◆ matrix

template<class T, class Abi, StorageOrder Order = StorageOrder::ColMajor>

using batmat::linalg::matrix = simd_view_types<std::remove_const_t<T>, Abi>::template matrix<T, Order>

Definition at line 72 of file uview.hpp.

◆ real_view

template<class Abi, StorageOrder Order = StorageOrder::ColMajor>

using batmat::linalg::real_view = simd_view_types<real_t, Abi>::template view<const real_t, Order>

Definition at line 75 of file uview.hpp.

◆ mut_real_view

template<class Abi, StorageOrder Order = StorageOrder::ColMajor>

using batmat::linalg::mut_real_view = simd_view_types<real_t, Abi>::template view<real_t, Order>

Definition at line 77 of file uview.hpp.

Enumeration Type Documentation

◆ StorageOrder

enum class guanaqo::StorageOrder

strong

Function Documentation

◆ copy() [1/2]

template<simdifiable_multi VA, simdifiable_multi VB, rotate_opt... Opts>

void batmat::linalg::copy	(	VA &&	A,
		VB &&	B,
		Opts...	opts )

B = A.

Definition at line 223 of file copy.hpp.

◆ copy() [2/2]

template<MatrixStructure S, simdifiable_multi VA, simdifiable_multi VB, rotate_opt... Opts>

void batmat::linalg::copy	(	Structured< VA, S >	A,
		Structured< VB, S >	B,
		Opts...	opts )

B = A.

Definition at line 232 of file copy.hpp.

◆ fill() [1/2]

template<simdifiable_multi VB>

void batmat::linalg::fill	(	simdified_value_t< VB >	a,
		VB &&	B )

B = A.

Definition at line 240 of file copy.hpp.

◆ fill() [2/2]

template<MatrixStructure S, simdifiable_multi VB>

void batmat::linalg::fill	(	simdified_value_t< VB >	a,
		Structured< VB, S >	B )

B = A.

Definition at line 247 of file copy.hpp.

◆ simdify() [1/3]

auto batmat::linalg::simdify ( simdifiable auto && a ) -> simdified_view_t< decltype(a)>

constexpr

Definition at line 228 of file simdify.hpp.

◆ simdify() [2/3]

auto batmat::linalg::simdify ( simdifiable_multi auto && a ) -> simdified_multi_view_t< decltype(a)>

constexpr

Definition at line 271 of file simdify.hpp.

◆ simdify() [3/3]

template<class M, MatrixStructure S>

void batmat::linalg::simdify ( const Structured< M, S > & )

delete

◆ with_cached_access() [1/5]

template<index_t Rows, index_t Cols, class T, class Abi, StorageOrder Order>

cached_uview< Order==StorageOrder::ColMajor ? Cols :Rows, T, Abi, Order > batmat::linalg::with_cached_access ( const uview< T, Abi, Order > & o )

inlinenoexcept

Definition at line 228 of file uview.hpp.

◆ with_cached_access() [2/5]

template<index_t Rows, index_t Cols, class T, class Abi>

cached_uview< Cols, T, Abi, StorageOrder::ColMajor > batmat::linalg::with_cached_access ( const uview< T, Abi, StorageOrder::ColMajor > & o )

inlinenoexcept

Definition at line 235 of file uview.hpp.

◆ with_cached_access() [3/5]

template<index_t Rows, index_t Cols, class T, class Abi>

uview< T, Abi, StorageOrder::RowMajor > batmat::linalg::with_cached_access ( const uview< T, Abi, StorageOrder::RowMajor > & o )

inlinenoexcept

Definition at line 242 of file uview.hpp.

◆ with_cached_access() [4/5]

template<index_t Rows, index_t Cols, class T, class Abi>

cached_uview< Rows, T, Abi, StorageOrder::RowMajor > batmat::linalg::with_cached_access ( const uview< T, Abi, StorageOrder::RowMajor > & o )

inlinenoexcept

Definition at line 249 of file uview.hpp.

◆ with_cached_access() [5/5]

template<index_t Rows, index_t Cols, class T, class Abi>

uview< T, Abi, StorageOrder::ColMajor > batmat::linalg::with_cached_access ( const uview< T, Abi, StorageOrder::ColMajor > & o )

inlinenoexcept

Definition at line 256 of file uview.hpp.

Variable Documentation

◆ track_zeros

template<bool Z = true>

track_zeros_t<Z> batmat::linalg::track_zeros

inlineconstexpr

Definition at line 56 of file gemm-diag.hpp.

◆ with_shift_A

template<int I>

with_shift_A_t batmat::linalg::with_shift_A

inlineconstexpr

Definition at line 20 of file shift.hpp.

◆ with_rotate_B

template<int I>

with_rotate_B_t batmat::linalg::with_rotate_B

inlineconstexpr

Definition at line 22 of file shift.hpp.

◆ with_rotate_C

template<int I>

with_rotate_C_t batmat::linalg::with_rotate_C

inlineconstexpr

Definition at line 24 of file shift.hpp.

◆ with_rotate_D

template<int I>

with_rotate_D_t batmat::linalg::with_rotate_D

inlineconstexpr

Definition at line 26 of file shift.hpp.

◆ with_mask_D

template<int I>

with_mask_D_t batmat::linalg::with_mask_D

inlineconstexpr

Definition at line 28 of file shift.hpp.

◆ shift_A

template<class...>

std::optional<int> batmat::linalg::shift_A = std::nullopt

inlineconstexpr

Definition at line 31 of file shift.hpp.

◆ shift_A< T, Ts... >

template<class T, class... Ts>

std::optional<int> batmat::linalg::shift_A< T, Ts... > = shift_A<Ts...>

inlineconstexpr

Definition at line 33 of file shift.hpp.

◆ shift_A< with_shift_A_t, Ts... >

template<int I, class... Ts>

std::optional<int> batmat::linalg::shift_A< with_shift_A_t, Ts... > = I

inlineconstexpr

Definition at line 35 of file shift.hpp.

◆ rotate_B

template<class...>

std::optional<int> batmat::linalg::rotate_B = std::nullopt

inlineconstexpr

Definition at line 38 of file shift.hpp.

◆ rotate_B< T, Ts... >

template<class T, class... Ts>

std::optional<int> batmat::linalg::rotate_B< T, Ts... > = rotate_B<Ts...>

inlineconstexpr

Definition at line 40 of file shift.hpp.

◆ rotate_B< with_rotate_B_t, Ts... >

template<int I, class... Ts>

std::optional<int> batmat::linalg::rotate_B< with_rotate_B_t, Ts... > = I

inlineconstexpr

Definition at line 42 of file shift.hpp.

◆ rotate_C

template<class...>

std::optional<int> batmat::linalg::rotate_C = std::nullopt

inlineconstexpr

Definition at line 45 of file shift.hpp.

◆ rotate_C< T, Ts... >

template<class T, class... Ts>

std::optional<int> batmat::linalg::rotate_C< T, Ts... > = rotate_C<Ts...>

inlineconstexpr

Definition at line 47 of file shift.hpp.

◆ rotate_C< with_rotate_C_t, Ts... >

template<int I, class... Ts>

std::optional<int> batmat::linalg::rotate_C< with_rotate_C_t, Ts... > = I

inlineconstexpr

Definition at line 49 of file shift.hpp.

◆ rotate_D

template<class...>

std::optional<int> batmat::linalg::rotate_D = std::nullopt

inlineconstexpr

Definition at line 52 of file shift.hpp.

◆ rotate_D< T, Ts... >

template<class T, class... Ts>

std::optional<int> batmat::linalg::rotate_D< T, Ts... > = rotate_D<Ts...>

inlineconstexpr

Definition at line 54 of file shift.hpp.

◆ rotate_D< with_rotate_D_t, Ts... >

template<int I, class... Ts>

std::optional<int> batmat::linalg::rotate_D< with_rotate_D_t, Ts... > = I

inlineconstexpr

Definition at line 56 of file shift.hpp.

◆ mask_D

template<class...>

std::optional<int> batmat::linalg::mask_D = std::nullopt

inlineconstexpr

Definition at line 59 of file shift.hpp.

◆ mask_D< T, Ts... >

template<class T, class... Ts>

std::optional<int> batmat::linalg::mask_D< T, Ts... > = mask_D<Ts...>

inlineconstexpr

Definition at line 61 of file shift.hpp.

◆ mask_D< with_mask_D_t, Ts... >

template<int I, class... Ts>

std::optional<int> batmat::linalg::mask_D< with_mask_D_t, Ts... > = I

inlineconstexpr

Definition at line 63 of file shift.hpp.

◆ is_shift_opt

template<class>

bool batmat::linalg::is_shift_opt = false

inlineconstexpr

Definition at line 66 of file shift.hpp.

◆ is_shift_opt< with_shift_A_t >

template<int I>

bool batmat::linalg::is_shift_opt< with_shift_A_t > = true

inlineconstexpr

Definition at line 68 of file shift.hpp.

◆ is_shift_opt< with_rotate_B_t >

template<int I>

bool batmat::linalg::is_shift_opt< with_rotate_B_t > = true

inlineconstexpr

Definition at line 70 of file shift.hpp.

◆ is_shift_opt< with_rotate_C_t >

template<int I>

bool batmat::linalg::is_shift_opt< with_rotate_C_t > = true

inlineconstexpr

Definition at line 72 of file shift.hpp.

◆ is_shift_opt< with_rotate_D_t >

template<int I>

bool batmat::linalg::is_shift_opt< with_rotate_D_t > = true

inlineconstexpr

Definition at line 74 of file shift.hpp.

◆ is_shift_opt< with_mask_D_t >

template<int I>

bool batmat::linalg::is_shift_opt< with_mask_D_t > = true

inlineconstexpr

Definition at line 76 of file shift.hpp.

◆ with_rotate

template<int I>

with_rotate_t batmat::linalg::with_rotate

inlineconstexpr

Definition at line 87 of file shift.hpp.

◆ with_mask

template<int I>

with_mask_t batmat::linalg::with_mask

inlineconstexpr

Definition at line 89 of file shift.hpp.

◆ get_rotate

template<class...>

std::optional<int> batmat::linalg::get_rotate = std::nullopt

inlineconstexpr

Definition at line 92 of file shift.hpp.

◆ get_rotate< T, Ts... >

template<class T, class... Ts>

std::optional<int> batmat::linalg::get_rotate< T, Ts... > = get_rotate<Ts...>

inlineconstexpr

Definition at line 94 of file shift.hpp.

◆ get_rotate< with_rotate_t, Ts... >

template<int I, class... Ts>

std::optional<int> batmat::linalg::get_rotate< with_rotate_t, Ts... > = I

inlineconstexpr

Definition at line 96 of file shift.hpp.

◆ get_mask

template<class...>

std::optional<int> batmat::linalg::get_mask = std::nullopt

inlineconstexpr

Definition at line 99 of file shift.hpp.

◆ get_mask< T, Ts... >

template<class T, class... Ts>

std::optional<int> batmat::linalg::get_mask< T, Ts... > = get_mask<Ts...>

inlineconstexpr

Definition at line 101 of file shift.hpp.

◆ get_mask< with_mask_t, Ts... >

template<int I, class... Ts>

std::optional<int> batmat::linalg::get_mask< with_mask_t, Ts... > = I

inlineconstexpr

Definition at line 103 of file shift.hpp.

◆ is_rotate_opt

template<class>

bool batmat::linalg::is_rotate_opt = false

inlineconstexpr

Definition at line 106 of file shift.hpp.

◆ is_rotate_opt< with_rotate_t >

template<int I>

bool batmat::linalg::is_rotate_opt< with_rotate_t > = true

inlineconstexpr

Definition at line 108 of file shift.hpp.

◆ is_rotate_opt< with_mask_t >

template<int I>

bool batmat::linalg::is_rotate_opt< with_mask_t > = true

inlineconstexpr

Definition at line 110 of file shift.hpp.

◆ simdify_compatible

template<class...>

bool batmat::linalg::simdify_compatible = false

inlineconstexpr

Definition at line 221 of file simdify.hpp.

◆ simdify_compatible< V, Vs... >

template<simdifiable V, simdifiable... Vs>

bool batmat::linalg::simdify_compatible< V, Vs... >

inlineconstexpr

Initial value:

                                     =
(std::is_same_v<simdified_value_t<V>, simdified_value_t<Vs>> && ...) &&
(std::is_same_v<simdified_abi_t<V>, simdified_abi_t<Vs>> && ...)

Definition at line 224 of file simdify.hpp.

◆ with_rotate_A

template<int I>

with_rotate_A_t batmat::linalg::with_rotate_A

inlineconstexpr

Definition at line 20 of file trsm.hpp.

Namespaces

Compression of masks containing zeros

Copying and filling batches of matrices

Copying and filling multiple batches of matrices

Single-batch elementwise operations

Multi-batch elementwise operations

Multiplication of batches of matrices with diagonal scaling

Multiplication of batches of general matrices

Multiplication of batches of matrices with symmetric results

Multiplication of batches of triangular matrices

Matrix-vector multiplication of batches of matrices

Cholesky factorization updates

Cholesky factorization of batches of matrices

Single-batch reduction operations

Multi-batch reduction operations

Symmetric multiplication of batches of matrices

Symmetric matrix-vector multiplication of batches of matrices

Symmetric matrix-vector multiplication of a block tridiagonal matrix

Triangular views of batches of matrices

Triangular solve of batches of matrices

Triangular inversion of batches of matrices

Classes

Concepts

Typedefs

Enumerations

Functions

Variables

Typedef Documentation

◆ simdified_view_type

◆ simdified_view_t

◆ simdified_value_t

◆ simdified_abi_t

◆ simdified_simd_t

◆ simdified_multi_view_type

◆ simdified_multi_view_t

◆ view

◆ matrix

◆ real_view

◆ mut_real_view

Enumeration Type Documentation

◆ StorageOrder

Function Documentation

◆ copy() [1/2]

◆ copy() [2/2]

◆ fill() [1/2]

◆ fill() [2/2]

◆ simdify() [1/3]

◆ simdify() [2/3]

◆ simdify() [3/3]

◆ with_cached_access() [1/5]

◆ with_cached_access() [2/5]

◆ with_cached_access() [3/5]

◆ with_cached_access() [4/5]

◆ with_cached_access() [5/5]

Variable Documentation

◆ track_zeros

◆ with_shift_A

◆ with_rotate_B

◆ with_rotate_C

◆ with_rotate_D

◆ with_mask_D

◆ shift_A

◆ shift_A< T, Ts... >

◆ shift_A< with_shift_A_t< I >, Ts... >

◆ rotate_B

◆ rotate_B< T, Ts... >

◆ rotate_B< with_rotate_B_t< I >, Ts... >

◆ rotate_C

◆ rotate_C< T, Ts... >

◆ rotate_C< with_rotate_C_t< I >, Ts... >

◆ rotate_D

◆ rotate_D< T, Ts... >

◆ rotate_D< with_rotate_D_t< I >, Ts... >

◆ mask_D

◆ mask_D< T, Ts... >

◆ mask_D< with_mask_D_t< I >, Ts... >

◆ is_shift_opt

◆ is_shift_opt< with_shift_A_t< I > >

◆ is_shift_opt< with_rotate_B_t< I > >

◆ is_shift_opt< with_rotate_C_t< I > >