12#include <guanaqo/trace.hpp>
17template <
class T,
class Abi, micro_kernels::potrf::KernelConfig Conf,
StorageOrder OA,
27 if constexpr (Conf.with_diag()) {
31 const index_t M = D.rows(), N = D.cols();
34 if (M == 0 || N == 0) [[unlikely]]
47template <MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD>
55template <MatrixStructure SC, simdifiable VA, simdifiable VD>
62template <MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD>
70template <MatrixStructure SC, simdifiable VA, simdifiable VD>
77template <MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD, simdifiable Vd>
83 {.negate_A =
false, .diag_A = KernelConfig::diag, .struc_C = SC}>(
88template <MatrixStructure SC, simdifiable VA, simdifiable VD, simdifiable Vd>
95template <MatrixStructure SC, simdifiable VC, simdifiable VD>
98 decltype(
simdify(C.
value).as_const()) null{{.data =
nullptr, .rows = 0, .cols = 0}};
104template <MatrixStructure SD, simdifiable VD>
106 decltype(
simdify(D.
value).as_const()) null{{.data =
nullptr, .rows = 0, .cols = 0}};
constexpr FlopCount syrk_potrf(index_t m, index_t n, index_t k)
Fused symmetric rank-k update and Cholesky factorization of an m×n matrix with m≥n.
void syrk_add_potrf(VA &&A, Structured< VC, SC > C, Structured< VD, SC > D, simdified_value_t< VA > regularization=0)
D = chol(C + AAᵀ) with C symmetric, D triangular.
void syrk_sub_potrf(VA &&A, Structured< VC, SC > C, Structured< VD, SC > D, simdified_value_t< VA > regularization=0)
D = chol(C - AAᵀ) with C symmetric, D triangular.
void potrf(Structured< VC, SC > C, Structured< VD, SC > D, simdified_value_t< VC > regularization=0)
D = chol(C) with C symmetric, D triangular.
void syrk_diag_add_potrf(VA &&A, Structured< VC, SC > C, Structured< VD, SC > D, Vd &&d, simdified_value_t< VA > regularization=0)
D = chol(C + A diag(d) Aᵀ) with C symmetric, D triangular.
#define GUANAQO_TRACE_LINALG(name, gflops)
void potrf(view< const T, Abi, OA > A, view< const T, Abi, OCD > C, view< T, Abi, OCD > D, T regularization, micro_kernels::potrf::diag_view_type< const T, Abi, Conf > d={})
void potrf_copy_register(view< const T, Abi, OA > A, view< const T, Abi, OCD > C, view< T, Abi, OCD > D, T regularization, diag_view_type< const T, Abi, Conf > diag) noexcept
std::conditional_t< Conf.with_diag(), view< T, Abi >, std::false_type > diag_view_type
typename detail::simdified_value< V >::type simdified_value_t
typename detail::simdified_abi< V >::type simdified_abi_t
constexpr bool simdify_compatible
constexpr auto simdify(simdifiable auto &&a) -> simdified_view_t< decltype(a)>
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view
Aligned allocation for matrix storage.
Light-weight wrapper class used for overload resolution of triangular and symmetric matrices.