13#include <guanaqo/trace.hpp>
31 const index_t M = D.rows(), N = D.cols(), K = A.cols();
36 if (M == 0 || N == 0) [[unlikely]]
38 if (K == 0) [[unlikely]] {
55template <
bool Z = true>
61template <
class T,
class... Ts>
63template <
bool Z,
class... Ts>
74template <
class... Opts>
93void gemm_diag(VA &&A, VB &&B, VD &&D, Vd &&d, Opts... opts) {
94 std::optional<
decltype(
simdify(D).as_const())> null;
101template <simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, simdifiable Vd,
102 detail::gemm_diag::track_zeros_opt... Opts>
111template <simdifiable VA, simdifiable VB, simdifiable VD, simdifiable Vd,
112 detail::gemm_diag::track_zeros_opt... Opts>
119template <simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, simdifiable Vd,
120 detail::gemm_diag::track_zeros_opt... Opts>
129template <simdifiable VA, simdifiable VB, simdifiable VD, simdifiable Vd,
130 detail::gemm_diag::track_zeros_opt... Opts>
137template <
MatrixStructure SC, simdifiable VA, simdifiable VD, simdifiable Vd,
138 detail::gemm_diag::track_zeros_opt... Opts>
142 std::optional<
decltype(
simdify(D.
value).as_const())> null;
143 constexpr auto conf =
151template <
MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD, simdifiable Vd,
152 detail::gemm_diag::track_zeros_opt... Opts>
156 constexpr auto conf =
163template <
MatrixStructure SC, simdifiable VA, simdifiable VD, simdifiable Vd,
164 detail::gemm_diag::track_zeros_opt... Opts>
171template <
MatrixStructure SC, simdifiable VA, simdifiable VC, simdifiable VD, simdifiable Vd,
172 detail::gemm_diag::track_zeros_opt... Opts>
176 constexpr auto conf =
183template <
MatrixStructure SC, simdifiable VA, simdifiable VD, simdifiable Vd,
184 detail::gemm_diag::track_zeros_opt... Opts>
constexpr FlopCount gemmt_diag(index_t m, index_t n, index_t k, MatrixStructure sC)
Matrix-matrix multiplication of m×k and k×n matrices with a diagonal k×k matrix in the middle,...
void gemm_diag_add(VA &&A, VB &&B, VC &&C, VD &&D, Vd &&d, Opts... opts)
D = C + A diag(d) B.
void syrk_diag(VA &&A, Structured< VD, SC > D, Vd &&d, Opts... opts)
D = A diag(d) Aᵀ with D symmetric.
void syrk_diag_sub(VA &&A, Structured< VC, SC > C, Structured< VD, SC > D, Vd &&d, Opts... opts)
D = C - A diag(d) Aᵀ with C, D symmetric.
void syrk_diag_add(VA &&A, Structured< VC, SC > C, Structured< VD, SC > D, Vd &&d, Opts... opts)
D = C + A diag(d) Aᵀ with C, D symmetric.
void gemm_diag_sub(VA &&A, VB &&B, VC &&C, VD &&D, Vd &&d, Opts... opts)
D = C - A diag(d) B.
void gemm_diag(VA &&A, VB &&B, VD &&D, Vd &&d, Opts... opts)
D = A diag(d) B.
#define GUANAQO_TRACE_LINALG(name, gflops)
void fill(T a, view< T, Abi, OB > B)
void copy(view< const T, Abi, OA > A, view< T, Abi, OB > B)
constexpr bool is_track_zeros_opt
void gemm_diag(view< const T, Abi, OA > A, view< const T, Abi, OB > B, std::optional< view< const T, Abi, OC > > C, view< T, Abi, OD > D, view< const T, Abi > d)
constexpr micro_kernels::gemm_diag::KernelConfig apply_options(micro_kernels::gemm_diag::KernelConfig conf, Opts...)
constexpr std::optional< bool > get_track_zeros
void gemm_diag_copy_register(view< const T, Abi, OA > A, view< const T, Abi, OB > B, std::optional< view< const T, Abi, OC > > C, view< T, Abi, OD > D, view< const T, Abi > diag) noexcept
Generalized matrix multiplication D = C ± A⁽ᵀ⁾ diag(d) B⁽ᵀ⁾. Using register blocking.
typename detail::simdified_abi< V >::type simdified_abi_t
constexpr track_zeros_t< Z > track_zeros
constexpr bool simdify_compatible
constexpr auto simdify(simdifiable auto &&a) -> simdified_view_t< decltype(a)>
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view
Aligned allocation for matrix storage.
Light-weight wrapper class used for overload resolution of triangular and symmetric matrices.