8#include <guanaqo/trace.hpp>
13template <
class T,
class Abi, MatrixStructure SA, StorageOrder OA>
25 const index_t M = D.rows(), N = D.cols(), K = A.cols();
28 if (M == 0 || N == 0) [[unlikely]]
36 for (index_t j = 0; j < N; ++j)
37 for (index_t l = 0; l < K; ++l) {
38 auto Blj = B_.
load(l, j);
39 auto All = A_.
load(l, l);
40 auto Dlj = All * Blj + C_.
load(l, j);
42 auto Ail = A_.
load(i, l);
43 auto Bil = B_.
load(i, j);
44 D_.
store(Ail * Blj + C_.
load(i, j), i, j);
58template <MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
61 std::optional<
decltype(
simdify(D).as_const())> null;
74template <MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD>
81template <MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
void symm_add(Structured< VA, SA > A, VB &&B, VC &&C, VD &&D)
D = C + A B with A symmetric.
Structured(M &&) -> Structured< M >
#define GUANAQO_TRACE_LINALG(name, gflops)
void symm(view< const T, Abi, OA > A, view< const T, Abi > B, std::optional< view< const T, Abi > > C, view< T, Abi > D)
typename detail::simdified_abi< V >::type simdified_abi_t
constexpr bool simdify_compatible
constexpr auto simdify(simdifiable auto &&a) -> simdified_view_t< decltype(a)>
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view
Aligned allocation for matrix storage.
Light-weight wrapper class used for overload resolution of triangular and symmetric matrices.
void store(simd x, index_t r, index_t c) const noexcept
simd load(index_t r, index_t c) const noexcept
#define BATMAT_UNROLLED_IVDEP_FOR(N,...)