batmat 0.0.15
Batched linear algebra routines
Loading...
Searching...
No Matches
symm.hpp
Go to the documentation of this file.
1#pragma once
2
6#include <batmat/loop.hpp>
8#include <guanaqo/trace.hpp>
9
10namespace batmat::linalg {
11
12namespace detail {
13template <class T, class Abi, MatrixStructure SA, StorageOrder OA>
16 view<T, Abi> D) {
17 GUANAQO_TRACE_LINALG("symm", A.rows() * A.cols() * B.cols() * A.depth());
18 // Check dimensions
19 BATMAT_ASSERT(!C || C->rows() == D.rows());
20 BATMAT_ASSERT(!C || C->cols() == D.cols());
21 BATMAT_ASSERT(A.rows() == A.cols());
22 BATMAT_ASSERT(A.rows() == D.rows());
23 BATMAT_ASSERT(A.cols() == B.rows());
24 BATMAT_ASSERT(B.cols() == D.cols());
25 const index_t M = D.rows(), N = D.cols(), K = A.cols();
26
27 // Degenerate case
28 if (M == 0 || N == 0) [[unlikely]]
29 return;
30
31 if (C) {
36 for (index_t j = 0; j < N; ++j)
37 for (index_t l = 0; l < K; ++l) {
38 auto Blj = B_.load(l, j);
39 auto All = A_.load(l, l);
40 auto Dlj = All * Blj + C_.load(l, j);
41 BATMAT_UNROLLED_IVDEP_FOR (4, index_t i = l + 1; i < M; ++i) {
42 auto Ail = A_.load(i, l);
43 auto Bil = B_.load(i, j);
44 D_.store(Ail * Blj + C_.load(i, j), i, j);
45 Dlj += Ail * Bil;
46 }
47 D_.store(Dlj, l, j);
48 }
49 } else {
50 BATMAT_ASSERT(!"Not implemented"); // TODO
51 }
52}
53
54} // namespace detail
55
56#if 0 // Not implemented
57/// D = A B with A symmetric
58template <MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
60void symm(Structured<VA, SA> A, VB &&B, VD &&D) {
61 std::optional<decltype(simdify(D).as_const())> null;
63 simdify(A.value).as_const(), simdify(B).as_const(), null, simdify(D));
64}
65#endif
66
67/// @addtogroup topic-linalg
68/// @{
69
70/// @name Symmetric multiplication of batches of matrices
71/// @{
72
73/// D = C + A B with A symmetric
74template <MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD>
76void symm_add(Structured<VA, SA> A, VB &&B, VC &&C, VD &&D) {
78 simdify(A.value).as_const(), simdify(B).as_const(), simdify(C).as_const(), simdify(D));
79}
80/// D = D + A B with A symmetric
81template <MatrixStructure SA, simdifiable VA, simdifiable VB, simdifiable VD>
83void symm_add(Structured<VA, SA> A, VB &&B, VD &&D) {
84 symm_add(A.ref(), B, D, D);
85}
86
87/// @}
88
89/// @}
90
91} // namespace batmat::linalg
#define BATMAT_ASSERT(x)
Definition assume.hpp:14
void symm_add(Structured< VA, SA > A, VB &&B, VC &&C, VD &&D)
D = C + A B with A symmetric.
Definition symm.hpp:76
Structured(M &&) -> Structured< M >
#define GUANAQO_TRACE_LINALG(name, gflops)
void symm(view< const T, Abi, OA > A, view< const T, Abi > B, std::optional< view< const T, Abi > > C, view< T, Abi > D)
Definition symm.hpp:15
typename detail::simdified_abi< V >::type simdified_abi_t
Definition simdify.hpp:204
constexpr bool simdify_compatible
Definition simdify.hpp:207
constexpr auto simdify(simdifiable auto &&a) -> simdified_view_t< decltype(a)>
Definition simdify.hpp:214
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view
Definition uview.hpp:70
Aligned allocation for matrix storage.
Light-weight wrapper class used for overload resolution of triangular and symmetric matrices.
void store(simd x, index_t r, index_t c) const noexcept
Definition uview.hpp:104
simd load(index_t r, index_t c) const noexcept
Definition uview.hpp:100
#define BATMAT_UNROLLED_IVDEP_FOR(N,...)
Definition unroll.h:29