batmat 0.0.13
Batched linear algebra routines
Loading...
Searching...
No Matches
potrf.hpp
Go to the documentation of this file.
1#pragma once
2
5#include <batmat/lut.hpp>
7
9
14
15template <class T, class Abi, KernelConfig Conf, index_t RowsReg, StorageOrder O1, StorageOrder O2>
17 uview<const T, Abi, O2> C, uview<T, Abi, O2> D, T *invD, index_t k1,
18 index_t k2, T regularization) noexcept;
19
20template <class T, class Abi, KernelConfig Conf, index_t RowsReg, index_t ColsReg, StorageOrder O1,
21 StorageOrder O2>
25 uview<T, Abi, O2> D, index_t k1, index_t k2) noexcept;
26
27template <class T, class Abi, KernelConfig Conf, StorageOrder OA, StorageOrder OCD>
29 T regularization) noexcept;
30
31// Square block sizes greatly simplify handling of triangular matrices.
32using gemm::RowsReg;
33template <class T, class Abi>
34constexpr index_t ColsReg = RowsReg<T, Abi>;
35
36template <class T, class Abi, KernelConfig Conf, StorageOrder OA, StorageOrder OC>
37inline const constinit auto potrf_copy_lut =
40 });
41
42template <class T, class Abi, KernelConfig Conf, StorageOrder O1, StorageOrder O2>
44 []<index_t Row, index_t Col>(index_constant<Row>, index_constant<Col>) {
46 });
47
48} // namespace batmat::linalg::micro_kernels::potrf
consteval auto make_1d_lut(F f)
Returns an array of the form:
Definition lut.hpp:39
consteval auto make_2d_lut(F f)
Returns a 2D array of the form:
Definition lut.hpp:25
constexpr index_t RowsReg
Register block size of the matrix-matrix multiplication micro-kernels.
Definition avx-512.hpp:13
void potrf_copy_microkernel(uview< const T, Abi, O1 > A1, uview< const T, Abi, O2 > A2, uview< const T, Abi, O2 > C, uview< T, Abi, O2 > D, T *invD, index_t k1, index_t k2, T regularization) noexcept
Definition potrf.tpp:24
constexpr index_t RowsReg
Register block size of the matrix-matrix multiplication micro-kernels.
Definition avx-512.hpp:13
const constinit auto potrf_copy_lut
Definition potrf.hpp:37
void potrf_copy_register(view< const T, Abi, OA > A, view< const T, Abi, OCD > C, view< T, Abi, OCD > D, T regularization) noexcept
Definition potrf.tpp:185
void trsm_copy_microkernel(uview< const T, Abi, O1 > A1, uview< const T, Abi, O1 > B1, uview< const T, Abi, O2 > A2, uview< const T, Abi, O2 > B2, uview< const T, Abi, O2 > L, const T *invL, uview< const T, Abi, O2 > C, uview< T, Abi, O2 > D, index_t k1, index_t k2) noexcept
Definition potrf.tpp:126
const constinit auto trsm_copy_lut
Definition potrf.hpp:43
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view
Definition uview.hpp:70
std::integral_constant< index_t, I > index_constant
Definition lut.hpp:10