batmat 0.0.16
Batched linear algebra routines
Loading...
Searching...
No Matches
trtri.hpp
Go to the documentation of this file.
1#pragma once
2
5#include <batmat/lut.hpp>
7
9
13
14template <class T, class Abi, KernelConfig Conf, index_t RowsReg, StorageOrder OA, StorageOrder OD>
16
17template <class T, class Abi, KernelConfig Conf, index_t RowsReg, index_t ColsReg, StorageOrder OD>
18void trmm_microkernel(uview<const T, Abi, OD> Dr, uview<T, Abi, OD> D, index_t k) noexcept;
19
20template <class T, class Abi, KernelConfig Conf, StorageOrder OA, StorageOrder OD>
22
23// Square block sizes greatly simplify handling of triangular matrices.
24using gemm::RowsReg;
25template <class T, class Abi>
26constexpr index_t ColsReg = RowsReg<T, Abi>;
27
28template <class T, class Abi, KernelConfig Conf, StorageOrder OA, StorageOrder OD>
29inline const constinit auto trtri_copy_lut =
32 });
33
34template <class T, class Abi, KernelConfig Conf, StorageOrder OD>
36 []<index_t Row, index_t Col>(index_constant<Row>, index_constant<Col>) {
38 });
39
40} // namespace batmat::linalg::micro_kernels::trtri
consteval auto make_1d_lut(F f)
Returns an array of the form:
Definition lut.hpp:39
consteval auto make_2d_lut(F f)
Returns a 2D array of the form:
Definition lut.hpp:25
constexpr index_t RowsReg
Register block size of the matrix-matrix multiplication micro-kernels.
Definition avx-512.hpp:13
const constinit auto trmm_lut
Definition trtri.hpp:35
constexpr index_t RowsReg
Register block size of the matrix-matrix multiplication micro-kernels.
Definition avx-512.hpp:13
const constinit auto trtri_copy_lut
Definition trtri.hpp:29
void trmm_microkernel(uview< const T, Abi, OD > Dr, uview< T, Abi, OD > D, index_t k) noexcept
Definition trtri.tpp:98
void trtri_copy_register(view< const T, Abi, OA > A, view< T, Abi, OD > D) noexcept
Definition trtri.tpp:132
void trtri_copy_microkernel(uview< const T, Abi, OA > A, uview< T, Abi, OD > D, index_t k) noexcept
Definition trtri.tpp:19
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view
Definition uview.hpp:70
std::integral_constant< index_t, I > index_constant
Definition lut.hpp:10