6#include <batmat/micro-kernels/geqrf/export.h>
14template <
class T,
class Abi>
16template <
class T,
class Abi>
19template <
class T,
class Abi, KernelConfig Conf, index_t R, StorageOrder OA, StorageOrder OD>
23template <
class T,
class Abi, KernelConfig Conf, index_t R, StorageOrder OA, StorageOrder OD>
35template <
class T,
class Abi, StorageOrder OA>
39 return {W_t::num_elem_per_layer(), (A.cols() + R - 1) / R};
43template <
class T,
class Abi, KernelConfig Conf = {},
StorageOrder OA = StorageOrder::ColMajor,
48template <
class T,
class Abi, KernelConfig Conf, StorageOrder OA, StorageOrder OD, StorageOrder OB>
51 bool transposed,
bool reversed)
noexcept;
constexpr index_t RowsReg
Register block size of the matrix-matrix multiplication micro-kernels.
void geqrf_copy_register(view< const T, Abi, OA > A, view< T, Abi, OD > D, view< T, Abi > W) noexcept
Block hyperbolic Householder factorization update using register blocking.
constexpr std::pair< index_t, index_t > geqrf_W_size(view< T, Abi, OA > A)
void geqrf_diag_microkernel(index_t k, triangular_accessor< T, Abi, SizeR< T, Abi > > W, uview< const T, Abi, OA > A, uview< T, Abi, OD > D) noexcept
void geqrf_tail_microkernel(index_t k, bool transposed, triangular_accessor< const T, Abi, SizeR< T, Abi > > W, uview< const T, Abi, OA > A, uview< T, Abi, OD > D, uview< const T, Abi, OB > B) noexcept
void geqrf_full_microkernel(index_t k, uview< const T, Abi, OA > A, uview< T, Abi, OD > D) noexcept
A (k×R) D (k×R).
void geqrf_apply_register(view< const T, Abi, OA > A, view< T, Abi, OD > D, view< const T, Abi, OB > B, view< const T, Abi > W, bool transposed, bool reversed) noexcept
Apply a block Householder transformation.
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view
std::integral_constant< index_t, I > index_constant