15template <
class T,
class Abi, index_t R>
25 static constexpr size_t size() {
36 [[gnu::always_inline]]
simd load(index_t r, index_t c)
const noexcept {
39 [[gnu::always_inline]]
void store(
simd x, index_t r, index_t c)
const noexcept
40 requires(!std::is_const_v<T>)
49template <
class T,
class Abi>
51template <
class T,
class Abi>
54template <
class T,
class Abi, KernelConfig Conf, index_t R, StorageOrder OL, StorageOrder OA>
59template <
class T,
class Abi, KernelConfig Conf, index_t R, StorageOrder OL, StorageOrder OA>
69template <
class T,
class Abi, KernelConfig Conf, index_t R, index_t S,
StorageOrder OL,
76 Structure struc_L,
int rotate_A)
noexcept;
78template <
class T,
class Abi, KernelConfig Conf, StorageOrder OL, StorageOrder OA>
84template <
class T,
class Abi, KernelConfig Conf, StorageOrder OL, StorageOrder OA>
90template <
class T,
class Abi, KernelConfig Conf, StorageOrder OL, StorageOrder OA, StorageOrder OB>
96template <
class T,
class Abi, KernelConfig Conf, StorageOrder OL, StorageOrder OA, StorageOrder OB>
104template <
class T,
class Abi, StorageOrder OL>
108 return {W_t::num_elem_per_layer(), (L.cols() + R - 1) / R};
112template <
class T,
class Abi, KernelConfig Conf = {},
StorageOrder OL = StorageOrder::ColMajor,
116template <
class T,
class Abi, KernelConfig Conf = {},
StorageOrder OL = StorageOrder::ColMajor,
121template <
class T,
class Abi, KernelConfig Conf = {},
StorageOrder OL = StorageOrder::ColMajor,
126 index_t kA_in_offset = 0) noexcept;
134template <
class T,
class Abi,
StorageOrder OL = StorageOrder::ColMajor,
142template <
class T,
class Abi,
StorageOrder OL = StorageOrder::ColMajor,
consteval auto make_1d_lut(F f)
Returns an array of the form:
consteval auto make_2d_lut(F f)
Returns a 2D array of the form:
void aligned_store(V v, typename V::value_type *p)
stdx::memory_alignment< simd< Tp, Abi > > simd_align
stdx::simd_size< Tp, Abi > simd_size
V aligned_load(const typename V::value_type *p)
stdx::simd< Tp, Abi > simd
constexpr index_t RowsReg
Register block size of the matrix-matrix multiplication micro-kernels.
void hyhound_diag_full_microkernel(index_t kA, uview< T, Abi, OL > L, uview< T, Abi, OA > A, uview< const T, Abi, StorageOrder::ColMajor > diag) noexcept
const constinit auto microkernel_full_lut
void hyhound_diag_cyclic_register(view< T, Abi, OL > L11, view< T, Abi, OW > A1, view< T, Abi, OY > L21, view< const T, Abi, OW > A22, view< T, Abi, OW > A2_out, view< T, Abi, OU > L31, view< const T, Abi, OW > A31, view< T, Abi, OW > A3_out, view< const T, Abi > D) noexcept
Performs a factorization update of the following matrix:
void hyhound_diag_register(view< T, Abi, OL > L, view< T, Abi, OA > A, view< const T, Abi > D) noexcept
Block hyperbolic Householder factorization update using register blocking.
void hyhound_diag_riccati_register(view< T, Abi, OL > L11, view< T, Abi, OA > A1, view< T, Abi, OL > L21, view< const T, Abi, OA > A2, view< T, Abi, OA > A2_out, view< T, Abi, OLu > Lu1, view< T, Abi, OAu > Au_out, view< const T, Abi > D, bool shift_A_out) noexcept
Performs a factorization update of the following matrix:
void hyhound_diag_tail_microkernel(index_t kA_in_offset, index_t kA_in, index_t k, triangular_accessor< const T, Abi, SizeR< T, Abi > > W, uview< T, Abi, OL > L, uview< const T, Abi, OA > A_in, uview< T, Abi, OA > A_out, uview< const T, Abi, OB > B, uview< const T, Abi, StorageOrder::ColMajor > diag, Structure struc_L, int rotate_A) noexcept
constexpr std::pair< index_t, index_t > hyhound_W_size(view< T, Abi, OL > L)
void hyhound_diag_diag_microkernel(index_t kA, triangular_accessor< T, Abi, SizeR< T, Abi > > W, uview< T, Abi, OL > L, uview< T, Abi, OA > A, uview< const T, Abi, StorageOrder::ColMajor > diag) noexcept
void hyhound_diag_apply_register(view< T, Abi, OL > L, view< const T, Abi, OA > Ain, view< T, Abi, OA > Aout, view< const T, Abi, OA > B, view< const T, Abi > D, view< const T, Abi > W, index_t kA_in_offset=0) noexcept
Apply a block hyperbolic Householder transformation.
const constinit auto microkernel_tail_lut_2
const constinit auto microkernel_tail_lut
const constinit auto microkernel_diag_lut
void hyhound_diag_2_register(view< T, Abi, OL1 > L11, view< T, Abi, OA1 > A1, view< T, Abi, OL2 > L21, view< T, Abi, OA2 > A2, view< const T, Abi > D) noexcept
Same as hyhound_diag_register but for two block rows at once.
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view
std::integral_constant< index_t, I > index_constant
simd load(index_t r, index_t c) const noexcept
datapar::simd< std::remove_const_t< T >, Abi > simd
static constexpr index_t num_elem_per_layer()
triangular_accessor(value_type *data) noexcept
void store(simd x, index_t r, index_t c) const noexcept
static constexpr size_t alignment()
static constexpr size_t size()
value_type & operator()(index_t r, index_t c) const noexcept
static constexpr ptrdiff_t inner_stride