batmat main
Batched linear algebra routines
Loading...
Searching...
No Matches
hyhound.hpp
Go to the documentation of this file.
1#pragma once
2
5#include <batmat/lut.hpp>
6#include <batmat/micro-kernels/hyhound/export.h>
8#include <batmat/simd.hpp>
9
11
12struct BATMAT_LINALG_HYHOUND_EXPORT KernelConfig {
13 bool sign_only = false;
14};
15
16template <class T, class Abi, index_t R>
18 using value_type = T;
20
22 static constexpr ptrdiff_t inner_stride =
24
25 static constexpr index_t num_elem_per_layer() { return R * (R + 1) / 2; }
26 static constexpr size_t size() {
27 return simd::size() * static_cast<size_t>(num_elem_per_layer());
28 }
29 static constexpr size_t alignment() {
31 }
32
33 [[gnu::always_inline]] value_type &operator()(index_t r, index_t c) const noexcept {
34 assert(r <= c);
35 return data[(r + c * (c + 1) / 2) * inner_stride];
36 }
37 [[gnu::always_inline]] simd load(index_t r, index_t c) const noexcept {
38 return datapar::aligned_load<simd>(&operator()(r, c));
39 }
40 [[gnu::always_inline]] void store(simd x, index_t r, index_t c) const noexcept
41 requires(!std::is_const_v<T>)
42 {
43 datapar::aligned_store(x, &operator()(r, c));
44 }
45
46 [[gnu::always_inline]] triangular_accessor(value_type *data) noexcept : data{data} {}
47 operator triangular_accessor<const T, Abi, R>() const noexcept { return {data}; }
48};
49
50template <class T, class Abi>
51inline constexpr index_t SizeR = gemm::RowsReg<T, Abi>; // TODO
52template <class T, class Abi>
53inline constexpr index_t SizeS = gemm::RowsReg<T, Abi>; // TODO
54
55template <class T, class Abi, KernelConfig Conf, index_t R, StorageOrder OL, StorageOrder OA>
59
60template <class T, class Abi, KernelConfig Conf, index_t R, StorageOrder OL, StorageOrder OA>
63
64enum class Structure {
66 Zero = 1,
67 Upper = 2,
68};
69
70template <class T, class Abi, KernelConfig Conf, index_t R, index_t S, StorageOrder OL,
72void hyhound_diag_tail_microkernel(index_t kA_in_offset, index_t kA_in, index_t k,
73 triangular_accessor<const T, Abi, SizeR<T, Abi>> W,
77 Structure struc_L, int rotate_A) noexcept;
78
79// Helper function to compute size of the storage for the matrix W (part of the hyperbolic
80// Householder representation).
81template <class T, class Abi, StorageOrder OL>
82constexpr std::pair<index_t, index_t> hyhound_W_size(view<T, Abi, OL> L) {
83 static constexpr index_constant<SizeR<std::remove_const_t<T>, Abi>> R;
85 return {W_t::num_elem_per_layer(), (L.cols() + R - 1) / R};
86}
87
88// Low-level register-blocked routines
89template <class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor,
90 StorageOrder OA = StorageOrder::ColMajor>
91BATMAT_LINALG_HYHOUND_EXPORT void hyhound_diag_register(view<T, Abi, OL> L, view<T, Abi, OA> A,
92 view<const T, Abi> D) noexcept;
93
94template <class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor,
95 StorageOrder OA = StorageOrder::ColMajor>
96BATMAT_LINALG_HYHOUND_EXPORT void hyhound_diag_register(view<T, Abi, OL> L, view<T, Abi, OA> A,
98 view<T, Abi> W) noexcept;
99
100template <class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor,
101 StorageOrder OA = StorageOrder::ColMajor>
102BATMAT_LINALG_HYHOUND_EXPORT void
105 index_t kA_in_offset = 0) noexcept;
106
107template <class T, class Abi, KernelConfig Conf = {}, StorageOrder OL1 = StorageOrder::ColMajor,
108 StorageOrder OA1 = StorageOrder::ColMajor, StorageOrder OL2 = StorageOrder::ColMajor,
109 StorageOrder OA2 = StorageOrder::ColMajor>
110BATMAT_LINALG_HYHOUND_EXPORT void
113
114template <class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor,
115 StorageOrder OW = StorageOrder::ColMajor, StorageOrder OY = StorageOrder::ColMajor,
116 StorageOrder OU = StorageOrder::ColMajor>
117BATMAT_LINALG_HYHOUND_EXPORT void
121 view<T, Abi, OW> A3_out, view<const T, Abi> D) noexcept;
122
123template <class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor,
124 StorageOrder OA = StorageOrder::ColMajor, StorageOrder OLu = StorageOrder::ColMajor,
125 StorageOrder OAu = StorageOrder::ColMajor>
126BATMAT_LINALG_HYHOUND_EXPORT void
130 bool shift_A_out) noexcept;
131
132} // namespace batmat::linalg::micro_kernels::hyhound
void aligned_store(V v, typename V::value_type *p)
Definition simd.hpp:121
stdx::memory_alignment< simd< Tp, Abi > > simd_align
Definition simd.hpp:139
stdx::simd_size< Tp, Abi > simd_size
Definition simd.hpp:137
V aligned_load(const typename V::value_type *p)
Definition simd.hpp:111
stdx::simd< Tp, Abi > simd
Definition simd.hpp:99
constexpr index_t RowsReg
Register block size of the matrix-matrix multiplication micro-kernels.
Definition avx-512.hpp:13
void hyhound_diag_full_microkernel(index_t kA, uview< T, Abi, OL > L, uview< T, Abi, OA > A, uview< const T, Abi, StorageOrder::ColMajor > diag) noexcept
Definition hyhound.tpp:96
void hyhound_diag_cyclic_register(view< T, Abi, OL > L11, view< T, Abi, OW > A1, view< T, Abi, OY > L21, view< const T, Abi, OW > A22, view< T, Abi, OW > A2_out, view< T, Abi, OU > L31, view< const T, Abi, OW > A31, view< T, Abi, OW > A3_out, view< const T, Abi > D) noexcept
Performs a factorization update of the following matrix:
Definition hyhound.tpp:514
void hyhound_diag_register(view< T, Abi, OL > L, view< T, Abi, OA > A, view< const T, Abi > D) noexcept
Block hyperbolic Householder factorization update using register blocking.
Definition hyhound.tpp:288
void hyhound_diag_riccati_register(view< T, Abi, OL > L11, view< T, Abi, OA > A1, view< T, Abi, OL > L21, view< const T, Abi, OA > A2, view< T, Abi, OA > A2_out, view< T, Abi, OLu > Lu1, view< T, Abi, OAu > Au_out, view< const T, Abi > D, bool shift_A_out) noexcept
Performs a factorization update of the following matrix:
Definition hyhound.tpp:608
void hyhound_diag_tail_microkernel(index_t kA_in_offset, index_t kA_in, index_t k, triangular_accessor< const T, Abi, SizeR< T, Abi > > W, uview< T, Abi, OL > L, uview< const T, Abi, OA > A_in, uview< T, Abi, OA > A_out, uview< const T, Abi, OB > B, uview< const T, Abi, StorageOrder::ColMajor > diag, Structure struc_L, int rotate_A) noexcept
Definition hyhound.tpp:160
constexpr std::pair< index_t, index_t > hyhound_W_size(view< T, Abi, OL > L)
Definition hyhound.hpp:82
void hyhound_diag_diag_microkernel(index_t kA, triangular_accessor< T, Abi, SizeR< T, Abi > > W, uview< T, Abi, OL > L, uview< T, Abi, OA > A, uview< const T, Abi, StorageOrder::ColMajor > diag) noexcept
Definition hyhound.tpp:43
void hyhound_diag_apply_register(view< T, Abi, OL > L, view< const T, Abi, OA > Ain, view< T, Abi, OA > Aout, view< const T, Abi, OA > B, view< const T, Abi > D, view< const T, Abi > W, index_t kA_in_offset=0) noexcept
Apply a block hyperbolic Householder transformation.
Definition hyhound.tpp:401
void hyhound_diag_2_register(view< T, Abi, OL1 > L11, view< T, Abi, OA1 > A1, view< T, Abi, OL2 > L21, view< T, Abi, OA2 > A2, view< const T, Abi > D) noexcept
Same as hyhound_diag_register but for two block rows at once.
Definition hyhound.tpp:452
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view
Definition uview.hpp:70
std::integral_constant< index_t, I > index_constant
Definition lut.hpp:10
simd load(index_t r, index_t c) const noexcept
Definition hyhound.hpp:37
datapar::simd< std::remove_const_t< T >, Abi > simd
Definition hyhound.hpp:21
void store(simd x, index_t r, index_t c) const noexcept
Definition hyhound.hpp:40
value_type & operator()(index_t r, index_t c) const noexcept
Definition hyhound.hpp:33