batmat develop
Batched linear algebra routines
Loading...
Searching...
No Matches
hyhound.hpp
Go to the documentation of this file.
1#pragma once
2
5#include <batmat/lut.hpp>
6#include <batmat/micro-kernels/hyhound/export.h>
8#include <batmat/simd.hpp>
9
11
12struct BATMAT_LINALG_HYHOUND_EXPORT KernelConfig {
13 bool sign_only = false;
14};
15
16template <class T, class Abi>
17inline constexpr index_t SizeR = gemm::RowsReg<T, Abi>; // TODO
18template <class T, class Abi>
19inline constexpr index_t SizeS = gemm::RowsReg<T, Abi>; // TODO
20
21template <class T, class Abi, KernelConfig Conf, index_t R, StorageOrder OL, StorageOrder OA>
25
26template <class T, class Abi, KernelConfig Conf, index_t R, StorageOrder OL, StorageOrder OA>
29
30enum class Structure {
32 Zero = 1,
33 Upper = 2,
34};
35
36template <class T, class Abi, KernelConfig Conf, index_t R, index_t S, StorageOrder OL,
38void hyhound_diag_tail_microkernel(index_t kA_in_offset, index_t kA_in, index_t k,
39 triangular_accessor<const T, Abi, SizeR<T, Abi>> W,
43 Structure struc_L, int rotate_A) noexcept;
44
45// Helper function to compute size of the storage for the matrix W (part of the hyperbolic
46// Householder representation).
47template <class T, class Abi, StorageOrder OL>
48constexpr std::pair<index_t, index_t> hyhound_W_size(view<T, Abi, OL> L) {
49 static constexpr index_constant<SizeR<std::remove_const_t<T>, Abi>> R;
51 return {W_t::num_elem_per_layer(), (L.cols() + R - 1) / R};
52}
53
54// Low-level register-blocked routines
55template <class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor,
56 StorageOrder OA = StorageOrder::ColMajor>
57BATMAT_LINALG_HYHOUND_EXPORT void hyhound_diag_register(view<T, Abi, OL> L, view<T, Abi, OA> A,
58 view<const T, Abi> D) noexcept;
59
60template <class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor,
61 StorageOrder OA = StorageOrder::ColMajor>
62BATMAT_LINALG_HYHOUND_EXPORT void hyhound_diag_register(view<T, Abi, OL> L, view<T, Abi, OA> A,
64 view<T, Abi> W) noexcept;
65
66template <class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor,
67 StorageOrder OA = StorageOrder::ColMajor>
68BATMAT_LINALG_HYHOUND_EXPORT void
71 index_t kA_in_offset = 0) noexcept;
72
73template <class T, class Abi, KernelConfig Conf = {}, StorageOrder OL1 = StorageOrder::ColMajor,
74 StorageOrder OA1 = StorageOrder::ColMajor, StorageOrder OL2 = StorageOrder::ColMajor,
75 StorageOrder OA2 = StorageOrder::ColMajor>
76BATMAT_LINALG_HYHOUND_EXPORT void
79
80template <class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor,
81 StorageOrder OW = StorageOrder::ColMajor, StorageOrder OY = StorageOrder::ColMajor,
82 StorageOrder OU = StorageOrder::ColMajor>
83BATMAT_LINALG_HYHOUND_EXPORT void
87 view<T, Abi, OW> A3_out, view<const T, Abi> D) noexcept;
88
89template <class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor,
90 StorageOrder OA = StorageOrder::ColMajor, StorageOrder OLu = StorageOrder::ColMajor,
91 StorageOrder OAu = StorageOrder::ColMajor>
92BATMAT_LINALG_HYHOUND_EXPORT void
96 bool shift_A_out) noexcept;
97
98} // namespace batmat::linalg::micro_kernels::hyhound
int index_t
Definition config.hpp:13
constexpr index_t RowsReg
Register block size of the matrix-matrix multiplication micro-kernels.
Definition avx-512.hpp:13
void hyhound_diag_full_microkernel(index_t kA, uview< T, Abi, OL > L, uview< T, Abi, OA > A, uview< const T, Abi, StorageOrder::ColMajor > diag) noexcept
Definition hyhound.tpp:96
void hyhound_diag_cyclic_register(view< T, Abi, OL > L11, view< T, Abi, OW > A1, view< T, Abi, OY > L21, view< const T, Abi, OW > A22, view< T, Abi, OW > A2_out, view< T, Abi, OU > L31, view< const T, Abi, OW > A31, view< T, Abi, OW > A3_out, view< const T, Abi > D) noexcept
Performs a factorization update of the following matrix:
Definition hyhound.tpp:514
void hyhound_diag_register(view< T, Abi, OL > L, view< T, Abi, OA > A, view< const T, Abi > D) noexcept
Block hyperbolic Householder factorization update using register blocking.
Definition hyhound.tpp:288
void hyhound_diag_riccati_register(view< T, Abi, OL > L11, view< T, Abi, OA > A1, view< T, Abi, OL > L21, view< const T, Abi, OA > A2, view< T, Abi, OA > A2_out, view< T, Abi, OLu > Lu1, view< T, Abi, OAu > Au_out, view< const T, Abi > D, bool shift_A_out) noexcept
Performs a factorization update of the following matrix:
Definition hyhound.tpp:608
void hyhound_diag_tail_microkernel(index_t kA_in_offset, index_t kA_in, index_t k, triangular_accessor< const T, Abi, SizeR< T, Abi > > W, uview< T, Abi, OL > L, uview< const T, Abi, OA > A_in, uview< T, Abi, OA > A_out, uview< const T, Abi, OB > B, uview< const T, Abi, StorageOrder::ColMajor > diag, Structure struc_L, int rotate_A) noexcept
Definition hyhound.tpp:160
constexpr std::pair< index_t, index_t > hyhound_W_size(view< T, Abi, OL > L)
Definition hyhound.hpp:48
void hyhound_diag_diag_microkernel(index_t kA, triangular_accessor< T, Abi, SizeR< T, Abi > > W, uview< T, Abi, OL > L, uview< T, Abi, OA > A, uview< const T, Abi, StorageOrder::ColMajor > diag) noexcept
Definition hyhound.tpp:43
void hyhound_diag_apply_register(view< T, Abi, OL > L, view< const T, Abi, OA > Ain, view< T, Abi, OA > Aout, view< const T, Abi, OA > B, view< const T, Abi > D, view< const T, Abi > W, index_t kA_in_offset=0) noexcept
Apply a block hyperbolic Householder transformation.
Definition hyhound.tpp:401
void hyhound_diag_2_register(view< T, Abi, OL1 > L11, view< T, Abi, OA1 > A1, view< T, Abi, OL2 > L21, view< T, Abi, OA2 > A2, view< const T, Abi > D) noexcept
Same as hyhound_diag_register but for two block rows at once.
Definition hyhound.tpp:452
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view
Definition uview.hpp:70
std::integral_constant< index_t, I > index_constant
Definition lut.hpp:10
int index_t
Definition config.hpp:13