batmat 0.0.19
Batched linear algebra routines
Loading...
Searching...
No Matches
batmat::linalg::micro_kernels::hyhound Namespace Reference

Namespaces

namespace  detail

Classes

struct  KernelConfig
struct  triangular_accessor

Enumerations

enum class  Structure { General = 0 , Zero = 1 , Upper = 2 }

Functions

template<class T, class Abi, KernelConfig Conf, index_t R, StorageOrder OL, StorageOrder OA>
void hyhound_diag_diag_microkernel (index_t kA, triangular_accessor< T, Abi, SizeR< T, Abi > > W, uview< T, Abi, OL > L, uview< T, Abi, OA > A, uview< const T, Abi, StorageOrder::ColMajor > diag) noexcept
template<class T, class Abi, KernelConfig Conf, index_t R, StorageOrder OL, StorageOrder OA>
void hyhound_diag_full_microkernel (index_t kA, uview< T, Abi, OL > L, uview< T, Abi, OA > A, uview< const T, Abi, StorageOrder::ColMajor > diag) noexcept
template<class T, class Abi, KernelConfig Conf, index_t R, index_t S, StorageOrder OL, StorageOrder OA, StorageOrder OB>
void hyhound_diag_tail_microkernel (index_t kA_in_offset, index_t kA_in, index_t k, triangular_accessor< const T, Abi, SizeR< T, Abi > > W, uview< T, Abi, OL > L, uview< const T, Abi, OA > A_in, uview< T, Abi, OA > A_out, uview< const T, Abi, OB > B, uview< const T, Abi, StorageOrder::ColMajor > diag, Structure struc_L, int rotate_A) noexcept
template<class T, class Abi, StorageOrder OL>
constexpr std::pair< index_t, index_t > hyhound_W_size (view< T, Abi, OL > L)
template<class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor, StorageOrder OA = StorageOrder::ColMajor>
void hyhound_diag_register (const view< T, Abi, OL > L, const view< T, Abi, OA > A, const view< const T, Abi > D) noexcept
 Block hyperbolic Householder factorization update using register blocking.
template<class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor, StorageOrder OA = StorageOrder::ColMajor>
void hyhound_diag_register (const view< T, Abi, OL > L, const view< T, Abi, OA > A, const view< const T, Abi > D, const view< T, Abi > W) noexcept
 Block hyperbolic Householder factorization update using register blocking.
template<class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor, StorageOrder OA = StorageOrder::ColMajor>
void hyhound_diag_apply_register (const view< T, Abi, OL > L, const view< const T, Abi, OA > Ain, const view< T, Abi, OA > Aout, const view< const T, Abi, OA > B, const view< const T, Abi > D, const view< const T, Abi > W, index_t kA_in_offset) noexcept
 Apply a block hyperbolic Householder transformation.
template<class T, class Abi, KernelConfig Conf = {}, StorageOrder OL1 = StorageOrder::ColMajor, StorageOrder OA1 = StorageOrder::ColMajor, StorageOrder OL2 = StorageOrder::ColMajor, StorageOrder OA2 = StorageOrder::ColMajor>
void hyhound_diag_2_register (const view< T, Abi, OL1 > L11, const view< T, Abi, OA1 > A1, const view< T, Abi, OL2 > L21, const view< T, Abi, OA2 > A2, const view< const T, Abi > D) noexcept
 Same as hyhound_diag_register but for two block rows at once.
template<class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor, StorageOrder OW = StorageOrder::ColMajor, StorageOrder OY = StorageOrder::ColMajor, StorageOrder OU = StorageOrder::ColMajor>
void hyhound_diag_cyclic_register (const view< T, Abi, OL > L11, const view< T, Abi, OW > A1, const view< T, Abi, OY > L21, const view< const T, Abi, OW > A22, const view< T, Abi, OW > A2_out, const view< T, Abi, OU > L31, const view< const T, Abi, OW > A31, const view< T, Abi, OW > A3_out, const view< const T, Abi > D) noexcept
 Performs a factorization update of the following matrix:
template<class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor, StorageOrder OA = StorageOrder::ColMajor, StorageOrder OLu = StorageOrder::ColMajor, StorageOrder OAu = StorageOrder::ColMajor>
void hyhound_diag_riccati_register (const view< T, Abi, OL > L11, const view< T, Abi, OA > A1, const view< T, Abi, OL > L21, const view< const T, Abi, OA > A2, const view< T, Abi, OA > A2_out, const view< T, Abi, OLu > Lu1, const view< T, Abi, OAu > Au_out, const view< const T, Abi > D, bool shift_A_out) noexcept
 Performs a factorization update of the following matrix:

Variables

template<class T, class Abi>
constexpr index_t SizeR = gemm::RowsReg<T, Abi>
template<class T, class Abi>
constexpr index_t SizeS = gemm::RowsReg<T, Abi>
template<class T, class Abi, KernelConfig Conf, StorageOrder OL, StorageOrder OA>
const constinit auto microkernel_diag_lut
template<class T, class Abi, KernelConfig Conf, StorageOrder OL, StorageOrder OA>
const constinit auto microkernel_full_lut
template<class T, class Abi, KernelConfig Conf, StorageOrder OL, StorageOrder OA, StorageOrder OB>
const constinit auto microkernel_tail_lut
template<class T, class Abi, KernelConfig Conf, StorageOrder OL, StorageOrder OA, StorageOrder OB>
const constinit auto microkernel_tail_lut_2

Class Documentation

◆ batmat::linalg::micro_kernels::hyhound::KernelConfig

struct batmat::linalg::micro_kernels::hyhound::KernelConfig
Class Members
bool sign_only = false

Enumeration Type Documentation

◆ Structure

Enumerator
General 
Zero 
Upper 

Definition at line 64 of file hyhound.hpp.

Function Documentation

◆ hyhound_diag_diag_microkernel()

template<class T, class Abi, KernelConfig Conf, index_t R, StorageOrder OL, StorageOrder OA>
void batmat::linalg::micro_kernels::hyhound::hyhound_diag_diag_microkernel ( index_t kA,
triangular_accessor< T, Abi, SizeR< T, Abi > > W,
uview< T, Abi, OL > L,
uview< T, Abi, OA > A,
uview< const T, Abi, StorageOrder::ColMajor > diag )
noexcept

Definition at line 43 of file hyhound.tpp.

◆ hyhound_diag_full_microkernel()

template<class T, class Abi, KernelConfig Conf, index_t R, StorageOrder OL, StorageOrder OA>
void batmat::linalg::micro_kernels::hyhound::hyhound_diag_full_microkernel ( index_t kA,
uview< T, Abi, OL > L,
uview< T, Abi, OA > A,
uview< const T, Abi, StorageOrder::ColMajor > diag )
noexcept

Definition at line 96 of file hyhound.tpp.

◆ hyhound_diag_tail_microkernel()

template<class T, class Abi, KernelConfig Conf, index_t R, index_t S, StorageOrder OL, StorageOrder OA, StorageOrder OB>
void batmat::linalg::micro_kernels::hyhound::hyhound_diag_tail_microkernel ( index_t kA_in_offset,
index_t kA_in,
index_t k,
triangular_accessor< const T, Abi, SizeR< T, Abi > > W,
uview< T, Abi, OL > L,
uview< const T, Abi, OA > A_in,
uview< T, Abi, OA > A_out,
uview< const T, Abi, OB > B,
uview< const T, Abi, StorageOrder::ColMajor > diag,
Structure struc_L,
int rotate_A )
noexcept

Definition at line 160 of file hyhound.tpp.

◆ hyhound_W_size()

template<class T, class Abi, StorageOrder OL>
std::pair< index_t, index_t > batmat::linalg::micro_kernels::hyhound::hyhound_W_size ( view< T, Abi, OL > L)
constexpr

Definition at line 82 of file hyhound.hpp.

◆ hyhound_diag_register() [1/2]

template<class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor, StorageOrder OA = StorageOrder::ColMajor>
void batmat::linalg::micro_kernels::hyhound::hyhound_diag_register ( const view< T, Abi, OL > L,
const view< T, Abi, OA > A,
const view< const T, Abi > D )
noexcept

Block hyperbolic Householder factorization update using register blocking.

This variant does not store the Householder representation W.

Definition at line 288 of file hyhound.tpp.

◆ hyhound_diag_register() [2/2]

template<class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor, StorageOrder OA = StorageOrder::ColMajor>
void batmat::linalg::micro_kernels::hyhound::hyhound_diag_register ( const view< T, Abi, OL > L,
const view< T, Abi, OA > A,
const view< const T, Abi > D,
const view< T, Abi > W )
noexcept

Block hyperbolic Householder factorization update using register blocking.

This variant stores the Householder representation W.

Definition at line 358 of file hyhound.tpp.

◆ hyhound_diag_apply_register()

template<class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor, StorageOrder OA = StorageOrder::ColMajor>
void batmat::linalg::micro_kernels::hyhound::hyhound_diag_apply_register ( view< T, Abi, OL > L,
view< const T, Abi, OA > Ain,
view< T, Abi, OA > Aout,
view< const T, Abi, OA > B,
view< const T, Abi > D,
view< const T, Abi > W,
index_t kA_in_offset = 0 )
noexcept

Apply a block hyperbolic Householder transformation.

Definition at line 401 of file hyhound.tpp.

◆ hyhound_diag_2_register()

template<class T, class Abi, KernelConfig Conf = {}, StorageOrder OL1 = StorageOrder::ColMajor, StorageOrder OA1 = StorageOrder::ColMajor, StorageOrder OL2 = StorageOrder::ColMajor, StorageOrder OA2 = StorageOrder::ColMajor>
void batmat::linalg::micro_kernels::hyhound::hyhound_diag_2_register ( view< T, Abi, OL1 > L11,
view< T, Abi, OA1 > A1,
view< T, Abi, OL2 > L21,
view< T, Abi, OA2 > A2,
view< const T, Abi > D )
noexcept

Same as hyhound_diag_register but for two block rows at once.

Definition at line 452 of file hyhound.tpp.

◆ hyhound_diag_cyclic_register()

template<class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor, StorageOrder OW = StorageOrder::ColMajor, StorageOrder OY = StorageOrder::ColMajor, StorageOrder OU = StorageOrder::ColMajor>
void batmat::linalg::micro_kernels::hyhound::hyhound_diag_cyclic_register ( const view< T, Abi, OL > L11,
const view< T, Abi, OW > A1,
const view< T, Abi, OY > L21,
const view< const T, Abi, OW > A22,
const view< T, Abi, OW > A2_out,
const view< T, Abi, OU > L31,
const view< const T, Abi, OW > A31,
const view< T, Abi, OW > A3_out,
const view< const T, Abi > D )
noexcept

Performs a factorization update of the following matrix:

[ A11 A12 | L11 ]     [  0   0  | L̃11 ]
[  0  A22 | L21 ] Q = [ Ã21 Ã22 | L̃21 ]
[ A31  0  | L31 ]     [ Ã31 Ã32 | L̃31 ]

Definition at line 514 of file hyhound.tpp.

◆ hyhound_diag_riccati_register()

template<class T, class Abi, KernelConfig Conf = {}, StorageOrder OL = StorageOrder::ColMajor, StorageOrder OA = StorageOrder::ColMajor, StorageOrder OLu = StorageOrder::ColMajor, StorageOrder OAu = StorageOrder::ColMajor>
void batmat::linalg::micro_kernels::hyhound::hyhound_diag_riccati_register ( const view< T, Abi, OL > L11,
const view< T, Abi, OA > A1,
const view< T, Abi, OL > L21,
const view< const T, Abi, OA > A2,
const view< T, Abi, OA > A2_out,
const view< T, Abi, OLu > Lu1,
const view< T, Abi, OAu > Au_out,
const view< const T, Abi > D,
bool shift_A_out )
noexcept

Performs a factorization update of the following matrix:

[ A1 | L11 ]     [  0 | L̃11 ]
[ A2 | L21 ] Q = [ Ã2 | L̃21 ]
[  0 | Lu1 ]     [ Ãu | L̃u1 ]

where Lu1 and L̃u1 are upper triangular

Definition at line 608 of file hyhound.tpp.

Variable Documentation

◆ SizeR

template<class T, class Abi>
index_t batmat::linalg::micro_kernels::hyhound::SizeR = gemm::RowsReg<T, Abi>
inlineconstexpr

Definition at line 51 of file hyhound.hpp.

◆ SizeS

template<class T, class Abi>
index_t batmat::linalg::micro_kernels::hyhound::SizeS = gemm::RowsReg<T, Abi>
inlineconstexpr

Definition at line 53 of file hyhound.hpp.

◆ microkernel_diag_lut

template<class T, class Abi, KernelConfig Conf, StorageOrder OL, StorageOrder OA>
const constinit auto batmat::linalg::micro_kernels::hyhound::microkernel_diag_lut
inlineconstinit
Initial value:
=
})
consteval auto make_1d_lut(F f)
Returns an array of the form:
Definition lut.hpp:39
void hyhound_diag_diag_microkernel(index_t kA, triangular_accessor< T, Abi, SizeR< T, Abi > > W, uview< T, Abi, OL > L, uview< T, Abi, OA > A, uview< const T, Abi, StorageOrder::ColMajor > diag) noexcept
Definition hyhound.tpp:43
std::integral_constant< index_t, I > index_constant
Definition lut.hpp:10

Definition at line 18 of file hyhound.tpp.

◆ microkernel_full_lut

template<class T, class Abi, KernelConfig Conf, StorageOrder OL, StorageOrder OA>
const constinit auto batmat::linalg::micro_kernels::hyhound::microkernel_full_lut
inlineconstinit
Initial value:
=
})
void hyhound_diag_full_microkernel(index_t kA, uview< T, Abi, OL > L, uview< T, Abi, OA > A, uview< const T, Abi, StorageOrder::ColMajor > diag) noexcept
Definition hyhound.tpp:96

Definition at line 24 of file hyhound.tpp.

◆ microkernel_tail_lut

template<class T, class Abi, KernelConfig Conf, StorageOrder OL, StorageOrder OA, StorageOrder OB>
const constinit auto batmat::linalg::micro_kernels::hyhound::microkernel_tail_lut
inlineconstinit
Initial value:
=
})
void hyhound_diag_tail_microkernel(index_t kA_in_offset, index_t kA_in, index_t k, triangular_accessor< const T, Abi, SizeR< T, Abi > > W, uview< T, Abi, OL > L, uview< const T, Abi, OA > A_in, uview< T, Abi, OA > A_out, uview< const T, Abi, OB > B, uview< const T, Abi, StorageOrder::ColMajor > diag, Structure struc_L, int rotate_A) noexcept
Definition hyhound.tpp:160

Definition at line 30 of file hyhound.tpp.

◆ microkernel_tail_lut_2

template<class T, class Abi, KernelConfig Conf, StorageOrder OL, StorageOrder OA, StorageOrder OB>
const constinit auto batmat::linalg::micro_kernels::hyhound::microkernel_tail_lut_2
inlineconstinit
Initial value:
[]<index_t NR, index_t NS>(index_constant<NR>, index_constant<NS>) {
})
consteval auto make_2d_lut(F f)
Returns a 2D array of the form:
Definition lut.hpp:25

Definition at line 36 of file hyhound.tpp.