batmat develop
Batched linear algebra routines
Loading...
Searching...
No Matches
geqrf.hpp
Go to the documentation of this file.
1#pragma once
2
5#include <batmat/lut.hpp>
6#include <batmat/micro-kernels/geqrf/export.h>
8#include <batmat/simd.hpp>
9
11
12struct BATMAT_LINALG_GEQRF_EXPORT KernelConfig {};
13
14template <class T, class Abi>
15inline constexpr index_t SizeR = gemm::RowsReg<T, Abi>; // TODO
16template <class T, class Abi>
17inline constexpr index_t SizeS = gemm::RowsReg<T, Abi>; // TODO
18
19template <class T, class Abi, KernelConfig Conf, index_t R, StorageOrder OA, StorageOrder OD>
22
23template <class T, class Abi, KernelConfig Conf, index_t R, StorageOrder OA, StorageOrder OD>
25
26template <class T, class Abi, KernelConfig Conf, index_t R, index_t S, StorageOrder OA,
28void geqrf_tail_microkernel(index_t k, bool transposed,
29 triangular_accessor<const T, Abi, SizeR<T, Abi>> W,
31 uview<const T, Abi, OB> B) noexcept;
32
33// Helper function to compute size of the storage for the matrix W (part of the block
34// Householder representation).
35template <class T, class Abi, StorageOrder OA>
36constexpr std::pair<index_t, index_t> geqrf_W_size(view<T, Abi, OA> A) {
37 static constexpr index_constant<SizeR<std::remove_const_t<T>, Abi>> R;
39 return {W_t::num_elem_per_layer(), (A.cols() + R - 1) / R};
40}
41
42// Low-level register-blocked routines
43template <class T, class Abi, KernelConfig Conf = {}, StorageOrder OA = StorageOrder::ColMajor,
44 StorageOrder OD = StorageOrder::ColMajor>
45BATMAT_LINALG_GEQRF_EXPORT void geqrf_copy_register(view<const T, Abi, OA> A, view<T, Abi, OD> D,
46 view<T, Abi> W) noexcept;
47
48template <class T, class Abi, KernelConfig Conf, StorageOrder OA, StorageOrder OD, StorageOrder OB>
49BATMAT_LINALG_GEQRF_EXPORT void geqrf_apply_register(view<const T, Abi, OA> A, view<T, Abi, OD> D,
51 bool transposed, bool reversed) noexcept;
52
53} // namespace batmat::linalg::micro_kernels::geqrf
constexpr index_t RowsReg
Register block size of the matrix-matrix multiplication micro-kernels.
Definition avx-512.hpp:13
void geqrf_copy_register(view< const T, Abi, OA > A, view< T, Abi, OD > D, view< T, Abi > W) noexcept
Block hyperbolic Householder factorization update using register blocking.
Definition geqrf.tpp:222
constexpr std::pair< index_t, index_t > geqrf_W_size(view< T, Abi, OA > A)
Definition geqrf.hpp:36
void geqrf_diag_microkernel(index_t k, triangular_accessor< T, Abi, SizeR< T, Abi > > W, uview< const T, Abi, OA > A, uview< T, Abi, OD > D) noexcept
Definition geqrf.tpp:42
void geqrf_tail_microkernel(index_t k, bool transposed, triangular_accessor< const T, Abi, SizeR< T, Abi > > W, uview< const T, Abi, OA > A, uview< T, Abi, OD > D, uview< const T, Abi, OB > B) noexcept
Definition geqrf.tpp:153
void geqrf_full_microkernel(index_t k, uview< const T, Abi, OA > A, uview< T, Abi, OD > D) noexcept
A (k×R) D (k×R).
Definition geqrf.tpp:94
void geqrf_apply_register(view< const T, Abi, OA > A, view< T, Abi, OD > D, view< const T, Abi, OB > B, view< const T, Abi > W, bool transposed, bool reversed) noexcept
Apply a block Householder transformation.
Definition geqrf.tpp:323
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view
Definition uview.hpp:70
std::integral_constant< index_t, I > index_constant
Definition lut.hpp:10
int index_t
Definition config.hpp:13