batmat develop
Batched linear algebra routines
Loading...
Searching...
No Matches
geqrf.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <batmat/assume.hpp>
10#include <guanaqo/trace.hpp>
11
12namespace batmat::linalg {
13
14namespace detail {
15template <class T, class Abi, micro_kernels::geqrf::KernelConfig Conf, StorageOrder OA,
16 StorageOrder OD>
18 // Check dimensions
19 BATMAT_ASSERT(A.rows() >= A.cols());
20 BATMAT_ASSERT(A.rows() == D.rows());
21 BATMAT_ASSERT(A.cols() == D.cols());
22 BATMAT_ASSERT(W.rows() == 0 || (W.cols() == 1 && W.rows() == A.cols()) ||
23 std::make_pair(W.rows(), W.cols()) ==
25 const index_t M = D.rows(), N = D.cols();
26 [[maybe_unused]] const auto fc = flops::geqrf(M, N);
27 GUANAQO_TRACE_LINALG("geqrf", total(fc) * D.depth());
28 // Degenerate case
29 if (M == 0 || N == 0) [[unlikely]]
30 return;
32}
33
34template <class T, class Abi, micro_kernels::geqrf::KernelConfig Conf, StorageOrder OA,
37 view<const T, Abi> W, bool transposed, bool reversed) {
38 // Check dimensions
39 BATMAT_ASSERT(A.rows() == D.rows());
40 BATMAT_ASSERT(A.cols() == D.cols());
41 BATMAT_ASSERT(B.rows() == A.rows());
42 BATMAT_ASSERT(std::make_pair(W.rows(), W.cols()) ==
44 const index_t M = D.rows(), N = D.cols(), K = B.cols();
45 [[maybe_unused]] const auto fc = flops::geqrf_apply(M, N, K);
46 GUANAQO_TRACE_LINALG("geqrf_apply", total(fc) * D.depth());
47 // Degenerate case
48 if (K == 0 && D.data() != A.data()) [[unlikely]]
49 linalg::copy(A, D);
50 if (M == 0 || N == 0 || K == 0) [[unlikely]]
51 return;
52
54 reversed);
55}
56} // namespace detail
57
58/// @addtogroup topic-linalg
59/// @{
60
61/// @name QR factorization of batches of matrices
62/// @{
63
64/// QR factorization. The upper triangular part of D contains the R factor. The Householder vectors
65/// are stored in the strict lower triangular part of D.
66/// The Householder coefficients are stored in W, which should either be a vector of `A.cols()`
67/// elements, or a matrix of size `geqrf_W_size(A)`. If W has zero rows, the coefficients are
68/// discarded.
69template <simdifiable VA, simdifiable VD, simdifiable VW>
71void geqrf(VA &&A, VD &&D, VW &&W) {
73 simdify(W));
74}
75
76/// QR factorization. The upper triangular part of D contains the R factor. The Householder vectors
77/// are stored in the strict lower triangular part of D.
78/// The Householder coefficients are stored in W, which should either be a vector of `A.cols()`
79/// elements, or a matrix of size `geqrf_W_size(A)`. If W has zero rows, the coefficients are
80/// discarded.
81template <simdifiable VD, simdifiable VW>
83void geqrf(VD &&D, VW &&W) {
85 simdify(W));
86}
87
88/// Apply the Q factor from @ref geqrf (represented by @p B and @p W) to a matrix @p A, storing
89/// either QA or QᵀA in @p D (depending on @p transposed).
90template <simdifiable VA, simdifiable VD, simdifiable VB, simdifiable VW>
92void geqrf_apply(VA &&A, VD &&D, VB &&B, VW &&W, bool transposed = false) {
94 simdify(A).as_const(), simdify(D), simdify(B).as_const(), simdify(W).as_const(), transposed,
95 false);
96}
97
98/// Apply the Q factor from @ref geqrf (represented by @p B and @p W) to a matrix @p D, overwriting
99/// it with either QA or QᵀA (depending on @p transposed).
100template <simdifiable VD, simdifiable VB, simdifiable VW>
102void geqrf_apply(VD &&D, VB &&B, VW &&W, bool transposed = false) {
104 simdify(D).as_const(), simdify(D), simdify(B).as_const(), simdify(W).as_const(), transposed,
105 false);
106}
107
108/// Get the size of the storage for the matrix W returned by
109/// @ref geqrf(VA &&A, VD &&D, VW &&W).
110template <simdifiable VA>
115
116/// @}
117
118/// @}
119
120} // namespace batmat::linalg
#define BATMAT_ASSERT(x)
Definition assume.hpp:14
constexpr FlopCount geqrf_apply(index_t m, index_t n, index_t k)
Apply the Q factor of a QR factorization of an m×n matrix with m≥n to an m×k matrix.
Definition flops.hpp:220
constexpr FlopCount geqrf(index_t m, index_t n)
QR factorization of an m×n matrix with m≥n.
Definition flops.hpp:207
auto geqrf_size_W(VA &&A)
Get the size of the storage for the matrix W returned by geqrf(VA &&A, VD &&D, VW &&W).
Definition geqrf.hpp:111
void geqrf_apply(VA &&A, VD &&D, VB &&B, VW &&W, bool transposed=false)
Apply the Q factor from geqrf (represented by B and W) to a matrix A, storing either QA or QᵀA in D (...
Definition geqrf.hpp:92
void copy(VA &&A, VB &&B, Opts... opts)
B = A.
Definition copy.hpp:187
void geqrf(VA &&A, VD &&D, VW &&W)
QR factorization.
Definition geqrf.hpp:71
#define GUANAQO_TRACE_LINALG(name, gflops)
void geqrf_apply(view< const T, Abi, OA > A, view< T, Abi, OD > D, view< const T, Abi, OB > B, view< const T, Abi > W, bool transposed, bool reversed)
Definition geqrf.hpp:36
void geqrf(view< const T, Abi, OA > A, view< T, Abi, OD > D, view< T, Abi > W)
Definition geqrf.hpp:17
void geqrf_copy_register(view< const T, Abi, OA > A, view< T, Abi, OD > D, view< T, Abi > W) noexcept
Block hyperbolic Householder factorization update using register blocking.
Definition geqrf.tpp:222
constexpr std::pair< index_t, index_t > geqrf_W_size(view< T, Abi, OA > A)
Definition geqrf.hpp:36
void geqrf_apply_register(view< const T, Abi, OA > A, view< T, Abi, OD > D, view< const T, Abi, OB > B, view< const T, Abi > W, bool transposed, bool reversed) noexcept
Apply a block Householder transformation.
Definition geqrf.tpp:323
typename detail::simdified_abi< V >::type simdified_abi_t
Definition simdify.hpp:216
constexpr bool simdify_compatible
Definition simdify.hpp:221
constexpr auto simdify(simdifiable auto &&a) -> simdified_view_t< decltype(a)>
Definition simdify.hpp:228
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view
Definition uview.hpp:70
int index_t
Definition config.hpp:13