11#include <guanaqo/trace.hpp>
16template <
class T,
class Abi, micro_kernels::gemv::KernelConfig Conf = {},
StorageOrder OA>
27 const index_t M = D.rows(), K = A.cols();
30 if (M == 0) [[unlikely]]
32 if (K == 0) [[unlikely]] {
71template <simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
73void gemv(VA &&A, VB &&B, VD &&D, Opts... opts) {
75 std::optional<
decltype(
simdify(D).as_const())> null;
81template <simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
83void gemv_neg(VA &&A, VB &&B, VD &&D, Opts... opts) {
85 std::optional<
decltype(
simdify(D).as_const())> null;
91template <simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>
93void gemv_add(VA &&A, VB &&B, VC &&C, VD &&D, Opts... opts) {
99template <simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
101void gemv_add(VA &&A, VB &&B, VD &&D, Opts... opts) {
106template <simdifiable VA, simdifiable VB, simdifiable VC, simdifiable VD, shift_opt... Opts>
108void gemv_sub(VA &&A, VB &&B, VC &&C, VD &&D, Opts... opts) {
114template <simdifiable VA, simdifiable VB, simdifiable VD, shift_opt... Opts>
116void gemv_sub(VA &&A, VB &&B, VD &&D, Opts... opts) {
void gemv_add(VA &&A, VB &&B, VC &&C, VD &&D, Opts... opts)
d = c + A b
void gemv(VA &&A, VB &&B, VD &&D, Opts... opts)
d = A b
void gemv_neg(VA &&A, VB &&B, VD &&D, Opts... opts)
d = -A b
void gemv_sub(VA &&A, VB &&B, VC &&C, VD &&D, Opts... opts)
d = c - A b
#define GUANAQO_TRACE_LINALG(name, gflops)
void fill(T a, view< T, Abi, OB > B)
void copy(view< const T, Abi, OA > A, view< T, Abi, OB > B)
constexpr micro_kernels::gemv::KernelConfig apply_gemv_options(micro_kernels::gemv::KernelConfig conf, Opts...)
void gemv(view< const T, Abi, OA > A, view< const T, Abi > B, std::optional< view< const T, Abi > > C, view< T, Abi > D)
void gemv_copy_register(view< const T, Abi, OA > A, view< const T, Abi > B, std::optional< view< const T, Abi > > C, view< T, Abi > D) noexcept
Generalized matrix multiplication d = c ± A⁽ᵀ⁾ b. Using register blocking.
typename detail::simdified_abi< V >::type simdified_abi_t
constexpr std::optional< int > rotate_C
constexpr bool simdify_compatible
constexpr std::optional< int > mask_D
constexpr auto simdify(simdifiable auto &&a) -> simdified_view_t< decltype(a)>
constexpr std::optional< int > shift_B
constexpr std::optional< int > rotate_D
constexpr std::optional< int > shift_A
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view
Aligned allocation for matrix storage.