batmat 0.0.16
Batched linear algebra routines
Loading...
Searching...
No Matches
gemv.hpp
Go to the documentation of this file.
1#pragma once
2
5#include <batmat/lut.hpp>
7#include <optional>
8
10
12 bool negate = false;
13 int shift_A = 0;
14 int rotate_B = 0;
15 int rotate_C = 0;
18};
19
20template <class T, class Abi, KernelConfig Conf, index_t RowsReg, StorageOrder OA>
23 uview<T, Abi, StorageOrder::ColMajor> D, index_t k) noexcept;
24
25template <class T, class Abi, KernelConfig Conf, StorageOrder OA>
27 std::optional<view<const T, Abi>> C, view<T, Abi> D) noexcept;
28
29template <class T, class Abi>
30constexpr index_t RowsReg = 2 * gemm::RowsReg<T, Abi>;
31
32template <class T, class Abi, KernelConfig Conf, StorageOrder OA>
33inline const constinit auto gemv_copy_lut =
36 });
37
38} // namespace batmat::linalg::micro_kernels::gemv
consteval auto make_1d_lut(F f)
Returns an array of the form:
Definition lut.hpp:39
constexpr index_t RowsReg
Register block size of the matrix-matrix multiplication micro-kernels.
Definition avx-512.hpp:13
const constinit auto gemv_copy_lut
Definition gemv.hpp:33
void gemv_copy_register(view< const T, Abi, OA > A, view< const T, Abi > B, std::optional< view< const T, Abi > > C, view< T, Abi > D) noexcept
Generalized matrix multiplication d = c ± A⁽ᵀ⁾ b. Using register blocking.
Definition gemv.tpp:79
void gemv_copy_microkernel(uview< const T, Abi, OA > A, uview< const T, Abi, StorageOrder::ColMajor > B, std::optional< uview< const T, Abi, StorageOrder::ColMajor > > C, uview< T, Abi, StorageOrder::ColMajor > D, index_t k) noexcept
Generalized matrix-vector multiplication d = c ± A⁽ᵀ⁾ b. Single register block.
Definition gemv.tpp:16
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view
Definition uview.hpp:70
std::integral_constant< index_t, I > index_constant
Definition lut.hpp:10