batmat develop
Batched linear algebra routines
Loading...
Searching...
No Matches
avx-512.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <batmat/config.hpp>
4#include <batmat/simd.hpp>
5
6namespace batmat {
8
9/// Register block size of the matrix-matrix multiplication micro-kernels.
10/// AVX-512 has 32 vector registers, we use 25 registers for a 5×5 accumulator
11/// block of matrix C (leaving some registers for loading A and B):
12template <class T, class Abi>
13inline constexpr index_t RowsReg = 5;
14// Vectors greater than the physical vector length use more registers, so decrease the block size.
15template <class T, class Abi>
16 requires(datapar::simd_size<T, Abi>::value * sizeof(T) > 64)
17inline constexpr index_t RowsReg<T, Abi> = 3;
18
19} // namespace linalg::micro_kernels::gemm
20namespace ops {
21
22template <class T>
23inline constexpr index_t RowsRegTranspose = 8;
24template <class T>
25inline constexpr index_t ColsRegTranspose = 8;
26
27// TODO: we're using the AVX2 implementation for now.
28template <>
29inline constexpr index_t RowsRegTranspose<double> = 4;
30template <>
31inline constexpr index_t ColsRegTranspose<double> = 4;
32
33} // namespace ops
34} // namespace batmat
stdx::simd_size< Tp, Abi > simd_size
Definition simd.hpp:137
constexpr index_t RowsReg
Register block size of the matrix-matrix multiplication micro-kernels.
Definition avx-512.hpp:13
constexpr index_t RowsRegTranspose
Definition avx-512.hpp:23
constexpr index_t ColsRegTranspose
Definition avx-512.hpp:25