0.0.19/Doxygen/micro-kernels_2gemm_8hpp_source.html

#pragma once


#include <batmat/linalg/structure.hpp>

#include <batmat/linalg/uview.hpp>

#include <batmat/lut.hpp>

#include <batmat/micro-kernels/gemm/export.h>

#include <batmat/platform/platform.hpp>

#include <optional>


namespace batmat::linalg::micro_kernels::gemm {


struct BATMAT_LINALG_GEMM_EXPORT KernelConfig {

    bool negate             = false;

    int shift_A             = 0;

    int rotate_B            = 0;

    int rotate_C            = 0;

    int rotate_D            = rotate_C;

    int mask_D              = rotate_D;

    MatrixStructure struc_A = MatrixStructure::General;

    MatrixStructure struc_B = MatrixStructure::General;

    MatrixStructure struc_C = MatrixStructure::General;

};


template <class T, class Abi, KernelConfig Conf, index_t RowsReg, index_t ColsReg, StorageOrder OA,

          StorageOrder OB, StorageOrder OC, StorageOrder OD>

void gemm_copy_microkernel(uview<const T, Abi, OA> A, uview<const T, Abi, OB> B,

                           std::optional<uview<const T, Abi, OC>> C, uview<T, Abi, OD> D,

                           index_t k) noexcept;


template <class T, class Abi, KernelConfig Conf, StorageOrder OA, StorageOrder OB, StorageOrder OC,

          StorageOrder OD>

BATMAT_LINALG_GEMM_EXPORT void

gemm_copy_register(view<const T, Abi, OA> A, view<const T, Abi, OB> B,

                   std::optional<view<const T, Abi, OC>> C, view<T, Abi, OD> D) noexcept;


// Square block sizes greatly simplify handling of triangular matrices.

template <class T, class Abi>

constexpr index_t ColsReg = RowsReg<T, Abi>;


namespace detail {

// Initialization of the LUT. The actual LUT is not defined here, because it needs to be exported

// in the shared library, we don't want the compiler calling the micro-kernels directly, since

// those are not exported. We could move this value to the .tpp file, but then we'd have to spell

// out the type here anyway.

template <class T, class Abi, KernelConfig Conf, StorageOrder OA, StorageOrder OB, StorageOrder OC,

          StorageOrder OD>

constexpr auto gemm_copy_lut = make_2d_lut<RowsReg<T, Abi>, ColsReg<T, Abi>>(

    []<index_t Row, index_t Col>(index_constant<Row>, index_constant<Col>) {

        return gemm_copy_microkernel<T, Abi, Conf, Row + 1, Col + 1, OA, OB, OC, OD>;

    });

} // namespace detail


#ifndef BATMAT_LINALG_GEMM_NO_DECLARE_LUT

template <class T, class Abi, KernelConfig Conf, StorageOrder OA, StorageOrder OB, StorageOrder OC,

          StorageOrder OD>

BATMAT_LINALG_GEMM_EXPORT extern const constinit decltype(detail::gemm_copy_lut<T, Abi, Conf, OA,

                                                                                OB, OC, OD>)

    gemm_copy_lut;

#endif


} // namespace batmat::linalg::micro_kernels::gemm


guanaqo::StorageOrder
StorageOrder

batmat::linalg::MatrixStructure
MatrixStructure
Definition structure.hpp:8

batmat::linalg::MatrixStructure::General
@ General
Definition structure.hpp:8

batmat::make_2d_lut
consteval auto make_2d_lut(F f)
Returns a 2D array of the form:
Definition lut.hpp:25

lut.hpp

batmat::linalg::micro_kernels::gemm::detail
Definition gemm.hpp:40

batmat::linalg::micro_kernels::gemm::detail::gemm_copy_lut
constexpr auto gemm_copy_lut
Definition gemm.hpp:47

batmat::linalg::micro_kernels::gemm
Definition gemm.hpp:10

batmat::linalg::micro_kernels::gemm::KernelConfig::mask_D
int mask_D
Definition gemm.hpp:18

batmat::linalg::micro_kernels::gemm::KernelConfig::rotate_C
int rotate_C
Definition gemm.hpp:16

batmat::linalg::micro_kernels::gemm::gemm_copy_lut
const constinit decltype(detail::gemm_copy_lut< T, Abi, Conf, OA, OB, OC, OD >) gemm_copy_lut
Definition gemm.tpp:20

batmat::linalg::micro_kernels::gemm::RowsReg
constexpr index_t RowsReg
Register block size of the matrix-matrix multiplication micro-kernels.
Definition avx-512.hpp:13

batmat::linalg::micro_kernels::gemm::KernelConfig::struc_A
MatrixStructure struc_A
Definition gemm.hpp:19

batmat::linalg::micro_kernels::gemm::KernelConfig::struc_B
MatrixStructure struc_B
Definition gemm.hpp:20

batmat::linalg::micro_kernels::gemm::KernelConfig::negate
bool negate
Definition gemm.hpp:13

batmat::linalg::micro_kernels::gemm::KernelConfig::rotate_D
int rotate_D
Definition gemm.hpp:17

batmat::linalg::micro_kernels::gemm::gemm_copy_register
void gemm_copy_register(view< const T, Abi, OA > A, view< const T, Abi, OB > B, std::optional< view< const T, Abi, OC > > C, view< T, Abi, OD > D) noexcept
Generalized matrix multiplication D = C ± A⁽ᵀ⁾ B⁽ᵀ⁾. Using register blocking.
Definition gemm.tpp:174

batmat::linalg::micro_kernels::gemm::KernelConfig::rotate_B
int rotate_B
Definition gemm.hpp:15

batmat::linalg::micro_kernels::gemm::KernelConfig::shift_A
int shift_A
Definition gemm.hpp:14

batmat::linalg::micro_kernels::gemm::gemm_copy_microkernel
void gemm_copy_microkernel(uview< const T, Abi, OA > A, uview< const T, Abi, OB > B, std::optional< uview< const T, Abi, OC > > C, uview< T, Abi, OD > D, index_t k) noexcept
Generalized matrix multiplication D = C ± A⁽ᵀ⁾ B⁽ᵀ⁾. Single register block.
Definition gemm.tpp:36

batmat::linalg::micro_kernels::gemm::KernelConfig::struc_C
MatrixStructure struc_C
Definition gemm.hpp:21

batmat::linalg::micro_kernels::gemm::ColsReg
constexpr index_t ColsReg
Definition gemm.hpp:38

batmat::linalg::micro_kernels::gemm::KernelConfig
Definition gemm.hpp:12

batmat::linalg::view
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view
Definition uview.hpp:70

batmat::index_constant
std::integral_constant< index_t, I > index_constant
Definition lut.hpp:10

platform.hpp

batmat::linalg::uview
Definition uview.hpp:80

structure.hpp

uview.hpp