4#include <batmat/config.hpp>
18template <index_t R, index_t C,
class T>
20inline void transpose_dyn(
const T *pa, index_t lda, T *pb, index_t ldb, index_t d = R) {
25 r[i][j] = pa[i * lda + j];
28 pb[i * ldb + j] = r[j][i];
34inline void transpose_dyn<4, 4>(
const double *pa, index_t lda,
double *pb, index_t ldb, index_t d) {
36 simd cols[4], shuf[4];
40 shuf[0] = simd{_mm256_shuffle_pd((__m256d)cols[0], (__m256d)cols[1], 0b0000)};
41 shuf[1] = simd{_mm256_shuffle_pd((__m256d)cols[0], (__m256d)cols[1], 0b1111)};
42 shuf[2] =
simd{_mm256_shuffle_pd((__m256d)cols[2], (__m256d)cols[3], 0b0000)};
43 shuf[3] =
simd{_mm256_shuffle_pd((__m256d)cols[2], (__m256d)cols[3], 0b1111)};
44 cols[0] =
simd{_mm256_permute2f128_pd((__m256d)shuf[0], (__m256d)shuf[2], 0b00100000)};
46 if (d < 2) [[unlikely]]
return;
47 cols[1] =
simd{_mm256_permute2f128_pd((__m256d)shuf[1], (__m256d)shuf[3], 0b00100000)};
49 if (d < 3) [[unlikely]]
return;
50 cols[2] =
simd{_mm256_permute2f128_pd((__m256d)shuf[0], (__m256d)shuf[2], 0b00110001)};
52 if (d < 4) [[unlikely]]
return;
53 cols[3] =
simd{_mm256_permute2f128_pd((__m256d)shuf[1], (__m256d)shuf[3], 0b00110001)};
61template <index_t R, index_t C,
class T>
63inline void transpose(
const T *pa, index_t lda, T *pb, index_t ldb) {
#define BATMAT_ASSUME(x)
Invokes undefined behavior if the expression x does not evaluate to true.
void transpose(const T *pa, index_t lda, T *pb, index_t ldb)
Transposes the R × C matrix at pa with leading dimension lda, writing the result to pb with leading d...
void transpose_dyn(const T *pa, index_t lda, T *pb, index_t ldb, index_t d=R)
Transposes the R × C matrix at pa with leading dimension lda, writing the result to pb with leading d...
V unaligned_load(const typename V::value_type *p)
void unaligned_store(V v, typename V::value_type *p)
simd< Tp, deduced_abi< Tp, Np > > deduced_simd
stdx::simd< Tp, Abi > simd
constexpr auto cols(const MatrixView< T, I, S, O > &v)
#define BATMAT_FULLY_UNROLLED_FOR(...)