6#if BATMAT_WITH_GSI_HPC_SIMD
13template <
class Tp,
class Abi>
14using simd = std::datapar::basic_simd<Tp, Abi>;
15template <
class Tp, std::
size_t Np>
17template <
class Tp, std::
size_t Np>
26 std::span<
typename V::value_type, V::size()> sp{
const_cast<V::value_type *
>(p), V::size()};
27 return std::datapar::unchecked_load<V>(sp);
32 std::span<
typename V::value_type, V::size()> sp{
const_cast<V::value_type *
>(p), V::size()};
33 return std::datapar::unchecked_load<V>(sp, std::datapar::flag_aligned);
38 std::datapar::unchecked_store(v, p, V::size());
43 std::span<
typename V::value_type, V::size()> sp{p, V::size()};
44 std::datapar::unchecked_store(v, sp, std::datapar::flag_aligned);
49 std::span<
const typename V::value_type, V::size()> sp{p, V::size()};
50 return std::datapar::unchecked_load<V>(sp, m, std::datapar::flag_aligned);
55 std::span<
const typename V::value_type, V::size()> sp{p, V::size()};
56 return std::datapar::unchecked_load<V>(sp, m);
59template <
class V,
int N>
61 std::span<const typename V::value_type, N> sp{p, N};
62 return std::datapar::partial_load<V>(sp);
67 std::span<
typename V::value_type, V::size()> sp{p, V::size()};
68 if constexpr (V::size() == 1) {
70 std::datapar::unchecked_store(v, sp, std::datapar::flag_aligned);
72 std::datapar::unchecked_store(v, sp, m, std::datapar::flag_aligned);
78 std::span<
typename V::value_type, V::size()> sp{p, V::size()};
79 if constexpr (V::size() == 1) {
81 std::datapar::unchecked_store(v, sp);
83 std::datapar::unchecked_store(v, sp, m);
87template <
class V,
int I,
bool Value = true>
89 return typename V::mask_type{[](
int i) ->
bool {
return (i != I) ^ Value; }};
92template <
class V,
bool Value = true>
94 return typename V::mask_type{[i](
int j) ->
bool {
return (j != i) ^ Value; }};
97template <
class V,
int N,
bool Value = true>
99 return typename V::mask_type{[](
int i) ->
bool {
return (i >= N) ^ Value; }};
102#if defined(__x86_64__) || defined(_M_X64)
105 return std::__detail::__to_x86_intrin(v);
107#define BATMAT_HAVE_SIMD_TO_INTRIN 1
110template <
class Tp,
class Abi>
112template <
class Tp,
class Abi>
113using simd_align = std::datapar::alignment<simd<Tp, Abi>>;
114template <
class T,
class V>
119 using value_type = V::value_type;
122 m = std::max(v[i], m);
127 using value_type = V::value_type;
130 m = std::min(v[i], m);
134using std::datapar::reduce_count;
135using std::datapar::select;
141#include <experimental/simd>
145namespace stdx = std::experimental;
147template <
class Tp,
class Abi>
148using simd = stdx::simd<Tp, Abi>;
149template <
class Tp, std::
size_t Np>
151template <
class Tp, std::
size_t Np>
156 return V{p, stdx::element_aligned};
161 return V{p, stdx::vector_aligned};
166 v.copy_to(p, stdx::element_aligned);
171 v.copy_to(p, stdx::vector_aligned);
177 where(m, v).copy_from(p, stdx::vector_aligned);
184 where(m, v).copy_from(p, stdx::element_aligned);
190 where(m, v).copy_to(p, stdx::vector_aligned);
195 where(m, v).copy_to(p, stdx::element_aligned);
198template <
class V,
size_t I,
bool Value = true>
200 typename V::mask_type m{!Value};
205template <
class V,
bool Value = true>
207 typename V::mask_type m{!Value};
212template <
class V,
size_t N,
bool Value = true>
214 typename V::mask_type m{Value};
220template <
class V,
size_t N>
228 return static_cast<stdx::__intrinsic_type_t<V, v.size()
>>(v);
230#define BATMAT_HAVE_SIMD_TO_INTRIN 1
232template <
class Tp,
class Abi>
234template <
class Tp,
class Abi>
236template <
class T,
class V>
259 const typename V::value_type data[]{values...};
void masked_unaligned_store(V v, typename V::mask_type m, typename V::value_type *p)
V unaligned_load(const typename V::value_type *p)
V partial_load(const typename V::value_type *p)
void aligned_store(V v, typename V::value_type *p)
stdx::memory_alignment< simd< Tp, Abi > > simd_align
stdx::simd_size< Tp, Abi > simd_size
deduced_abi< Tp, 1 > scalar_abi
auto reduce_count(auto v)
stdx::rebind_simd_t< T, V > rebind_simd_t
V masked_unaligned_load(const typename V::value_type *p, typename V::mask_type m)
V aligned_load(const typename V::value_type *p)
stdx::simd_abi::deduce_t< Tp, Np > deduced_abi
void unaligned_store(V v, typename V::value_type *p)
simd< Tp, deduced_abi< Tp, Np > > deduced_simd
void masked_aligned_store(V v, typename V::mask_type m, typename V::value_type *p)
auto select(auto cond, auto t, auto f)
constexpr V from_values(auto... values)
V masked_aligned_load(const typename V::value_type *p, typename V::mask_type m)
auto generate_mask_until()
stdx::simd< Tp, Abi > simd
#define BATMAT_FULLY_UNROLLED_FOR(...)