batmat develop
Batched linear algebra routines
Loading...
Searching...
No Matches
simd.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <batmat/config.hpp>
4#include <batmat/unroll.h>
5
6#if BATMAT_WITH_GSI_HPC_SIMD
7
8#include <cstddef>
9#include <simd>
10
11namespace batmat::datapar {
12
13template <class Tp, class Abi>
14using simd = std::datapar::basic_simd<Tp, Abi>;
15template <class Tp, std::size_t Np>
16using deduced_simd = std::datapar::simd<Tp, Np>;
17template <class Tp, std::size_t Np>
19template <class Tp>
21
22template <class V>
23V unaligned_load(const typename V::value_type *p) {
24 // const_cast required because unchecked_load seems to lack a std::remove_const_t somewhere
25 // in its metaprogramming
26 std::span<typename V::value_type, V::size()> sp{const_cast<V::value_type *>(p), V::size()};
27 return std::datapar::unchecked_load<V>(sp);
28}
29
30template <class V>
31V aligned_load(const typename V::value_type *p) {
32 std::span<typename V::value_type, V::size()> sp{const_cast<V::value_type *>(p), V::size()};
33 return std::datapar::unchecked_load<V>(sp, std::datapar::flag_aligned);
34}
35
36template <class V>
37void unaligned_store(V v, typename V::value_type *p) {
38 std::datapar::unchecked_store(v, p, V::size());
39}
40
41template <class V>
42void aligned_store(V v, typename V::value_type *p) {
43 std::span<typename V::value_type, V::size()> sp{p, V::size()};
44 std::datapar::unchecked_store(v, sp, std::datapar::flag_aligned);
45}
46
47template <class V>
48V masked_aligned_load(const typename V::value_type *p, typename V::mask_type m) {
49 std::span<const typename V::value_type, V::size()> sp{p, V::size()};
50 return std::datapar::unchecked_load<V>(sp, m, std::datapar::flag_aligned);
51}
52
53template <class V>
54V masked_unaligned_load(const typename V::value_type *p, typename V::mask_type m) {
55 std::span<const typename V::value_type, V::size()> sp{p, V::size()};
56 return std::datapar::unchecked_load<V>(sp, m);
57}
58
59template <class V, int N>
60V partial_load(const typename V::value_type *p) {
61 std::span<const typename V::value_type, N> sp{p, N};
62 return std::datapar::partial_load<V>(sp);
63}
64
65template <class V>
66void masked_aligned_store(V v, typename V::mask_type m, typename V::value_type *p) {
67 std::span<typename V::value_type, V::size()> sp{p, V::size()};
68 if constexpr (V::size() == 1) {
69 if (m[0])
70 std::datapar::unchecked_store(v, sp, std::datapar::flag_aligned);
71 } else {
72 std::datapar::unchecked_store(v, sp, m, std::datapar::flag_aligned);
73 }
74}
75
76template <class V>
77void masked_unaligned_store(V v, typename V::mask_type m, typename V::value_type *p) {
78 std::span<typename V::value_type, V::size()> sp{p, V::size()};
79 if constexpr (V::size() == 1) {
80 if (m[0])
81 std::datapar::unchecked_store(v, sp);
82 } else {
83 std::datapar::unchecked_store(v, sp, m);
84 }
85}
86
87template <class V, int I, bool Value = true>
88auto generate_mask() {
89 return typename V::mask_type{[](int i) -> bool { return (i != I) ^ Value; }};
90}
91
92template <class V, bool Value = true>
93auto generate_mask(int i) {
94 return typename V::mask_type{[i](int j) -> bool { return (j != i) ^ Value; }};
95}
96
97template <class V, int N, bool Value = true>
99 return typename V::mask_type{[](int i) -> bool { return (i >= N) ^ Value; }};
100}
101
102#if defined(__x86_64__) || defined(_M_X64)
103template <class V>
104auto to_intrin(V v) {
105 return std::__detail::__to_x86_intrin(v);
106}
107#define BATMAT_HAVE_SIMD_TO_INTRIN 1
108#endif
109
110template <class Tp, class Abi>
111using simd_size = std::remove_cvref_t<decltype(simd<Tp, Abi>::size)>;
112template <class Tp, class Abi>
113using simd_align = std::datapar::alignment<simd<Tp, Abi>>;
114template <class T, class V>
115using rebind_simd_t = deduced_simd<T, V::size()>;
116
117template <class V>
118auto hmax(V v) { // TODO
119 using value_type = V::value_type;
120 value_type m = v[0];
121 BATMAT_FULLY_UNROLLED_FOR (int i = 1; i < v.size(); ++i)
122 m = std::max(v[i], m);
123 return m;
124}
125template <class V>
126auto hmin(V v) { // TODO
127 using value_type = V::value_type;
128 value_type m = v[0];
129 BATMAT_FULLY_UNROLLED_FOR (int i = 1; i < v.size(); ++i)
130 m = std::min(v[i], m);
131 return m;
132}
133
134using std::datapar::reduce_count;
135using std::datapar::select;
136
137} // namespace batmat::datapar
138
139#else
140
141#include <experimental/simd>
142#include <cstddef>
143
145namespace stdx = std::experimental;
146
147template <class Tp, class Abi>
148using simd = stdx::simd<Tp, Abi>;
149template <class Tp, std::size_t Np>
150using deduced_abi = stdx::simd_abi::deduce_t<Tp, Np>;
151template <class Tp, std::size_t Np>
153
154template <class V>
155V unaligned_load(const typename V::value_type *p) {
156 return V{p, stdx::element_aligned};
157}
158
159template <class V>
160V aligned_load(const typename V::value_type *p) {
161 return V{p, stdx::vector_aligned};
162}
163
164template <class V>
165void unaligned_store(V v, typename V::value_type *p) {
166 v.copy_to(p, stdx::element_aligned);
167}
168
169template <class V>
170void aligned_store(V v, typename V::value_type *p) {
171 v.copy_to(p, stdx::vector_aligned);
172}
173
174template <class V>
175V masked_aligned_load(const typename V::value_type *p, typename V::mask_type m) {
176 V v{};
177 where(m, v).copy_from(p, stdx::vector_aligned);
178 return v;
179}
180
181template <class V>
182V masked_unaligned_load(const typename V::value_type *p, typename V::mask_type m) {
183 V v{};
184 where(m, v).copy_from(p, stdx::element_aligned);
185 return v;
186}
187
188template <class V>
189void masked_aligned_store(V v, typename V::mask_type m, typename V::value_type *p) {
190 where(m, v).copy_to(p, stdx::vector_aligned);
191}
192
193template <class V>
194void masked_unaligned_store(V v, typename V::mask_type m, typename V::value_type *p) {
195 where(m, v).copy_to(p, stdx::element_aligned);
196}
197
198template <class V, size_t I, bool Value = true>
200 typename V::mask_type m{!Value};
201 m[I] = Value;
202 return m;
203}
204
205template <class V, bool Value = true>
206auto generate_mask(size_t i) {
207 typename V::mask_type m{!Value};
208 m[i] = Value;
209 return m;
210}
211
212template <class V, size_t N, bool Value = true>
214 typename V::mask_type m{Value};
215 BATMAT_FULLY_UNROLLED_FOR (size_t i = N; i < V::size(); ++i)
216 m[i] = !Value;
217 return m;
218}
219
220template <class V, size_t N>
221V partial_load(const typename V::value_type *p) {
222 const auto mask = generate_mask_until<V, N>();
223 return masked_unaligned_load<V>(p, mask);
224}
225
226template <class V>
227auto to_intrin(V v) {
228 return static_cast<stdx::__intrinsic_type_t<V, v.size()>>(v);
229}
230#define BATMAT_HAVE_SIMD_TO_INTRIN 1
231
232template <class Tp, class Abi>
233using simd_size = stdx::simd_size<Tp, Abi>;
234template <class Tp, class Abi>
235using simd_align = stdx::memory_alignment<simd<Tp, Abi>>;
236template <class T, class V>
237using rebind_simd_t = stdx::rebind_simd_t<T, V>;
238template <class Tp>
240
241using stdx::hmax;
242using stdx::hmin;
243
244auto reduce_count(auto v) { return popcount(v); }
245auto select(auto cond, auto t, auto f) {
246 where(cond, f) = t;
247 return f;
248}
249
250} // namespace batmat::datapar
251
252#endif
253
254namespace batmat::datapar {
255
256template <class V>
257constexpr V from_values(auto... values) {
259 const typename V::value_type data[]{values...};
260 return aligned_load<V>(data);
261}
262
263} // namespace batmat::datapar
void masked_unaligned_store(V v, typename V::mask_type m, typename V::value_type *p)
Definition simd.hpp:194
V unaligned_load(const typename V::value_type *p)
Definition simd.hpp:155
V partial_load(const typename V::value_type *p)
Definition simd.hpp:221
void aligned_store(V v, typename V::value_type *p)
Definition simd.hpp:170
stdx::memory_alignment< simd< Tp, Abi > > simd_align
Definition simd.hpp:235
stdx::simd_size< Tp, Abi > simd_size
Definition simd.hpp:233
auto to_intrin(V v)
Definition simd.hpp:227
deduced_abi< Tp, 1 > scalar_abi
Definition simd.hpp:239
auto reduce_count(auto v)
Definition simd.hpp:244
stdx::rebind_simd_t< T, V > rebind_simd_t
Definition simd.hpp:237
V masked_unaligned_load(const typename V::value_type *p, typename V::mask_type m)
Definition simd.hpp:182
auto generate_mask()
Definition simd.hpp:199
V aligned_load(const typename V::value_type *p)
Definition simd.hpp:160
stdx::simd_abi::deduce_t< Tp, Np > deduced_abi
Definition simd.hpp:150
void unaligned_store(V v, typename V::value_type *p)
Definition simd.hpp:165
simd< Tp, deduced_abi< Tp, Np > > deduced_simd
Definition simd.hpp:152
void masked_aligned_store(V v, typename V::mask_type m, typename V::value_type *p)
Definition simd.hpp:189
auto select(auto cond, auto t, auto f)
Definition simd.hpp:245
constexpr V from_values(auto... values)
Definition simd.hpp:257
V masked_aligned_load(const typename V::value_type *p, typename V::mask_type m)
Definition simd.hpp:175
auto generate_mask_until()
Definition simd.hpp:213
stdx::simd< Tp, Abi > simd
Definition simd.hpp:148
#define BATMAT_FULLY_UNROLLED_FOR(...)
Definition unroll.h:27