14template <std::
floating_po
int T>
24template <
class T,
class Abi>
38 return decltype(x){_mm512_rsqrt14_pd(
static_cast<__m512d
>(x))};
41 return decltype(x){_mm512_rsqrt14_ps(
static_cast<__m512
>(x))};
44 return decltype(x){_mm256_rsqrt14_pd(
static_cast<__m256d
>(x))};
47 return decltype(x){_mm256_rsqrt14_ps(
static_cast<__m256
>(x))};
50 return decltype(x){_mm_rsqrt14_pd(
static_cast<__m128d
>(x))};
53 return decltype(x){_mm_rsqrt14_ps(
static_cast<__m128
>(x))};
58 return decltype(x){_mm256_rsqrt_ps(
static_cast<__m256
>(x))};
63template <
class T,
class Abi>
67 return y * (three_halves - (half * x * y * y));
71template <
class T,
class Abi>
75 return y * (three_halves - (half * x * y * y));
111#if BATMAT_SCALAR_APPROX_INV_SQRT
114inline float rsqrt_0(
float x) {
115 __m128 input = _mm_set_ss(x);
116 __m128 result = _mm_rsqrt14_ss(input, input);
117 return _mm_cvtss_f32(result);
120inline double rsqrt_0(
double x) {
121 __m128d input = _mm_set_sd(x);
122 __m128d result = _mm_rsqrt14_sd(input, input);
123 return _mm_cvtsd_f64(result);
127template <std::
floating_po
int T>
130 return y * (T(1.5) - (T(0.5) * x * y * y));
134template <std::
floating_po
int T>
137 return y * (T(1.5) - (T(0.5) * x * y * y));
T rsqrt(T x)
Inverse square root.
simd< Tp, deduced_abi< Tp, Np > > deduced_simd
stdx::simd< Tp, Abi > simd
datapar::simd< T, Abi > rsqrt_1(datapar::simd< T, Abi > x)
rsqrt_0 with a single Newton iteration of refinement.
datapar::simd< T, Abi > rsqrt_2(datapar::simd< T, Abi > x)
rsqrt_0 with two Newton iterations of refinement.