17 return user_tol > 0 ? user_tol : std::numeric_limits<
decltype(user_tol)>::epsilon();
20template <
class T,
class Abi>
24 static constexpr T safe_min = std::numeric_limits<T>::min();
25 static constexpr T safe_max = std::numeric_limits<T>::max();
26 static constexpr T ε = std::numeric_limits<T>::epsilon();
28 static const T small = sqrt(safe_min) / ε;
29 static const T large = sqrt(safe_max) * ε;
38template <
class T,
class Abi>
41 return all_of(x == T{0});
44template <
class T,
class Abi>
49 return all_of(fabs(e0_sq) <= ε_sq * fabs(d0 * d1));
53template <
class T,
class Abi>
61 const auto half_trace = (a + c) * half;
62 const auto half_diff = (a - c) * half;
63 const auto radius = hypot(half_diff, b);
64 auto lambda1 = half_trace - radius;
65 auto lambda2 = half_trace + radius;
66 return {lambda1, lambda2};
69template <
class T,
class Abi>
74 const auto b = sqrt(fabs(e.load(l, 0)));
75 const auto a = d.load(l, 0), c = d.load(l + 1, 0);
77 d.store(λ1, l, 0), d.store(λ2, l + 1, 0), e.store(T{0}, l, 0);
80template <
class T,
class Abi>
83 for (
index_t i = l; i <= m; ++i)
84 d.store(d.load(i, 0) * factor, i, 0);
87template <
class T,
class Abi>
92 const auto factor_sq = factor * factor;
94 e.store(e.load(i, 0) * factor_sq, i, 0);
97template <
class T,
class Abi>
106 const simd zero{T{0}}, one{T{1}}, two{T{2}};
108 const auto p0 = d.load(l, 0);
109 const auto e0 = sqrt(fabs(e.load(l, 0)));
110 auto σ = (d.load(l + 1, 0) - p0) / (two * e0);
111 const auto rshift = hypot(σ, one);
112 σ = p0 - e0 / (σ + copysign(rshift, σ));
117 auto γ = d.load(m, 0) - σ;
120 for (
index_t i = m; i-- > l;) {
121 const auto bb = e.load(i, 0);
122 const auto r = p + bb;
124 e.store(s * r, i + 1, 0);
125 const auto old_c = c;
128 const auto old_γ = γ;
129 const auto α = d.load(i, 0);
130 γ = c * (α - σ) - s * old_γ;
131 d.store(old_γ + (α - γ), i + 1, 0);
134 e.store(s * p, l, 0);
135 d.store(σ + γ, l, 0);
138template <
class T,
class Abi>
147 const simd zero{T{0}}, one{T{1}}, two{T{2}};
149 const auto p0 = d.load(m, 0);
150 const auto e0 = sqrt(fabs(e.load(m - 1, 0)));
151 auto σ = (d.load(m - 1, 0) - p0) / (two * e0);
152 const auto rshift = hypot(σ, one);
153 σ = p0 - e0 / (σ + copysign(rshift, σ));
158 auto γ = d.load(l, 0) - σ;
161 for (
index_t i = l; i < m; ++i) {
162 const auto bb = e.load(i, 0);
163 const auto r = p + bb;
165 e.store(s * r, i - 1, 0);
166 const auto old_c = c;
169 const auto old_γ = γ;
170 const auto α = d.load(i + 1, 0);
171 γ = c * (α - σ) - s * old_γ;
172 d.store(old_γ + (α - γ), i, 0);
175 e.store(s * p, m - 1, 0);
176 d.store(σ + γ, m, 0);
179template <
class T,
class Abi>
192template <
class T,
class Abi>
202 for (
index_t i = l; i <= m; ++i) {
203 const auto di = d.load(i, 0);
204 anorm_sq = max(anorm_sq, di * di);
206 for (
index_t i = l; i < m; ++i) {
207 const auto ei_sq = fabs(e_sq.load(i, 0));
208 anorm_sq = max(anorm_sq, ei_sq);
213template <
class T,
class Abi>
226template <
class T,
class Abi>
230 static_assert(!std::is_const_v<T>);
241 const T ε_sq = ε * ε;
242 const simd zero{T{0}}, one{T{1}};
243 const index_t max_total_iterations = options.max_iterations_per_eigenvalue * n;
253 for (
index_t i = 0; i + 1 < n; ++i) {
254 const auto ei = e.
load(i, 0);
255 const auto ei_sq = ei * ei;
256 const auto di = d.
load(i, 0), di_next = d.
load(i + 1, 0);
258 e.
store(split ? zero : ei_sq, i, 0);
261 bool found_unreduced_block;
263 found_unreduced_block =
false;
274 for (m = l; m + 1 < n; ++m) {
275 const auto em = e.
load(m, 0);
285 found_unreduced_block =
true;
288 const bool scaled = any_of(factor != one);
294 else if (++total_iterations < max_total_iterations)
299 if (total_iterations >= max_total_iterations)
300 return std::unexpected(total_iterations);
305 }
while (found_unreduced_block);
306 return total_iterations;
#define BATMAT_ASSUME(x)
Invokes undefined behavior if the expression x does not evaluate to true.
stdx::simd_size< Tp, Abi > simd_size
auto reduce_count(auto v)
auto select(auto cond, auto t, auto f)
stdx::simd< Tp, Abi > simd
std::expected< index_t, index_t > sterf(view< T, Abi, StorageOrder::ColMajor > diag, view< T, Abi, StorageOrder::ColMajor > subdiag, SterfOptions options) noexcept
Eigenvalues of a symmetric tridiagonal matrix given by diag and subdiag, computed in-place using the ...
void sterf_ql_sweep_squared_e_inplace(uview< T, Abi, StorageOrder::ColMajor > d, uview< T, Abi, StorageOrder::ColMajor > e, index_t l, index_t m) noexcept
datapar::simd< T, Abi > squared_block_norm_estimate_from_squared_e(uview< T, Abi, StorageOrder::ColMajor > d, uview< T, Abi, StorageOrder::ColMajor > e_sq, index_t l, index_t m) noexcept
void sterf_dynamic_step_squared_e_inplace(uview< T, Abi, StorageOrder::ColMajor > d, uview< T, Abi, StorageOrder::ColMajor > e, index_t l, index_t m) noexcept
datapar::simd< T, Abi > safe_scaling_factor(datapar::simd< T, Abi > anorm) noexcept
constexpr auto default_tolerance(auto user_tol) noexcept
datapar::simd< T, Abi > block_norm_estimate_from_squared_e(uview< T, Abi, StorageOrder::ColMajor > d, uview< T, Abi, StorageOrder::ColMajor > e_sq, index_t l, index_t m) noexcept
void sterf_qr_sweep_squared_e_inplace(uview< T, Abi, StorageOrder::ColMajor > d, uview< T, Abi, StorageOrder::ColMajor > e, index_t l, index_t m) noexcept
void solve_2x2_squared_e_inplace(uview< T, Abi, StorageOrder::ColMajor > d, uview< T, Abi, StorageOrder::ColMajor > e, index_t l) noexcept
std::pair< datapar::simd< T, Abi >, datapar::simd< T, Abi > > stable_2x2_eigenvalues(datapar::simd< T, Abi > a, datapar::simd< T, Abi > b, datapar::simd< T, Abi > c) noexcept
Eigenvalues of [a b; b c].
void scale_squared_e(uview< T, Abi, StorageOrder::ColMajor > d, uview< T, Abi, StorageOrder::ColMajor > e, index_t l, index_t m, datapar::simd< T, Abi > factor) noexcept
bool all_zero(datapar::simd< T, Abi > x) noexcept
bool negligible_squared_e(datapar::simd< T, Abi > e0_sq, datapar::simd< T, Abi > d0, datapar::simd< T, Abi > d1, T ε_sq) noexcept
void scale_diag_only(uview< T, Abi, StorageOrder::ColMajor > d, index_t l, index_t m, datapar::simd< T, Abi > factor) noexcept
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view
void store(simd x, index_t r, index_t c) const noexcept
simd load(index_t r, index_t c) const noexcept