batmat 0.0.15
Batched linear algebra routines
Loading...
Searching...
No Matches
loop.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <batmat/config.hpp>
4#include <batmat/openmp.h>
5#if !BATMAT_WITH_OPENMP
7#endif
8
9namespace batmat {
10
11/// @ingroup topic-utils
12enum class LoopDir {
15};
16
17/// Iterate over the range `[i_begin, i_end)` in chunks of size @p chunk_size, calling @p func_chunk
18/// for each full chunk and @p func_rem for the remaining elements (if any).
19/// @ingroup topic-utils
20[[gnu::always_inline]] inline void foreach_chunked(index_t i_begin, index_t i_end, auto chunk_size,
21 auto func_chunk, auto func_rem,
23 if (dir == LoopDir::Forward) {
24 index_t i;
25 for (i = i_begin; i + chunk_size <= i_end; i += chunk_size)
26 func_chunk(i);
27 index_t rem_i = i_end - i;
28 if (rem_i > 0)
29 func_rem(i, rem_i);
30 } else {
31 index_t rem_i = (i_end - i_begin) % chunk_size;
32 index_t i = i_end - rem_i;
33 if (rem_i > 0)
34 func_rem(i, rem_i);
35 for (i -= chunk_size; i >= i_begin; i -= chunk_size)
36 func_chunk(i);
37 }
38}
39
40/// Iterate over the range `[i_begin, i_end)` in chunks of size @p chunk_size, calling @p func_chunk
41/// for each chunk (including the last chunk, which may be smaller than @p chunk_size).
42/// @ingroup topic-utils
43[[gnu::always_inline]] inline void foreach_chunked_merged(index_t i_begin, index_t i_end,
44 auto chunk_size, auto func_chunk,
46 if (dir == LoopDir::Forward) {
47 index_t i;
48 for (i = i_begin; i + chunk_size <= i_end; i += chunk_size)
49 func_chunk(i, chunk_size);
50 index_t rem_i = i_end - i;
51 if (rem_i > 0)
52 func_chunk(i, rem_i);
53 } else {
54 index_t rem_i = (i_end - i_begin) % chunk_size;
55 index_t i = i_end - rem_i;
56 if (rem_i > 0)
57 func_chunk(i, rem_i);
58 for (i -= chunk_size; i >= i_begin; i -= chunk_size)
59 func_chunk(i, chunk_size);
60 }
61}
62
63/// @deprecated
64[[deprecated, gnu::always_inline]] inline void
65foreach_chunked_merged_parallel(index_t i_begin, index_t i_end, auto chunk_size, auto func_chunk,
67 const index_t rem_i = (i_end - i_begin) % chunk_size;
68 if (dir == LoopDir::Forward) {
69 BATMAT_OMP(parallel) {
70 BATMAT_OMP(for nowait)
71 for (index_t i = i_begin; i <= i_end - chunk_size; i += chunk_size)
72 func_chunk(i, chunk_size);
73 BATMAT_OMP(single) {
74 if (rem_i > 0)
75 func_chunk(i_end - rem_i, rem_i);
76 }
77 }
78 } else {
79 const index_t i_last = i_end - rem_i;
80 BATMAT_OMP(parallel) {
81 BATMAT_OMP(single nowait) {
82 if (rem_i > 0)
83 func_chunk(i_last, rem_i);
84 }
85 BATMAT_OMP(for)
86 for (index_t i = i_last - chunk_size; i >= i_begin; i -= chunk_size)
87 func_chunk(i, chunk_size);
88 }
89 }
90}
91
92/// @deprecated
93[[deprecated, gnu::always_inline]] inline void foreach_thread(auto &&func) {
94#if BATMAT_WITH_OPENMP
95 if (omp_get_max_threads() == 1) {
96 func(index_t{0}, index_t{1});
97 } else {
98 BATMAT_OMP(parallel) {
99 auto ni = static_cast<index_t>(omp_get_num_threads());
100 BATMAT_OMP(for schedule(static))
101 for (index_t i = 0; i < ni; ++i)
102 func(i, ni);
103 }
104 }
105#else
107#endif
108}
109
110/// @deprecated
111[[deprecated, gnu::always_inline]] inline void foreach_thread(index_t num_threads, auto &&func) {
112#if BATMAT_WITH_OPENMP
113 if (num_threads == 1) {
114 func(index_t{0}, index_t{1});
115 } else {
116 BATMAT_OMP(parallel num_threads(num_threads)) {
117 auto ni = static_cast<index_t>(omp_get_num_threads());
118 BATMAT_OMP(for schedule(static))
119 for (index_t i = 0; i < ni; ++i)
120 func(i, ni);
121 }
122 }
123#else
124 pool_sync_run_n<index_t>(num_threads, func);
125#endif
126}
127
128} // namespace batmat
void pool_sync_run_all(F &&f)
Run a function on all threads in the global thread pool, synchronously waiting for all threads.
void foreach_chunked(index_t i_begin, index_t i_end, auto chunk_size, auto func_chunk, auto func_rem, LoopDir dir=LoopDir::Forward)
Iterate over the range [i_begin, i_end) in chunks of size chunk_size, calling func_chunk for each ful...
Definition loop.hpp:20
void pool_sync_run_n(I n, F &&f)
Run a function on the first n threads in the global thread pool, synchronously waiting for those thre...
LoopDir
Definition loop.hpp:12
void foreach_chunked_merged(index_t i_begin, index_t i_end, auto chunk_size, auto func_chunk, LoopDir dir=LoopDir::Forward)
Iterate over the range [i_begin, i_end) in chunks of size chunk_size, calling func_chunk for each chu...
Definition loop.hpp:43
Definition kib.hpp:5
void foreach_thread(auto &&func)
Definition loop.hpp:93
void foreach_chunked_merged_parallel(index_t i_begin, index_t i_end, auto chunk_size, auto func_chunk, LoopDir dir=LoopDir::Forward)
Definition loop.hpp:65
#define BATMAT_OMP(X)
Definition openmp.h:8