]>
Commit | Line | Data |
---|---|---|
748086b7 | 1 | /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc. |
953ff289 DN |
2 | Contributed by Richard Henderson <rth@redhat.com>. |
3 | ||
4 | This file is part of the GNU OpenMP Library (libgomp). | |
5 | ||
6 | Libgomp is free software; you can redistribute it and/or modify it | |
748086b7 JJ |
7 | under the terms of the GNU General Public License as published by |
8 | the Free Software Foundation; either version 3, or (at your option) | |
9 | any later version. | |
953ff289 DN |
10 | |
11 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
748086b7 | 13 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
953ff289 DN |
14 | more details. |
15 | ||
748086b7 JJ |
16 | Under Section 7 of GPL version 3, you are granted additional |
17 | permissions described in the GCC Runtime Library Exception, version | |
18 | 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | You should have received a copy of the GNU General Public License and | |
21 | a copy of the GCC Runtime Library Exception along with this program; | |
22 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | <http://www.gnu.org/licenses/>. */ | |
953ff289 DN |
24 | |
25 | /* This file contains routines for managing work-share iteration, both | |
26 | for loops and sections. */ | |
27 | ||
28 | #include "libgomp.h" | |
29 | #include <stdlib.h> | |
30 | ||
31 | ||
32 | /* This function implements the STATIC scheduling method. The caller should | |
33 | iterate *pstart <= x < *pend. Return zero if there are more iterations | |
34 | to perform; nonzero if not. Return less than 0 if this thread had | |
35 | received the absolutely last iteration. */ | |
36 | ||
37 | int | |
38 | gomp_iter_static_next (long *pstart, long *pend) | |
39 | { | |
40 | struct gomp_thread *thr = gomp_thread (); | |
41 | struct gomp_team *team = thr->ts.team; | |
42 | struct gomp_work_share *ws = thr->ts.work_share; | |
43 | unsigned long nthreads = team ? team->nthreads : 1; | |
44 | ||
45 | if (thr->ts.static_trip == -1) | |
46 | return -1; | |
47 | ||
48 | /* Quick test for degenerate teams and orphaned constructs. */ | |
49 | if (nthreads == 1) | |
50 | { | |
51 | *pstart = ws->next; | |
52 | *pend = ws->end; | |
53 | thr->ts.static_trip = -1; | |
54 | return ws->next == ws->end; | |
55 | } | |
56 | ||
57 | /* We interpret chunk_size zero as "unspecified", which means that we | |
58 | should break up the iterations such that each thread makes only one | |
59 | trip through the outer loop. */ | |
60 | if (ws->chunk_size == 0) | |
61 | { | |
62 | unsigned long n, q, i; | |
63 | unsigned long s0, e0; | |
64 | long s, e; | |
65 | ||
66 | if (thr->ts.static_trip > 0) | |
67 | return 1; | |
68 | ||
69 | /* Compute the total number of iterations. */ | |
70 | s = ws->incr + (ws->incr > 0 ? -1 : 1); | |
71 | n = (ws->end - ws->next + s) / ws->incr; | |
72 | i = thr->ts.team_id; | |
73 | ||
74 | /* Compute the "zero-based" start and end points. That is, as | |
75 | if the loop began at zero and incremented by one. */ | |
76 | q = n / nthreads; | |
77 | q += (q * nthreads != n); | |
78 | s0 = q * i; | |
79 | e0 = s0 + q; | |
80 | if (e0 > n) | |
81 | e0 = n; | |
82 | ||
83 | /* Notice when no iterations allocated for this thread. */ | |
84 | if (s0 >= e0) | |
85 | { | |
86 | thr->ts.static_trip = 1; | |
87 | return 1; | |
88 | } | |
89 | ||
90 | /* Transform these to the actual start and end numbers. */ | |
91 | s = (long)s0 * ws->incr + ws->next; | |
92 | e = (long)e0 * ws->incr + ws->next; | |
93 | ||
94 | *pstart = s; | |
95 | *pend = e; | |
96 | thr->ts.static_trip = (e0 == n ? -1 : 1); | |
97 | return 0; | |
98 | } | |
99 | else | |
100 | { | |
101 | unsigned long n, s0, e0, i, c; | |
102 | long s, e; | |
103 | ||
104 | /* Otherwise, each thread gets exactly chunk_size iterations | |
105 | (if available) each time through the loop. */ | |
106 | ||
107 | s = ws->incr + (ws->incr > 0 ? -1 : 1); | |
108 | n = (ws->end - ws->next + s) / ws->incr; | |
109 | i = thr->ts.team_id; | |
110 | c = ws->chunk_size; | |
111 | ||
112 | /* Initial guess is a C sized chunk positioned nthreads iterations | |
113 | in, offset by our thread number. */ | |
114 | s0 = (thr->ts.static_trip * nthreads + i) * c; | |
115 | e0 = s0 + c; | |
116 | ||
117 | /* Detect overflow. */ | |
118 | if (s0 >= n) | |
119 | return 1; | |
120 | if (e0 > n) | |
121 | e0 = n; | |
122 | ||
123 | /* Transform these to the actual start and end numbers. */ | |
124 | s = (long)s0 * ws->incr + ws->next; | |
125 | e = (long)e0 * ws->incr + ws->next; | |
126 | ||
127 | *pstart = s; | |
128 | *pend = e; | |
129 | ||
130 | if (e0 == n) | |
131 | thr->ts.static_trip = -1; | |
132 | else | |
133 | thr->ts.static_trip++; | |
134 | return 0; | |
135 | } | |
136 | } | |
137 | ||
138 | ||
139 | /* This function implements the DYNAMIC scheduling method. Arguments are | |
140 | as for gomp_iter_static_next. This function must be called with ws->lock | |
141 | held. */ | |
142 | ||
143 | bool | |
144 | gomp_iter_dynamic_next_locked (long *pstart, long *pend) | |
145 | { | |
146 | struct gomp_thread *thr = gomp_thread (); | |
147 | struct gomp_work_share *ws = thr->ts.work_share; | |
148 | long start, end, chunk, left; | |
149 | ||
150 | start = ws->next; | |
151 | if (start == ws->end) | |
152 | return false; | |
153 | ||
a68ab351 | 154 | chunk = ws->chunk_size; |
953ff289 DN |
155 | left = ws->end - start; |
156 | if (ws->incr < 0) | |
157 | { | |
158 | if (chunk < left) | |
159 | chunk = left; | |
160 | } | |
161 | else | |
162 | { | |
163 | if (chunk > left) | |
164 | chunk = left; | |
165 | } | |
166 | end = start + chunk; | |
167 | ||
168 | ws->next = end; | |
169 | *pstart = start; | |
170 | *pend = end; | |
171 | return true; | |
172 | } | |
173 | ||
174 | ||
175 | #ifdef HAVE_SYNC_BUILTINS | |
176 | /* Similar, but doesn't require the lock held, and uses compare-and-swap | |
177 | instead. Note that the only memory value that changes is ws->next. */ | |
178 | ||
179 | bool | |
180 | gomp_iter_dynamic_next (long *pstart, long *pend) | |
181 | { | |
182 | struct gomp_thread *thr = gomp_thread (); | |
183 | struct gomp_work_share *ws = thr->ts.work_share; | |
184 | long start, end, nend, chunk, incr; | |
185 | ||
953ff289 DN |
186 | end = ws->end; |
187 | incr = ws->incr; | |
a68ab351 JJ |
188 | chunk = ws->chunk_size; |
189 | ||
190 | if (__builtin_expect (ws->mode, 1)) | |
191 | { | |
192 | long tmp = __sync_fetch_and_add (&ws->next, chunk); | |
193 | if (incr > 0) | |
194 | { | |
195 | if (tmp >= end) | |
196 | return false; | |
197 | nend = tmp + chunk; | |
198 | if (nend > end) | |
199 | nend = end; | |
200 | *pstart = tmp; | |
201 | *pend = nend; | |
202 | return true; | |
203 | } | |
204 | else | |
205 | { | |
206 | if (tmp <= end) | |
207 | return false; | |
208 | nend = tmp + chunk; | |
209 | if (nend < end) | |
210 | nend = end; | |
211 | *pstart = tmp; | |
212 | *pend = nend; | |
213 | return true; | |
214 | } | |
215 | } | |
953ff289 | 216 | |
a68ab351 | 217 | start = ws->next; |
953ff289 DN |
218 | while (1) |
219 | { | |
220 | long left = end - start; | |
221 | long tmp; | |
222 | ||
223 | if (start == end) | |
224 | return false; | |
225 | ||
226 | if (incr < 0) | |
227 | { | |
228 | if (chunk < left) | |
229 | chunk = left; | |
230 | } | |
231 | else | |
232 | { | |
233 | if (chunk > left) | |
234 | chunk = left; | |
235 | } | |
236 | nend = start + chunk; | |
237 | ||
238 | tmp = __sync_val_compare_and_swap (&ws->next, start, nend); | |
239 | if (__builtin_expect (tmp == start, 1)) | |
240 | break; | |
241 | ||
242 | start = tmp; | |
243 | } | |
244 | ||
245 | *pstart = start; | |
246 | *pend = nend; | |
247 | return true; | |
248 | } | |
249 | #endif /* HAVE_SYNC_BUILTINS */ | |
250 | ||
251 | ||
252 | /* This function implements the GUIDED scheduling method. Arguments are | |
253 | as for gomp_iter_static_next. This function must be called with the | |
254 | work share lock held. */ | |
255 | ||
256 | bool | |
257 | gomp_iter_guided_next_locked (long *pstart, long *pend) | |
258 | { | |
259 | struct gomp_thread *thr = gomp_thread (); | |
260 | struct gomp_work_share *ws = thr->ts.work_share; | |
261 | struct gomp_team *team = thr->ts.team; | |
262 | unsigned long nthreads = team ? team->nthreads : 1; | |
263 | unsigned long n, q; | |
264 | long start, end; | |
265 | ||
266 | if (ws->next == ws->end) | |
267 | return false; | |
268 | ||
9e775963 JJ |
269 | start = ws->next; |
270 | n = (ws->end - start) / ws->incr; | |
953ff289 DN |
271 | q = (n + nthreads - 1) / nthreads; |
272 | ||
273 | if (q < ws->chunk_size) | |
274 | q = ws->chunk_size; | |
9e775963 JJ |
275 | if (q <= n) |
276 | end = start + q * ws->incr; | |
277 | else | |
278 | end = ws->end; | |
953ff289 DN |
279 | |
280 | ws->next = end; | |
281 | *pstart = start; | |
282 | *pend = end; | |
283 | return true; | |
284 | } | |
285 | ||
286 | #ifdef HAVE_SYNC_BUILTINS | |
287 | /* Similar, but doesn't require the lock held, and uses compare-and-swap | |
288 | instead. Note that the only memory value that changes is ws->next. */ | |
289 | ||
290 | bool | |
291 | gomp_iter_guided_next (long *pstart, long *pend) | |
292 | { | |
293 | struct gomp_thread *thr = gomp_thread (); | |
294 | struct gomp_work_share *ws = thr->ts.work_share; | |
295 | struct gomp_team *team = thr->ts.team; | |
296 | unsigned long nthreads = team ? team->nthreads : 1; | |
297 | long start, end, nend, incr; | |
298 | unsigned long chunk_size; | |
299 | ||
300 | start = ws->next; | |
301 | end = ws->end; | |
302 | incr = ws->incr; | |
303 | chunk_size = ws->chunk_size; | |
304 | ||
305 | while (1) | |
306 | { | |
307 | unsigned long n, q; | |
308 | long tmp; | |
309 | ||
310 | if (start == end) | |
311 | return false; | |
312 | ||
9e775963 | 313 | n = (end - start) / incr; |
953ff289 DN |
314 | q = (n + nthreads - 1) / nthreads; |
315 | ||
316 | if (q < chunk_size) | |
317 | q = chunk_size; | |
9e775963 JJ |
318 | if (__builtin_expect (q <= n, 1)) |
319 | nend = start + q * incr; | |
320 | else | |
321 | nend = end; | |
953ff289 DN |
322 | |
323 | tmp = __sync_val_compare_and_swap (&ws->next, start, nend); | |
324 | if (__builtin_expect (tmp == start, 1)) | |
325 | break; | |
326 | ||
327 | start = tmp; | |
328 | } | |
329 | ||
330 | *pstart = start; | |
331 | *pend = nend; | |
332 | return true; | |
333 | } | |
334 | #endif /* HAVE_SYNC_BUILTINS */ |