LLVM OpenMP* Runtime Library
kmp_sched.cpp
1/*
2 * kmp_sched.cpp -- static scheduling -- iteration initialization
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13/* Static scheduling initialization.
14
15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16 it may change values between parallel regions. __kmp_max_nth
17 is the largest value __kmp_nth may take, 1 is the smallest. */
18
19#include "kmp.h"
20#include "kmp_error.h"
21#include "kmp_i18n.h"
22#include "kmp_itt.h"
23#include "kmp_stats.h"
24#include "kmp_str.h"
25
26#if OMPT_SUPPORT
27#include "ompt-specific.h"
28#endif
29
30#ifdef KMP_DEBUG
31//-------------------------------------------------------------------------
32// template for debug prints specification ( d, u, lld, llu )
33char const *traits_t<int>::spec = "d";
34char const *traits_t<unsigned int>::spec = "u";
35char const *traits_t<long long>::spec = "lld";
36char const *traits_t<unsigned long long>::spec = "llu";
37char const *traits_t<long>::spec = "ld";
38//-------------------------------------------------------------------------
39#endif
40
41#if KMP_STATS_ENABLED
42#define KMP_STATS_LOOP_END(stat) \
43 { \
44 kmp_int64 t; \
45 kmp_int64 u = (kmp_int64)(*pupper); \
46 kmp_int64 l = (kmp_int64)(*plower); \
47 kmp_int64 i = (kmp_int64)incr; \
48 if (i == 1) { \
49 t = u - l + 1; \
50 } else if (i == -1) { \
51 t = l - u + 1; \
52 } else if (i > 0) { \
53 t = (u - l) / i + 1; \
54 } else { \
55 t = (l - u) / (-i) + 1; \
56 } \
57 KMP_COUNT_VALUE(stat, t); \
58 KMP_POP_PARTITIONED_TIMER(); \
59 }
60#else
61#define KMP_STATS_LOOP_END(stat) /* Nothing */
62#endif
63
64template <typename T>
65static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
66 kmp_int32 schedtype, kmp_int32 *plastiter,
67 T *plower, T *pupper,
68 typename traits_t<T>::signed_t *pstride,
69 typename traits_t<T>::signed_t incr,
70 typename traits_t<T>::signed_t chunk
71#if OMPT_SUPPORT && OMPT_OPTIONAL
72 ,
73 void *codeptr
74#endif
75 ) {
76 KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
77 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
78 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
79
80 typedef typename traits_t<T>::unsigned_t UT;
81 typedef typename traits_t<T>::signed_t ST;
82 /* this all has to be changed back to TID and such.. */
83 kmp_int32 gtid = global_tid;
84 kmp_uint32 tid;
85 kmp_uint32 nth;
86 UT trip_count;
87 kmp_team_t *team;
88 kmp_info_t *th = __kmp_threads[gtid];
89
90#if OMPT_SUPPORT && OMPT_OPTIONAL
91 ompt_team_info_t *team_info = NULL;
92 ompt_task_info_t *task_info = NULL;
93 ompt_work_t ompt_work_type = ompt_work_loop;
94
95 static kmp_int8 warn = 0;
96
97 if (ompt_enabled.ompt_callback_work) {
98 // Only fully initialize variables needed by OMPT if OMPT is enabled.
99 team_info = __ompt_get_teaminfo(0, NULL);
100 task_info = __ompt_get_task_info_object(0);
101 // Determine workshare type
102 if (loc != NULL) {
103 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
104 ompt_work_type = ompt_work_loop;
105 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
106 ompt_work_type = ompt_work_sections;
107 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
108 ompt_work_type = ompt_work_distribute;
109 } else {
110 kmp_int8 bool_res =
111 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
112 if (bool_res)
113 KMP_WARNING(OmptOutdatedWorkshare);
114 }
115 KMP_DEBUG_ASSERT(ompt_work_type);
116 }
117 }
118#endif
119
120 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
121 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
122#ifdef KMP_DEBUG
123 {
124 char *buff;
125 // create format specifiers before the debug output
126 buff = __kmp_str_format(
127 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
128 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
129 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
130 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
131 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
132 *pstride, incr, chunk));
133 __kmp_str_free(&buff);
134 }
135#endif
136
137 if (__kmp_env_consistency_check) {
138 __kmp_push_workshare(global_tid, ct_pdo, loc);
139 if (incr == 0) {
140 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
141 loc);
142 }
143 }
144 /* special handling for zero-trip loops */
145 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
146 if (plastiter != NULL)
147 *plastiter = FALSE;
148 /* leave pupper and plower set to entire iteration space */
149 *pstride = incr; /* value should never be used */
150// *plower = *pupper - incr;
151// let compiler bypass the illegal loop (like for(i=1;i<10;i--))
152// THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
153// ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
154#ifdef KMP_DEBUG
155 {
156 char *buff;
157 // create format specifiers before the debug output
158 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
159 "lower=%%%s upper=%%%s stride = %%%s "
160 "signed?<%s>, loc = %%s\n",
161 traits_t<T>::spec, traits_t<T>::spec,
162 traits_t<ST>::spec, traits_t<T>::spec);
163 KD_TRACE(100,
164 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
165 __kmp_str_free(&buff);
166 }
167#endif
168 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
169
170#if OMPT_SUPPORT && OMPT_OPTIONAL
171 if (ompt_enabled.ompt_callback_work) {
172 ompt_callbacks.ompt_callback(ompt_callback_work)(
173 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
174 &(task_info->task_data), 0, codeptr);
175 }
176#endif
177 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
178 return;
179 }
180
181 // Although there are schedule enumerations above kmp_ord_upper which are not
182 // schedules for "distribute", the only ones which are useful are dynamic, so
183 // cannot be seen here, since this codepath is only executed for static
184 // schedules.
185 if (schedtype > kmp_ord_upper) {
186 // we are in DISTRIBUTE construct
187 schedtype += kmp_sch_static -
188 kmp_distribute_static; // AC: convert to usual schedule type
189 tid = th->th.th_team->t.t_master_tid;
190 team = th->th.th_team->t.t_parent;
191 } else {
192 tid = __kmp_tid_from_gtid(global_tid);
193 team = th->th.th_team;
194 }
195
196 /* determine if "for" loop is an active worksharing construct */
197 if (team->t.t_serialized) {
198 /* serialized parallel, each thread executes whole iteration space */
199 if (plastiter != NULL)
200 *plastiter = TRUE;
201 /* leave pupper and plower set to entire iteration space */
202 *pstride =
203 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
204
205#ifdef KMP_DEBUG
206 {
207 char *buff;
208 // create format specifiers before the debug output
209 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
210 "lower=%%%s upper=%%%s stride = %%%s\n",
211 traits_t<T>::spec, traits_t<T>::spec,
212 traits_t<ST>::spec);
213 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
214 __kmp_str_free(&buff);
215 }
216#endif
217 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
218
219#if OMPT_SUPPORT && OMPT_OPTIONAL
220 if (ompt_enabled.ompt_callback_work) {
221 ompt_callbacks.ompt_callback(ompt_callback_work)(
222 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
223 &(task_info->task_data), *pstride, codeptr);
224 }
225#endif
226 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
227 return;
228 }
229 nth = team->t.t_nproc;
230 if (nth == 1) {
231 if (plastiter != NULL)
232 *plastiter = TRUE;
233 *pstride =
234 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
235#ifdef KMP_DEBUG
236 {
237 char *buff;
238 // create format specifiers before the debug output
239 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
240 "lower=%%%s upper=%%%s stride = %%%s\n",
241 traits_t<T>::spec, traits_t<T>::spec,
242 traits_t<ST>::spec);
243 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
244 __kmp_str_free(&buff);
245 }
246#endif
247 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
248
249#if OMPT_SUPPORT && OMPT_OPTIONAL
250 if (ompt_enabled.ompt_callback_work) {
251 ompt_callbacks.ompt_callback(ompt_callback_work)(
252 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
253 &(task_info->task_data), *pstride, codeptr);
254 }
255#endif
256 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
257 return;
258 }
259
260 /* compute trip count */
261 if (incr == 1) {
262 trip_count = *pupper - *plower + 1;
263 } else if (incr == -1) {
264 trip_count = *plower - *pupper + 1;
265 } else if (incr > 0) {
266 // upper-lower can exceed the limit of signed type
267 trip_count = (UT)(*pupper - *plower) / incr + 1;
268 } else {
269 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
270 }
271
272#if KMP_STATS_ENABLED
273 if (KMP_MASTER_GTID(gtid)) {
274 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
275 }
276#endif
277
278 if (__kmp_env_consistency_check) {
279 /* tripcount overflow? */
280 if (trip_count == 0 && *pupper != *plower) {
281 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
282 loc);
283 }
284 }
285
286 /* compute remaining parameters */
287 switch (schedtype) {
288 case kmp_sch_static: {
289 if (trip_count < nth) {
290 KMP_DEBUG_ASSERT(
291 __kmp_static == kmp_sch_static_greedy ||
292 __kmp_static ==
293 kmp_sch_static_balanced); // Unknown static scheduling type.
294 if (tid < trip_count) {
295 *pupper = *plower = *plower + tid * incr;
296 } else {
297 *plower = *pupper + incr;
298 }
299 if (plastiter != NULL)
300 *plastiter = (tid == trip_count - 1);
301 } else {
302 if (__kmp_static == kmp_sch_static_balanced) {
303 UT small_chunk = trip_count / nth;
304 UT extras = trip_count % nth;
305 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
306 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
307 if (plastiter != NULL)
308 *plastiter = (tid == nth - 1);
309 } else {
310 T big_chunk_inc_count =
311 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
312 T old_upper = *pupper;
313
314 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
315 // Unknown static scheduling type.
316
317 *plower += tid * big_chunk_inc_count;
318 *pupper = *plower + big_chunk_inc_count - incr;
319 if (incr > 0) {
320 if (*pupper < *plower)
321 *pupper = traits_t<T>::max_value;
322 if (plastiter != NULL)
323 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
324 if (*pupper > old_upper)
325 *pupper = old_upper; // tracker C73258
326 } else {
327 if (*pupper > *plower)
328 *pupper = traits_t<T>::min_value;
329 if (plastiter != NULL)
330 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
331 if (*pupper < old_upper)
332 *pupper = old_upper; // tracker C73258
333 }
334 }
335 }
336 *pstride = trip_count;
337 break;
338 }
339 case kmp_sch_static_chunked: {
340 ST span;
341 if (chunk < 1) {
342 chunk = 1;
343 }
344 span = chunk * incr;
345 *pstride = span * nth;
346 *plower = *plower + (span * tid);
347 *pupper = *plower + span - incr;
348 if (plastiter != NULL)
349 *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
350 break;
351 }
352 case kmp_sch_static_balanced_chunked: {
353 T old_upper = *pupper;
354 // round up to make sure the chunk is enough to cover all iterations
355 UT span = (trip_count + nth - 1) / nth;
356
357 // perform chunk adjustment
358 chunk = (span + chunk - 1) & ~(chunk - 1);
359
360 span = chunk * incr;
361 *plower = *plower + (span * tid);
362 *pupper = *plower + span - incr;
363 if (incr > 0) {
364 if (*pupper > old_upper)
365 *pupper = old_upper;
366 } else if (*pupper < old_upper)
367 *pupper = old_upper;
368
369 if (plastiter != NULL)
370 *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
371 break;
372 }
373 default:
374 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
375 break;
376 }
377
378#if USE_ITT_BUILD
379 // Report loop metadata
380 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
381 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
382 team->t.t_active_level == 1) {
383 kmp_uint64 cur_chunk = chunk;
384 // Calculate chunk in case it was not specified; it is specified for
385 // kmp_sch_static_chunked
386 if (schedtype == kmp_sch_static) {
387 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
388 }
389 // 0 - "static" schedule
390 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
391 }
392#endif
393#ifdef KMP_DEBUG
394 {
395 char *buff;
396 // create format specifiers before the debug output
397 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
398 "upper=%%%s stride = %%%s signed?<%s>\n",
399 traits_t<T>::spec, traits_t<T>::spec,
400 traits_t<ST>::spec, traits_t<T>::spec);
401 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
402 __kmp_str_free(&buff);
403 }
404#endif
405 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
406
407#if OMPT_SUPPORT && OMPT_OPTIONAL
408 if (ompt_enabled.ompt_callback_work) {
409 ompt_callbacks.ompt_callback(ompt_callback_work)(
410 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
411 &(task_info->task_data), trip_count, codeptr);
412 }
413#endif
414
415 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
416 return;
417}
418
419template <typename T>
420static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
421 kmp_int32 schedule, kmp_int32 *plastiter,
422 T *plower, T *pupper, T *pupperDist,
423 typename traits_t<T>::signed_t *pstride,
424 typename traits_t<T>::signed_t incr,
425 typename traits_t<T>::signed_t chunk) {
426 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
427 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
428 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
429 typedef typename traits_t<T>::unsigned_t UT;
430 typedef typename traits_t<T>::signed_t ST;
431 kmp_uint32 tid;
432 kmp_uint32 nth;
433 kmp_uint32 team_id;
434 kmp_uint32 nteams;
435 UT trip_count;
436 kmp_team_t *team;
437 kmp_info_t *th;
438
439 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
440 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
441#ifdef KMP_DEBUG
442 {
443 char *buff;
444 // create format specifiers before the debug output
445 buff = __kmp_str_format(
446 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
447 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
448 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
449 traits_t<ST>::spec, traits_t<T>::spec);
450 KD_TRACE(100,
451 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
452 __kmp_str_free(&buff);
453 }
454#endif
455
456 if (__kmp_env_consistency_check) {
457 __kmp_push_workshare(gtid, ct_pdo, loc);
458 if (incr == 0) {
459 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
460 loc);
461 }
462 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
463 // The loop is illegal.
464 // Some zero-trip loops maintained by compiler, e.g.:
465 // for(i=10;i<0;++i) // lower >= upper - run-time check
466 // for(i=0;i>10;--i) // lower <= upper - run-time check
467 // for(i=0;i>10;++i) // incr > 0 - compile-time check
468 // for(i=10;i<0;--i) // incr < 0 - compile-time check
469 // Compiler does not check the following illegal loops:
470 // for(i=0;i<10;i+=incr) // where incr<0
471 // for(i=10;i>0;i-=incr) // where incr<0
472 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
473 }
474 }
475 tid = __kmp_tid_from_gtid(gtid);
476 th = __kmp_threads[gtid];
477 nth = th->th.th_team_nproc;
478 team = th->th.th_team;
479 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
480 nteams = th->th.th_teams_size.nteams;
481 team_id = team->t.t_master_tid;
482 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
483
484 // compute global trip count
485 if (incr == 1) {
486 trip_count = *pupper - *plower + 1;
487 } else if (incr == -1) {
488 trip_count = *plower - *pupper + 1;
489 } else if (incr > 0) {
490 // upper-lower can exceed the limit of signed type
491 trip_count = (UT)(*pupper - *plower) / incr + 1;
492 } else {
493 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
494 }
495
496 *pstride = *pupper - *plower; // just in case (can be unused)
497 if (trip_count <= nteams) {
498 KMP_DEBUG_ASSERT(
499 __kmp_static == kmp_sch_static_greedy ||
500 __kmp_static ==
501 kmp_sch_static_balanced); // Unknown static scheduling type.
502 // only masters of some teams get single iteration, other threads get
503 // nothing
504 if (team_id < trip_count && tid == 0) {
505 *pupper = *pupperDist = *plower = *plower + team_id * incr;
506 } else {
507 *pupperDist = *pupper;
508 *plower = *pupper + incr; // compiler should skip loop body
509 }
510 if (plastiter != NULL)
511 *plastiter = (tid == 0 && team_id == trip_count - 1);
512 } else {
513 // Get the team's chunk first (each team gets at most one chunk)
514 if (__kmp_static == kmp_sch_static_balanced) {
515 UT chunkD = trip_count / nteams;
516 UT extras = trip_count % nteams;
517 *plower +=
518 incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
519 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
520 if (plastiter != NULL)
521 *plastiter = (team_id == nteams - 1);
522 } else {
523 T chunk_inc_count =
524 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
525 T upper = *pupper;
526 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
527 // Unknown static scheduling type.
528 *plower += team_id * chunk_inc_count;
529 *pupperDist = *plower + chunk_inc_count - incr;
530 // Check/correct bounds if needed
531 if (incr > 0) {
532 if (*pupperDist < *plower)
533 *pupperDist = traits_t<T>::max_value;
534 if (plastiter != NULL)
535 *plastiter = *plower <= upper && *pupperDist > upper - incr;
536 if (*pupperDist > upper)
537 *pupperDist = upper; // tracker C73258
538 if (*plower > *pupperDist) {
539 *pupper = *pupperDist; // no iterations available for the team
540 goto end;
541 }
542 } else {
543 if (*pupperDist > *plower)
544 *pupperDist = traits_t<T>::min_value;
545 if (plastiter != NULL)
546 *plastiter = *plower >= upper && *pupperDist < upper - incr;
547 if (*pupperDist < upper)
548 *pupperDist = upper; // tracker C73258
549 if (*plower < *pupperDist) {
550 *pupper = *pupperDist; // no iterations available for the team
551 goto end;
552 }
553 }
554 }
555 // Get the parallel loop chunk now (for thread)
556 // compute trip count for team's chunk
557 if (incr == 1) {
558 trip_count = *pupperDist - *plower + 1;
559 } else if (incr == -1) {
560 trip_count = *plower - *pupperDist + 1;
561 } else if (incr > 1) {
562 // upper-lower can exceed the limit of signed type
563 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
564 } else {
565 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
566 }
567 KMP_DEBUG_ASSERT(trip_count);
568 switch (schedule) {
569 case kmp_sch_static: {
570 if (trip_count <= nth) {
571 KMP_DEBUG_ASSERT(
572 __kmp_static == kmp_sch_static_greedy ||
573 __kmp_static ==
574 kmp_sch_static_balanced); // Unknown static scheduling type.
575 if (tid < trip_count)
576 *pupper = *plower = *plower + tid * incr;
577 else
578 *plower = *pupper + incr; // no iterations available
579 if (plastiter != NULL)
580 if (*plastiter != 0 && !(tid == trip_count - 1))
581 *plastiter = 0;
582 } else {
583 if (__kmp_static == kmp_sch_static_balanced) {
584 UT chunkL = trip_count / nth;
585 UT extras = trip_count % nth;
586 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
587 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
588 if (plastiter != NULL)
589 if (*plastiter != 0 && !(tid == nth - 1))
590 *plastiter = 0;
591 } else {
592 T chunk_inc_count =
593 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
594 T upper = *pupperDist;
595 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
596 // Unknown static scheduling type.
597 *plower += tid * chunk_inc_count;
598 *pupper = *plower + chunk_inc_count - incr;
599 if (incr > 0) {
600 if (*pupper < *plower)
601 *pupper = traits_t<T>::max_value;
602 if (plastiter != NULL)
603 if (*plastiter != 0 &&
604 !(*plower <= upper && *pupper > upper - incr))
605 *plastiter = 0;
606 if (*pupper > upper)
607 *pupper = upper; // tracker C73258
608 } else {
609 if (*pupper > *plower)
610 *pupper = traits_t<T>::min_value;
611 if (plastiter != NULL)
612 if (*plastiter != 0 &&
613 !(*plower >= upper && *pupper < upper - incr))
614 *plastiter = 0;
615 if (*pupper < upper)
616 *pupper = upper; // tracker C73258
617 }
618 }
619 }
620 break;
621 }
622 case kmp_sch_static_chunked: {
623 ST span;
624 if (chunk < 1)
625 chunk = 1;
626 span = chunk * incr;
627 *pstride = span * nth;
628 *plower = *plower + (span * tid);
629 *pupper = *plower + span - incr;
630 if (plastiter != NULL)
631 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
632 *plastiter = 0;
633 break;
634 }
635 default:
636 KMP_ASSERT2(0,
637 "__kmpc_dist_for_static_init: unknown loop scheduling type");
638 break;
639 }
640 }
641end:;
642#ifdef KMP_DEBUG
643 {
644 char *buff;
645 // create format specifiers before the debug output
646 buff = __kmp_str_format(
647 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
648 "stride=%%%s signed?<%s>\n",
649 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
650 traits_t<ST>::spec, traits_t<T>::spec);
651 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
652 __kmp_str_free(&buff);
653 }
654#endif
655 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
656 KMP_STATS_LOOP_END(OMP_distribute_iterations);
657 return;
658}
659
660template <typename T>
661static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
662 kmp_int32 *p_last, T *p_lb, T *p_ub,
663 typename traits_t<T>::signed_t *p_st,
664 typename traits_t<T>::signed_t incr,
665 typename traits_t<T>::signed_t chunk) {
666 // The routine returns the first chunk distributed to the team and
667 // stride for next chunks calculation.
668 // Last iteration flag set for the team that will execute
669 // the last iteration of the loop.
670 // The routine is called for dist_schedule(static,chunk) only.
671 typedef typename traits_t<T>::unsigned_t UT;
672 typedef typename traits_t<T>::signed_t ST;
673 kmp_uint32 team_id;
674 kmp_uint32 nteams;
675 UT trip_count;
676 T lower;
677 T upper;
678 ST span;
679 kmp_team_t *team;
680 kmp_info_t *th;
681
682 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
683 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
684#ifdef KMP_DEBUG
685 {
686 char *buff;
687 // create format specifiers before the debug output
688 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
689 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
690 traits_t<T>::spec, traits_t<T>::spec,
691 traits_t<ST>::spec, traits_t<ST>::spec,
692 traits_t<T>::spec);
693 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
694 __kmp_str_free(&buff);
695 }
696#endif
697
698 lower = *p_lb;
699 upper = *p_ub;
700 if (__kmp_env_consistency_check) {
701 if (incr == 0) {
702 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
703 loc);
704 }
705 if (incr > 0 ? (upper < lower) : (lower < upper)) {
706 // The loop is illegal.
707 // Some zero-trip loops maintained by compiler, e.g.:
708 // for(i=10;i<0;++i) // lower >= upper - run-time check
709 // for(i=0;i>10;--i) // lower <= upper - run-time check
710 // for(i=0;i>10;++i) // incr > 0 - compile-time check
711 // for(i=10;i<0;--i) // incr < 0 - compile-time check
712 // Compiler does not check the following illegal loops:
713 // for(i=0;i<10;i+=incr) // where incr<0
714 // for(i=10;i>0;i-=incr) // where incr<0
715 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
716 }
717 }
718 th = __kmp_threads[gtid];
719 team = th->th.th_team;
720 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
721 nteams = th->th.th_teams_size.nteams;
722 team_id = team->t.t_master_tid;
723 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
724
725 // compute trip count
726 if (incr == 1) {
727 trip_count = upper - lower + 1;
728 } else if (incr == -1) {
729 trip_count = lower - upper + 1;
730 } else if (incr > 0) {
731 // upper-lower can exceed the limit of signed type
732 trip_count = (UT)(upper - lower) / incr + 1;
733 } else {
734 trip_count = (UT)(lower - upper) / (-incr) + 1;
735 }
736 if (chunk < 1)
737 chunk = 1;
738 span = chunk * incr;
739 *p_st = span * nteams;
740 *p_lb = lower + (span * team_id);
741 *p_ub = *p_lb + span - incr;
742 if (p_last != NULL)
743 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
744 // Correct upper bound if needed
745 if (incr > 0) {
746 if (*p_ub < *p_lb) // overflow?
747 *p_ub = traits_t<T>::max_value;
748 if (*p_ub > upper)
749 *p_ub = upper; // tracker C73258
750 } else { // incr < 0
751 if (*p_ub > *p_lb)
752 *p_ub = traits_t<T>::min_value;
753 if (*p_ub < upper)
754 *p_ub = upper; // tracker C73258
755 }
756#ifdef KMP_DEBUG
757 {
758 char *buff;
759 // create format specifiers before the debug output
760 buff =
761 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
762 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
763 traits_t<T>::spec, traits_t<T>::spec,
764 traits_t<ST>::spec, traits_t<ST>::spec);
765 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
766 __kmp_str_free(&buff);
767 }
768#endif
769}
770
771//------------------------------------------------------------------------------
772extern "C" {
794void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
795 kmp_int32 *plastiter, kmp_int32 *plower,
796 kmp_int32 *pupper, kmp_int32 *pstride,
797 kmp_int32 incr, kmp_int32 chunk) {
798 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
799 pupper, pstride, incr, chunk
800#if OMPT_SUPPORT && OMPT_OPTIONAL
801 ,
802 OMPT_GET_RETURN_ADDRESS(0)
803#endif
804 );
805}
806
810void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
811 kmp_int32 schedtype, kmp_int32 *plastiter,
812 kmp_uint32 *plower, kmp_uint32 *pupper,
813 kmp_int32 *pstride, kmp_int32 incr,
814 kmp_int32 chunk) {
815 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
816 pupper, pstride, incr, chunk
817#if OMPT_SUPPORT && OMPT_OPTIONAL
818 ,
819 OMPT_GET_RETURN_ADDRESS(0)
820#endif
821 );
822}
823
827void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
828 kmp_int32 *plastiter, kmp_int64 *plower,
829 kmp_int64 *pupper, kmp_int64 *pstride,
830 kmp_int64 incr, kmp_int64 chunk) {
831 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
832 pupper, pstride, incr, chunk
833#if OMPT_SUPPORT && OMPT_OPTIONAL
834 ,
835 OMPT_GET_RETURN_ADDRESS(0)
836#endif
837 );
838}
839
843void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
844 kmp_int32 schedtype, kmp_int32 *plastiter,
845 kmp_uint64 *plower, kmp_uint64 *pupper,
846 kmp_int64 *pstride, kmp_int64 incr,
847 kmp_int64 chunk) {
848 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
849 pupper, pstride, incr, chunk
850#if OMPT_SUPPORT && OMPT_OPTIONAL
851 ,
852 OMPT_GET_RETURN_ADDRESS(0)
853#endif
854 );
855}
882void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
883 kmp_int32 schedule, kmp_int32 *plastiter,
884 kmp_int32 *plower, kmp_int32 *pupper,
885 kmp_int32 *pupperD, kmp_int32 *pstride,
886 kmp_int32 incr, kmp_int32 chunk) {
887 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
888 pupper, pupperD, pstride, incr, chunk);
889}
890
894void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
895 kmp_int32 schedule, kmp_int32 *plastiter,
896 kmp_uint32 *plower, kmp_uint32 *pupper,
897 kmp_uint32 *pupperD, kmp_int32 *pstride,
898 kmp_int32 incr, kmp_int32 chunk) {
899 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
900 pupper, pupperD, pstride, incr, chunk);
901}
902
906void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
907 kmp_int32 schedule, kmp_int32 *plastiter,
908 kmp_int64 *plower, kmp_int64 *pupper,
909 kmp_int64 *pupperD, kmp_int64 *pstride,
910 kmp_int64 incr, kmp_int64 chunk) {
911 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
912 pupper, pupperD, pstride, incr, chunk);
913}
914
918void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
919 kmp_int32 schedule, kmp_int32 *plastiter,
920 kmp_uint64 *plower, kmp_uint64 *pupper,
921 kmp_uint64 *pupperD, kmp_int64 *pstride,
922 kmp_int64 incr, kmp_int64 chunk) {
923 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
924 pupper, pupperD, pstride, incr, chunk);
925}
930//------------------------------------------------------------------------------
931// Auxiliary routines for Distribute Parallel Loop construct implementation
932// Transfer call to template< type T >
933// __kmp_team_static_init( ident_t *loc, int gtid,
934// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
935
956void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
957 kmp_int32 *p_lb, kmp_int32 *p_ub,
958 kmp_int32 *p_st, kmp_int32 incr,
959 kmp_int32 chunk) {
960 KMP_DEBUG_ASSERT(__kmp_init_serial);
961 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
962 chunk);
963}
964
968void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
969 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
970 kmp_int32 *p_st, kmp_int32 incr,
971 kmp_int32 chunk) {
972 KMP_DEBUG_ASSERT(__kmp_init_serial);
973 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
974 chunk);
975}
976
980void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
981 kmp_int64 *p_lb, kmp_int64 *p_ub,
982 kmp_int64 *p_st, kmp_int64 incr,
983 kmp_int64 chunk) {
984 KMP_DEBUG_ASSERT(__kmp_init_serial);
985 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
986 chunk);
987}
988
992void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
993 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
994 kmp_int64 *p_st, kmp_int64 incr,
995 kmp_int64 chunk) {
996 KMP_DEBUG_ASSERT(__kmp_init_serial);
997 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
998 chunk);
999}
1004} // extern "C"
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:203
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:205
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:207
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
Definition: kmp_stats.h:887
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:900
void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:827
void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:906
void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:810
void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:980
void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:894
void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:992
void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:843
void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:968
void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:882
void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:794
void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:956
void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:918
@ kmp_sch_static
Definition: kmp.h:339
@ kmp_distribute_static
Definition: kmp.h:375
@ kmp_ord_upper
Definition: kmp.h:371
Definition: kmp.h:222
char const * psource
Definition: kmp.h:232
kmp_int32 flags
Definition: kmp.h:224