// RUN: %libomp-cxx-compile // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=0 %libomp-run // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=0 %libomp-run // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=1 %libomp-run // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=3 %libomp-run // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=4 %libomp-run // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=5 %libomp-run // // RUN: %libomp-cxx-compile -DUSE_HIDDEN_HELPERS=1 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=0 %libomp-run // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=0 %libomp-run // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=1 %libomp-run // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=3 %libomp-run // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=4 %libomp-run // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=5 %libomp-run // This test stresses the task team mechanism by running a simple // increment task over and over with varying number of threads and nesting. // The test covers nested serial teams and mixing serial teams with // normal active teams. #include #include #include #include // The number of times to run each test #define NTIMES 5 // Regular single increment task void task_inc_a(int *a) { #pragma omp task { #pragma omp atomic (*a)++; } } // Splitting increment task that binary splits the incrementing task void task_inc_split_a(int *a, int low, int high) { #pragma omp task firstprivate(low, high) { if (low == high) { #pragma omp atomic (*a)++; } else if (low < high) { int mid = (high - low) / 2 + low; task_inc_split_a(a, low, mid); task_inc_split_a(a, mid + 1, high); } } } #ifdef USE_HIDDEN_HELPERS // Hidden helper tasks force serial regions to create task teams void task_inc_a_hidden_helper(int *a) { #pragma omp target map(tofrom : a[0]) nowait { #pragma omp atomic (*a)++; } } #else // Detached tasks force serial regions to create task teams void task_inc_a_detached(int *a, omp_event_handle_t handle) { #pragma omp task detach(handle) { #pragma omp atomic (*a)++; omp_fulfill_event(handle); } } #endif void check_a(int *a, int expected) { if (*a != expected) { fprintf(stderr, "FAIL: a = %d instead of expected = %d. Compile with " "-DVERBOSE for more verbose output.\n", *a, expected); exit(EXIT_FAILURE); } } // Every thread creates a single "increment" task void test_tasks(omp_event_handle_t *handles, int expected, int *a) { int tid = omp_get_thread_num(); task_inc_a(a); #pragma omp barrier check_a(a, expected); #pragma omp barrier check_a(a, expected); #pragma omp barrier #ifdef USE_HIDDEN_HELPERS task_inc_a_hidden_helper(a); #else task_inc_a_detached(a, handles[tid]); #endif #pragma omp barrier check_a(a, 2 * expected); #pragma omp barrier task_inc_a(a); #pragma omp barrier check_a(a, 3 * expected); } // Testing single level of parallelism with increment tasks void test_base(int nthreads) { #ifdef VERBOSE #pragma omp master printf(" test_base(%d)\n", nthreads); #endif int a = 0; omp_event_handle_t *handles; handles = (omp_event_handle_t *)malloc(sizeof(omp_event_handle_t) * nthreads); #pragma omp parallel num_threads(nthreads) shared(a) { test_tasks(handles, nthreads, &a); } free(handles); } // Testing nested parallel with increment tasks // first = nthreads of outer parallel // second = nthreads of nested parallel void test_nest(int first, int second) { #ifdef VERBOSE #pragma omp master printf(" test_nest(%d, %d)\n", first, second); #endif #pragma omp parallel num_threads(first) { test_base(second); } } // Testing 2-level nested parallels with increment tasks // first = nthreads of outer parallel // second = nthreads of nested parallel // third = nthreads of second nested parallel void test_nest2(int first, int second, int third) { #ifdef VERBOSE #pragma omp master printf(" test_nest2(%d, %d, %d)\n", first, second, third); #endif #pragma omp parallel num_threads(first) { test_nest(second, third); } } // Testing 3-level nested parallels with increment tasks // first = nthreads of outer parallel // second = nthreads of nested parallel // third = nthreads of second nested parallel // fourth = nthreads of third nested parallel void test_nest3(int first, int second, int third, int fourth) { #ifdef VERBOSE #pragma omp master printf(" test_nest3(%d, %d, %d, %d)\n", first, second, third, fourth); #endif #pragma omp parallel num_threads(first) { test_nest2(second, third, fourth); } } // Testing 4-level nested parallels with increment tasks // first = nthreads of outer parallel // second = nthreads of nested parallel // third = nthreads of second nested parallel // fourth = nthreads of third nested parallel // fifth = nthreads of fourth nested parallel void test_nest4(int first, int second, int third, int fourth, int fifth) { #ifdef VERBOSE #pragma omp master printf("test_nest4(%d, %d, %d, %d, %d)\n", first, second, third, fourth, fifth); #endif #pragma omp parallel num_threads(first) { test_nest3(second, third, fourth, fifth); } } // Single thread starts a binary splitting "increment" task // Detached tasks are still single "increment" task void test_tasks_split(omp_event_handle_t *handles, int expected, int *a) { int tid = omp_get_thread_num(); #pragma omp single task_inc_split_a(a, 1, expected); // task team A #pragma omp barrier check_a(a, expected); #pragma omp barrier check_a(a, expected); #pragma omp barrier #ifdef USE_HIDDEN_HELPERS task_inc_a_hidden_helper(a); #else task_inc_a_detached(a, handles[tid]); #endif #pragma omp barrier check_a(a, 2 * expected); #pragma omp barrier #pragma omp single task_inc_split_a(a, 1, expected); // task team B #pragma omp barrier check_a(a, 3 * expected); } // Testing single level of parallelism with splitting incrementing tasks void test_base_split(int nthreads) { #ifdef VERBOSE #pragma omp master printf(" test_base_split(%d)\n", nthreads); #endif int a = 0; omp_event_handle_t *handles; handles = (omp_event_handle_t *)malloc(sizeof(omp_event_handle_t) * nthreads); #pragma omp parallel num_threads(nthreads) shared(a) { test_tasks_split(handles, nthreads, &a); } free(handles); } // Testing nested parallels with splitting tasks // first = nthreads of outer parallel // second = nthreads of nested parallel void test_nest_split(int first, int second) { #ifdef VERBOSE #pragma omp master printf(" test_nest_split(%d, %d)\n", first, second); #endif #pragma omp parallel num_threads(first) { test_base_split(second); } } // Testing doubly nested parallels with splitting tasks // first = nthreads of outer parallel // second = nthreads of nested parallel // third = nthreads of second nested parallel void test_nest2_split(int first, int second, int third) { #ifdef VERBOSE #pragma omp master printf("test_nest2_split(%d, %d, %d)\n", first, second, third); #endif #pragma omp parallel num_threads(first) { test_nest_split(second, third); } } template void run_ntimes(int n, void (*func)(Args...), Args... args) { for (int i = 0; i < n; ++i) { func(args...); } } int main() { omp_set_max_active_levels(5); run_ntimes(NTIMES, test_base, 4); run_ntimes(NTIMES, test_base, 1); run_ntimes(NTIMES, test_base, 8); run_ntimes(NTIMES, test_base, 2); run_ntimes(NTIMES, test_base, 6); run_ntimes(NTIMES, test_nest, 1, 1); run_ntimes(NTIMES, test_nest, 1, 5); run_ntimes(NTIMES, test_nest, 2, 6); run_ntimes(NTIMES, test_nest, 1, 1); run_ntimes(NTIMES, test_nest, 4, 3); run_ntimes(NTIMES, test_nest, 3, 2); run_ntimes(NTIMES, test_nest, 1, 1); run_ntimes(NTIMES, test_nest2, 1, 1, 2); run_ntimes(NTIMES, test_nest2, 1, 2, 1); run_ntimes(NTIMES, test_nest2, 2, 2, 1); run_ntimes(NTIMES, test_nest2, 2, 1, 1); run_ntimes(NTIMES, test_nest2, 4, 2, 1); run_ntimes(NTIMES, test_nest2, 4, 2, 2); run_ntimes(NTIMES, test_nest2, 1, 1, 1); run_ntimes(NTIMES, test_nest2, 4, 2, 2); run_ntimes(NTIMES, test_nest3, 1, 1, 1, 1); run_ntimes(NTIMES, test_nest3, 1, 2, 1, 1); run_ntimes(NTIMES, test_nest3, 1, 1, 2, 1); run_ntimes(NTIMES, test_nest3, 1, 1, 1, 2); run_ntimes(NTIMES, test_nest3, 2, 1, 1, 1); run_ntimes(NTIMES, test_nest4, 1, 1, 1, 1, 1); run_ntimes(NTIMES, test_nest4, 2, 1, 1, 1, 1); run_ntimes(NTIMES, test_nest4, 1, 2, 1, 1, 1); run_ntimes(NTIMES, test_nest4, 1, 1, 2, 1, 1); run_ntimes(NTIMES, test_nest4, 1, 1, 1, 2, 1); run_ntimes(NTIMES, test_nest4, 1, 1, 1, 1, 2); run_ntimes(NTIMES, test_nest4, 1, 1, 1, 1, 1); run_ntimes(NTIMES, test_nest4, 1, 2, 1, 2, 1); run_ntimes(NTIMES, test_base_split, 4); run_ntimes(NTIMES, test_base_split, 2); run_ntimes(NTIMES, test_base_split, 7); run_ntimes(NTIMES, test_base_split, 1); run_ntimes(NTIMES, test_nest_split, 4, 2); run_ntimes(NTIMES, test_nest_split, 2, 1); run_ntimes(NTIMES, test_nest_split, 7, 2); run_ntimes(NTIMES, test_nest_split, 1, 1); run_ntimes(NTIMES, test_nest_split, 1, 4); run_ntimes(NTIMES, test_nest2_split, 1, 1, 2); run_ntimes(NTIMES, test_nest2_split, 1, 2, 1); run_ntimes(NTIMES, test_nest2_split, 2, 2, 1); run_ntimes(NTIMES, test_nest2_split, 2, 1, 1); run_ntimes(NTIMES, test_nest2_split, 4, 2, 1); run_ntimes(NTIMES, test_nest2_split, 4, 2, 2); run_ntimes(NTIMES, test_nest2_split, 1, 1, 1); run_ntimes(NTIMES, test_nest2_split, 4, 2, 2); printf("PASS\n"); return EXIT_SUCCESS; }