diff options
author | Tobias Burnus <tobias@codesourcery.com> | 2022-10-24 15:19:00 +0200 |
---|---|---|
committer | Tobias Burnus <tobias@codesourcery.com> | 2022-10-24 15:19:00 +0200 |
commit | 178ac530fe67e4f2fc439cc4ce89bc19d571ca31 (patch) | |
tree | 3cc708fdbbbf527804a9b46c3f596a14e442d314 /gcc | |
parent | 97d1ed67fc6a5773c8c00875bfa3616a457cf5f9 (diff) | |
download | gcc-178ac530fe67e4f2fc439cc4ce89bc19d571ca31.zip gcc-178ac530fe67e4f2fc439cc4ce89bc19d571ca31.tar.gz gcc-178ac530fe67e4f2fc439cc4ce89bc19d571ca31.tar.bz2 |
OpenMP: Fix reverse offload GOMP_TARGET_REV IFN corner cases [PR107236]
For 'target parallel' and similarly nested directives, cgraph_node's
calls_declare_variant_alt was not set in the parent region node but in
cfun->decl. Hence, pass_omp_device_lower did not process handle the
internal function GOMP_TARGET_REV. - Solution is to set it to the
DECL_CONTEXT, which is set in adjust_context_and_scope.
The cgraph_node::create_clone issue is exposed with -O2 for the existing
libgomp.fortran/reverse-offload-1.f90.
PR middle-end/107236
gcc/ChangeLog:
* omp-expand.cc (expand_omp_target): Set calls_declare_variant_alt
in DECL_CONTEXT and not to cfun->decl.
* cgraphclones.cc (cgraph_node::create_clone): Copy also the
node's calls_declare_variant_alt value.
gcc/testsuite/ChangeLog:
* gfortran.dg/gomp/target-device-ancestor-6.f90: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/cgraphclones.cc | 1 | ||||
-rw-r--r-- | gcc/omp-expand.cc | 13 | ||||
-rw-r--r-- | gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-6.f90 | 17 |
3 files changed, 24 insertions, 7 deletions
diff --git a/gcc/cgraphclones.cc b/gcc/cgraphclones.cc index eb0fa87..bb4b3c5 100644 --- a/gcc/cgraphclones.cc +++ b/gcc/cgraphclones.cc @@ -375,6 +375,7 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count, if (!new_inlined_to) prof_count = count.combine_with_ipa_count (prof_count); new_node->count = prof_count; + new_node->calls_declare_variant_alt = this->calls_declare_variant_alt; /* Update IPA profile. Local profiles need no updating in original. */ if (update_original) diff --git a/gcc/omp-expand.cc b/gcc/omp-expand.cc index 5dc0bf1..c636a17 100644 --- a/gcc/omp-expand.cc +++ b/gcc/omp-expand.cc @@ -10054,13 +10054,8 @@ expand_omp_target (struct omp_region *region) /* Handle the case that an inner ancestor:1 target is called by an outer target region. */ - if (!is_ancestor) - cgraph_node::get (child_fn)->calls_declare_variant_alt - |= cgraph_node::get (cfun->decl)->calls_declare_variant_alt; - else /* Duplicate function to create empty nonhost variant. */ + if (is_ancestor) { - /* Enable pass_omp_device_lower pass. */ - cgraph_node::get (cfun->decl)->calls_declare_variant_alt = 1; cgraph_node *fn2_node; child_fn2 = build_decl (DECL_SOURCE_LOCATION (child_fn), FUNCTION_DECL, @@ -10074,7 +10069,7 @@ expand_omp_target (struct omp_region *region) TREE_PUBLIC (child_fn2) = 0; DECL_UNINLINABLE (child_fn2) = 1; DECL_EXTERNAL (child_fn2) = 0; - DECL_CONTEXT (child_fn2) = NULL_TREE; + DECL_CONTEXT (child_fn2) = DECL_CONTEXT (child_fn); DECL_INITIAL (child_fn2) = make_node (BLOCK); BLOCK_SUPERCONTEXT (DECL_INITIAL (child_fn2)) = child_fn2; DECL_ATTRIBUTES (child_fn) @@ -10098,6 +10093,10 @@ expand_omp_target (struct omp_region *region) fn2_node->force_output = 1; node->offloadable = 0; + /* Enable pass_omp_device_lower pass. */ + fn2_node = cgraph_node::get (DECL_CONTEXT (child_fn)); + fn2_node->calls_declare_variant_alt = 1; + t = build_decl (DECL_SOURCE_LOCATION (child_fn), RESULT_DECL, NULL_TREE, void_type_node); DECL_ARTIFICIAL (t) = 1; diff --git a/gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-6.f90 b/gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-6.f90 new file mode 100644 index 0000000..821e785 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-6.f90 @@ -0,0 +1,17 @@ +! PR middle-end/107236 + +! Did ICE before because IFN .GOMP_TARGET_REV was not +! processed in omp-offload.cc. +! Note: Test required ENABLE_OFFLOADING being true inside GCC. + +implicit none +!$omp requires reverse_offload +!$omp target parallel num_threads(4) + !$omp target device(ancestor:1) + call foo() + !$omp end target +!$omp end target parallel +contains + subroutine foo + end +end |