aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorTobias Burnus <tobias@codesourcery.com>2022-10-24 15:19:00 +0200
committerTobias Burnus <tobias@codesourcery.com>2022-10-24 15:19:00 +0200
commit178ac530fe67e4f2fc439cc4ce89bc19d571ca31 (patch)
tree3cc708fdbbbf527804a9b46c3f596a14e442d314 /gcc
parent97d1ed67fc6a5773c8c00875bfa3616a457cf5f9 (diff)
downloadgcc-178ac530fe67e4f2fc439cc4ce89bc19d571ca31.zip
gcc-178ac530fe67e4f2fc439cc4ce89bc19d571ca31.tar.gz
gcc-178ac530fe67e4f2fc439cc4ce89bc19d571ca31.tar.bz2
OpenMP: Fix reverse offload GOMP_TARGET_REV IFN corner cases [PR107236]
For 'target parallel' and similarly nested directives, cgraph_node's calls_declare_variant_alt was not set in the parent region node but in cfun->decl. Hence, pass_omp_device_lower did not process handle the internal function GOMP_TARGET_REV. - Solution is to set it to the DECL_CONTEXT, which is set in adjust_context_and_scope. The cgraph_node::create_clone issue is exposed with -O2 for the existing libgomp.fortran/reverse-offload-1.f90. PR middle-end/107236 gcc/ChangeLog: * omp-expand.cc (expand_omp_target): Set calls_declare_variant_alt in DECL_CONTEXT and not to cfun->decl. * cgraphclones.cc (cgraph_node::create_clone): Copy also the node's calls_declare_variant_alt value. gcc/testsuite/ChangeLog: * gfortran.dg/gomp/target-device-ancestor-6.f90: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/cgraphclones.cc1
-rw-r--r--gcc/omp-expand.cc13
-rw-r--r--gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-6.f9017
3 files changed, 24 insertions, 7 deletions
diff --git a/gcc/cgraphclones.cc b/gcc/cgraphclones.cc
index eb0fa87..bb4b3c5 100644
--- a/gcc/cgraphclones.cc
+++ b/gcc/cgraphclones.cc
@@ -375,6 +375,7 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count,
if (!new_inlined_to)
prof_count = count.combine_with_ipa_count (prof_count);
new_node->count = prof_count;
+ new_node->calls_declare_variant_alt = this->calls_declare_variant_alt;
/* Update IPA profile. Local profiles need no updating in original. */
if (update_original)
diff --git a/gcc/omp-expand.cc b/gcc/omp-expand.cc
index 5dc0bf1..c636a17 100644
--- a/gcc/omp-expand.cc
+++ b/gcc/omp-expand.cc
@@ -10054,13 +10054,8 @@ expand_omp_target (struct omp_region *region)
/* Handle the case that an inner ancestor:1 target is called by an outer
target region. */
- if (!is_ancestor)
- cgraph_node::get (child_fn)->calls_declare_variant_alt
- |= cgraph_node::get (cfun->decl)->calls_declare_variant_alt;
- else /* Duplicate function to create empty nonhost variant. */
+ if (is_ancestor)
{
- /* Enable pass_omp_device_lower pass. */
- cgraph_node::get (cfun->decl)->calls_declare_variant_alt = 1;
cgraph_node *fn2_node;
child_fn2 = build_decl (DECL_SOURCE_LOCATION (child_fn),
FUNCTION_DECL,
@@ -10074,7 +10069,7 @@ expand_omp_target (struct omp_region *region)
TREE_PUBLIC (child_fn2) = 0;
DECL_UNINLINABLE (child_fn2) = 1;
DECL_EXTERNAL (child_fn2) = 0;
- DECL_CONTEXT (child_fn2) = NULL_TREE;
+ DECL_CONTEXT (child_fn2) = DECL_CONTEXT (child_fn);
DECL_INITIAL (child_fn2) = make_node (BLOCK);
BLOCK_SUPERCONTEXT (DECL_INITIAL (child_fn2)) = child_fn2;
DECL_ATTRIBUTES (child_fn)
@@ -10098,6 +10093,10 @@ expand_omp_target (struct omp_region *region)
fn2_node->force_output = 1;
node->offloadable = 0;
+ /* Enable pass_omp_device_lower pass. */
+ fn2_node = cgraph_node::get (DECL_CONTEXT (child_fn));
+ fn2_node->calls_declare_variant_alt = 1;
+
t = build_decl (DECL_SOURCE_LOCATION (child_fn),
RESULT_DECL, NULL_TREE, void_type_node);
DECL_ARTIFICIAL (t) = 1;
diff --git a/gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-6.f90 b/gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-6.f90
new file mode 100644
index 0000000..821e785
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-6.f90
@@ -0,0 +1,17 @@
+! PR middle-end/107236
+
+! Did ICE before because IFN .GOMP_TARGET_REV was not
+! processed in omp-offload.cc.
+! Note: Test required ENABLE_OFFLOADING being true inside GCC.
+
+implicit none
+!$omp requires reverse_offload
+!$omp target parallel num_threads(4)
+ !$omp target device(ancestor:1)
+ call foo()
+ !$omp end target
+!$omp end target parallel
+contains
+ subroutine foo
+ end
+end