aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom de Vries <tdevries@suse.de>2018-12-19 14:20:54 +0000
committerTom de Vries <vries@gcc.gnu.org>2018-12-19 14:20:54 +0000
commita152954ea4fee516e83b4f75a17818fbc8d555bb (patch)
tree33b3a2c426ea95171664075290111d1d59c9d21a
parent49188cd1f2deb943e4047dbffba7a333875d6479 (diff)
downloadgcc-a152954ea4fee516e83b4f75a17818fbc8d555bb.zip
gcc-a152954ea4fee516e83b4f75a17818fbc8d555bb.tar.gz
gcc-a152954ea4fee516e83b4f75a17818fbc8d555bb.tar.bz2
[nvptx] Commit passing pr85381-*.c test-cases
Add pr85381*.c test-cases that are already passing without the fix for PR85381. Build and reg-tested on x86_64 with nvptx accelerator. 2018-12-19 Tom de Vries <tdevries@suse.de> * testsuite/libgomp.oacc-c-c++-common/pr85381-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/pr85381-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/pr85381-4.c: New test. From-SVN: r267268
-rw-r--r--libgomp/ChangeLog6
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c36
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-3.c35
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c27
4 files changed, 104 insertions, 0 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index ad0abb8..ae8801e 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,5 +1,11 @@
2018-12-19 Tom de Vries <tdevries@suse.de>
+ * testsuite/libgomp.oacc-c-c++-common/pr85381-2.c: New test.
+ * testsuite/libgomp.oacc-c-c++-common/pr85381-3.c: New test.
+ * testsuite/libgomp.oacc-c-c++-common/pr85381-4.c: New test.
+
+2018-12-19 Tom de Vries <tdevries@suse.de>
+
* testsuite/lib/libgomp.exp: Add load_lib of scanoffloadrtl.exp.
* testsuite/libgomp.oacc-c-c++-common/nvptx-merged-loop.c: Move from
gcc/testsuite/gcc.dg/goacc.
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c
new file mode 100644
index 0000000..6570c64
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c
@@ -0,0 +1,36 @@
+/* { dg-additional-options "-save-temps" } */
+/* { dg-do run { target openacc_nvidia_accel_selected } }
+ { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
+
+int
+main (void)
+{
+ int v1;
+
+ #pragma acc parallel
+ #pragma acc loop worker
+ for (v1 = 0; v1 < 20; v1 += 2)
+ ;
+
+ return 0;
+}
+
+/* Todo: Boths bar.syncs can be removed.
+ Atm we generate this dead code inbetween forked and joining:
+
+ mov.u32 %r28, %ntid.y;
+ mov.u32 %r29, %tid.y;
+ add.u32 %r30, %r29, %r29;
+ setp.gt.s32 %r31, %r30, 19;
+ @%r31 bra $L2;
+ add.u32 %r25, %r28, %r28;
+ mov.u32 %r24, %r30;
+ $L3:
+ add.u32 %r24, %r24, %r25;
+ setp.le.s32 %r33, %r24, 19;
+ @%r33 bra $L3;
+ $L2:
+
+ so the loop is not recognized as empty loop (which we detect by seeing if
+ joining immediately follows forked). */
+/* { dg-final { scan-assembler-times "bar.sync" 2 } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-3.c
new file mode 100644
index 0000000..c5d1c5a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-3.c
@@ -0,0 +1,35 @@
+/* { dg-additional-options "-save-temps -w" } */
+/* { dg-do run { target openacc_nvidia_accel_selected } }
+ { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
+
+int a;
+#pragma acc declare create(a)
+
+#pragma acc routine vector
+void __attribute__((noinline, noclone))
+foo_v (void)
+{
+ a = 1;
+}
+
+#pragma acc routine worker
+void __attribute__((noinline, noclone))
+foo_w (void)
+{
+ a = 2;
+}
+
+int
+main (void)
+{
+
+ #pragma acc parallel
+ foo_v ();
+
+ #pragma acc parallel
+ foo_w ();
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-not "bar.sync" } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c
new file mode 100644
index 0000000..d955d79
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c
@@ -0,0 +1,27 @@
+/* { dg-additional-options "-save-temps -w" } */
+/* { dg-do run { target openacc_nvidia_accel_selected } }
+ { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
+
+#define n 1024
+
+int
+main (void)
+{
+ #pragma acc parallel
+ {
+ #pragma acc loop worker
+ for (int i = 0; i < n; i++)
+ ;
+
+ #pragma acc loop worker
+ for (int i = 0; i < n; i++)
+ ;
+ }
+
+ return 0;
+}
+
+/* Atm, %ntid.y is broadcast from one loop to the next, so there are 2 bar.syncs
+ for that (the other two are there for the same reason as in pr85381-2.c).
+ Todo: Recompute %ntid.y instead of broadcasting it. */
+/* { dg-final { scan-assembler-times "bar.sync" 4 } } */