aboutsummaryrefslogtreecommitdiff
path: root/openmp/runtime
diff options
context:
space:
mode:
authorMichael Kruse <llvm-project@meinersbur.de>2024-05-13 16:10:58 +0200
committerGitHub <noreply@github.com>2024-05-13 16:10:58 +0200
commitb0b6c16b470a7d5d9c63765058cca0ebe07ad57d (patch)
treea74a3acc0ac7eba93e701162feed0377c3201b86 /openmp/runtime
parentc4e9e41199127bb288e84e9477da99f28941edb3 (diff)
downloadllvm-b0b6c16b470a7d5d9c63765058cca0ebe07ad57d.zip
llvm-b0b6c16b470a7d5d9c63765058cca0ebe07ad57d.tar.gz
llvm-b0b6c16b470a7d5d9c63765058cca0ebe07ad57d.tar.bz2
[Clang][OpenMP][Tile] Allow non-constant tile sizes. (#91345)
Allow non-constants in the `sizes` clause such as ``` #pragma omp tile sizes(a) for (int i = 0; i < n; ++i) ``` This is permitted since tile was introduced in [OpenMP 5.1](https://www.openmp.org/spec-html/5.1/openmpsu53.html#x78-860002.11.9). It is possible to sneak-in negative numbers at runtime as in ``` int a = -1; #pragma omp tile sizes(a) ``` Even though it is not well-formed, it should still result in every loop iteration to be executed exactly once, an invariant of the tile construct that we should ensure. `ParseOpenMPExprListClause` is extracted-out to be reused by the `permutation` clause of the `interchange` construct. Some care was put into ensuring correct behavior in template contexts.
Diffstat (limited to 'openmp/runtime')
-rw-r--r--openmp/runtime/test/transform/tile/intfor.c191
-rw-r--r--openmp/runtime/test/transform/tile/negtile_intfor.c44
-rw-r--r--openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intfor.cpp100
3 files changed, 335 insertions, 0 deletions
diff --git a/openmp/runtime/test/transform/tile/intfor.c b/openmp/runtime/test/transform/tile/intfor.c
new file mode 100644
index 0000000..4a930ea
--- /dev/null
+++ b/openmp/runtime/test/transform/tile/intfor.c
@@ -0,0 +1,191 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <stdlib.h>
+#include <stdio.h>
+
+// TODO: The OpenMP specification explicitly does not define when and how often
+// expressions in the clause are evaluated. Currently Clang evaluates it again
+// whenever needed, but function calls in clauses are not common. A better
+// implementation would evaluate it just once and reuse the result.
+static int tilesize(int i) {
+ printf("tilesize(%d)\n", i);
+ return 3;
+}
+
+int main() {
+ printf("do\n");
+#pragma omp tile sizes(tilesize(1), tilesize(2))
+ for (int i = 7; i < 19; i += 3)
+ for (int j = 7; j < 20; j += 3)
+ printf("i=%d j=%d\n", i, j);
+ printf("done\n");
+ return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK: do
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=7 j=7
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=7 j=10
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=7 j=13
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=10 j=7
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=10 j=10
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=10 j=13
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=13 j=7
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=13 j=10
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=13 j=13
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=7 j=16
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=7 j=19
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=10 j=16
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=10 j=19
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=13 j=16
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=13 j=19
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=16 j=7
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=16 j=10
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=16 j=13
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=16 j=16
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: i=16 j=19
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(2)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: tilesize(1)
+// CHECK-NEXT: done \ No newline at end of file
diff --git a/openmp/runtime/test/transform/tile/negtile_intfor.c b/openmp/runtime/test/transform/tile/negtile_intfor.c
new file mode 100644
index 0000000..8784d9e
--- /dev/null
+++ b/openmp/runtime/test/transform/tile/negtile_intfor.c
@@ -0,0 +1,44 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int tilesize = -2;
+
+int main() {
+ printf("do\n");
+#pragma omp tile sizes(tilesize, tilesize)
+ for (int i = 7; i < 19; i += 3)
+ for (int j = 7; j < 20; j += 3)
+ printf("i=%d j=%d\n", i, j);
+ printf("done\n");
+ return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK: do
+// CHECK-NEXT: i=7 j=7
+// CHECK-NEXT: i=7 j=10
+// CHECK-NEXT: i=7 j=13
+// CHECK-NEXT: i=7 j=16
+// CHECK-NEXT: i=7 j=19
+// CHECK-NEXT: i=10 j=7
+// CHECK-NEXT: i=10 j=10
+// CHECK-NEXT: i=10 j=13
+// CHECK-NEXT: i=10 j=16
+// CHECK-NEXT: i=10 j=19
+// CHECK-NEXT: i=13 j=7
+// CHECK-NEXT: i=13 j=10
+// CHECK-NEXT: i=13 j=13
+// CHECK-NEXT: i=13 j=16
+// CHECK-NEXT: i=13 j=19
+// CHECK-NEXT: i=16 j=7
+// CHECK-NEXT: i=16 j=10
+// CHECK-NEXT: i=16 j=13
+// CHECK-NEXT: i=16 j=16
+// CHECK-NEXT: i=16 j=19
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intfor.cpp b/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intfor.cpp
new file mode 100644
index 0000000..f4c2af6
--- /dev/null
+++ b/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intfor.cpp
@@ -0,0 +1,100 @@
+// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdio>
+
+int main() {
+ printf("do\n");
+#pragma omp parallel for collapse(3) num_threads(1)
+ for (int i = 0; i < 3; ++i)
+#pragma omp tile sizes(3, 3)
+ for (int j = 0; j < 4; ++j)
+ for (int k = 0; k < 5; ++k)
+ printf("i=%d j=%d k=%d\n", i, j, k);
+ printf("done\n");
+ return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK: do
+
+// Full tile
+// CHECK-NEXT: i=0 j=0 k=0
+// CHECK-NEXT: i=0 j=0 k=1
+// CHECK-NEXT: i=0 j=0 k=2
+// CHECK-NEXT: i=0 j=1 k=0
+// CHECK-NEXT: i=0 j=1 k=1
+// CHECK-NEXT: i=0 j=1 k=2
+// CHECK-NEXT: i=0 j=2 k=0
+// CHECK-NEXT: i=0 j=2 k=1
+// CHECK-NEXT: i=0 j=2 k=2
+
+// Partial tile
+// CHECK-NEXT: i=0 j=0 k=3
+// CHECK-NEXT: i=0 j=0 k=4
+// CHECK-NEXT: i=0 j=1 k=3
+// CHECK-NEXT: i=0 j=1 k=4
+// CHECK-NEXT: i=0 j=2 k=3
+// CHECK-NEXT: i=0 j=2 k=4
+
+// Partial tile
+// CHECK-NEXT: i=0 j=3 k=0
+// CHECK-NEXT: i=0 j=3 k=1
+// CHECK-NEXT: i=0 j=3 k=2
+
+// Partial tile
+// CHECK-NEXT: i=0 j=3 k=3
+// CHECK-NEXT: i=0 j=3 k=4
+
+// Full tile
+// CHECK-NEXT: i=1 j=0 k=0
+// CHECK-NEXT: i=1 j=0 k=1
+// CHECK-NEXT: i=1 j=0 k=2
+// CHECK-NEXT: i=1 j=1 k=0
+// CHECK-NEXT: i=1 j=1 k=1
+// CHECK-NEXT: i=1 j=1 k=2
+// CHECK-NEXT: i=1 j=2 k=0
+// CHECK-NEXT: i=1 j=2 k=1
+// CHECK-NEXT: i=1 j=2 k=2
+
+// Partial tiles
+// CHECK-NEXT: i=1 j=0 k=3
+// CHECK-NEXT: i=1 j=0 k=4
+// CHECK-NEXT: i=1 j=1 k=3
+// CHECK-NEXT: i=1 j=1 k=4
+// CHECK-NEXT: i=1 j=2 k=3
+// CHECK-NEXT: i=1 j=2 k=4
+// CHECK-NEXT: i=1 j=3 k=0
+// CHECK-NEXT: i=1 j=3 k=1
+// CHECK-NEXT: i=1 j=3 k=2
+// CHECK-NEXT: i=1 j=3 k=3
+// CHECK-NEXT: i=1 j=3 k=4
+
+// Full tile
+// CHECK-NEXT: i=2 j=0 k=0
+// CHECK-NEXT: i=2 j=0 k=1
+// CHECK-NEXT: i=2 j=0 k=2
+// CHECK-NEXT: i=2 j=1 k=0
+// CHECK-NEXT: i=2 j=1 k=1
+// CHECK-NEXT: i=2 j=1 k=2
+// CHECK-NEXT: i=2 j=2 k=0
+// CHECK-NEXT: i=2 j=2 k=1
+// CHECK-NEXT: i=2 j=2 k=2
+
+// Partial tiles
+// CHECK-NEXT: i=2 j=0 k=3
+// CHECK-NEXT: i=2 j=0 k=4
+// CHECK-NEXT: i=2 j=1 k=3
+// CHECK-NEXT: i=2 j=1 k=4
+// CHECK-NEXT: i=2 j=2 k=3
+// CHECK-NEXT: i=2 j=2 k=4
+// CHECK-NEXT: i=2 j=3 k=0
+// CHECK-NEXT: i=2 j=3 k=1
+// CHECK-NEXT: i=2 j=3 k=2
+// CHECK-NEXT: i=2 j=3 k=3
+// CHECK-NEXT: i=2 j=3 k=4
+// CHECK-NEXT: done