aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorAndre Vieira <andre.simoesdiasvieira@arm.com>2023-11-03 19:09:07 +0000
committerAndre Vieira <andre.simoesdiasvieira@arm.com>2023-11-03 19:09:07 +0000
commitaed00696a01ac065e9ed327434ec29d1cf50179e (patch)
tree0a57b9a8842e77c3acd4667607f9c31f1721e694 /gcc
parentae8abcb81ed81456c0fe5ff8e0c060c9fb9c82d7 (diff)
downloadgcc-aed00696a01ac065e9ed327434ec29d1cf50179e.zip
gcc-aed00696a01ac065e9ed327434ec29d1cf50179e.tar.gz
gcc-aed00696a01ac065e9ed327434ec29d1cf50179e.tar.bz2
vect: allow using inbranch simdclones for masked loops
In a previous patch I did most of the work for this, but forgot to change the check for number of arguments matching between call and simdclone. This check should accept calls without a mask to be matched against simdclones with mask arguments. I also added tests to verify this feature actually works. gcc/ChangeLog: * tree-vect-stmts.cc (vectorizable_simd_clone_call): Allow unmasked calls to use masked simdclones. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-simd-clone-20.c: New file. * gfortran.dg/simd-builtins-1.h: Adapt. * gfortran.dg/simd-builtins-6.f90: Adapt.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-20.c87
-rw-r--r--gcc/testsuite/gfortran.dg/simd-builtins-1.h1
-rw-r--r--gcc/testsuite/gfortran.dg/simd-builtins-6.f901
-rw-r--r--gcc/tree-vect-stmts.cc11
4 files changed, 97 insertions, 3 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-20.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-20.c
new file mode 100644
index 0000000..9f51a68
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-20.c
@@ -0,0 +1,87 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+/* Test that simd inbranch clones work correctly. */
+
+#ifndef TYPE
+#define TYPE int
+#endif
+
+/* A simple function that will be cloned. */
+#pragma omp declare simd inbranch
+TYPE __attribute__((noinline))
+foo (TYPE a)
+{
+ return a + 1;
+}
+
+/* Check that "inbranch" clones are called correctly. */
+
+void __attribute__((noipa))
+masked (TYPE * __restrict a, TYPE * __restrict b, int size)
+{
+ #pragma omp simd
+ for (int i = 0; i < size; i++)
+ b[i] = foo(a[i]);
+}
+
+/* Check that "inbranch" works when there might be unrolling. */
+
+void __attribute__((noipa))
+masked_fixed (TYPE * __restrict a, TYPE * __restrict b)
+{
+ #pragma omp simd
+ for (int i = 0; i < 128; i++)
+ b[i] = foo(a[i]);
+}
+
+/* Validate the outputs. */
+
+void
+check_masked (TYPE *b, int size)
+{
+ for (int i = 0; i < size; i++)
+ if (b[i] != (TYPE)(i + 1))
+ {
+ __builtin_printf ("error at %d\n", i);
+ __builtin_exit (1);
+ }
+}
+
+int
+main ()
+{
+ TYPE a[1024];
+ TYPE b[1024];
+
+ for (int i = 0; i < 1024; i++)
+ a[i] = i;
+
+ masked_fixed (a, b);
+ check_masked (b, 128);
+
+ /* Test various sizes to cover machines with different vectorization
+ factors. */
+ for (int size = 8; size <= 1024; size *= 2)
+ {
+ masked (a, b, size);
+ check_masked (b, size);
+ }
+
+ /* Test sizes that might exercise the partial vector code-path. */
+ for (int size = 8; size <= 1024; size *= 2)
+ {
+ masked (a, b, size-4);
+ check_masked (b, size-4);
+ }
+
+ return 0;
+}
+
+/* Ensure the the in-branch simd clones are used on targets that support them. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { x86_64*-*-* } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gfortran.dg/simd-builtins-1.h b/gcc/testsuite/gfortran.dg/simd-builtins-1.h
index 88d555c..08b7351 100644
--- a/gcc/testsuite/gfortran.dg/simd-builtins-1.h
+++ b/gcc/testsuite/gfortran.dg/simd-builtins-1.h
@@ -1,4 +1,3 @@
-!GCC$ builtin (sin) attributes simd (inbranch)
!GCC$ builtin (sinf) attributes simd (notinbranch)
!GCC$ builtin (cosf) attributes simd
!GCC$ builtin (cosf) attributes simd (notinbranch)
diff --git a/gcc/testsuite/gfortran.dg/simd-builtins-6.f90 b/gcc/testsuite/gfortran.dg/simd-builtins-6.f90
index 60bcac7..2c68f9f 100644
--- a/gcc/testsuite/gfortran.dg/simd-builtins-6.f90
+++ b/gcc/testsuite/gfortran.dg/simd-builtins-6.f90
@@ -2,7 +2,6 @@
! { dg-additional-options "-nostdinc -Ofast -fdump-tree-optimized" }
! { dg-additional-options "-msse2 -mno-avx" { target i?86-*-linux* x86_64-*-linux* } }
-!GCC$ builtin (sin) attributes simd (inbranch)
!GCC$ builtin (sinf) attributes simd (notinbranch)
!GCC$ builtin (cosf) attributes simd
!GCC$ builtin (cosf) attributes simd (notinbranch)
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index d374907..f895aaf 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -4149,10 +4149,19 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
{
unsigned int this_badness = 0;
unsigned int num_calls;
+ /* The number of arguments in the call and the number of parameters in
+ the simdclone should match. However, when the simdclone is
+ 'inbranch', it could have one more paramater than nargs when using
+ an inbranch simdclone to call a non-inbranch call, either in a
+ non-masked loop using a all true constant mask, or inside a masked
+ loop using it's mask. */
+ size_t simd_nargs = n->simdclone->nargs;
+ if (!masked_call_offset && n->simdclone->inbranch)
+ simd_nargs--;
if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
&num_calls)
|| (!n->simdclone->inbranch && (masked_call_offset > 0))
- || nargs != n->simdclone->nargs)
+ || (nargs != simd_nargs))
continue;
if (num_calls != 1)
this_badness += exact_log2 (num_calls) * 4096;