aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Stubbs <ams@codesourcery.com>2022-07-28 16:07:22 +0100
committerAndrew Stubbs <ams@codesourcery.com>2023-02-22 13:57:11 +0000
commit3da77f217c8b2089ecba3eb201e727c3fcdcd19d (patch)
tree2f3fb8460f25af2a5bf8a8bc9e8fe19f8b5b41f1
parentc878c6586dee353e685364910e02ad1a611d4634 (diff)
downloadgcc-3da77f217c8b2089ecba3eb201e727c3fcdcd19d.zip
gcc-3da77f217c8b2089ecba3eb201e727c3fcdcd19d.tar.gz
gcc-3da77f217c8b2089ecba3eb201e727c3fcdcd19d.tar.bz2
vect: inbranch SIMD clones
There has been support for generating "inbranch" SIMD clones for a long time, but nothing actually uses them (as far as I can see). This patch add supports for a sub-set of possible cases (those using mask_mode == VOIDmode). The other cases fail to vectorize, just as before, so there should be no regressions. The sub-set of support should cover all cases needed by amdgcn, at present. gcc/ChangeLog: * internal-fn.cc (expand_MASK_CALL): New. * internal-fn.def (MASK_CALL): New. * internal-fn.h (expand_MASK_CALL): New prototype. * omp-simd-clone.cc (simd_clone_adjust_argument_types): Set vector_type for mask arguments also. * tree-if-conv.cc: Include cgraph.h. (if_convertible_stmt_p): Do if conversions for calls to SIMD calls. (predicate_statements): Convert functions to IFN_MASK_CALL. * tree-vect-loop.cc (vect_get_datarefs_in_loop): Recognise IFN_MASK_CALL as a SIMD function call. * tree-vect-stmts.cc (vectorizable_simd_clone_call): Handle IFN_MASK_CALL as an inbranch SIMD function call. Generate the mask vector arguments. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-simd-clone-16.c: New test. * gcc.dg/vect/vect-simd-clone-16b.c: New test. * gcc.dg/vect/vect-simd-clone-16c.c: New test. * gcc.dg/vect/vect-simd-clone-16d.c: New test. * gcc.dg/vect/vect-simd-clone-16e.c: New test. * gcc.dg/vect/vect-simd-clone-16f.c: New test. * gcc.dg/vect/vect-simd-clone-17.c: New test. * gcc.dg/vect/vect-simd-clone-17b.c: New test. * gcc.dg/vect/vect-simd-clone-17c.c: New test. * gcc.dg/vect/vect-simd-clone-17d.c: New test. * gcc.dg/vect/vect-simd-clone-17e.c: New test. * gcc.dg/vect/vect-simd-clone-17f.c: New test. * gcc.dg/vect/vect-simd-clone-18.c: New test. * gcc.dg/vect/vect-simd-clone-18b.c: New test. * gcc.dg/vect/vect-simd-clone-18c.c: New test. * gcc.dg/vect/vect-simd-clone-18d.c: New test. * gcc.dg/vect/vect-simd-clone-18e.c: New test. * gcc.dg/vect/vect-simd-clone-18f.c: New test.
-rw-r--r--gcc/internal-fn.cc7
-rw-r--r--gcc/internal-fn.def3
-rw-r--r--gcc/internal-fn.h1
-rw-r--r--gcc/omp-simd-clone.cc1
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c89
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c14
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c17
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c17
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c16
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c14
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c89
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c14
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c17
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c17
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c16
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c14
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c89
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c14
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c17
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c17
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c16
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c14
-rw-r--r--gcc/tree-if-conv.cc41
-rw-r--r--gcc/tree-vect-loop.cc8
-rw-r--r--gcc/tree-vect-stmts.cc152
25 files changed, 682 insertions, 32 deletions
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index a7936ff..6e81dc0 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4521,3 +4521,10 @@ void
expand_ASSUME (internal_fn, gcall *)
{
}
+
+void
+expand_MASK_CALL (internal_fn, gcall *)
+{
+ /* This IFN should only exist between ifcvt and vect passes. */
+ gcc_unreachable ();
+}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 22b4a2d..7fe742c 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -461,6 +461,9 @@ DEF_INTERNAL_FN (SPACESHIP, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (ASSUME, ECF_CONST | ECF_LEAF | ECF_NOTHROW
| ECF_LOOPING_CONST_OR_PURE, NULL)
+/* For if-conversion of inbranch SIMD clones. */
+DEF_INTERNAL_FN (MASK_CALL, ECF_NOVOPS, NULL)
+
#undef DEF_INTERNAL_INT_FN
#undef DEF_INTERNAL_FLT_FN
#undef DEF_INTERNAL_FLT_FLOATN_FN
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 2fd82a1..08922ed 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -244,6 +244,7 @@ extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
extern void expand_SPACESHIP (internal_fn, gcall *);
extern void expand_TRAP (internal_fn, gcall *);
extern void expand_ASSUME (internal_fn, gcall *);
+extern void expand_MASK_CALL (internal_fn, gcall *);
extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc
index 0949b8b..03ff86e 100644
--- a/gcc/omp-simd-clone.cc
+++ b/gcc/omp-simd-clone.cc
@@ -942,6 +942,7 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
}
sc->args[i].orig_type = base_type;
sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK;
+ sc->args[i].vector_type = adj.type;
}
if (node->definition)
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c
new file mode 100644
index 0000000..ce9a6da
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c
@@ -0,0 +1,89 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+/* Test that simd inbranch clones work correctly. */
+
+#ifndef TYPE
+#define TYPE int
+#endif
+
+/* A simple function that will be cloned. */
+#pragma omp declare simd
+TYPE __attribute__((noinline))
+foo (TYPE a)
+{
+ return a + 1;
+}
+
+/* Check that "inbranch" clones are called correctly. */
+
+void __attribute__((noipa))
+masked (TYPE * __restrict a, TYPE * __restrict b, int size)
+{
+ #pragma omp simd
+ for (int i = 0; i < size; i++)
+ b[i] = a[i]<1 ? foo(a[i]) : a[i];
+}
+
+/* Check that "inbranch" works when there might be unrolling. */
+
+void __attribute__((noipa))
+masked_fixed (TYPE * __restrict a, TYPE * __restrict b)
+{
+ #pragma omp simd
+ for (int i = 0; i < 128; i++)
+ b[i] = a[i]<1 ? foo(a[i]) : a[i];
+}
+
+/* Validate the outputs. */
+
+void
+check_masked (TYPE *b, int size)
+{
+ for (int i = 0; i < size; i++)
+ if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1))
+ || ((TYPE)i >= 1 && b[i] != (TYPE)i))
+ {
+ __builtin_printf ("error at %d\n", i);
+ __builtin_exit (1);
+ }
+}
+
+int
+main ()
+{
+ TYPE a[1024];
+ TYPE b[1024];
+
+ for (int i = 0; i < 1024; i++)
+ a[i] = i;
+
+ masked_fixed (a, b);
+ check_masked (b, 128);
+
+ /* Test various sizes to cover machines with different vectorization
+ factors. */
+ for (int size = 8; size <= 1024; size *= 2)
+ {
+ masked (a, b, size);
+ check_masked (b, size);
+ }
+
+ /* Test sizes that might exercise the partial vector code-path. */
+ for (int size = 8; size <= 1024; size *= 2)
+ {
+ masked (a, b, size-4);
+ check_masked (b, size-4);
+ }
+
+ return 0;
+}
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! aarch64*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target aarch64*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c
new file mode 100644
index 0000000..af543b6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE float
+#include "vect-simd-clone-16.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { avx_runtime || aarch64*-*-* } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime || aarch64*-*-* } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c
new file mode 100644
index 0000000..677548a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE short
+#include "vect-simd-clone-16.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=short. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c
new file mode 100644
index 0000000..a9ae993
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE char
+#include "vect-simd-clone-16.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=char. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c
new file mode 100644
index 0000000..c8b482b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c
@@ -0,0 +1,16 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE double
+#include "vect-simd-clone-16.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops.
+ Some targets use pairs of vectors and do twice the calls. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { { ! avx_runtime } && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c
new file mode 100644
index 0000000..f42ac08
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE __INT64_TYPE__
+#include "vect-simd-clone-16.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use pairs of vectors and do twice the calls. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { i686*-*-* && { ! lp64 } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c
new file mode 100644
index 0000000..756225e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c
@@ -0,0 +1,89 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+/* Test that simd inbranch clones work correctly. */
+
+#ifndef TYPE
+#define TYPE int
+#endif
+
+/* A simple function that will be cloned. */
+#pragma omp declare simd uniform(b)
+TYPE __attribute__((noinline))
+foo (TYPE a, TYPE b)
+{
+ return a + b;
+}
+
+/* Check that "inbranch" clones are called correctly. */
+
+void __attribute__((noipa))
+masked (TYPE * __restrict a, TYPE * __restrict b, int size)
+{
+ #pragma omp simd
+ for (int i = 0; i < size; i++)
+ b[i] = a[i]<1 ? foo(a[i], 1) : a[i];
+}
+
+/* Check that "inbranch" works when there might be unrolling. */
+
+void __attribute__((noipa))
+masked_fixed (TYPE * __restrict a, TYPE * __restrict b)
+{
+ #pragma omp simd
+ for (int i = 0; i < 128; i++)
+ b[i] = a[i]<1 ? foo(a[i], 1) : a[i];
+}
+
+/* Validate the outputs. */
+
+void
+check_masked (TYPE *b, int size)
+{
+ for (int i = 0; i < size; i++)
+ if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1))
+ || ((TYPE)i >= 1 && b[i] != (TYPE)i))
+ {
+ __builtin_printf ("error at %d\n", i);
+ __builtin_exit (1);
+ }
+}
+
+int
+main ()
+{
+ TYPE a[1024];
+ TYPE b[1024];
+
+ for (int i = 0; i < 1024; i++)
+ a[i] = i;
+
+ masked_fixed (a, b);
+ check_masked (b, 128);
+
+ /* Test various sizes to cover machines with different vectorization
+ factors. */
+ for (int size = 8; size <= 1024; size *= 2)
+ {
+ masked (a, b, size);
+ check_masked (b, size);
+ }
+
+ /* Test sizes that might exercise the partial vector code-path. */
+ for (int size = 8; size <= 1024; size *= 2)
+ {
+ masked (a, b, size-4);
+ check_masked (b, size-4);
+ }
+
+ return 0;
+}
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! aarch64*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target aarch64*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c
new file mode 100644
index 0000000..8731c26
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE float
+#include "vect-simd-clone-17.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { avx_runtime || aarch64*-*-* } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime || aarch64*-*-* } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c
new file mode 100644
index 0000000..6683d1a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE short
+#include "vect-simd-clone-17.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=short. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c
new file mode 100644
index 0000000..d38bde6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE char
+#include "vect-simd-clone-17.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=char. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c
new file mode 100644
index 0000000..f2a428c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c
@@ -0,0 +1,16 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE double
+#include "vect-simd-clone-17.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops.
+ Some targets use pairs of vectors and do twice the calls. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { { ! avx_runtime } && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c
new file mode 100644
index 0000000..cd05dec
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE __INT64_TYPE__
+#include "vect-simd-clone-17.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use pairs of vectors and do twice the calls. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { i686*-*-* && { ! lp64 } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c
new file mode 100644
index 0000000..febf9fd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c
@@ -0,0 +1,89 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+/* Test that simd inbranch clones work correctly. */
+
+#ifndef TYPE
+#define TYPE int
+#endif
+
+/* A simple function that will be cloned. */
+#pragma omp declare simd uniform(b)
+TYPE __attribute__((noinline))
+foo (TYPE b, TYPE a)
+{
+ return a + b;
+}
+
+/* Check that "inbranch" clones are called correctly. */
+
+void __attribute__((noipa))
+masked (TYPE * __restrict a, TYPE * __restrict b, int size)
+{
+ #pragma omp simd
+ for (int i = 0; i < size; i++)
+ b[i] = a[i]<1 ? foo(1, a[i]) : a[i];
+}
+
+/* Check that "inbranch" works when there might be unrolling. */
+
+void __attribute__((noipa))
+masked_fixed (TYPE * __restrict a, TYPE * __restrict b)
+{
+ #pragma omp simd
+ for (int i = 0; i < 128; i++)
+ b[i] = a[i]<1 ? foo(1, a[i]) : a[i];
+}
+
+/* Validate the outputs. */
+
+void
+check_masked (TYPE *b, int size)
+{
+ for (int i = 0; i < size; i++)
+ if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1))
+ || ((TYPE)i >= 1 && b[i] != (TYPE)i))
+ {
+ __builtin_printf ("error at %d\n", i);
+ __builtin_exit (1);
+ }
+}
+
+int
+main ()
+{
+ TYPE a[1024];
+ TYPE b[1024];
+
+ for (int i = 0; i < 1024; i++)
+ a[i] = i;
+
+ masked_fixed (a, b);
+ check_masked (b, 128);
+
+ /* Test various sizes to cover machines with different vectorization
+ factors. */
+ for (int size = 8; size <= 1024; size *= 2)
+ {
+ masked (a, b, size);
+ check_masked (b, size);
+ }
+
+ /* Test sizes that might exercise the partial vector code-path. */
+ for (int size = 8; size <= 1024; size *= 2)
+ {
+ masked (a, b, size-4);
+ check_masked (b, size-4);
+ }
+
+ return 0;
+}
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! aarch64*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target aarch64*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c
new file mode 100644
index 0000000..120993e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE float
+#include "vect-simd-clone-18.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { avx_runtime || aarch64*-*-* } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime || aarch64*-*-* } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c
new file mode 100644
index 0000000..0d1fc6d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE short
+#include "vect-simd-clone-18.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=short. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c
new file mode 100644
index 0000000..1e6c028
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE char
+#include "vect-simd-clone-18.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=char. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c
new file mode 100644
index 0000000..9d20e52
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c
@@ -0,0 +1,16 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE double
+#include "vect-simd-clone-18.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops.
+ Some targets use pairs of vectors and do twice the calls. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { { ! avx_runtime } && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c
new file mode 100644
index 0000000..09ee7ff
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE __INT64_TYPE__
+#include "vect-simd-clone-18.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use pairs of vectors and do twice the calls. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { i686*-*-* && { ! lp64 } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index b19eebd..a7a8406 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -123,6 +123,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-ssa-dse.h"
#include "tree-vectorizer.h"
#include "tree-eh.h"
+#include "cgraph.h"
/* For lang_hooks.types.type_for_mode. */
#include "langhooks.h"
@@ -1063,7 +1064,8 @@ if_convertible_gimple_assign_stmt_p (gimple *stmt,
A statement is if-convertible if:
- it is an if-convertible GIMPLE_ASSIGN,
- it is a GIMPLE_LABEL or a GIMPLE_COND,
- - it is builtins call. */
+ - it is builtins call,
+ - it is a call to a function with a SIMD clone. */
static bool
if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
@@ -1083,13 +1085,23 @@ if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
tree fndecl = gimple_call_fndecl (stmt);
if (fndecl)
{
+ /* We can vectorize some builtins and functions with SIMD
+ "inbranch" clones. */
int flags = gimple_call_flags (stmt);
+ struct cgraph_node *node = cgraph_node::get (fndecl);
if ((flags & ECF_CONST)
&& !(flags & ECF_LOOPING_CONST_OR_PURE)
- /* We can only vectorize some builtins at the moment,
- so restrict if-conversion to those. */
&& fndecl_built_in_p (fndecl))
return true;
+ if (node && node->simd_clones != NULL)
+ /* Ensure that at least one clone can be "inbranch". */
+ for (struct cgraph_node *n = node->simd_clones; n != NULL;
+ n = n->simdclone->next_clone)
+ if (n->simdclone->inbranch)
+ {
+ need_to_predicate = true;
+ return true;
+ }
}
return false;
}
@@ -2613,6 +2625,29 @@ predicate_statements (loop_p loop)
gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
update_stmt (stmt);
}
+
+ /* Convert functions that have a SIMD clone to IFN_MASK_CALL. This
+ will cause the vectorizer to match the "in branch" clone variants,
+ and serves to build the mask vector in a natural way. */
+ gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
+ if (call && !gimple_call_internal_p (call))
+ {
+ tree orig_fn = gimple_call_fn (call);
+ int orig_nargs = gimple_call_num_args (call);
+ auto_vec<tree> args;
+ args.safe_push (orig_fn);
+ for (int i = 0; i < orig_nargs; i++)
+ args.safe_push (gimple_call_arg (call, i));
+ args.safe_push (cond);
+
+ /* Replace the call with a IFN_MASK_CALL that has the extra
+ condition parameter. */
+ gcall *new_call = gimple_build_call_internal_vec (IFN_MASK_CALL,
+ args);
+ gimple_call_set_lhs (new_call, gimple_call_lhs (call));
+ gsi_replace (&gsi, new_call, true);
+ }
+
lhs = gimple_get_lhs (gsi_stmt (gsi));
if (lhs && TREE_CODE (lhs) == SSA_NAME)
ssa_names.add (lhs);
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 8387f76..ab7af0e 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2125,6 +2125,14 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
if (is_gimple_call (stmt) && loop->safelen)
{
tree fndecl = gimple_call_fndecl (stmt), op;
+ if (fndecl == NULL_TREE
+ && gimple_call_internal_p (stmt, IFN_MASK_CALL))
+ {
+ fndecl = gimple_call_arg (stmt, 0);
+ gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
+ fndecl = TREE_OPERAND (fndecl, 0);
+ gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
+ }
if (fndecl != NULL_TREE)
{
cgraph_node *node = cgraph_node::get (fndecl);
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index df6239a..9e5ffbe 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -4004,6 +4004,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
size_t i, nargs;
tree lhs, rtype, ratype;
vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
+ int arg_offset = 0;
/* Is STMT a vectorizable call? */
gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
@@ -4011,6 +4012,15 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
return false;
fndecl = gimple_call_fndecl (stmt);
+ if (fndecl == NULL_TREE
+ && gimple_call_internal_p (stmt, IFN_MASK_CALL))
+ {
+ fndecl = gimple_call_arg (stmt, 0);
+ gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
+ fndecl = TREE_OPERAND (fndecl, 0);
+ gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
+ arg_offset = 1;
+ }
if (fndecl == NULL_TREE)
return false;
@@ -4041,7 +4051,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
return false;
/* Process function arguments. */
- nargs = gimple_call_num_args (stmt);
+ nargs = gimple_call_num_args (stmt) - arg_offset;
/* Bail out if the function has zero arguments. */
if (nargs == 0)
@@ -4059,7 +4069,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
thisarginfo.op = NULL_TREE;
thisarginfo.simd_lane_linear = false;
- op = gimple_call_arg (stmt, i);
+ op = gimple_call_arg (stmt, i + arg_offset);
if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
&thisarginfo.vectype)
|| thisarginfo.dt == vect_uninitialized_def)
@@ -4074,16 +4084,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
|| thisarginfo.dt == vect_external_def)
gcc_assert (thisarginfo.vectype == NULL_TREE);
else
- {
- gcc_assert (thisarginfo.vectype != NULL_TREE);
- if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "vector mask arguments are not supported\n");
- return false;
- }
- }
+ gcc_assert (thisarginfo.vectype != NULL_TREE);
/* For linear arguments, the analyze phase should have saved
the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
@@ -4176,9 +4177,6 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
if (target_badness < 0)
continue;
this_badness += target_badness * 512;
- /* FORNOW: Have to add code to add the mask argument. */
- if (n->simdclone->inbranch)
- continue;
for (i = 0; i < nargs; i++)
{
switch (n->simdclone->args[i].arg_type)
@@ -4186,7 +4184,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
case SIMD_CLONE_ARG_TYPE_VECTOR:
if (!useless_type_conversion_p
(n->simdclone->args[i].orig_type,
- TREE_TYPE (gimple_call_arg (stmt, i))))
+ TREE_TYPE (gimple_call_arg (stmt, i + arg_offset))))
i = -1;
else if (arginfo[i].dt == vect_constant_def
|| arginfo[i].dt == vect_external_def
@@ -4216,7 +4214,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
i = -1;
break;
case SIMD_CLONE_ARG_TYPE_MASK:
- gcc_unreachable ();
+ break;
}
if (i == (size_t) -1)
break;
@@ -4242,18 +4240,55 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
return false;
for (i = 0; i < nargs; i++)
- if ((arginfo[i].dt == vect_constant_def
- || arginfo[i].dt == vect_external_def)
- && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
- {
- tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
- arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
- slp_node);
- if (arginfo[i].vectype == NULL
- || !constant_multiple_p (bestn->simdclone->simdlen,
- simd_clone_subparts (arginfo[i].vectype)))
+ {
+ if ((arginfo[i].dt == vect_constant_def
+ || arginfo[i].dt == vect_external_def)
+ && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
+ {
+ tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i + arg_offset));
+ arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
+ slp_node);
+ if (arginfo[i].vectype == NULL
+ || !constant_multiple_p (bestn->simdclone->simdlen,
+ simd_clone_subparts (arginfo[i].vectype)))
+ return false;
+ }
+
+ if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
+ && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "vector mask arguments are not supported.\n");
return false;
- }
+ }
+
+ if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
+ && bestn->simdclone->mask_mode == VOIDmode
+ && (simd_clone_subparts (bestn->simdclone->args[i].vector_type)
+ != simd_clone_subparts (arginfo[i].vectype)))
+ {
+ /* FORNOW we only have partial support for vector-type masks that
+ can't hold all of simdlen. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+ vect_location,
+ "in-branch vector clones are not yet"
+ " supported for mismatched vector sizes.\n");
+ return false;
+ }
+ if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
+ && bestn->simdclone->mask_mode != VOIDmode)
+ {
+ /* FORNOW don't support integer-type masks. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+ vect_location,
+ "in-branch vector clones are not yet"
+ " supported for integer mask modes.\n");
+ return false;
+ }
+ }
fndecl = bestn->decl;
nunits = bestn->simdclone->simdlen;
@@ -4343,7 +4378,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
{
unsigned int k, l, m, o;
tree atype;
- op = gimple_call_arg (stmt, i);
+ op = gimple_call_arg (stmt, i + arg_offset);
switch (bestn->simdclone->args[i].arg_type)
{
case SIMD_CLONE_ARG_TYPE_VECTOR:
@@ -4442,6 +4477,65 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
}
}
break;
+ case SIMD_CLONE_ARG_TYPE_MASK:
+ atype = bestn->simdclone->args[i].vector_type;
+ if (bestn->simdclone->mask_mode != VOIDmode)
+ {
+ /* FORNOW: this is disabled above. */
+ gcc_unreachable ();
+ }
+ else
+ {
+ tree elt_type = TREE_TYPE (atype);
+ tree one = fold_convert (elt_type, integer_one_node);
+ tree zero = fold_convert (elt_type, integer_zero_node);
+ o = vector_unroll_factor (nunits,
+ simd_clone_subparts (atype));
+ for (m = j * o; m < (j + 1) * o; m++)
+ {
+ if (simd_clone_subparts (atype)
+ < simd_clone_subparts (arginfo[i].vectype))
+ {
+ /* The mask type has fewer elements than simdlen. */
+
+ /* FORNOW */
+ gcc_unreachable ();
+ }
+ else if (simd_clone_subparts (atype)
+ == simd_clone_subparts (arginfo[i].vectype))
+ {
+ /* The SIMD clone function has the same number of
+ elements as the current function. */
+ if (m == 0)
+ {
+ vect_get_vec_defs_for_operand (vinfo, stmt_info,
+ o * ncopies,
+ op,
+ &vec_oprnds[i]);
+ vec_oprnds_i[i] = 0;
+ }
+ vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
+ vec_oprnd0
+ = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
+ build_vector_from_val (atype, one),
+ build_vector_from_val (atype, zero));
+ gassign *new_stmt
+ = gimple_build_assign (make_ssa_name (atype),
+ vec_oprnd0);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
+ vargs.safe_push (gimple_assign_lhs (new_stmt));
+ }
+ else
+ {
+ /* The mask type has more elements than simdlen. */
+
+ /* FORNOW */
+ gcc_unreachable ();
+ }
+ }
+ }
+ break;
case SIMD_CLONE_ARG_TYPE_UNIFORM:
vargs.safe_push (op);
break;