diff options
author | Andrew Stubbs <ams@codesourcery.com> | 2022-07-28 16:07:22 +0100 |
---|---|---|
committer | Andrew Stubbs <ams@codesourcery.com> | 2023-02-22 13:57:11 +0000 |
commit | 3da77f217c8b2089ecba3eb201e727c3fcdcd19d (patch) | |
tree | 2f3fb8460f25af2a5bf8a8bc9e8fe19f8b5b41f1 | |
parent | c878c6586dee353e685364910e02ad1a611d4634 (diff) | |
download | gcc-3da77f217c8b2089ecba3eb201e727c3fcdcd19d.zip gcc-3da77f217c8b2089ecba3eb201e727c3fcdcd19d.tar.gz gcc-3da77f217c8b2089ecba3eb201e727c3fcdcd19d.tar.bz2 |
vect: inbranch SIMD clones
There has been support for generating "inbranch" SIMD clones for a long time,
but nothing actually uses them (as far as I can see).
This patch add supports for a sub-set of possible cases (those using
mask_mode == VOIDmode). The other cases fail to vectorize, just as before,
so there should be no regressions.
The sub-set of support should cover all cases needed by amdgcn, at present.
gcc/ChangeLog:
* internal-fn.cc (expand_MASK_CALL): New.
* internal-fn.def (MASK_CALL): New.
* internal-fn.h (expand_MASK_CALL): New prototype.
* omp-simd-clone.cc (simd_clone_adjust_argument_types): Set vector_type
for mask arguments also.
* tree-if-conv.cc: Include cgraph.h.
(if_convertible_stmt_p): Do if conversions for calls to SIMD calls.
(predicate_statements): Convert functions to IFN_MASK_CALL.
* tree-vect-loop.cc (vect_get_datarefs_in_loop): Recognise
IFN_MASK_CALL as a SIMD function call.
* tree-vect-stmts.cc (vectorizable_simd_clone_call): Handle
IFN_MASK_CALL as an inbranch SIMD function call.
Generate the mask vector arguments.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/vect-simd-clone-16.c: New test.
* gcc.dg/vect/vect-simd-clone-16b.c: New test.
* gcc.dg/vect/vect-simd-clone-16c.c: New test.
* gcc.dg/vect/vect-simd-clone-16d.c: New test.
* gcc.dg/vect/vect-simd-clone-16e.c: New test.
* gcc.dg/vect/vect-simd-clone-16f.c: New test.
* gcc.dg/vect/vect-simd-clone-17.c: New test.
* gcc.dg/vect/vect-simd-clone-17b.c: New test.
* gcc.dg/vect/vect-simd-clone-17c.c: New test.
* gcc.dg/vect/vect-simd-clone-17d.c: New test.
* gcc.dg/vect/vect-simd-clone-17e.c: New test.
* gcc.dg/vect/vect-simd-clone-17f.c: New test.
* gcc.dg/vect/vect-simd-clone-18.c: New test.
* gcc.dg/vect/vect-simd-clone-18b.c: New test.
* gcc.dg/vect/vect-simd-clone-18c.c: New test.
* gcc.dg/vect/vect-simd-clone-18d.c: New test.
* gcc.dg/vect/vect-simd-clone-18e.c: New test.
* gcc.dg/vect/vect-simd-clone-18f.c: New test.
25 files changed, 682 insertions, 32 deletions
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index a7936ff..6e81dc0 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4521,3 +4521,10 @@ void expand_ASSUME (internal_fn, gcall *) { } + +void +expand_MASK_CALL (internal_fn, gcall *) +{ + /* This IFN should only exist between ifcvt and vect passes. */ + gcc_unreachable (); +} diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 22b4a2d..7fe742c 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -461,6 +461,9 @@ DEF_INTERNAL_FN (SPACESHIP, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (ASSUME, ECF_CONST | ECF_LEAF | ECF_NOTHROW | ECF_LOOPING_CONST_OR_PURE, NULL) +/* For if-conversion of inbranch SIMD clones. */ +DEF_INTERNAL_FN (MASK_CALL, ECF_NOVOPS, NULL) + #undef DEF_INTERNAL_INT_FN #undef DEF_INTERNAL_FLT_FN #undef DEF_INTERNAL_FLT_FLOATN_FN diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index 2fd82a1..08922ed 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -244,6 +244,7 @@ extern void expand_SHUFFLEVECTOR (internal_fn, gcall *); extern void expand_SPACESHIP (internal_fn, gcall *); extern void expand_TRAP (internal_fn, gcall *); extern void expand_ASSUME (internal_fn, gcall *); +extern void expand_MASK_CALL (internal_fn, gcall *); extern bool vectorized_internal_fn_supported_p (internal_fn, tree); diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc index 0949b8b..03ff86e 100644 --- a/gcc/omp-simd-clone.cc +++ b/gcc/omp-simd-clone.cc @@ -942,6 +942,7 @@ simd_clone_adjust_argument_types (struct cgraph_node *node) } sc->args[i].orig_type = base_type; sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK; + sc->args[i].vector_type = adj.type; } if (node->definition) diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c new file mode 100644 index 0000000..ce9a6da --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c @@ -0,0 +1,89 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +/* Test that simd inbranch clones work correctly. */ + +#ifndef TYPE +#define TYPE int +#endif + +/* A simple function that will be cloned. */ +#pragma omp declare simd +TYPE __attribute__((noinline)) +foo (TYPE a) +{ + return a + 1; +} + +/* Check that "inbranch" clones are called correctly. */ + +void __attribute__((noipa)) +masked (TYPE * __restrict a, TYPE * __restrict b, int size) +{ + #pragma omp simd + for (int i = 0; i < size; i++) + b[i] = a[i]<1 ? foo(a[i]) : a[i]; +} + +/* Check that "inbranch" works when there might be unrolling. */ + +void __attribute__((noipa)) +masked_fixed (TYPE * __restrict a, TYPE * __restrict b) +{ + #pragma omp simd + for (int i = 0; i < 128; i++) + b[i] = a[i]<1 ? foo(a[i]) : a[i]; +} + +/* Validate the outputs. */ + +void +check_masked (TYPE *b, int size) +{ + for (int i = 0; i < size; i++) + if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1)) + || ((TYPE)i >= 1 && b[i] != (TYPE)i)) + { + __builtin_printf ("error at %d\n", i); + __builtin_exit (1); + } +} + +int +main () +{ + TYPE a[1024]; + TYPE b[1024]; + + for (int i = 0; i < 1024; i++) + a[i] = i; + + masked_fixed (a, b); + check_masked (b, 128); + + /* Test various sizes to cover machines with different vectorization + factors. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size); + check_masked (b, size); + } + + /* Test sizes that might exercise the partial vector code-path. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size-4); + check_masked (b, size-4); + } + + return 0; +} + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use another call for the epilogue loops. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! aarch64*-*-* } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target aarch64*-*-* } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c new file mode 100644 index 0000000..af543b6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE float +#include "vect-simd-clone-16.c" + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use another call for the epilogue loops. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { avx_runtime || aarch64*-*-* } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime || aarch64*-*-* } } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c new file mode 100644 index 0000000..677548a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c @@ -0,0 +1,17 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE short +#include "vect-simd-clone-16.c" + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use another call for the epilogue loops. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */ + +/* x86_64 fails to use in-branch clones for TYPE=short. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c new file mode 100644 index 0000000..a9ae993 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c @@ -0,0 +1,17 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE char +#include "vect-simd-clone-16.c" + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use another call for the epilogue loops. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */ + +/* x86_64 fails to use in-branch clones for TYPE=char. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c new file mode 100644 index 0000000..c8b482b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE double +#include "vect-simd-clone-16.c" + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use another call for the epilogue loops. + Some targets use pairs of vectors and do twice the calls. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { { ! avx_runtime } && { ! { i686*-*-* && { ! lp64 } } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime && { ! { i686*-*-* && { ! lp64 } } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c new file mode 100644 index 0000000..f42ac08 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE __INT64_TYPE__ +#include "vect-simd-clone-16.c" + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use pairs of vectors and do twice the calls. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { i686*-*-* && { ! lp64 } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c new file mode 100644 index 0000000..756225e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c @@ -0,0 +1,89 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +/* Test that simd inbranch clones work correctly. */ + +#ifndef TYPE +#define TYPE int +#endif + +/* A simple function that will be cloned. */ +#pragma omp declare simd uniform(b) +TYPE __attribute__((noinline)) +foo (TYPE a, TYPE b) +{ + return a + b; +} + +/* Check that "inbranch" clones are called correctly. */ + +void __attribute__((noipa)) +masked (TYPE * __restrict a, TYPE * __restrict b, int size) +{ + #pragma omp simd + for (int i = 0; i < size; i++) + b[i] = a[i]<1 ? foo(a[i], 1) : a[i]; +} + +/* Check that "inbranch" works when there might be unrolling. */ + +void __attribute__((noipa)) +masked_fixed (TYPE * __restrict a, TYPE * __restrict b) +{ + #pragma omp simd + for (int i = 0; i < 128; i++) + b[i] = a[i]<1 ? foo(a[i], 1) : a[i]; +} + +/* Validate the outputs. */ + +void +check_masked (TYPE *b, int size) +{ + for (int i = 0; i < size; i++) + if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1)) + || ((TYPE)i >= 1 && b[i] != (TYPE)i)) + { + __builtin_printf ("error at %d\n", i); + __builtin_exit (1); + } +} + +int +main () +{ + TYPE a[1024]; + TYPE b[1024]; + + for (int i = 0; i < 1024; i++) + a[i] = i; + + masked_fixed (a, b); + check_masked (b, 128); + + /* Test various sizes to cover machines with different vectorization + factors. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size); + check_masked (b, size); + } + + /* Test sizes that might exercise the partial vector code-path. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size-4); + check_masked (b, size-4); + } + + return 0; +} + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use another call for the epilogue loops. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! aarch64*-*-* } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target aarch64*-*-* } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c new file mode 100644 index 0000000..8731c26 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE float +#include "vect-simd-clone-17.c" + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use another call for the epilogue loops. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { avx_runtime || aarch64*-*-* } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime || aarch64*-*-* } } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c new file mode 100644 index 0000000..6683d1a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c @@ -0,0 +1,17 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE short +#include "vect-simd-clone-17.c" + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use another call for the epilogue loops. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */ + +/* x86_64 fails to use in-branch clones for TYPE=short. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c new file mode 100644 index 0000000..d38bde6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c @@ -0,0 +1,17 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE char +#include "vect-simd-clone-17.c" + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use another call for the epilogue loops. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */ + +/* x86_64 fails to use in-branch clones for TYPE=char. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c new file mode 100644 index 0000000..f2a428c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE double +#include "vect-simd-clone-17.c" + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use another call for the epilogue loops. + Some targets use pairs of vectors and do twice the calls. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { { ! avx_runtime } && { ! { i686*-*-* && { ! lp64 } } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime && { ! { i686*-*-* && { ! lp64 } } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c new file mode 100644 index 0000000..cd05dec --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE __INT64_TYPE__ +#include "vect-simd-clone-17.c" + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use pairs of vectors and do twice the calls. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { i686*-*-* && { ! lp64 } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c new file mode 100644 index 0000000..febf9fd --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c @@ -0,0 +1,89 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +/* Test that simd inbranch clones work correctly. */ + +#ifndef TYPE +#define TYPE int +#endif + +/* A simple function that will be cloned. */ +#pragma omp declare simd uniform(b) +TYPE __attribute__((noinline)) +foo (TYPE b, TYPE a) +{ + return a + b; +} + +/* Check that "inbranch" clones are called correctly. */ + +void __attribute__((noipa)) +masked (TYPE * __restrict a, TYPE * __restrict b, int size) +{ + #pragma omp simd + for (int i = 0; i < size; i++) + b[i] = a[i]<1 ? foo(1, a[i]) : a[i]; +} + +/* Check that "inbranch" works when there might be unrolling. */ + +void __attribute__((noipa)) +masked_fixed (TYPE * __restrict a, TYPE * __restrict b) +{ + #pragma omp simd + for (int i = 0; i < 128; i++) + b[i] = a[i]<1 ? foo(1, a[i]) : a[i]; +} + +/* Validate the outputs. */ + +void +check_masked (TYPE *b, int size) +{ + for (int i = 0; i < size; i++) + if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1)) + || ((TYPE)i >= 1 && b[i] != (TYPE)i)) + { + __builtin_printf ("error at %d\n", i); + __builtin_exit (1); + } +} + +int +main () +{ + TYPE a[1024]; + TYPE b[1024]; + + for (int i = 0; i < 1024; i++) + a[i] = i; + + masked_fixed (a, b); + check_masked (b, 128); + + /* Test various sizes to cover machines with different vectorization + factors. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size); + check_masked (b, size); + } + + /* Test sizes that might exercise the partial vector code-path. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size-4); + check_masked (b, size-4); + } + + return 0; +} + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use another call for the epilogue loops. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! aarch64*-*-* } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target aarch64*-*-* } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c new file mode 100644 index 0000000..120993e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE float +#include "vect-simd-clone-18.c" + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use another call for the epilogue loops. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { avx_runtime || aarch64*-*-* } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime || aarch64*-*-* } } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c new file mode 100644 index 0000000..0d1fc6d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c @@ -0,0 +1,17 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE short +#include "vect-simd-clone-18.c" + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use another call for the epilogue loops. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */ + +/* x86_64 fails to use in-branch clones for TYPE=short. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c new file mode 100644 index 0000000..1e6c028 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c @@ -0,0 +1,17 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE char +#include "vect-simd-clone-18.c" + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use another call for the epilogue loops. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */ + +/* x86_64 fails to use in-branch clones for TYPE=char. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c new file mode 100644 index 0000000..9d20e52 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE double +#include "vect-simd-clone-18.c" + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use another call for the epilogue loops. + Some targets use pairs of vectors and do twice the calls. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { { ! avx_runtime } && { ! { i686*-*-* && { ! lp64 } } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime && { ! { i686*-*-* && { ! lp64 } } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c new file mode 100644 index 0000000..09ee7ff --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE __INT64_TYPE__ +#include "vect-simd-clone-18.c" + +/* Ensure the the in-branch simd clones are used on targets that support them. + Some targets use pairs of vectors and do twice the calls. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { i686*-*-* && { ! lp64 } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index b19eebd..a7a8406 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -123,6 +123,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-dse.h" #include "tree-vectorizer.h" #include "tree-eh.h" +#include "cgraph.h" /* For lang_hooks.types.type_for_mode. */ #include "langhooks.h" @@ -1063,7 +1064,8 @@ if_convertible_gimple_assign_stmt_p (gimple *stmt, A statement is if-convertible if: - it is an if-convertible GIMPLE_ASSIGN, - it is a GIMPLE_LABEL or a GIMPLE_COND, - - it is builtins call. */ + - it is builtins call, + - it is a call to a function with a SIMD clone. */ static bool if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs) @@ -1083,13 +1085,23 @@ if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs) tree fndecl = gimple_call_fndecl (stmt); if (fndecl) { + /* We can vectorize some builtins and functions with SIMD + "inbranch" clones. */ int flags = gimple_call_flags (stmt); + struct cgraph_node *node = cgraph_node::get (fndecl); if ((flags & ECF_CONST) && !(flags & ECF_LOOPING_CONST_OR_PURE) - /* We can only vectorize some builtins at the moment, - so restrict if-conversion to those. */ && fndecl_built_in_p (fndecl)) return true; + if (node && node->simd_clones != NULL) + /* Ensure that at least one clone can be "inbranch". */ + for (struct cgraph_node *n = node->simd_clones; n != NULL; + n = n->simdclone->next_clone) + if (n->simdclone->inbranch) + { + need_to_predicate = true; + return true; + } } return false; } @@ -2613,6 +2625,29 @@ predicate_statements (loop_p loop) gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi)); update_stmt (stmt); } + + /* Convert functions that have a SIMD clone to IFN_MASK_CALL. This + will cause the vectorizer to match the "in branch" clone variants, + and serves to build the mask vector in a natural way. */ + gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi)); + if (call && !gimple_call_internal_p (call)) + { + tree orig_fn = gimple_call_fn (call); + int orig_nargs = gimple_call_num_args (call); + auto_vec<tree> args; + args.safe_push (orig_fn); + for (int i = 0; i < orig_nargs; i++) + args.safe_push (gimple_call_arg (call, i)); + args.safe_push (cond); + + /* Replace the call with a IFN_MASK_CALL that has the extra + condition parameter. */ + gcall *new_call = gimple_build_call_internal_vec (IFN_MASK_CALL, + args); + gimple_call_set_lhs (new_call, gimple_call_lhs (call)); + gsi_replace (&gsi, new_call, true); + } + lhs = gimple_get_lhs (gsi_stmt (gsi)); if (lhs && TREE_CODE (lhs) == SSA_NAME) ssa_names.add (lhs); diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 8387f76..ab7af0e 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -2125,6 +2125,14 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs, if (is_gimple_call (stmt) && loop->safelen) { tree fndecl = gimple_call_fndecl (stmt), op; + if (fndecl == NULL_TREE + && gimple_call_internal_p (stmt, IFN_MASK_CALL)) + { + fndecl = gimple_call_arg (stmt, 0); + gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR); + fndecl = TREE_OPERAND (fndecl, 0); + gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL); + } if (fndecl != NULL_TREE) { cgraph_node *node = cgraph_node::get (fndecl); diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index df6239a..9e5ffbe 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -4004,6 +4004,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, size_t i, nargs; tree lhs, rtype, ratype; vec<constructor_elt, va_gc> *ret_ctor_elts = NULL; + int arg_offset = 0; /* Is STMT a vectorizable call? */ gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt); @@ -4011,6 +4012,15 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, return false; fndecl = gimple_call_fndecl (stmt); + if (fndecl == NULL_TREE + && gimple_call_internal_p (stmt, IFN_MASK_CALL)) + { + fndecl = gimple_call_arg (stmt, 0); + gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR); + fndecl = TREE_OPERAND (fndecl, 0); + gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL); + arg_offset = 1; + } if (fndecl == NULL_TREE) return false; @@ -4041,7 +4051,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, return false; /* Process function arguments. */ - nargs = gimple_call_num_args (stmt); + nargs = gimple_call_num_args (stmt) - arg_offset; /* Bail out if the function has zero arguments. */ if (nargs == 0) @@ -4059,7 +4069,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, thisarginfo.op = NULL_TREE; thisarginfo.simd_lane_linear = false; - op = gimple_call_arg (stmt, i); + op = gimple_call_arg (stmt, i + arg_offset); if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt, &thisarginfo.vectype) || thisarginfo.dt == vect_uninitialized_def) @@ -4074,16 +4084,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, || thisarginfo.dt == vect_external_def) gcc_assert (thisarginfo.vectype == NULL_TREE); else - { - gcc_assert (thisarginfo.vectype != NULL_TREE); - if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "vector mask arguments are not supported\n"); - return false; - } - } + gcc_assert (thisarginfo.vectype != NULL_TREE); /* For linear arguments, the analyze phase should have saved the base and step in STMT_VINFO_SIMD_CLONE_INFO. */ @@ -4176,9 +4177,6 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, if (target_badness < 0) continue; this_badness += target_badness * 512; - /* FORNOW: Have to add code to add the mask argument. */ - if (n->simdclone->inbranch) - continue; for (i = 0; i < nargs; i++) { switch (n->simdclone->args[i].arg_type) @@ -4186,7 +4184,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, case SIMD_CLONE_ARG_TYPE_VECTOR: if (!useless_type_conversion_p (n->simdclone->args[i].orig_type, - TREE_TYPE (gimple_call_arg (stmt, i)))) + TREE_TYPE (gimple_call_arg (stmt, i + arg_offset)))) i = -1; else if (arginfo[i].dt == vect_constant_def || arginfo[i].dt == vect_external_def @@ -4216,7 +4214,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, i = -1; break; case SIMD_CLONE_ARG_TYPE_MASK: - gcc_unreachable (); + break; } if (i == (size_t) -1) break; @@ -4242,18 +4240,55 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, return false; for (i = 0; i < nargs; i++) - if ((arginfo[i].dt == vect_constant_def - || arginfo[i].dt == vect_external_def) - && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) - { - tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i)); - arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type, - slp_node); - if (arginfo[i].vectype == NULL - || !constant_multiple_p (bestn->simdclone->simdlen, - simd_clone_subparts (arginfo[i].vectype))) + { + if ((arginfo[i].dt == vect_constant_def + || arginfo[i].dt == vect_external_def) + && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) + { + tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i + arg_offset)); + arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type, + slp_node); + if (arginfo[i].vectype == NULL + || !constant_multiple_p (bestn->simdclone->simdlen, + simd_clone_subparts (arginfo[i].vectype))) + return false; + } + + if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR + && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "vector mask arguments are not supported.\n"); return false; - } + } + + if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK + && bestn->simdclone->mask_mode == VOIDmode + && (simd_clone_subparts (bestn->simdclone->args[i].vector_type) + != simd_clone_subparts (arginfo[i].vectype))) + { + /* FORNOW we only have partial support for vector-type masks that + can't hold all of simdlen. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, + vect_location, + "in-branch vector clones are not yet" + " supported for mismatched vector sizes.\n"); + return false; + } + if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK + && bestn->simdclone->mask_mode != VOIDmode) + { + /* FORNOW don't support integer-type masks. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, + vect_location, + "in-branch vector clones are not yet" + " supported for integer mask modes.\n"); + return false; + } + } fndecl = bestn->decl; nunits = bestn->simdclone->simdlen; @@ -4343,7 +4378,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, { unsigned int k, l, m, o; tree atype; - op = gimple_call_arg (stmt, i); + op = gimple_call_arg (stmt, i + arg_offset); switch (bestn->simdclone->args[i].arg_type) { case SIMD_CLONE_ARG_TYPE_VECTOR: @@ -4442,6 +4477,65 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, } } break; + case SIMD_CLONE_ARG_TYPE_MASK: + atype = bestn->simdclone->args[i].vector_type; + if (bestn->simdclone->mask_mode != VOIDmode) + { + /* FORNOW: this is disabled above. */ + gcc_unreachable (); + } + else + { + tree elt_type = TREE_TYPE (atype); + tree one = fold_convert (elt_type, integer_one_node); + tree zero = fold_convert (elt_type, integer_zero_node); + o = vector_unroll_factor (nunits, + simd_clone_subparts (atype)); + for (m = j * o; m < (j + 1) * o; m++) + { + if (simd_clone_subparts (atype) + < simd_clone_subparts (arginfo[i].vectype)) + { + /* The mask type has fewer elements than simdlen. */ + + /* FORNOW */ + gcc_unreachable (); + } + else if (simd_clone_subparts (atype) + == simd_clone_subparts (arginfo[i].vectype)) + { + /* The SIMD clone function has the same number of + elements as the current function. */ + if (m == 0) + { + vect_get_vec_defs_for_operand (vinfo, stmt_info, + o * ncopies, + op, + &vec_oprnds[i]); + vec_oprnds_i[i] = 0; + } + vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++]; + vec_oprnd0 + = build3 (VEC_COND_EXPR, atype, vec_oprnd0, + build_vector_from_val (atype, one), + build_vector_from_val (atype, zero)); + gassign *new_stmt + = gimple_build_assign (make_ssa_name (atype), + vec_oprnd0); + vect_finish_stmt_generation (vinfo, stmt_info, + new_stmt, gsi); + vargs.safe_push (gimple_assign_lhs (new_stmt)); + } + else + { + /* The mask type has more elements than simdlen. */ + + /* FORNOW */ + gcc_unreachable (); + } + } + } + break; case SIMD_CLONE_ARG_TYPE_UNIFORM: vargs.safe_push (op); break; |