diff options
-rw-r--r-- | gcc/ChangeLog | 22 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 20 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 14 | ||||
-rw-r--r-- | gcc/doc/md.texi | 11 | ||||
-rw-r--r-- | gcc/doc/sourcebuild.texi | 3 | ||||
-rw-r--r-- | gcc/fold-const-call.c | 9 | ||||
-rw-r--r-- | gcc/internal-fn.def | 6 | ||||
-rw-r--r-- | gcc/optabs.def | 3 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 16 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c | 16 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c | 13 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c | 61 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c | 58 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 6 | ||||
-rw-r--r-- | gcc/tree-vect-loop.c | 15 |
17 files changed, 283 insertions, 18 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 77eae2d..c7a5895 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -2,6 +2,28 @@ Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> + * optabs.def (reduc_and_scal_optab, reduc_ior_scal_optab) + (reduc_xor_scal_optab): New optabs. + * doc/md.texi (reduc_and_scal_@var{m}, reduc_ior_scal_@var{m}) + (reduc_xor_scal_@var{m}): Document. + * doc/sourcebuild.texi (vect_logical_reduc): Likewise. + * internal-fn.def (IFN_REDUC_AND, IFN_REDUC_IOR, IFN_REDUC_XOR): New + internal functions. + * fold-const-call.c (fold_const_call): Handle them. + * tree-vect-loop.c (reduction_fn_for_scalar_code): Return the new + internal functions for BIT_AND_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR. + * config/aarch64/aarch64-sve.md (reduc_<bit_reduc>_scal_<mode>): + (*reduc_<bit_reduc>_scal_<mode>): New patterns. + * config/aarch64/iterators.md (UNSPEC_ANDV, UNSPEC_ORV) + (UNSPEC_XORV): New unspecs. + (optab): Add entries for them. + (BITWISEV): New int iterator. + (bit_reduc_op): New int attributes. + +2018-01-13 Richard Sandiford <richard.sandiford@linaro.org> + Alan Hayward <alan.hayward@arm.com> + David Sherwood <david.sherwood@arm.com> + * doc/md.texi (vec_shl_insert_@var{m}): New optab. * internal-fn.def (VEC_SHL_INSERT): New internal function. * optabs.def (vec_shl_insert_optab): New optab. diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index fc35902..b312ab7 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1505,6 +1505,26 @@ "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>" ) +(define_expand "reduc_<optab>_scal_<mode>" + [(set (match_operand:<VEL> 0 "register_operand") + (unspec:<VEL> [(match_dup 2) + (match_operand:SVE_I 1 "register_operand")] + BITWISEV))] + "TARGET_SVE" + { + operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); + } +) + +(define_insn "*reduc_<optab>_scal_<mode>" + [(set (match_operand:<VEL> 0 "register_operand" "=w") + (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") + (match_operand:SVE_I 2 "register_operand" "w")] + BITWISEV))] + "TARGET_SVE" + "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>" +) + ;; Unpredicated floating-point addition. (define_expand "add<mode>3" [(set (match_operand:SVE_F 0 "register_operand") diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 607f9c3..406c515 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -426,6 +426,9 @@ UNSPEC_FMLAL2 ; Used in aarch64-simd.md. UNSPEC_FMLSL2 ; Used in aarch64-simd.md. UNSPEC_SEL ; Used in aarch64-sve.md. + UNSPEC_ANDV ; Used in aarch64-sve.md. + UNSPEC_IORV ; Used in aarch64-sve.md. + UNSPEC_XORV ; Used in aarch64-sve.md. UNSPEC_ANDF ; Used in aarch64-sve.md. UNSPEC_IORF ; Used in aarch64-sve.md. UNSPEC_XORF ; Used in aarch64-sve.md. @@ -1344,6 +1347,8 @@ (define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV UNSPEC_FMAXNMV UNSPEC_FMINNMV]) +(define_int_iterator BITWISEV [UNSPEC_ANDV UNSPEC_IORV UNSPEC_XORV]) + (define_int_iterator LOGICALF [UNSPEC_ANDF UNSPEC_IORF UNSPEC_XORF]) (define_int_iterator HADDSUB [UNSPEC_SHADD UNSPEC_UHADD @@ -1476,7 +1481,10 @@ ;; name for consistency with the integer patterns. (define_int_attr optab [(UNSPEC_ANDF "and") (UNSPEC_IORF "ior") - (UNSPEC_XORF "xor")]) + (UNSPEC_XORF "xor") + (UNSPEC_ANDV "and") + (UNSPEC_IORV "ior") + (UNSPEC_XORV "xor")]) (define_int_attr maxmin_uns [(UNSPEC_UMAXV "umax") (UNSPEC_UMINV "umin") @@ -1504,6 +1512,10 @@ (UNSPEC_FMAXNM "fmaxnm") (UNSPEC_FMINNM "fminnm")]) +(define_int_attr bit_reduc_op [(UNSPEC_ANDV "andv") + (UNSPEC_IORV "orv") + (UNSPEC_XORV "eorv")]) + ;; The SVE logical instruction that implements an unspec. (define_int_attr logicalf_op [(UNSPEC_ANDF "and") (UNSPEC_IORF "orr") diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 4f635b0..c66c0b3 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5195,6 +5195,17 @@ Compute the sum of the elements of a vector. The vector is operand 1, and operand 0 is the scalar result, with mode equal to the mode of the elements of the input vector. +@cindex @code{reduc_and_scal_@var{m}} instruction pattern +@item @samp{reduc_and_scal_@var{m}} +@cindex @code{reduc_ior_scal_@var{m}} instruction pattern +@itemx @samp{reduc_ior_scal_@var{m}} +@cindex @code{reduc_xor_scal_@var{m}} instruction pattern +@itemx @samp{reduc_xor_scal_@var{m}} +Compute the bitwise @code{AND}/@code{IOR}/@code{XOR} reduction of the elements +of a vector of mode @var{m}. Operand 1 is the vector input and operand 0 +is the scalar result. The mode of the scalar result is the same as one +element of @var{m}. + @cindex @code{sdot_prod@var{m}} instruction pattern @item @samp{sdot_prod@var{m}} @cindex @code{udot_prod@var{m}} instruction pattern diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index e4a3835..25b6af3 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1581,6 +1581,9 @@ Target supports 16- and 8-bytes vectors. @item vect_sizes_32B_16B Target supports 32- and 16-bytes vectors. + +@item vect_logical_reduc +Target supports AND, IOR and XOR reduction on vectors. @end table @subsubsection Thread Local Storage attributes diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c index 7e3cd1e..60acf96 100644 --- a/gcc/fold-const-call.c +++ b/gcc/fold-const-call.c @@ -1181,6 +1181,15 @@ fold_const_call (combined_fn fn, tree type, tree arg) case CFN_REDUC_MIN: return fold_const_reduction (type, arg, MIN_EXPR); + case CFN_REDUC_AND: + return fold_const_reduction (type, arg, BIT_AND_EXPR); + + case CFN_REDUC_IOR: + return fold_const_reduction (type, arg, BIT_IOR_EXPR); + + case CFN_REDUC_XOR: + return fold_const_reduction (type, arg, BIT_XOR_EXPR); + default: return fold_const_call_1 (fn, type, arg); } diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 925a230..ef79b62 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -127,6 +127,12 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (REDUC_MAX, ECF_CONST | ECF_NOTHROW, first, reduc_smax_scal, reduc_umax_scal, unary) DEF_INTERNAL_SIGNED_OPTAB_FN (REDUC_MIN, ECF_CONST | ECF_NOTHROW, first, reduc_smin_scal, reduc_umin_scal, unary) +DEF_INTERNAL_OPTAB_FN (REDUC_AND, ECF_CONST | ECF_NOTHROW, + reduc_and_scal, unary) +DEF_INTERNAL_OPTAB_FN (REDUC_IOR, ECF_CONST | ECF_NOTHROW, + reduc_ior_scal, unary) +DEF_INTERNAL_OPTAB_FN (REDUC_XOR, ECF_CONST | ECF_NOTHROW, + reduc_xor_scal, unary) /* Unary math functions. */ DEF_INTERNAL_FLT_FN (ACOS, ECF_CONST, acos, unary) diff --git a/gcc/optabs.def b/gcc/optabs.def index ec5f5f5..035c8e9 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -292,6 +292,9 @@ OPTAB_D (reduc_smin_scal_optab, "reduc_smin_scal_$a") OPTAB_D (reduc_plus_scal_optab, "reduc_plus_scal_$a") OPTAB_D (reduc_umax_scal_optab, "reduc_umax_scal_$a") OPTAB_D (reduc_umin_scal_optab, "reduc_umin_scal_$a") +OPTAB_D (reduc_and_scal_optab, "reduc_and_scal_$a") +OPTAB_D (reduc_ior_scal_optab, "reduc_ior_scal_$a") +OPTAB_D (reduc_xor_scal_optab, "reduc_xor_scal_$a") OPTAB_D (sdot_prod_optab, "sdot_prod$I$a") OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d3ec83c..76f3c8a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -2,6 +2,22 @@ Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> + * lib/target-supports.exp (check_effective_target_vect_logical_reduc): + New proc. + * gcc.dg/vect/vect-reduc-or_1.c: Also run for vect_logical_reduc + and add an associated scan-dump test. Prevent vectorization + of the first two loops. + * gcc.dg/vect/vect-reduc-or_2.c: Likewise. + * gcc.target/aarch64/sve/reduc_1.c: Add AND, IOR and XOR reductions. + * gcc.target/aarch64/sve/reduc_2.c: Likewise. + * gcc.target/aarch64/sve/reduc_1_run.c: Likewise. + (INIT_VECTOR): Tweak initial value so that some bits are always set. + * gcc.target/aarch64/sve/reduc_2_run.c: Likewise. + +2018-01-13 Richard Sandiford <richard.sandiford@linaro.org> + Alan Hayward <alan.hayward@arm.com> + David Sherwood <david.sherwood@arm.com> + * gcc.dg/vect/pr37027.c: Remove XFAIL for variable-length vectors. * gcc.dg/vect/pr67790.c: Likewise. * gcc.dg/vect/slp-reduc-1.c: Likewise. diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c index aad1451..cff3f16 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c @@ -1,4 +1,4 @@ -/* { dg-require-effective-target whole_vector_shift } */ +/* { dg-do run { target { whole_vector_shift || vect_logical_reduc } } } */ /* Write a reduction loop to be reduced using vector shifts. */ @@ -24,17 +24,17 @@ main (unsigned char argc, char **argv) check_vect (); for (i = 0; i < N; i++) - in[i] = (i + i + 1) & 0xfd; + { + in[i] = (i + i + 1) & 0xfd; + asm volatile ("" ::: "memory"); + } for (i = 0; i < N; i++) { expected |= in[i]; - asm volatile (""); + asm volatile ("" ::: "memory"); } - /* Prevent constant propagation of the entire loop below. */ - asm volatile ("" : : : "memory"); - for (i = 0; i < N; i++) sum |= in[i]; @@ -47,5 +47,5 @@ main (unsigned char argc, char **argv) return 0; } -/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */ - +/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" { target { ! vect_logical_reduc } } } } */ +/* { dg-final { scan-tree-dump "Reduce using direct vector reduction" "vect" { target vect_logical_reduc } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c index ff3dfb2..cd1af6d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c @@ -1,4 +1,4 @@ -/* { dg-require-effective-target whole_vector_shift } */ +/* { dg-do run { target { whole_vector_shift || vect_logical_reduc } } } */ /* Write a reduction loop to be reduced using vector shifts and folded. */ @@ -23,12 +23,15 @@ main (unsigned char argc, char **argv) check_vect (); for (i = 0; i < N; i++) - in[i] = (i + i + 1) & 0xfd; + { + in[i] = (i + i + 1) & 0xfd; + asm volatile ("" ::: "memory"); + } for (i = 0; i < N; i++) { expected |= in[i]; - asm volatile (""); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) @@ -43,5 +46,5 @@ main (unsigned char argc, char **argv) return 0; } -/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */ - +/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" { target { ! vect_logical_reduc } } } } */ +/* { dg-final { scan-tree-dump "Reduce using direct vector reduction" "vect" { target vect_logical_reduc } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c index f86966b..72dc793 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c @@ -65,6 +65,46 @@ reduc_##NAME##_##TYPE (TYPE *a, int n) \ TEST_MAXMIN (DEF_REDUC_MAXMIN) +#define DEF_REDUC_BITWISE(TYPE, NAME, BIT_OP) \ +TYPE __attribute__ ((noinline, noclone)) \ +reduc_##NAME##_##TYPE (TYPE *a, int n) \ +{ \ + TYPE r = 13; \ + for (int i = 0; i < n; ++i) \ + r BIT_OP a[i]; \ + return r; \ +} + +#define TEST_BITWISE(T) \ + T (int8_t, and, &=) \ + T (int16_t, and, &=) \ + T (int32_t, and, &=) \ + T (int64_t, and, &=) \ + T (uint8_t, and, &=) \ + T (uint16_t, and, &=) \ + T (uint32_t, and, &=) \ + T (uint64_t, and, &=) \ + \ + T (int8_t, ior, |=) \ + T (int16_t, ior, |=) \ + T (int32_t, ior, |=) \ + T (int64_t, ior, |=) \ + T (uint8_t, ior, |=) \ + T (uint16_t, ior, |=) \ + T (uint32_t, ior, |=) \ + T (uint64_t, ior, |=) \ + \ + T (int8_t, xor, ^=) \ + T (int16_t, xor, ^=) \ + T (int32_t, xor, ^=) \ + T (int64_t, xor, ^=) \ + T (uint8_t, xor, ^=) \ + T (uint16_t, xor, ^=) \ + T (uint32_t, xor, ^=) \ + T (uint64_t, xor, ^=) + +TEST_BITWISE (DEF_REDUC_BITWISE) + /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ @@ -102,6 +142,12 @@ TEST_MAXMIN (DEF_REDUC_MAXMIN) /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 8 } } */ + +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 8 } } */ + +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 8 } } */ + /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ @@ -133,3 +179,18 @@ TEST_MAXMIN (DEF_REDUC_MAXMIN) /* { dg-final { scan-assembler-times {\tfminnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tandv\tb[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */ +/* { dg-final { scan-assembler-times {\tandv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\tandv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tandv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\torv\tb[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */ +/* { dg-final { scan-assembler-times {\torv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\torv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\torv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\teorv\tb[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */ +/* { dg-final { scan-assembler-times {\teorv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\teorv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\teorv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c index 3fcb7fb..c0fdada 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c @@ -9,7 +9,7 @@ TYPE a[NUM_ELEMS (TYPE) + 1]; \ for (int i = 0; i < NUM_ELEMS (TYPE) + 1; i++) \ { \ - a[i] = (i * 2) * (i & 1 ? 1 : -1); \ + a[i] = ((i * 2) * (i & 1 ? 1 : -1) | 3); \ asm volatile ("" ::: "memory"); \ } @@ -35,10 +35,22 @@ __builtin_abort (); \ } +#define TEST_REDUC_BITWISE(TYPE, NAME, BIT_OP) \ + { \ + INIT_VECTOR (TYPE); \ + TYPE r1 = reduc_##NAME##_##TYPE (a, NUM_ELEMS (TYPE)); \ + volatile TYPE r2 = 13; \ + for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ + r2 BIT_OP a[i]; \ + if (r1 != r2) \ + __builtin_abort (); \ + } + int main () { TEST_PLUS (TEST_REDUC_PLUS) TEST_MAXMIN (TEST_REDUC_MAXMIN) + TEST_BITWISE (TEST_REDUC_BITWISE) return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c index adc3699..376a453 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c @@ -73,6 +73,49 @@ reduc_##NAME##_##TYPE (TYPE (*restrict a)[NUM_ELEMS (TYPE)], \ TEST_MAXMIN (DEF_REDUC_MAXMIN) +#define DEF_REDUC_BITWISE(TYPE,NAME,BIT_OP) \ +void __attribute__ ((noinline, noclone)) \ +reduc_##NAME##TYPE (TYPE (*restrict a)[NUM_ELEMS(TYPE)], \ + TYPE *restrict r, int n) \ +{ \ + for (int i = 0; i < n; i++) \ + { \ + r[i] = a[i][0]; \ + for (int j = 0; j < NUM_ELEMS(TYPE); j++) \ + r[i] BIT_OP a[i][j]; \ + } \ +} + +#define TEST_BITWISE(T) \ + T (int8_t, and, &=) \ + T (int16_t, and, &=) \ + T (int32_t, and, &=) \ + T (int64_t, and, &=) \ + T (uint8_t, and, &=) \ + T (uint16_t, and, &=) \ + T (uint32_t, and, &=) \ + T (uint64_t, and, &=) \ + \ + T (int8_t, ior, |=) \ + T (int16_t, ior, |=) \ + T (int32_t, ior, |=) \ + T (int64_t, ior, |=) \ + T (uint8_t, ior, |=) \ + T (uint16_t, ior, |=) \ + T (uint32_t, ior, |=) \ + T (uint64_t, ior, |=) \ + \ + T (int8_t, xor, ^=) \ + T (int16_t, xor, ^=) \ + T (int32_t, xor, ^=) \ + T (int64_t, xor, ^=) \ + T (uint8_t, xor, ^=) \ + T (uint16_t, xor, ^=) \ + T (uint32_t, xor, ^=) \ + T (uint64_t, xor, ^=) + +TEST_BITWISE (DEF_REDUC_BITWISE) + /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ @@ -104,3 +147,18 @@ TEST_MAXMIN (DEF_REDUC_MAXMIN) /* { dg-final { scan-assembler-times {\tfminnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tandv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tandv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tandv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tandv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\torv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\teorv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teorv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teorv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teorv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c index f48e348..c4a0426 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c @@ -56,6 +56,20 @@ } \ } +#define TEST_REDUC_BITWISE(TYPE, NAME, BIT_OP) \ + { \ + INIT_MATRIX (TYPE); \ + reduc_##NAME##_##TYPE (mat, r, NROWS); \ + for (int i = 0; i < NROWS; i++) \ + { \ + volatile TYPE r2 = mat[i][0]; \ + for (int j = 0; j < NUM_ELEMS (TYPE); ++j) \ + r2 BIT_OP mat[i][j]; \ + if (r[i] != r2) \ + __builtin_abort (); \ + } \ + } + int main () { TEST_PLUS (TEST_REDUC_PLUS) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index aedb798..73dbb2c 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -7222,6 +7222,12 @@ proc check_effective_target_vect_call_roundf { } { return $et_vect_call_roundf_saved($et_index) } +# Return 1 if the target supports AND, OR and XOR reduction. + +proc check_effective_target_vect_logical_reduc { } { + return [check_effective_target_aarch64_sve] +} + # Return 1 if the target supports section-anchors proc check_effective_target_section_anchors { } { diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 9219a0d..d679115 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2438,11 +2438,20 @@ reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn) *reduc_fn = IFN_REDUC_PLUS; return true; - case MULT_EXPR: - case MINUS_EXPR: + case BIT_AND_EXPR: + *reduc_fn = IFN_REDUC_AND; + return true; + case BIT_IOR_EXPR: + *reduc_fn = IFN_REDUC_IOR; + return true; + case BIT_XOR_EXPR: - case BIT_AND_EXPR: + *reduc_fn = IFN_REDUC_XOR; + return true; + + case MULT_EXPR: + case MINUS_EXPR: *reduc_fn = IFN_LAST; return true; |