aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-13 17:58:42 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-13 17:58:42 +0000
commit898f07b0458a48a87df334301ada3414ff08d3de (patch)
tree3bfed61a2b1fc63096b23d92b6983a7e34e2f6c6 /gcc
parentf1739b4829105fa95d6ff6244632d5977169277f (diff)
downloadgcc-898f07b0458a48a87df334301ada3414ff08d3de.zip
gcc-898f07b0458a48a87df334301ada3414ff08d3de.tar.gz
gcc-898f07b0458a48a87df334301ada3414ff08d3de.tar.bz2
Add support for bitwise reductions
This patch adds support for the SVE bitwise reduction instructions (ANDV, ORV and EORV). It's a fairly mechanical extension of existing REDUC_* operators. 2018-01-13 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * optabs.def (reduc_and_scal_optab, reduc_ior_scal_optab) (reduc_xor_scal_optab): New optabs. * doc/md.texi (reduc_and_scal_@var{m}, reduc_ior_scal_@var{m}) (reduc_xor_scal_@var{m}): Document. * doc/sourcebuild.texi (vect_logical_reduc): Likewise. * internal-fn.def (IFN_REDUC_AND, IFN_REDUC_IOR, IFN_REDUC_XOR): New internal functions. * fold-const-call.c (fold_const_call): Handle them. * tree-vect-loop.c (reduction_fn_for_scalar_code): Return the new internal functions for BIT_AND_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR. * config/aarch64/aarch64-sve.md (reduc_<bit_reduc>_scal_<mode>): (*reduc_<bit_reduc>_scal_<mode>): New patterns. * config/aarch64/iterators.md (UNSPEC_ANDV, UNSPEC_ORV) (UNSPEC_XORV): New unspecs. (optab): Add entries for them. (BITWISEV): New int iterator. (bit_reduc_op): New int attributes. gcc/testsuite/ * lib/target-supports.exp (check_effective_target_vect_logical_reduc): New proc. * gcc.dg/vect/vect-reduc-or_1.c: Also run for vect_logical_reduc and add an associated scan-dump test. Prevent vectorization of the first two loops. * gcc.dg/vect/vect-reduc-or_2.c: Likewise. * gcc.target/aarch64/sve/reduc_1.c: Add AND, IOR and XOR reductions. * gcc.target/aarch64/sve/reduc_2.c: Likewise. * gcc.target/aarch64/sve/reduc_1_run.c: Likewise. (INIT_VECTOR): Tweak initial value so that some bits are always set. * gcc.target/aarch64/sve/reduc_2_run.c: Likewise. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256624
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog22
-rw-r--r--gcc/config/aarch64/aarch64-sve.md20
-rw-r--r--gcc/config/aarch64/iterators.md14
-rw-r--r--gcc/doc/md.texi11
-rw-r--r--gcc/doc/sourcebuild.texi3
-rw-r--r--gcc/fold-const-call.c9
-rw-r--r--gcc/internal-fn.def6
-rw-r--r--gcc/optabs.def3
-rw-r--r--gcc/testsuite/ChangeLog16
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c16
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c61
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c14
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c58
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c14
-rw-r--r--gcc/testsuite/lib/target-supports.exp6
-rw-r--r--gcc/tree-vect-loop.c15
17 files changed, 283 insertions, 18 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 77eae2d..c7a5895 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -2,6 +2,28 @@
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
+ * optabs.def (reduc_and_scal_optab, reduc_ior_scal_optab)
+ (reduc_xor_scal_optab): New optabs.
+ * doc/md.texi (reduc_and_scal_@var{m}, reduc_ior_scal_@var{m})
+ (reduc_xor_scal_@var{m}): Document.
+ * doc/sourcebuild.texi (vect_logical_reduc): Likewise.
+ * internal-fn.def (IFN_REDUC_AND, IFN_REDUC_IOR, IFN_REDUC_XOR): New
+ internal functions.
+ * fold-const-call.c (fold_const_call): Handle them.
+ * tree-vect-loop.c (reduction_fn_for_scalar_code): Return the new
+ internal functions for BIT_AND_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR.
+ * config/aarch64/aarch64-sve.md (reduc_<bit_reduc>_scal_<mode>):
+ (*reduc_<bit_reduc>_scal_<mode>): New patterns.
+ * config/aarch64/iterators.md (UNSPEC_ANDV, UNSPEC_ORV)
+ (UNSPEC_XORV): New unspecs.
+ (optab): Add entries for them.
+ (BITWISEV): New int iterator.
+ (bit_reduc_op): New int attributes.
+
+2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
+ Alan Hayward <alan.hayward@arm.com>
+ David Sherwood <david.sherwood@arm.com>
+
* doc/md.texi (vec_shl_insert_@var{m}): New optab.
* internal-fn.def (VEC_SHL_INSERT): New internal function.
* optabs.def (vec_shl_insert_optab): New optab.
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index fc35902..b312ab7 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1505,6 +1505,26 @@
"<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
)
+(define_expand "reduc_<optab>_scal_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand")
+ (unspec:<VEL> [(match_dup 2)
+ (match_operand:SVE_I 1 "register_operand")]
+ BITWISEV))]
+ "TARGET_SVE"
+ {
+ operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+ }
+)
+
+(define_insn "*reduc_<optab>_scal_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand" "=w")
+ (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SVE_I 2 "register_operand" "w")]
+ BITWISEV))]
+ "TARGET_SVE"
+ "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
+)
+
;; Unpredicated floating-point addition.
(define_expand "add<mode>3"
[(set (match_operand:SVE_F 0 "register_operand")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 607f9c3..406c515 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -426,6 +426,9 @@
UNSPEC_FMLAL2 ; Used in aarch64-simd.md.
UNSPEC_FMLSL2 ; Used in aarch64-simd.md.
UNSPEC_SEL ; Used in aarch64-sve.md.
+ UNSPEC_ANDV ; Used in aarch64-sve.md.
+ UNSPEC_IORV ; Used in aarch64-sve.md.
+ UNSPEC_XORV ; Used in aarch64-sve.md.
UNSPEC_ANDF ; Used in aarch64-sve.md.
UNSPEC_IORF ; Used in aarch64-sve.md.
UNSPEC_XORF ; Used in aarch64-sve.md.
@@ -1344,6 +1347,8 @@
(define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV
UNSPEC_FMAXNMV UNSPEC_FMINNMV])
+(define_int_iterator BITWISEV [UNSPEC_ANDV UNSPEC_IORV UNSPEC_XORV])
+
(define_int_iterator LOGICALF [UNSPEC_ANDF UNSPEC_IORF UNSPEC_XORF])
(define_int_iterator HADDSUB [UNSPEC_SHADD UNSPEC_UHADD
@@ -1476,7 +1481,10 @@
;; name for consistency with the integer patterns.
(define_int_attr optab [(UNSPEC_ANDF "and")
(UNSPEC_IORF "ior")
- (UNSPEC_XORF "xor")])
+ (UNSPEC_XORF "xor")
+ (UNSPEC_ANDV "and")
+ (UNSPEC_IORV "ior")
+ (UNSPEC_XORV "xor")])
(define_int_attr maxmin_uns [(UNSPEC_UMAXV "umax")
(UNSPEC_UMINV "umin")
@@ -1504,6 +1512,10 @@
(UNSPEC_FMAXNM "fmaxnm")
(UNSPEC_FMINNM "fminnm")])
+(define_int_attr bit_reduc_op [(UNSPEC_ANDV "andv")
+ (UNSPEC_IORV "orv")
+ (UNSPEC_XORV "eorv")])
+
;; The SVE logical instruction that implements an unspec.
(define_int_attr logicalf_op [(UNSPEC_ANDF "and")
(UNSPEC_IORF "orr")
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 4f635b0..c66c0b3 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5195,6 +5195,17 @@ Compute the sum of the elements of a vector. The vector is operand 1, and
operand 0 is the scalar result, with mode equal to the mode of the elements of
the input vector.
+@cindex @code{reduc_and_scal_@var{m}} instruction pattern
+@item @samp{reduc_and_scal_@var{m}}
+@cindex @code{reduc_ior_scal_@var{m}} instruction pattern
+@itemx @samp{reduc_ior_scal_@var{m}}
+@cindex @code{reduc_xor_scal_@var{m}} instruction pattern
+@itemx @samp{reduc_xor_scal_@var{m}}
+Compute the bitwise @code{AND}/@code{IOR}/@code{XOR} reduction of the elements
+of a vector of mode @var{m}. Operand 1 is the vector input and operand 0
+is the scalar result. The mode of the scalar result is the same as one
+element of @var{m}.
+
@cindex @code{sdot_prod@var{m}} instruction pattern
@item @samp{sdot_prod@var{m}}
@cindex @code{udot_prod@var{m}} instruction pattern
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index e4a3835..25b6af3 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1581,6 +1581,9 @@ Target supports 16- and 8-bytes vectors.
@item vect_sizes_32B_16B
Target supports 32- and 16-bytes vectors.
+
+@item vect_logical_reduc
+Target supports AND, IOR and XOR reduction on vectors.
@end table
@subsubsection Thread Local Storage attributes
diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c
index 7e3cd1e..60acf96 100644
--- a/gcc/fold-const-call.c
+++ b/gcc/fold-const-call.c
@@ -1181,6 +1181,15 @@ fold_const_call (combined_fn fn, tree type, tree arg)
case CFN_REDUC_MIN:
return fold_const_reduction (type, arg, MIN_EXPR);
+ case CFN_REDUC_AND:
+ return fold_const_reduction (type, arg, BIT_AND_EXPR);
+
+ case CFN_REDUC_IOR:
+ return fold_const_reduction (type, arg, BIT_IOR_EXPR);
+
+ case CFN_REDUC_XOR:
+ return fold_const_reduction (type, arg, BIT_XOR_EXPR);
+
default:
return fold_const_call_1 (fn, type, arg);
}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 925a230..ef79b62 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -127,6 +127,12 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (REDUC_MAX, ECF_CONST | ECF_NOTHROW, first,
reduc_smax_scal, reduc_umax_scal, unary)
DEF_INTERNAL_SIGNED_OPTAB_FN (REDUC_MIN, ECF_CONST | ECF_NOTHROW, first,
reduc_smin_scal, reduc_umin_scal, unary)
+DEF_INTERNAL_OPTAB_FN (REDUC_AND, ECF_CONST | ECF_NOTHROW,
+ reduc_and_scal, unary)
+DEF_INTERNAL_OPTAB_FN (REDUC_IOR, ECF_CONST | ECF_NOTHROW,
+ reduc_ior_scal, unary)
+DEF_INTERNAL_OPTAB_FN (REDUC_XOR, ECF_CONST | ECF_NOTHROW,
+ reduc_xor_scal, unary)
/* Unary math functions. */
DEF_INTERNAL_FLT_FN (ACOS, ECF_CONST, acos, unary)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index ec5f5f5..035c8e9 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -292,6 +292,9 @@ OPTAB_D (reduc_smin_scal_optab, "reduc_smin_scal_$a")
OPTAB_D (reduc_plus_scal_optab, "reduc_plus_scal_$a")
OPTAB_D (reduc_umax_scal_optab, "reduc_umax_scal_$a")
OPTAB_D (reduc_umin_scal_optab, "reduc_umin_scal_$a")
+OPTAB_D (reduc_and_scal_optab, "reduc_and_scal_$a")
+OPTAB_D (reduc_ior_scal_optab, "reduc_ior_scal_$a")
+OPTAB_D (reduc_xor_scal_optab, "reduc_xor_scal_$a")
OPTAB_D (sdot_prod_optab, "sdot_prod$I$a")
OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d3ec83c..76f3c8a 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -2,6 +2,22 @@
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
+ * lib/target-supports.exp (check_effective_target_vect_logical_reduc):
+ New proc.
+ * gcc.dg/vect/vect-reduc-or_1.c: Also run for vect_logical_reduc
+ and add an associated scan-dump test. Prevent vectorization
+ of the first two loops.
+ * gcc.dg/vect/vect-reduc-or_2.c: Likewise.
+ * gcc.target/aarch64/sve/reduc_1.c: Add AND, IOR and XOR reductions.
+ * gcc.target/aarch64/sve/reduc_2.c: Likewise.
+ * gcc.target/aarch64/sve/reduc_1_run.c: Likewise.
+ (INIT_VECTOR): Tweak initial value so that some bits are always set.
+ * gcc.target/aarch64/sve/reduc_2_run.c: Likewise.
+
+2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
+ Alan Hayward <alan.hayward@arm.com>
+ David Sherwood <david.sherwood@arm.com>
+
* gcc.dg/vect/pr37027.c: Remove XFAIL for variable-length vectors.
* gcc.dg/vect/pr67790.c: Likewise.
* gcc.dg/vect/slp-reduc-1.c: Likewise.
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c
index aad1451..cff3f16 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c
@@ -1,4 +1,4 @@
-/* { dg-require-effective-target whole_vector_shift } */
+/* { dg-do run { target { whole_vector_shift || vect_logical_reduc } } } */
/* Write a reduction loop to be reduced using vector shifts. */
@@ -24,17 +24,17 @@ main (unsigned char argc, char **argv)
check_vect ();
for (i = 0; i < N; i++)
- in[i] = (i + i + 1) & 0xfd;
+ {
+ in[i] = (i + i + 1) & 0xfd;
+ asm volatile ("" ::: "memory");
+ }
for (i = 0; i < N; i++)
{
expected |= in[i];
- asm volatile ("");
+ asm volatile ("" ::: "memory");
}
- /* Prevent constant propagation of the entire loop below. */
- asm volatile ("" : : : "memory");
-
for (i = 0; i < N; i++)
sum |= in[i];
@@ -47,5 +47,5 @@ main (unsigned char argc, char **argv)
return 0;
}
-/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
-
+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" { target { ! vect_logical_reduc } } } } */
+/* { dg-final { scan-tree-dump "Reduce using direct vector reduction" "vect" { target vect_logical_reduc } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c
index ff3dfb2..cd1af6d 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c
@@ -1,4 +1,4 @@
-/* { dg-require-effective-target whole_vector_shift } */
+/* { dg-do run { target { whole_vector_shift || vect_logical_reduc } } } */
/* Write a reduction loop to be reduced using vector shifts and folded. */
@@ -23,12 +23,15 @@ main (unsigned char argc, char **argv)
check_vect ();
for (i = 0; i < N; i++)
- in[i] = (i + i + 1) & 0xfd;
+ {
+ in[i] = (i + i + 1) & 0xfd;
+ asm volatile ("" ::: "memory");
+ }
for (i = 0; i < N; i++)
{
expected |= in[i];
- asm volatile ("");
+ asm volatile ("" ::: "memory");
}
for (i = 0; i < N; i++)
@@ -43,5 +46,5 @@ main (unsigned char argc, char **argv)
return 0;
}
-/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
-
+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" { target { ! vect_logical_reduc } } } } */
+/* { dg-final { scan-tree-dump "Reduce using direct vector reduction" "vect" { target vect_logical_reduc } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c
index f86966b..72dc793 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c
@@ -65,6 +65,46 @@ reduc_##NAME##_##TYPE (TYPE *a, int n) \
TEST_MAXMIN (DEF_REDUC_MAXMIN)
+#define DEF_REDUC_BITWISE(TYPE, NAME, BIT_OP) \
+TYPE __attribute__ ((noinline, noclone)) \
+reduc_##NAME##_##TYPE (TYPE *a, int n) \
+{ \
+ TYPE r = 13; \
+ for (int i = 0; i < n; ++i) \
+ r BIT_OP a[i]; \
+ return r; \
+}
+
+#define TEST_BITWISE(T) \
+ T (int8_t, and, &=) \
+ T (int16_t, and, &=) \
+ T (int32_t, and, &=) \
+ T (int64_t, and, &=) \
+ T (uint8_t, and, &=) \
+ T (uint16_t, and, &=) \
+ T (uint32_t, and, &=) \
+ T (uint64_t, and, &=) \
+ \
+ T (int8_t, ior, |=) \
+ T (int16_t, ior, |=) \
+ T (int32_t, ior, |=) \
+ T (int64_t, ior, |=) \
+ T (uint8_t, ior, |=) \
+ T (uint16_t, ior, |=) \
+ T (uint32_t, ior, |=) \
+ T (uint64_t, ior, |=) \
+ \
+ T (int8_t, xor, ^=) \
+ T (int16_t, xor, ^=) \
+ T (int32_t, xor, ^=) \
+ T (int64_t, xor, ^=) \
+ T (uint8_t, xor, ^=) \
+ T (uint16_t, xor, ^=) \
+ T (uint32_t, xor, ^=) \
+ T (uint64_t, xor, ^=)
+
+TEST_BITWISE (DEF_REDUC_BITWISE)
+
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
@@ -102,6 +142,12 @@ TEST_MAXMIN (DEF_REDUC_MAXMIN)
/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 8 } } */
+
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 8 } } */
+
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 8 } } */
+
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
@@ -133,3 +179,18 @@ TEST_MAXMIN (DEF_REDUC_MAXMIN)
/* { dg-final { scan-assembler-times {\tfminnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tandv\tb[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\torv\tb[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\teorv\tb[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c
index 3fcb7fb..c0fdada 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c
@@ -9,7 +9,7 @@
TYPE a[NUM_ELEMS (TYPE) + 1]; \
for (int i = 0; i < NUM_ELEMS (TYPE) + 1; i++) \
{ \
- a[i] = (i * 2) * (i & 1 ? 1 : -1); \
+ a[i] = ((i * 2) * (i & 1 ? 1 : -1) | 3); \
asm volatile ("" ::: "memory"); \
}
@@ -35,10 +35,22 @@
__builtin_abort (); \
}
+#define TEST_REDUC_BITWISE(TYPE, NAME, BIT_OP) \
+ { \
+ INIT_VECTOR (TYPE); \
+ TYPE r1 = reduc_##NAME##_##TYPE (a, NUM_ELEMS (TYPE)); \
+ volatile TYPE r2 = 13; \
+ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \
+ r2 BIT_OP a[i]; \
+ if (r1 != r2) \
+ __builtin_abort (); \
+ }
+
int main ()
{
TEST_PLUS (TEST_REDUC_PLUS)
TEST_MAXMIN (TEST_REDUC_MAXMIN)
+ TEST_BITWISE (TEST_REDUC_BITWISE)
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c
index adc3699..376a453 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c
@@ -73,6 +73,49 @@ reduc_##NAME##_##TYPE (TYPE (*restrict a)[NUM_ELEMS (TYPE)], \
TEST_MAXMIN (DEF_REDUC_MAXMIN)
+#define DEF_REDUC_BITWISE(TYPE,NAME,BIT_OP) \
+void __attribute__ ((noinline, noclone)) \
+reduc_##NAME##TYPE (TYPE (*restrict a)[NUM_ELEMS(TYPE)], \
+ TYPE *restrict r, int n) \
+{ \
+ for (int i = 0; i < n; i++) \
+ { \
+ r[i] = a[i][0]; \
+ for (int j = 0; j < NUM_ELEMS(TYPE); j++) \
+ r[i] BIT_OP a[i][j]; \
+ } \
+}
+
+#define TEST_BITWISE(T) \
+ T (int8_t, and, &=) \
+ T (int16_t, and, &=) \
+ T (int32_t, and, &=) \
+ T (int64_t, and, &=) \
+ T (uint8_t, and, &=) \
+ T (uint16_t, and, &=) \
+ T (uint32_t, and, &=) \
+ T (uint64_t, and, &=) \
+ \
+ T (int8_t, ior, |=) \
+ T (int16_t, ior, |=) \
+ T (int32_t, ior, |=) \
+ T (int64_t, ior, |=) \
+ T (uint8_t, ior, |=) \
+ T (uint16_t, ior, |=) \
+ T (uint32_t, ior, |=) \
+ T (uint64_t, ior, |=) \
+ \
+ T (int8_t, xor, ^=) \
+ T (int16_t, xor, ^=) \
+ T (int32_t, xor, ^=) \
+ T (int64_t, xor, ^=) \
+ T (uint8_t, xor, ^=) \
+ T (uint16_t, xor, ^=) \
+ T (uint32_t, xor, ^=) \
+ T (uint64_t, xor, ^=)
+
+TEST_BITWISE (DEF_REDUC_BITWISE)
+
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
@@ -104,3 +147,18 @@ TEST_MAXMIN (DEF_REDUC_MAXMIN)
/* { dg-final { scan-assembler-times {\tfminnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tandv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\torv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\teorv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c
index f48e348..c4a0426 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c
@@ -56,6 +56,20 @@
} \
}
+#define TEST_REDUC_BITWISE(TYPE, NAME, BIT_OP) \
+ { \
+ INIT_MATRIX (TYPE); \
+ reduc_##NAME##_##TYPE (mat, r, NROWS); \
+ for (int i = 0; i < NROWS; i++) \
+ { \
+ volatile TYPE r2 = mat[i][0]; \
+ for (int j = 0; j < NUM_ELEMS (TYPE); ++j) \
+ r2 BIT_OP mat[i][j]; \
+ if (r[i] != r2) \
+ __builtin_abort (); \
+ } \
+ }
+
int main ()
{
TEST_PLUS (TEST_REDUC_PLUS)
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index aedb798..73dbb2c 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7222,6 +7222,12 @@ proc check_effective_target_vect_call_roundf { } {
return $et_vect_call_roundf_saved($et_index)
}
+# Return 1 if the target supports AND, OR and XOR reduction.
+
+proc check_effective_target_vect_logical_reduc { } {
+ return [check_effective_target_aarch64_sve]
+}
+
# Return 1 if the target supports section-anchors
proc check_effective_target_section_anchors { } {
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 9219a0d..d679115 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2438,11 +2438,20 @@ reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn)
*reduc_fn = IFN_REDUC_PLUS;
return true;
- case MULT_EXPR:
- case MINUS_EXPR:
+ case BIT_AND_EXPR:
+ *reduc_fn = IFN_REDUC_AND;
+ return true;
+
case BIT_IOR_EXPR:
+ *reduc_fn = IFN_REDUC_IOR;
+ return true;
+
case BIT_XOR_EXPR:
- case BIT_AND_EXPR:
+ *reduc_fn = IFN_REDUC_XOR;
+ return true;
+
+ case MULT_EXPR:
+ case MINUS_EXPR:
*reduc_fn = IFN_LAST;
return true;