aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2019-07-31 11:22:48 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2019-07-31 11:22:48 +0200
commit1104467f3933078a019df1fb89149f5da39f7953 (patch)
tree45ba19a8b68b968ee8bf1d9e602a462501e1f814
parent89626179b6fe42cbd58c715808f7c6401879757f (diff)
downloadgcc-1104467f3933078a019df1fb89149f5da39f7953.zip
gcc-1104467f3933078a019df1fb89149f5da39f7953.tar.gz
gcc-1104467f3933078a019df1fb89149f5da39f7953.tar.bz2
re PR tree-optimization/91201 (SIMD not generated for horizontal sum of bytes in array)
PR tree-optimization/91201 * config/i386/sse.md (reduc_plus_scal_v16qi): New expander. (REDUC_PLUS_MODE): Add V32QImode for TARGET_AVX and V64QImode for TARGET_AVX512F. (reduc_plus_scal_<mode>): Improve formatting by introducing a temporary. * gcc.target/i386/sse2-pr91201.c: New test. * gcc.target/i386/avx2-pr91201.c: New test. * gcc.target/i386/avx512bw-pr91201.c: New test. From-SVN: r273927
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/config/i386/sse.md27
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/gcc.target/i386/avx2-pr91201.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-pr91201.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/sse2-pr91201.c18
6 files changed, 70 insertions, 3 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 3e166c3..c0b8688 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2019-07-31 Jakub Jelinek <jakub@redhat.com>
+
+ PR tree-optimization/91201
+ * config/i386/sse.md (reduc_plus_scal_v16qi): New expander.
+ (REDUC_PLUS_MODE): Add V32QImode for TARGET_AVX and V64QImode for
+ TARGET_AVX512F.
+ (reduc_plus_scal_<mode>): Improve formatting by introducing
+ a temporary.
+
2019-07-31 Sudakshina Das <sudi.das@arm.com>
* config/aarch64/aarch64-builtins.c (enum aarch64_builtins): Add
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index fa8f13f..56a8915 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2728,9 +2728,30 @@
DONE;
})
+(define_expand "reduc_plus_scal_v16qi"
+ [(plus:V16QI
+ (match_operand:QI 0 "register_operand")
+ (match_operand:V16QI 1 "register_operand"))]
+ "TARGET_SSE2"
+{
+ rtx tmp = gen_reg_rtx (V1TImode);
+ emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]),
+ GEN_INT (64)));
+ rtx tmp2 = gen_reg_rtx (V16QImode);
+ emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp)));
+ rtx tmp3 = gen_reg_rtx (V16QImode);
+ emit_move_insn (tmp3, CONST0_RTX (V16QImode));
+ rtx tmp4 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3));
+ tmp4 = gen_lowpart (V16QImode, tmp4);
+ emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx));
+ DONE;
+})
+
(define_mode_iterator REDUC_PLUS_MODE
[(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
- (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
+ (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])
(define_expand "reduc_plus_scal_<mode>"
[(plus:REDUC_PLUS_MODE
@@ -2741,8 +2762,8 @@
rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
- emit_insn (gen_add<ssehalfvecmodelower>3
- (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
+ rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
+ emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
DONE;
})
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 632164a..e09f3bf 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2019-07-31 Jakub Jelinek <jakub@redhat.com>
+
+ PR tree-optimization/91201
+ * gcc.target/i386/sse2-pr91201.c: New test.
+ * gcc.target/i386/avx2-pr91201.c: New test.
+ * gcc.target/i386/avx512bw-pr91201.c: New test.
+
2019-07-31 Sudakshina Das <sudi.das@arm.com>
* gcc.target/aarch64/acle/tme.c: New test.
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr91201.c b/gcc/testsuite/gcc.target/i386/avx2-pr91201.c
new file mode 100644
index 0000000..4cf0a3a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-pr91201.c
@@ -0,0 +1,6 @@
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx2 -mno-avx512f" } */
+/* { dg-final { scan-assembler "\tvpsadbw\t" } } */
+
+#include "sse2-pr91201.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr91201.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr91201.c
new file mode 100644
index 0000000..9829a5c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr91201.c
@@ -0,0 +1,6 @@
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512bw -mprefer-vector-width=512" } */
+/* { dg-final { scan-assembler "\tvpsadbw\t" } } */
+
+#include "sse2-pr91201.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-pr91201.c b/gcc/testsuite/gcc.target/i386/sse2-pr91201.c
new file mode 100644
index 0000000..016b187
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-pr91201.c
@@ -0,0 +1,18 @@
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -msse2 -mno-sse3" } */
+/* { dg-final { scan-assembler "\tpsadbw\t" } } */
+
+unsigned char bytes[1024];
+
+unsigned char
+sum (void)
+{
+ unsigned char r = 0;
+ unsigned char *p = (unsigned char *) bytes;
+ int n;
+
+ for (n = 0; n < sizeof (bytes); ++n)
+ r += p[n];
+ return r;
+}