diff options
author | Jakub Jelinek <jakub@redhat.com> | 2019-07-31 11:22:48 +0200 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2019-07-31 11:22:48 +0200 |
commit | 1104467f3933078a019df1fb89149f5da39f7953 (patch) | |
tree | 45ba19a8b68b968ee8bf1d9e602a462501e1f814 /gcc | |
parent | 89626179b6fe42cbd58c715808f7c6401879757f (diff) | |
download | gcc-1104467f3933078a019df1fb89149f5da39f7953.zip gcc-1104467f3933078a019df1fb89149f5da39f7953.tar.gz gcc-1104467f3933078a019df1fb89149f5da39f7953.tar.bz2 |
re PR tree-optimization/91201 (SIMD not generated for horizontal sum of bytes in array)
PR tree-optimization/91201
* config/i386/sse.md (reduc_plus_scal_v16qi): New expander.
(REDUC_PLUS_MODE): Add V32QImode for TARGET_AVX and V64QImode for
TARGET_AVX512F.
(reduc_plus_scal_<mode>): Improve formatting by introducing
a temporary.
* gcc.target/i386/sse2-pr91201.c: New test.
* gcc.target/i386/avx2-pr91201.c: New test.
* gcc.target/i386/avx512bw-pr91201.c: New test.
From-SVN: r273927
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 27 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx2-pr91201.c | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512bw-pr91201.c | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse2-pr91201.c | 18 |
6 files changed, 70 insertions, 3 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3e166c3..c0b8688 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2019-07-31 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/91201 + * config/i386/sse.md (reduc_plus_scal_v16qi): New expander. + (REDUC_PLUS_MODE): Add V32QImode for TARGET_AVX and V64QImode for + TARGET_AVX512F. + (reduc_plus_scal_<mode>): Improve formatting by introducing + a temporary. + 2019-07-31 Sudakshina Das <sudi.das@arm.com> * config/aarch64/aarch64-builtins.c (enum aarch64_builtins): Add diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index fa8f13f..56a8915 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2728,9 +2728,30 @@ DONE; }) +(define_expand "reduc_plus_scal_v16qi" + [(plus:V16QI + (match_operand:QI 0 "register_operand") + (match_operand:V16QI 1 "register_operand"))] + "TARGET_SSE2" +{ + rtx tmp = gen_reg_rtx (V1TImode); + emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]), + GEN_INT (64))); + rtx tmp2 = gen_reg_rtx (V16QImode); + emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp))); + rtx tmp3 = gen_reg_rtx (V16QImode); + emit_move_insn (tmp3, CONST0_RTX (V16QImode)); + rtx tmp4 = gen_reg_rtx (V2DImode); + emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3)); + tmp4 = gen_lowpart (V16QImode, tmp4); + emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx)); + DONE; +}) + (define_mode_iterator REDUC_PLUS_MODE [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX") - (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")]) + (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F") + (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")]) (define_expand "reduc_plus_scal_<mode>" [(plus:REDUC_PLUS_MODE @@ -2741,8 +2762,8 @@ rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode); emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_add<ssehalfvecmodelower>3 - (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1]))); + rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]); + emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3)); emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2)); DONE; }) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 632164a..e09f3bf 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2019-07-31 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/91201 + * gcc.target/i386/sse2-pr91201.c: New test. + * gcc.target/i386/avx2-pr91201.c: New test. + * gcc.target/i386/avx512bw-pr91201.c: New test. + 2019-07-31 Sudakshina Das <sudi.das@arm.com> * gcc.target/aarch64/acle/tme.c: New test. diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr91201.c b/gcc/testsuite/gcc.target/i386/avx2-pr91201.c new file mode 100644 index 0000000..4cf0a3a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-pr91201.c @@ -0,0 +1,6 @@ +/* PR tree-optimization/91201 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx2 -mno-avx512f" } */ +/* { dg-final { scan-assembler "\tvpsadbw\t" } } */ + +#include "sse2-pr91201.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr91201.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr91201.c new file mode 100644 index 0000000..9829a5c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr91201.c @@ -0,0 +1,6 @@ +/* PR tree-optimization/91201 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512bw -mprefer-vector-width=512" } */ +/* { dg-final { scan-assembler "\tvpsadbw\t" } } */ + +#include "sse2-pr91201.c" diff --git a/gcc/testsuite/gcc.target/i386/sse2-pr91201.c b/gcc/testsuite/gcc.target/i386/sse2-pr91201.c new file mode 100644 index 0000000..016b187 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-pr91201.c @@ -0,0 +1,18 @@ +/* PR tree-optimization/91201 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -msse2 -mno-sse3" } */ +/* { dg-final { scan-assembler "\tpsadbw\t" } } */ + +unsigned char bytes[1024]; + +unsigned char +sum (void) +{ + unsigned char r = 0; + unsigned char *p = (unsigned char *) bytes; + int n; + + for (n = 0; n < sizeof (bytes); ++n) + r += p[n]; + return r; +} |