aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2020-06-15 13:48:45 +0800
committerliuhongt <hongtao.liu@intel.com>2020-06-17 16:03:26 +0800
commitc7199fb6e694d1a0964351200648c24c3ee97973 (patch)
tree7e65b3bf5db037efe16f199599ec519325a5d988 /gcc
parent5fc312a98e9b06eac9e865f511a327b264056d66 (diff)
downloadgcc-c7199fb6e694d1a0964351200648c24c3ee97973.zip
gcc-c7199fb6e694d1a0964351200648c24c3ee97973.tar.gz
gcc-c7199fb6e694d1a0964351200648c24c3ee97973.tar.bz2
Optimize V16QI/V32QI/V64QI shift by constant.
gcc/ChangeLog: PR target/95524 * config/i386/i386-expand.c (ix86_expand_vec_shift_qihi_constant): New function. * config/i386/i386-protos.h (ix86_expand_vec_shift_qihi_constant): Declare. * config/i386/sse.md (<shift_insn><mode>3): Optimize shift V*QImode by constant. gcc/testsuite/ChangeLog: * gcc.target/i386/avx2-shiftqihi-constant-1.c: New test. * gcc.target/i386/avx2-shiftqihi-constant-2.c: Ditto. * gcc.target/i386/avx512bw-shiftqihi-constant-1.c: Ditto. * gcc.target/i386/avx512bw-shiftqihi-constant-2.c: Ditto. * gcc.target/i386/sse2-shiftqihi-constant-1.c: Ditto. * gcc.target/i386/sse2-shiftqihi-constant-2.c: Ditto.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386-expand.c99
-rw-r--r--gcc/config/i386/i386-protos.h1
-rw-r--r--gcc/config/i386/sse.md3
-rw-r--r--gcc/testsuite/gcc.target/i386/avx2-shiftqihi-constant-1.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/avx2-shiftqihi-constant-2.c62
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-2.c62
-rw-r--r--gcc/testsuite/gcc.target/i386/sse2-shiftqihi-constant-1.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/sse2-shiftqihi-constant-2.c62
9 files changed, 381 insertions, 1 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 3a414f6..d707798 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -19532,6 +19532,105 @@ ix86_expand_vecmul_qihi (rtx dest, rtx op1, rtx op2)
return true;
}
+/* Expand a vector operation shift by constant for a V*QImode in terms of the
+ same operation on V*HImode. Return true if success. */
+bool
+ix86_expand_vec_shift_qihi_constant (enum rtx_code code, rtx dest, rtx op1, rtx op2)
+{
+ machine_mode qimode, himode;
+ unsigned int and_constant, xor_constant;
+ HOST_WIDE_INT shift_amount;
+ rtx vec_const_and, vec_const_xor;
+ rtx tmp, op1_subreg;
+ rtx (*gen_shift) (rtx, rtx, rtx);
+ rtx (*gen_and) (rtx, rtx, rtx);
+ rtx (*gen_xor) (rtx, rtx, rtx);
+ rtx (*gen_sub) (rtx, rtx, rtx);
+
+ /* Only optimize shift by constant. */
+ if (!CONST_INT_P (op2))
+ return false;
+
+ qimode = GET_MODE (dest);
+ shift_amount = INTVAL (op2);
+ /* Do nothing when shift amount greater equal 8. */
+ if (shift_amount > 7)
+ return false;
+
+ gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
+ /* Record sign bit. */
+ xor_constant = 1 << (8 - shift_amount - 1);
+
+ /* Zero upper/lower bits shift from left/right element. */
+ and_constant
+ = (code == ASHIFT ? 256 - (1 << shift_amount)
+ : (1 << (8 - shift_amount)) - 1);
+
+ switch (qimode)
+ {
+ case V16QImode:
+ himode = V8HImode;
+ gen_shift =
+ ((code == ASHIFT)
+ ? gen_ashlv8hi3
+ : (code == ASHIFTRT) ? gen_ashrv8hi3 : gen_lshrv8hi3);
+ gen_and = gen_andv16qi3;
+ gen_xor = gen_xorv16qi3;
+ gen_sub = gen_subv16qi3;
+ break;
+ case V32QImode:
+ himode = V16HImode;
+ gen_shift =
+ ((code == ASHIFT)
+ ? gen_ashlv16hi3
+ : (code == ASHIFTRT) ? gen_ashrv16hi3 : gen_lshrv16hi3);
+ gen_and = gen_andv32qi3;
+ gen_xor = gen_xorv32qi3;
+ gen_sub = gen_subv32qi3;
+ break;
+ case V64QImode:
+ himode = V32HImode;
+ gen_shift =
+ ((code == ASHIFT)
+ ? gen_ashlv32hi3
+ : (code == ASHIFTRT) ? gen_ashrv32hi3 : gen_lshrv32hi3);
+ gen_and = gen_andv64qi3;
+ gen_xor = gen_xorv64qi3;
+ gen_sub = gen_subv64qi3;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ tmp = gen_reg_rtx (himode);
+ vec_const_and = gen_reg_rtx (qimode);
+ op1_subreg = lowpart_subreg (himode, op1, qimode);
+
+ /* For ASHIFT and LSHIFTRT, perform operation like
+ vpsllw/vpsrlw $shift_amount, %op1, %dest.
+ vpand %vec_const_and, %dest. */
+ emit_insn (gen_shift (tmp, op1_subreg, op2));
+ emit_move_insn (dest, simplify_gen_subreg (qimode, tmp, himode, 0));
+ emit_move_insn (vec_const_and,
+ ix86_build_const_vector (qimode, true,
+ GEN_INT (and_constant)));
+ emit_insn (gen_and (dest, dest, vec_const_and));
+
+ /* For ASHIFTRT, perform extra operation like
+ vpxor %vec_const_xor, %dest, %dest
+ vpsubb %vec_const_xor, %dest, %dest */
+ if (code == ASHIFTRT)
+ {
+ vec_const_xor = gen_reg_rtx (qimode);
+ emit_move_insn (vec_const_xor,
+ ix86_build_const_vector (qimode, true,
+ GEN_INT (xor_constant)));
+ emit_insn (gen_xor (dest, dest, vec_const_xor));
+ emit_insn (gen_sub (dest, dest, vec_const_xor));
+ }
+ return true;
+}
+
/* Expand a vector operation CODE for a V*QImode in terms of the
same operation on V*HImode. */
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index f532049..7c2ce61 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -206,6 +206,7 @@ extern void ix86_expand_round_sse4 (rtx, rtx);
extern bool ix86_expand_vecmul_qihi (rtx, rtx, rtx);
extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, rtx, rtx);
+extern bool ix86_expand_vec_shift_qihi_constant (enum rtx_code, rtx, rtx, rtx);
extern rtx ix86_split_stack_guard (void);
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index aa9fdc8..431571a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -19863,7 +19863,8 @@
gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
emit_insn (gen (operands[0], operands[1], tmp));
}
- else
+ else if (!ix86_expand_vec_shift_qihi_constant (<CODE>, operands[0],
+ operands[1], operands[2]))
ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
DONE;
})
diff --git a/gcc/testsuite/gcc.target/i386/avx2-shiftqihi-constant-1.c b/gcc/testsuite/gcc.target/i386/avx2-shiftqihi-constant-1.c
new file mode 100644
index 0000000..7206503
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-shiftqihi-constant-1.c
@@ -0,0 +1,31 @@
+/* PR target/95524 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2" } */
+/* { dg-final { scan-assembler-times "vpand\[^\n\]*%ymm" 3 } } */
+typedef char v32qi __attribute__ ((vector_size (32)));
+typedef unsigned char v32uqi __attribute__ ((vector_size (32)));
+
+__attribute__((noipa)) v32qi
+foo_ashiftrt_256 (v32qi a)
+{
+ return a >> 2;
+}
+/* { dg-final { scan-assembler-times "vpsraw\[^\n\]*%ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vpxor\[^\n\]*%ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubb\[^\n\]*%ymm" 1 } } */
+
+__attribute__((noipa)) v32qi
+foo_ashift_256 (v32qi a)
+{
+ return a << 7;
+}
+
+/* { dg-final { scan-assembler-times "vpsllw\[^\n\]*%ymm" 1 } } */
+
+__attribute__((noipa)) v32uqi
+foo_lshiftrt_256 (v32uqi a)
+{
+ return a >> 2;
+}
+
+/* { dg-final { scan-assembler-times "vpsrlw\[^\n\]*%ymm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-shiftqihi-constant-2.c b/gcc/testsuite/gcc.target/i386/avx2-shiftqihi-constant-2.c
new file mode 100644
index 0000000..509d5a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-shiftqihi-constant-2.c
@@ -0,0 +1,62 @@
+/* PR target/95524 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -Wno-shift-count-overflow" } */
+
+#ifndef CHECK
+#define CHECK "avx512bw-check.h"
+#endif
+
+#include CHECK
+
+#ifndef TEST
+#define TEST avx512bw_test
+#endif
+
+typedef char v64qi __attribute__ ((vector_size (64)));
+typedef unsigned char v64uqi __attribute__ ((vector_size (64)));
+
+#define TEST_SHIFT(N) \
+ do \
+ { \
+ int i; \
+ for (i = 0; i < 64; i++) \
+ exp1.a[i] = op1.a[i] << N; \
+ res1.x = (__m512i) (((v64qi) op1.x) << N); \
+ if (check_union512i_b (res1, exp1.a)) \
+ abort (); \
+ \
+ for (i = 0; i < 64; i++) \
+ exp1.a[i] = op1.a[i] >> N; \
+ res1.x = (__m512i) (((v64qi) op1.x) >> N); \
+ if (check_union512i_b (res1, exp1.a)) \
+ abort (); \
+ \
+ for (i = 0; i < 64; i++) \
+ exp2.a[i] = op2.a[i] >> N; \
+ res2.x = (__m512i) (((v64uqi) op2.x >> N)); \
+ if (check_union512i_ub (res2, exp2.a)) \
+ abort (); \
+ } \
+ while (0)
+
+static void
+TEST (void)
+{
+ union512i_b op1, exp1, res1;
+ union512i_ub op2, exp2, res2;
+ for (int i = 0; i != 64; i++)
+ {
+ op2.a[i] = i * i;
+ op1.a[i] = i * i + 200 * i;
+ }
+ TEST_SHIFT (0);
+ TEST_SHIFT (1);
+ TEST_SHIFT (2);
+ TEST_SHIFT (3);
+ TEST_SHIFT (4);
+ TEST_SHIFT (5);
+ TEST_SHIFT (6);
+ TEST_SHIFT (7);
+ TEST_SHIFT (8);
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c
new file mode 100644
index 0000000..78bf5d3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c
@@ -0,0 +1,31 @@
+/* PR target/95524 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-final { scan-assembler-times "vpand\[^\n\]*%zmm" 3 } } */
+typedef char v64qi __attribute__ ((vector_size (64)));
+typedef unsigned char v64uqi __attribute__ ((vector_size (64)));
+
+__attribute__((noipa)) v64qi
+foo_ashiftrt_512 (v64qi a)
+{
+ return a >> 2;
+}
+/* { dg-final { scan-assembler-times "vpsraw\[^\n\]*%zmm" 1 } } */
+/* { dg-final { scan-assembler-times "vpxor\[^\n\]*%zmm" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubb\[^\n\]*%zmm" 1 } } */
+
+__attribute__((noipa)) v64qi
+foo_ashift_512 (v64qi a)
+{
+ return a << 7;
+}
+
+/* { dg-final { scan-assembler-times "vpsllw\[^\n\]*%zmm" 1 } } */
+
+__attribute__((noipa)) v64uqi
+foo_lshiftrt_512 (v64uqi a)
+{
+ return a >> 2;
+}
+
+/* { dg-final { scan-assembler-times "vpsrlw\[^\n\]*%zmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-2.c
new file mode 100644
index 0000000..d6f7934
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-2.c
@@ -0,0 +1,62 @@
+/* PR target/95524 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2 -Wno-shift-count-overflow" } */
+
+#ifndef CHECK
+#define CHECK "avx2-check.h"
+#endif
+
+#include CHECK
+
+#ifndef TEST
+#define TEST avx2_test
+#endif
+
+typedef char v32qi __attribute__ ((vector_size (32)));
+typedef unsigned char v32uqi __attribute__ ((vector_size (32)));
+
+#define TEST_SHIFT(N) \
+ do \
+ { \
+ int i; \
+ for (i = 0; i < 32; i++) \
+ exp1.a[i] = op1.a[i] << N; \
+ res1.x = (__m256i) (((v32qi) op1.x) << N); \
+ if (check_union256i_b (res1, exp1.a)) \
+ abort (); \
+ \
+ for (i = 0; i < 32; i++) \
+ exp1.a[i] = op1.a[i] >> N; \
+ res1.x = (__m256i) (((v32qi) op1.x) >> N); \
+ if (check_union256i_b (res1, exp1.a)) \
+ abort (); \
+ \
+ for (i = 0; i < 32; i++) \
+ exp2.a[i] = op2.a[i] >> N; \
+ res2.x = (__m256i) (((v32uqi) op2.x >> N)); \
+ if (check_union256i_ub (res2, exp2.a)) \
+ abort (); \
+ } \
+ while (0)
+
+static void
+TEST (void)
+{
+ union256i_b op1, exp1, res1;
+ union256i_ub op2, exp2, res2;
+ for (int i = 0; i != 32; i++)
+ {
+ op2.a[i] = i * i;
+ op1.a[i] = i * i + 200 * i;
+ }
+ TEST_SHIFT (0);
+ TEST_SHIFT (1);
+ TEST_SHIFT (2);
+ TEST_SHIFT (3);
+ TEST_SHIFT (4);
+ TEST_SHIFT (5);
+ TEST_SHIFT (6);
+ TEST_SHIFT (7);
+ TEST_SHIFT (8);
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/sse2-shiftqihi-constant-1.c b/gcc/testsuite/gcc.target/i386/sse2-shiftqihi-constant-1.c
new file mode 100644
index 0000000..f1c68cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-shiftqihi-constant-1.c
@@ -0,0 +1,31 @@
+/* PR target/95524 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+/* { dg-final { scan-assembler-times "pand\[^\n\]*%xmm" 3 { xfail *-*-* } } } */
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef unsigned char v16uqi __attribute__ ((vector_size (16)));
+
+__attribute__((noipa)) v16qi
+foo_ashiftrt_128 (v16qi a)
+{
+ return a >> 2;
+}
+/* { dg-final { scan-assembler-times "psraw\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "pxor\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "psubb\[^\n\]*%xmm" 1 } } */
+
+__attribute__((noipa)) v16qi
+foo_ashift_128 (v16qi a)
+{
+ return a << 7;
+}
+
+/* { dg-final { scan-assembler-times "psllw\[^\n\]*%xmm" 1 { xfail *-*-* } } } */
+
+__attribute__((noipa)) v16uqi
+foo_lshiftrt_128 (v16uqi a)
+{
+ return a >> 2;
+}
+
+/* { dg-final { scan-assembler-times "psrlw\[^\n\]*%xmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse2-shiftqihi-constant-2.c b/gcc/testsuite/gcc.target/i386/sse2-shiftqihi-constant-2.c
new file mode 100644
index 0000000..d95171f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-shiftqihi-constant-2.c
@@ -0,0 +1,62 @@
+/* PR target/95524 */
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2 -Wno-shift-count-overflow" } */
+
+#ifndef CHECK
+#define CHECK "sse2-check.h"
+#endif
+
+#include CHECK
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef unsigned char v16uqi __attribute__ ((vector_size (16)));
+
+#define TEST_SHIFT(N) \
+ do \
+ { \
+ int i; \
+ for (i = 0; i < 16; i++) \
+ exp1.a[i] = op1.a[i] << N; \
+ res1.x = (__m128i) (((v16qi) op1.x) << N); \
+ if (check_union128i_b (res1, exp1.a)) \
+ abort (); \
+ \
+ for (i = 0; i < 16; i++) \
+ exp1.a[i] = op1.a[i] >> N; \
+ res1.x = (__m128i) (((v16qi) op1.x) >> N); \
+ if (check_union128i_b (res1, exp1.a)) \
+ abort (); \
+ \
+ for (i = 0; i < 16; i++) \
+ exp2.a[i] = op2.a[i] >> N; \
+ res2.x = (__m128i) (((v16uqi) op2.x >> N)); \
+ if (check_union128i_ub (res2, exp2.a)) \
+ abort (); \
+ } \
+ while (0)
+
+static void
+TEST (void)
+{
+ union128i_b op1, exp1, res1;
+ union128i_ub op2, exp2, res2;
+ for (int i = 0; i != 16; i++)
+ {
+ op2.a[i] = i * i;
+ op1.a[i] = i * i + 200 * i;
+ }
+ TEST_SHIFT (0);
+ TEST_SHIFT (1);
+ TEST_SHIFT (2);
+ TEST_SHIFT (3);
+ TEST_SHIFT (4);
+ TEST_SHIFT (5);
+ TEST_SHIFT (6);
+ TEST_SHIFT (7);
+ TEST_SHIFT (8);
+}
+