aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2024-05-14 18:39:54 +0800
committerliuhongt <hongtao.liu@intel.com>2024-05-16 08:41:01 +0800
commit0cc0956b3bb8bcbc9196075b9073a227d799e042 (patch)
tree147d95c51138f455c3ed676537a672d49c3b4e41
parent25456c0e6d18ecc40215a2ad945502edbab39e88 (diff)
downloadgcc-0cc0956b3bb8bcbc9196075b9073a227d799e042.zip
gcc-0cc0956b3bb8bcbc9196075b9073a227d799e042.tar.gz
gcc-0cc0956b3bb8bcbc9196075b9073a227d799e042.tar.bz2
Optimize ashift >> 7 to vpcmpgtb for vector int8.
Since there is no corresponding instruction, the shift operation for vector int8 is implemented using the instructions for vector int16, but for some special shift counts, it can be transformed into vpcmpgtb. gcc/ChangeLog: PR target/114514 * config/i386/i386-expand.cc (ix86_expand_vec_shift_qihi_constant): Optimize ashift >> 7 to vpcmpgtb. (ix86_expand_vecop_qihi_partial): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr114514-shift.c: New test.
-rw-r--r--gcc/config/i386/i386-expand.cc32
-rw-r--r--gcc/testsuite/gcc.target/i386/pr114514-shift.c49
2 files changed, 81 insertions, 0 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index e846a94..4c47cfe 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -24246,6 +24246,28 @@ ix86_expand_vec_shift_qihi_constant (enum rtx_code code,
return false;
gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
+
+
+ if (shift_amount == 7
+ && code == ASHIFTRT)
+ {
+ if (qimode == V16QImode
+ || qimode == V32QImode)
+ {
+ rtx zero = gen_reg_rtx (qimode);
+ emit_move_insn (zero, CONST0_RTX (qimode));
+ emit_move_insn (dest, gen_rtx_fmt_ee (GT, qimode, zero, op1));
+ }
+ else
+ {
+ gcc_assert (qimode == V64QImode);
+ rtx kmask = gen_reg_rtx (DImode);
+ emit_insn (gen_avx512bw_cvtb2maskv64qi (kmask, op1));
+ emit_insn (gen_avx512bw_cvtmask2bv64qi (dest, kmask));
+ }
+ return true;
+ }
+
/* Record sign bit. */
xor_constant = 1 << (8 - shift_amount - 1);
@@ -24356,6 +24378,16 @@ ix86_expand_vecop_qihi_partial (enum rtx_code code, rtx dest, rtx op1, rtx op2)
return;
}
+ if (CONST_INT_P (op2)
+ && code == ASHIFTRT
+ && INTVAL (op2) == 7)
+ {
+ rtx zero = gen_reg_rtx (qimode);
+ emit_move_insn (zero, CONST0_RTX (qimode));
+ emit_move_insn (dest, gen_rtx_fmt_ee (GT, qimode, zero, op1));
+ return;
+ }
+
switch (code)
{
case MULT:
diff --git a/gcc/testsuite/gcc.target/i386/pr114514-shift.c b/gcc/testsuite/gcc.target/i386/pr114514-shift.c
new file mode 100644
index 0000000..cf8b32b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr114514-shift.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "vpxor" 4 } } */
+/* { dg-final { scan-assembler-times "vpcmpgtb" 4 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpcmpgtb" 5 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vpmovb2m" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovm2b" 1 } } */
+
+
+typedef char v16qi __attribute__((vector_size(16)));
+typedef char v32qi __attribute__((vector_size(32)));
+typedef char v64qi __attribute__((vector_size(64)));
+typedef char v8qi __attribute__((vector_size(8)));
+typedef char v4qi __attribute__((vector_size(4)));
+
+v4qi
+__attribute__((noipa))
+foo1 (v4qi a)
+{
+ return a >> 7;
+}
+
+v8qi
+__attribute__((noipa))
+foo2 (v8qi a)
+{
+ return a >> 7;
+}
+
+v16qi
+__attribute__((noipa))
+foo3 (v16qi a)
+{
+ return a >> 7;
+}
+
+v32qi
+__attribute__((noipa))
+foo4 (v32qi a)
+{
+ return a >> 7;
+}
+
+v64qi
+__attribute__((noipa))
+foo5 (v64qi a)
+{
+ return a >> 7;
+}