diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2023-07-10 09:04:29 +0100 |
---|---|---|
committer | Roger Sayle <roger@nextmovesoftware.com> | 2023-07-10 09:04:29 +0100 |
commit | 4814b63c3c2326cb5d7baa63882da60ac011bd97 (patch) | |
tree | d695b58c309390bf2e3368bfc8e2505ec8ec8cd4 | |
parent | 0cafc3b6272d1dd738e8d7e66e1d8741e08f74d3 (diff) | |
download | gcc-4814b63c3c2326cb5d7baa63882da60ac011bd97.zip gcc-4814b63c3c2326cb5d7baa63882da60ac011bd97.tar.gz gcc-4814b63c3c2326cb5d7baa63882da60ac011bd97.tar.bz2 |
i386: Add AVX512 support for STV of SI/DImode rotation by constant.
Following Uros' suggestion, this patch adds support for AVX512VL's
vpro[lr][dq] instructions to the recently added scalar-to-vector (STV)
enhancements to handle DImode and SImode rotations by a constant.
For the test cases:
unsigned long long rot1(unsigned long long x) {
return (x>>1) | (x<<63);
}
void mem1(unsigned long long *p) {
*p = rot1(*p);
}
with -m32 -O2 -mavx512vl, we currently generate:
rot1: movl 4(%esp), %eax
movl 8(%esp), %edx
movl %eax, %ecx
shrdl $1, %edx, %eax
shrdl $1, %ecx, %edx
ret
mem1: movl 4(%esp), %eax
vmovq (%eax), %xmm0
vpshufd $20, %xmm0, %xmm0
vpsrlq $1, %xmm0, %xmm0
vpshufd $136, %xmm0, %xmm0
vmovq %xmm0, (%eax)
ret
with this patch, we now generate:
rot1: vmovq 4(%esp), %xmm0
vprorq $1, %xmm0, %xmm0
vmovd %xmm0, %eax
vpextrd $1, %xmm0, %edx
ret
mem1: movl 4(%esp), %eax
vmovq (%eax), %xmm0
vprorq $1, %xmm0, %xmm0
vmovq %xmm0, (%eax)
ret
2023-07-10 Roger Sayle <roger@nextmovesoftware.com>
gcc/ChangeLog
* config/i386/i386-features.cc (compute_convert_gain): Tweak
gains/costs for ROTATE/ROTATERT by integer constant on AVX512VL.
(general_scalar_chain::convert_rotate): On TARGET_AVX512F generate
avx512vl_rolv2di or avx412vl_rolv4si when appropriate.
gcc/testsuite/ChangeLog
* gcc.target/i386/avx512vl-stv-rotatedi-1.c: New test case.
-rw-r--r-- | gcc/config/i386/i386-features.cc | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512vl-stv-rotatedi-1.c | 35 |
2 files changed, 42 insertions, 1 deletions
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 2e751d1..4d69251 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -585,7 +585,9 @@ general_scalar_chain::compute_convert_gain () case ROTATE: case ROTATERT: igain += m * ix86_cost->shift_const; - if (smode == DImode) + if (TARGET_AVX512F) + igain -= ix86_cost->sse_op; + else if (smode == DImode) { int bits = INTVAL (XEXP (src, 1)); if ((bits & 0x0f) == 0) @@ -1225,6 +1227,8 @@ general_scalar_chain::convert_rotate (enum rtx_code code, rtx op0, rtx op1, emit_insn_before (pat, insn); result = gen_lowpart (V2DImode, tmp1); } + else if (TARGET_AVX512F) + result = simplify_gen_binary (code, V2DImode, op0, op1); else if (bits == 16 || bits == 48) { rtx tmp1 = gen_reg_rtx (V8HImode); @@ -1269,6 +1273,8 @@ general_scalar_chain::convert_rotate (enum rtx_code code, rtx op0, rtx op1, emit_insn_before (pat, insn); result = gen_lowpart (V4SImode, tmp1); } + else if (TARGET_AVX512F) + result = simplify_gen_binary (code, V4SImode, op0, op1); else { if (code == ROTATE) diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-stv-rotatedi-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-stv-rotatedi-1.c new file mode 100644 index 0000000..2f0ead8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-stv-rotatedi-1.c @@ -0,0 +1,35 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2 -mavx512vl" } */ + +unsigned long long rot1(unsigned long long x) { return (x>>1) | (x<<63); } +unsigned long long rot2(unsigned long long x) { return (x>>2) | (x<<62); } +unsigned long long rot3(unsigned long long x) { return (x>>3) | (x<<61); } +unsigned long long rot4(unsigned long long x) { return (x>>4) | (x<<60); } +unsigned long long rot5(unsigned long long x) { return (x>>5) | (x<<59); } +unsigned long long rot6(unsigned long long x) { return (x>>6) | (x<<58); } +unsigned long long rot7(unsigned long long x) { return (x>>7) | (x<<57); } +unsigned long long rot8(unsigned long long x) { return (x>>8) | (x<<56); } +unsigned long long rot9(unsigned long long x) { return (x>>9) | (x<<55); } +unsigned long long rot10(unsigned long long x) { return (x>>10) | (x<<54); } +unsigned long long rot15(unsigned long long x) { return (x>>15) | (x<<49); } +unsigned long long rot16(unsigned long long x) { return (x>>16) | (x<<48); } +unsigned long long rot17(unsigned long long x) { return (x>>17) | (x<<47); } +unsigned long long rot20(unsigned long long x) { return (x>>20) | (x<<44); } +unsigned long long rot24(unsigned long long x) { return (x>>24) | (x<<40); } +unsigned long long rot30(unsigned long long x) { return (x>>30) | (x<<34); } +unsigned long long rot31(unsigned long long x) { return (x>>31) | (x<<33); } +unsigned long long rot32(unsigned long long x) { return (x>>32) | (x<<32); } +unsigned long long rot33(unsigned long long x) { return (x>>33) | (x<<31); } +unsigned long long rot34(unsigned long long x) { return (x>>34) | (x<<30); } +unsigned long long rot40(unsigned long long x) { return (x>>40) | (x<<24); } +unsigned long long rot42(unsigned long long x) { return (x>>42) | (x<<22); } +unsigned long long rot48(unsigned long long x) { return (x>>48) | (x<<16); } +unsigned long long rot50(unsigned long long x) { return (x>>50) | (x<<14); } +unsigned long long rot56(unsigned long long x) { return (x>>56) | (x<<8); } +unsigned long long rot58(unsigned long long x) { return (x>>58) | (x<<6); } +unsigned long long rot60(unsigned long long x) { return (x>>60) | (x<<4); } +unsigned long long rot61(unsigned long long x) { return (x>>61) | (x<<3); } +unsigned long long rot62(unsigned long long x) { return (x>>62) | (x<<2); } +unsigned long long rot63(unsigned long long x) { return (x>>63) | (x<<1); } + +/* { dg-final { scan-assembler-times "vpro\[lr\]q" 29 } } */ |