aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2021-11-29 10:45:11 +0000
committerRoger Sayle <roger@nextmovesoftware.com>2021-11-29 10:48:06 +0000
commita5d269f0c1cda545a86da960e8989bea862dd75e (patch)
treedc3307b64b09747b2572afa6a88dacea79dfd275
parenta3b31fe3692894e80de16b4059a89a309e409687 (diff)
downloadgcc-a5d269f0c1cda545a86da960e8989bea862dd75e.zip
gcc-a5d269f0c1cda545a86da960e8989bea862dd75e.tar.gz
gcc-a5d269f0c1cda545a86da960e8989bea862dd75e.tar.bz2
x86_64: Improved V1TImode rotations by non-constant amounts.
This patch builds on the recent improvements to TImode rotations (and Jakub's fixes to shldq/shrdq patterns). Now that expanding a TImode rotation can never fail, it is safe to allow general_operand constraints on the QImode shift amounts in rotlv1ti3 and rotrv1ti3 patterns. I've also made an additional tweak to ix86_expand_v1ti_to_ti to use vec_extract via V2DImode, which avoid using memory and takes advantage vpextrq on recent hardware. For the following test case: typedef unsigned __int128 uv1ti __attribute__ ((__vector_size__ (16))); uv1ti rotr(uv1ti x, unsigned int i) { return (x >> i) | (x << (128-i)); } GCC with -O2 -mavx2 would previously generate: rotr: vmovdqa %xmm0, -24(%rsp) movq -16(%rsp), %rdx movl %edi, %ecx xorl %esi, %esi movq -24(%rsp), %rax shrdq %rdx, %rax shrq %cl, %rdx testb $64, %dil cmovne %rdx, %rax cmovne %rsi, %rdx negl %ecx xorl %edi, %edi andl $127, %ecx vmovq %rax, %xmm2 movq -24(%rsp), %rax vpinsrq $1, %rdx, %xmm2, %xmm1 movq -16(%rsp), %rdx shldq %rax, %rdx salq %cl, %rax testb $64, %cl cmovne %rax, %rdx cmovne %rdi, %rax vmovq %rax, %xmm3 vpinsrq $1, %rdx, %xmm3, %xmm0 vpor %xmm1, %xmm0, %xmm0 ret with this patch, we now generate: rotr: movl %edi, %ecx vpextrq $1, %xmm0, %rax vmovq %xmm0, %rdx shrdq %rax, %rdx vmovq %xmm0, %rsi shrdq %rsi, %rax andl $64, %ecx movq %rdx, %rsi cmovne %rax, %rsi cmove %rax, %rdx vmovq %rsi, %xmm0 vpinsrq $1, %rdx, %xmm0, %xmm0 ret 2021-11-29 Roger Sayle <roger@nextmovesoftware.com> gcc/ChangeLog * config/i386/i386-expand.c (ix86_expand_v1ti_to_ti): Perform the conversion via V2DImode using vec_extractv2didi on TARGET_SSE2. * config/i386/sse.md (rotlv1ti3, rotrv1ti3): Change constraint on QImode shift amounts from const_int_operand to general_operand. gcc/testsuite/ChangeLog * gcc.target/i386/sse2-v1ti-rotate.c: New test case.
-rw-r--r--gcc/config/i386/i386-expand.c12
-rw-r--r--gcc/config/i386/sse.md4
-rw-r--r--gcc/testsuite/gcc.target/i386/sse2-v1ti-rotate.c11
3 files changed, 24 insertions, 3 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 0d5d1a0..354a9a7 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -6162,7 +6162,17 @@ static rtx
ix86_expand_v1ti_to_ti (rtx x)
{
rtx result = gen_reg_rtx (TImode);
- emit_move_insn (result, gen_lowpart (TImode, x));
+ if (TARGET_SSE2)
+ {
+ rtx temp = gen_reg_rtx (V2DImode);
+ emit_move_insn (temp, gen_lowpart (V2DImode, x));
+ rtx lo = gen_lowpart (DImode, result);
+ emit_insn (gen_vec_extractv2didi (lo, temp, const0_rtx));
+ rtx hi = gen_highpart (DImode, result);
+ emit_insn (gen_vec_extractv2didi (hi, temp, const1_rtx));
+ }
+ else
+ emit_move_insn (result, gen_lowpart (TImode, x));
return result;
}
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b371b14..b6d03b8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15296,7 +15296,7 @@
[(set (match_operand:V1TI 0 "register_operand")
(rotate:V1TI
(match_operand:V1TI 1 "register_operand")
- (match_operand:QI 2 "const_int_operand")))]
+ (match_operand:QI 2 "general_operand")))]
"TARGET_SSE2 && TARGET_64BIT"
{
ix86_expand_v1ti_rotate (ROTATE, operands);
@@ -15307,7 +15307,7 @@
[(set (match_operand:V1TI 0 "register_operand")
(rotatert:V1TI
(match_operand:V1TI 1 "register_operand")
- (match_operand:QI 2 "const_int_operand")))]
+ (match_operand:QI 2 "general_operand")))]
"TARGET_SSE2 && TARGET_64BIT"
{
ix86_expand_v1ti_rotate (ROTATERT, operands);
diff --git a/gcc/testsuite/gcc.target/i386/sse2-v1ti-rotate.c b/gcc/testsuite/gcc.target/i386/sse2-v1ti-rotate.c
new file mode 100644
index 0000000..b4b2814
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-v1ti-rotate.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+typedef unsigned __int128 uv1ti __attribute__ ((__vector_size__ (16)));
+
+uv1ti rotr(uv1ti x, unsigned int i) { return (x >> i) | (x << (128-i)); }
+uv1ti rotl(uv1ti x, unsigned int i) { return (x << i) | (x >> (128-i)); }
+
+/* { dg-final { scan-assembler-not "shrq" } } */
+/* { dg-final { scan-assembler-not "salq" } } */