aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2024-08-15 22:02:05 +0100
committerRoger Sayle <roger@nextmovesoftware.com>2024-08-15 22:02:05 +0100
commitb6fb4f7f651d2aa89548c5833fe2679af2638df5 (patch)
tree5e0265e427ea0a1463259ab09770a626fa28d342 /gcc
parent0f8b11968472ff12674d67fd856610646b373bd0 (diff)
downloadgcc-b6fb4f7f651d2aa89548c5833fe2679af2638df5.zip
gcc-b6fb4f7f651d2aa89548c5833fe2679af2638df5.tar.gz
gcc-b6fb4f7f651d2aa89548c5833fe2679af2638df5.tar.bz2
i386: Improve split of *extendv2di2_highpart_stv_noavx512vl.
This patch follows up on the previous patch to fix PR target/116275 by improving the code STV (ultimately) generates for highpart sign extensions like (x<<8)>>8. The arithmetic right shift is able to take advantage of the available common subexpressions from the preceding left shift. Hence previously with -O2 -m32 -mavx -mno-avx512vl we'd generate: vpsllq $8, %xmm0, %xmm0 vpsrad $8, %xmm0, %xmm1 vpsrlq $8, %xmm0, %xmm0 vpblendw $51, %xmm0, %xmm1, %xmm0 But with improved splitting, we now generate three instructions: vpslld $8, %xmm1, %xmm0 vpsrad $8, %xmm0, %xmm0 vpblendw $51, %xmm1, %xmm0, %xmm0 This patch also implements Uros' suggestion that the pre-reload splitter could introduced a new pseudo to hold the intermediate to potentially help reload with register allocation, which applies when not performing the above optimization, i.e. on TARGET_XOP. 2024-08-15 Roger Sayle <roger@nextmovesoftware.com> Uros Bizjak <ubizjak@gmail.com> gcc/ChangeLog * config/i386/i386.md (*extendv2di2_highpart_stv_noavx512vl): Split to an improved implementation on !TARGET_XOP. On TARGET_XOP, use a new pseudo for the intermediate to simplify register allocation. gcc/testsuite/ChangeLog * g++.target/i386/pr116275-2.C: New test case.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386.md32
-rw-r--r--gcc/testsuite/g++.target/i386/pr116275-2.C19
2 files changed, 49 insertions, 2 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index efbab2f..36108e5 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17872,10 +17872,38 @@
&& ix86_pre_reload_split ()"
"#"
"&& 1"
- [(set (match_dup 0)
+ [(set (match_dup 4)
(ashift:V2DI (match_dup 1) (match_dup 2)))
(set (match_dup 0)
- (ashiftrt:V2DI (match_dup 0) (match_dup 2)))])
+ (ashiftrt:V2DI (match_dup 4) (match_dup 2)))]
+{
+ if (!TARGET_XOP)
+ {
+ rtx op0 = operands[0];
+ rtx op2 = operands[2];
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ rtx tmp3 = gen_reg_rtx (V4SImode);
+ rtx tmp4 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp1, lowpart_subreg (V4SImode, operands[1], V2DImode));
+ emit_insn (gen_ashlv4si3 (tmp2, tmp1, op2));
+ emit_insn (gen_ashrv4si3 (tmp3, tmp2, op2));
+ vec_perm_builder sel (4, 4, 1);
+ sel.quick_grow (4);
+ sel[0] = 0;
+ sel[1] = 5;
+ sel[2] = 2;
+ sel[3] = 7;
+ vec_perm_indices indices(sel, 2, 4);
+ bool ok = targetm.vectorize.vec_perm_const (V4SImode, V4SImode, tmp4,
+ tmp1, tmp3, indices);
+ gcc_assert (ok);
+ emit_move_insn (op0, lowpart_subreg (V2DImode, tmp4, V4SImode));
+ DONE;
+ }
+ else
+ operands[4] = gen_reg_rtx (V2DImode);
+})
;; Rotate instructions
diff --git a/gcc/testsuite/g++.target/i386/pr116275-2.C b/gcc/testsuite/g++.target/i386/pr116275-2.C
new file mode 100644
index 0000000..98d3c19
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr116275-2.C
@@ -0,0 +1,19 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -mavx -mno-avx512vl -std=c++11" } */
+
+struct SymbolDesc push_back(SymbolDesc);
+struct SymbolDesc {
+ long long ELFLocalSymIdx;
+};
+struct Expected {
+ long long &operator*();
+};
+void SymbolizableObjectFileaddSymbol() {
+ Expected SymbolAddressOrErr;
+ long long SymbolAddress = *SymbolAddressOrErr << 8 >> 8;
+ push_back({SymbolAddress});
+}
+
+/* { dg-final { scan-assembler "vpslld" } } */
+/* { dg-final { scan-assembler-not "vpsllq" } } */
+/* { dg-final { scan-assembler-not "vpsrlq" } } */