aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2023-07-20 20:54:51 +0200
committerUros Bizjak <ubizjak@gmail.com>2023-07-20 20:56:19 +0200
commitb50a851eef4b70aabf28fa875d9b2a302d17b66a (patch)
tree8c3f51fc31de58182883d6ee23c7a1299189f532
parent4b8878fbf7b74ea5c3405c9f558df0517036f131 (diff)
downloadgcc-b50a851eef4b70aabf28fa875d9b2a302d17b66a.zip
gcc-b50a851eef4b70aabf28fa875d9b2a302d17b66a.tar.gz
gcc-b50a851eef4b70aabf28fa875d9b2a302d17b66a.tar.bz2
i386: Double-word sign-extension missed-optimization [PR110717]
When sign-extending the value in a double-word register pair using shift and ashiftrt sequence with the same count immediate value less than word width, there is no need to shift the lower word of the value. The sign-extension could be limited to the upper word, but we uselessly shift the lower word with it as well: movq %rdi, %rax movq %rsi, %rdx shldq $59, %rdi, %rdx salq $59, %rax shrdq $59, %rdx, %rax sarq $59, %rdx ret for -m64 and movl 4(%esp), %eax movl 8(%esp), %edx shldl $27, %eax, %edx sall $27, %eax shrdl $27, %edx, %eax sarl $27, %edx ret for -m32. The patch introduces a new post-reload splitter to provide the combined ASHIFTRT/SHIFT instruction pattern. The instruction is split to a sequence of SAL and SAR insns with the same count immediate operand: movq %rsi, %rdx movq %rdi, %rax salq $59, %rdx sarq $59, %rdx ret Some complication is required to properly handle STV transform, where we emit a sequence with DImode PSLLQ and PSRAQ insns for 32-bit AVX512VL targets when profitable. The patch also fixes a small oversight and enables STV transform of SImode ASHIFTRT to PSRAD also for SSE2 targets. PR target/110717 gcc/ChangeLog: * config/i386/i386-features.cc (general_scalar_chain::compute_convert_gain): Calculate gain for extend higpart case. (general_scalar_chain::convert_op): Handle ASHIFTRT/ASHIFT combined RTX. (general_scalar_to_vector_candidate_p): Enable ASHIFTRT for SImode for SSE2 targets. Handle ASHIFTRT/ASHIFT combined RTX. * config/i386/i386.md (*extend<dwi>2_doubleword_highpart): New define_insn_and_split pattern. (*extendv2di2_highpart_stv): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr110717.c: New test.
-rw-r--r--gcc/config/i386/i386-features.cc16
-rw-r--r--gcc/config/i386/i386.md35
-rw-r--r--gcc/testsuite/gcc.target/i386/pr110717.c21
3 files changed, 70 insertions, 2 deletions
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 4d69251..f801a8f 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -572,6 +572,9 @@ general_scalar_chain::compute_convert_gain ()
{
if (INTVAL (XEXP (src, 1)) >= 32)
igain += ix86_cost->add;
+ /* Gain for extend highpart case. */
+ else if (GET_CODE (XEXP (src, 0)) == ASHIFT)
+ igain += ix86_cost->shift_const - ix86_cost->sse_op;
else
igain += ix86_cost->shift_const;
}
@@ -951,7 +954,8 @@ general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
{
*op = copy_rtx_if_shared (*op);
- if (GET_CODE (*op) == NOT)
+ if (GET_CODE (*op) == NOT
+ || GET_CODE (*op) == ASHIFT)
{
convert_op (&XEXP (*op, 0), insn);
PUT_MODE (*op, vmode);
@@ -2120,7 +2124,7 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
switch (GET_CODE (src))
{
case ASHIFTRT:
- if (!TARGET_AVX512VL)
+ if (mode == DImode && !TARGET_AVX512VL)
return false;
/* FALLTHRU */
@@ -2131,6 +2135,14 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
if (!CONST_INT_P (XEXP (src, 1))
|| !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1))
return false;
+
+ /* Check for extend highpart case. */
+ if (mode != DImode
+ || GET_CODE (src) != ASHIFTRT
+ || GET_CODE (XEXP (src, 0)) != ASHIFT)
+ break;
+
+ src = XEXP (src, 0);
break;
case SMAX:
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8c54aa5..4db210c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -15292,6 +15292,41 @@
(const_string "0")
(const_string "*")))
(set_attr "mode" "QI")])
+
+(define_insn_and_split "*extend<dwi>2_doubleword_highpart"
+ [(set (match_operand:<DWI> 0 "register_operand" "=r")
+ (ashiftrt:<DWI>
+ (ashift:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_int_operand"))
+ (match_operand:QI 3 "const_int_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "INTVAL (operands[2]) == INTVAL (operands[3])
+ && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
+ "#"
+ "&& reload_completed"
+ [(parallel [(set (match_dup 4)
+ (ashift:DWIH (match_dup 4) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 4)
+ (ashiftrt:DWIH (match_dup 4) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[4]);")
+
+(define_insn_and_split "*extendv2di2_highpart_stv"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (ashiftrt:V2DI
+ (ashift:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "vm")
+ (match_operand:QI 2 "const_int_operand"))
+ (match_operand:QI 3 "const_int_operand")))]
+ "!TARGET_64BIT && TARGET_STV && TARGET_AVX512VL
+ && INTVAL (operands[2]) == INTVAL (operands[3])
+ && UINTVAL (operands[2]) < 32"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (ashift:V2DI (match_dup 1) (match_dup 2)))
+ (set (match_dup 0)
+ (ashiftrt:V2DI (match_dup 0) (match_dup 2)))])
;; Rotate instructions
diff --git a/gcc/testsuite/gcc.target/i386/pr110717.c b/gcc/testsuite/gcc.target/i386/pr110717.c
new file mode 100644
index 0000000..233f0ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110717.c
@@ -0,0 +1,21 @@
+/* PR target/110717 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#ifdef __SIZEOF_INT128__
+unsigned __int128
+foo (unsigned __int128 x)
+{
+ x <<= 59;
+ return ((__int128) x) >> 59;
+}
+#else
+unsigned long long
+foo (unsigned long long x)
+{
+ x <<= 27;
+ return ((long long) x) >> 27;
+}
+#endif
+
+/* { dg-final { scan-assembler-not "sh\[lr\]d" } } */