aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2023-10-06 20:11:44 +0100
committerRoger Sayle <roger@nextmovesoftware.com>2023-10-06 20:48:27 +0100
commitce658accc7beed19341b6847e6f257f08306a96e (patch)
tree996adfe7efff0889fbc10149b97832a7f8e9d1aa
parent2551e10038a70901f30b2168e6e3af4536748f3c (diff)
downloadgcc-ce658accc7beed19341b6847e6f257f08306a96e.zip
gcc-ce658accc7beed19341b6847e6f257f08306a96e.tar.gz
gcc-ce658accc7beed19341b6847e6f257f08306a96e.tar.bz2
i386: Implement doubleword shift left by 1 bit using add+adc.
This patch tweaks the i386 back-end's ix86_split_ashl to implement doubleword left shifts by 1 bit, using an add followed by an add-with-carry (i.e. a doubleword x+x) instead of using the x86's shld instruction. The replacement sequence both requires fewer bytes and is faster on both Intel and AMD architectures (from Agner Fog's latency tables and confirmed by my own micro-benchmarking). For the test case: __int128 foo(__int128 x) { return x << 1; } with -O2 we previously generated: foo: movq %rdi, %rax movq %rsi, %rdx shldq $1, %rdi, %rdx addq %rdi, %rax ret with this patch we now generate: foo: movq %rdi, %rax movq %rsi, %rdx addq %rdi, %rax adcq %rsi, %rdx ret This patch has been tested on x86_64-pc-linux-gnu with make bootstrap and make -k check, both with and without --target_board=unix{-m32} with no new failures. Ok for mainline? 2023-10-06 Roger Sayle <roger@nextmovesoftware.com> gcc/ChangeLog * config/i386/i386-expand.cc (ix86_split_ashl): Split shifts by one into add3_cc_overflow_1 followed by add3_carry. * config/i386/i386.md (@add<mode>3_cc_overflow_1): Renamed from "*add<mode>3_cc_overflow_1" to provide generator function. gcc/testsuite/ChangeLog * gcc.target/i386/ashldi3-2.c: New 32-bit test case. * gcc.target/i386/ashlti3-3.c: New 64-bit test case.
-rw-r--r--gcc/config/i386/i386-expand.cc12
-rw-r--r--gcc/config/i386/i386.md2
-rw-r--r--gcc/testsuite/gcc.target/i386/ashldi3-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/ashlti3-3.c10
4 files changed, 33 insertions, 1 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 9a98834..425f353 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -6342,6 +6342,18 @@ ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
if (count > half_width)
ix86_expand_ashl_const (high[0], count - half_width, mode);
}
+ else if (count == 1)
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ rtx x3 = gen_rtx_REG (CCCmode, FLAGS_REG);
+ rtx x4 = gen_rtx_LTU (mode, x3, const0_rtx);
+ half_mode = mode == DImode ? SImode : DImode;
+ emit_insn (gen_add3_cc_overflow_1 (half_mode, low[0],
+ low[0], low[0]));
+ emit_insn (gen_add3_carry (half_mode, high[0], high[0], high[0],
+ x3, x4));
+ }
else
{
gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index eef8a0e..6a5bc16 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -8864,7 +8864,7 @@
[(set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
-(define_insn "*add<mode>3_cc_overflow_1"
+(define_insn "@add<mode>3_cc_overflow_1"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(plus:SWI
diff --git a/gcc/testsuite/gcc.target/i386/ashldi3-2.c b/gcc/testsuite/gcc.target/i386/ashldi3-2.c
new file mode 100644
index 0000000..053389d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ashldi3-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -mno-stv" } */
+
+long long foo(long long x)
+{
+ return x << 1;
+}
+
+/* { dg-final { scan-assembler "adcl" } } */
+/* { dg-final { scan-assembler-not "shldl" } } */
diff --git a/gcc/testsuite/gcc.target/i386/ashlti3-3.c b/gcc/testsuite/gcc.target/i386/ashlti3-3.c
new file mode 100644
index 0000000..4f14ca0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ashlti3-3.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2" } */
+
+__int128 foo(__int128 x)
+{
+ return x << 1;
+}
+
+/* { dg-final { scan-assembler "adcq" } } */
+/* { dg-final { scan-assembler-not "shldq" } } */