diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2023-10-06 20:07:53 +0100 |
---|---|---|
committer | Roger Sayle <roger@nextmovesoftware.com> | 2023-10-06 20:07:53 +0100 |
commit | fa8c99c4a426750adb5137f601b6f96b13e0da5c (patch) | |
tree | 9af108992dd4e0e23e39a555e2f968a054437171 | |
parent | c1bc7513b1d76812752f70cc2ef7e0173dfb0014 (diff) | |
download | gcc-fa8c99c4a426750adb5137f601b6f96b13e0da5c.zip gcc-fa8c99c4a426750adb5137f601b6f96b13e0da5c.tar.gz gcc-fa8c99c4a426750adb5137f601b6f96b13e0da5c.tar.bz2 |
i386: Split lea into shorter left shift by 2 or 3 bits with -Oz.
This patch avoids long lea instructions for performing x<<2 and x<<3
by splitting them into shorter sal and move (or xchg instructions).
Because this increases the number of instructions, but reduces the
total size, its suitable for -Oz (but not -Os).
The impact can be seen in the new test case:
int foo(int x) { return x<<2; }
int bar(int x) { return x<<3; }
long long fool(long long x) { return x<<2; }
long long barl(long long x) { return x<<3; }
where with -O2 we generate:
foo: lea 0x0(,%rdi,4),%eax // 7 bytes
retq
bar: lea 0x0(,%rdi,8),%eax // 7 bytes
retq
fool: lea 0x0(,%rdi,4),%rax // 8 bytes
retq
barl: lea 0x0(,%rdi,8),%rax // 8 bytes
retq
and with -Oz we now generate:
foo: xchg %eax,%edi // 1 byte
shl $0x2,%eax // 3 bytes
retq
bar: xchg %eax,%edi // 1 byte
shl $0x3,%eax // 3 bytes
retq
fool: xchg %rax,%rdi // 2 bytes
shl $0x2,%rax // 4 bytes
retq
barl: xchg %rax,%rdi // 2 bytes
shl $0x3,%rax // 4 bytes
retq
Over the entirety of the CSiBE code size benchmark this saves 1347
bytes (0.037%) for x86_64, and 1312 bytes (0.036%) with -m32.
Conveniently, there's already a backend function in i386.cc for
deciding whether to split an lea into its component instructions,
ix86_avoid_lea_for_addr, all that's required is an additional
clause checking for -Oz (i.e. optimize_size > 1).
2023-10-06 Roger Sayle <roger@nextmovesoftware.com>
Uros Bizjak <ubizjak@gmail.com>
gcc/ChangeLog
* config/i386/i386.cc (ix86_avoid_lea_for_addr): Split LEAs used
to perform left shifts into shorter instructions with -Oz.
gcc/testsuite/ChangeLog
* gcc.target/i386/lea-2.c: New test case.
-rw-r--r-- | gcc/config/i386/i386.cc | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/lea-2.c | 7 |
2 files changed, 14 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 477e6ce..9557bff 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -15543,6 +15543,13 @@ ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[]) && (regno0 == regno1 || regno0 == regno2)) return true; + /* Split with -Oz if the encoding requires fewer bytes. */ + if (optimize_size > 1 + && parts.scale > 1 + && !parts.base + && (!parts.disp || parts.disp == const0_rtx)) + return true; + /* Check we need to optimize. */ if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun)) return false; diff --git a/gcc/testsuite/gcc.target/i386/lea-2.c b/gcc/testsuite/gcc.target/i386/lea-2.c new file mode 100644 index 0000000..e9f12d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/lea-2.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-options "-Oz" } */ +int foo(int x) { return x<<2; } +int bar(int x) { return x<<3; } +long long fool(long long x) { return x<<2; } +long long barl(long long x) { return x<<3; } +/* { dg-final { scan-assembler-not "lea\[lq\]" } } */ |