aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2019-12-16 17:29:09 -0800
committerHongtao Liu <liuhongt@gcc.gnu.org>2019-12-17 01:29:09 +0000
commit62dd2904f05ee752bbeff43a196b90ecd7b94616 (patch)
tree6a5a2ca8520fd7b5c84211a45d663e981d1173fc
parentd187dab3873eb9737eb80ba02bb1ae11eae4e133 (diff)
downloadgcc-62dd2904f05ee752bbeff43a196b90ecd7b94616.zip
gcc-62dd2904f05ee752bbeff43a196b90ecd7b94616.tar.gz
gcc-62dd2904f05ee752bbeff43a196b90ecd7b94616.tar.bz2
Use add for a = a + b and a = b + a when possible.
Since except for Bonnell, 01 fb add %edi,%ebx is faster and shorter than 8d 1c 1f lea (%rdi,%rbx,1),%ebx we should use add for a = a + b and a = b + a when possible if not optimizing for Bonnell. Tested on x86-64. 2019-12-17 H.J. Lu <hjl.tools@gmail.com> gcc/ PR target/92807 * config/i386/i386.c (ix86_lea_outperforms): Check !TARGET_BONNELL. (ix86_avoid_lea_for_addr): When not optimizing for Bonnell, use add for a = a + b and a = b + a. gcc/testsuite/ PR target/92807 * gcc.target/i386/pr92807-1.c: New test. From-SVN: r279451
-rw-r--r--gcc/ChangeLog7
-rw-r--r--gcc/config/i386/i386.c27
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/i386/pr92807-1.c11
4 files changed, 41 insertions, 9 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 26dd711..9a37d82 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2019-12-17 H.J. Lu <hjl.tools@gmail.com>
+
+ PR target/92807
+ * config/i386/i386.c (ix86_lea_outperforms): Check !TARGET_BONNELL.
+ (ix86_avoid_lea_for_addr): When not optimizing for Bonnell, use add
+ for a = a + b and a = b + a.
+
2019-12-16 Martin Sebor <msebor@redhat.com>
PR middle-end/92952
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 551e77a..02f4d00 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14433,11 +14433,10 @@ ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
{
int dist_define, dist_use;
- /* For Silvermont if using a 2-source or 3-source LEA for
- non-destructive destination purposes, or due to wanting
- ability to use SCALE, the use of LEA is justified. */
- if (TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS
- || TARGET_TREMONT || TARGET_INTEL)
+ /* For Atom processors newer than Bonnell, if using a 2-source or
+ 3-source LEA for non-destructive destination purposes, or due to
+ wanting ability to use SCALE, the use of LEA is justified. */
+ if (!TARGET_BONNELL)
{
if (has_scale)
return true;
@@ -14572,10 +14571,6 @@ ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
struct ix86_address parts;
int ok;
- /* Check we need to optimize. */
- if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
- return false;
-
/* The "at least two components" test below might not catch simple
move or zero extension insns if parts.base is non-NULL and parts.disp
is const0_rtx as the only components in the address, e.g. if the
@@ -14612,6 +14607,20 @@ ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
if (parts.index)
regno2 = true_regnum (parts.index);
+ /* Use add for a = a + b and a = b + a since it is faster and shorter
+ than lea for most processors. For the processors like BONNELL, if
+ the destination register of LEA holds an actual address which will
+ be used soon, LEA is better and otherwise ADD is better. */
+ if (!TARGET_BONNELL
+ && parts.scale == 1
+ && (!parts.disp || parts.disp == const0_rtx)
+ && (regno0 == regno1 || regno0 == regno2))
+ return true;
+
+ /* Check we need to optimize. */
+ if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
+ return false;
+
split_cost = 0;
/* Compute how many cycles we will add to execution time
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 7272aba..4142c50 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2019-12-17 H.J. Lu <hjl.tools@gmail.com>
+
+ PR target/92807
+ * gcc.target/i386/pr92807-1.c: New test.
+
2019-12-16 Jozef Lawrynowicz <jozef.l@mittosystems.com>
* g++.dg/init/dso_handle1.C: Require cxa_atexit support.
diff --git a/gcc/testsuite/gcc.target/i386/pr92807-1.c b/gcc/testsuite/gcc.target/i386/pr92807-1.c
new file mode 100644
index 0000000..00f9293
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92807-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned int
+abs2 (unsigned int a)
+{
+ unsigned int s = ((a>>15)&0x10001)*0xffff;
+ return (a+s)^s;
+}
+
+/* { dg-final { scan-assembler-not "leal" } } */