diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2021-07-30 22:46:32 +0100 |
---|---|---|
committer | Roger Sayle <roger@nextmovesoftware.com> | 2021-07-30 22:46:32 +0100 |
commit | f7bf03cf69ccb7dcfa0320774aa7f3c51344dada (patch) | |
tree | f8ef93f335a6e662a6f470d17f3b5973c417b015 /gcc | |
parent | 5b2515f5ae86c21fc13021e1a8728a48e68aa7d6 (diff) | |
download | gcc-f7bf03cf69ccb7dcfa0320774aa7f3c51344dada.zip gcc-f7bf03cf69ccb7dcfa0320774aa7f3c51344dada.tar.gz gcc-f7bf03cf69ccb7dcfa0320774aa7f3c51344dada.tar.bz2 |
Decrement followed by cmov improvements.
The following patch to the x86_64 backend improves the code generated
for a decrement followed by a conditional move. The primary change is
to recognize that after subtracting one, checking the result is -1 (or
equivalently that the original value was zero) can be implemented using
the borrow/carry flag instead of requiring an explicit test instruction.
This is achieved by a new define_insn_and_split that allows combine to
split the desired sequence/composite into a *subsi_3 and *movsicc_noc.
The other change with this patch is/are a pair of peephole2 optimizations
to eliminate register-to-register moves generated during register
allocation. During reload, the compiler doesn't know that inverting
the condition of a conditional cmove can sometimes reduce register
pressure, but this is easy to tidy up during the peephole2 pass (where
swapping the order of the insn's operands performs the required
logic inversion).
Both improvements are demonstrated by the case below:
int foo(int x) {
if (x == 0)
x = 16;
else x--;
return x;
}
Before:
foo: leal -1(%rdi), %eax
testl %edi, %edi
movl $16, %edx
cmove %edx, %eax
ret
After:
foo: subl $1, %edi
movl $16, %eax
cmovnc %edi, %eax
ret
And the value of the peephole2 clean-up can be seen on its own in:
int bar(int x) {
x--;
if (x == 0)
x = 16;
return x;
}
Before:
bar: movl %edi, %eax
movl $16, %edx
subl $1, %eax
cmove %edx, %eax
ret
After:
bar: subl $1, %edi
movl $16, %eax
cmovne %edi, %eax
ret
These idioms were inspired by the source code of NIST SciMark4's
Random_nextDouble function, where the tweaks above result in
a ~1% improvement in the MonteCarlo benchmark kernel.
2021-07-30 Roger Sayle <roger@nextmovesoftware.com>
Uroš Bizjak <ubizjak@gmail.com>
gcc/ChangeLog
* config/i386/i386.md (*dec_cmov<mode>): New define_insn_and_split
to generate a conditional move using the carry flag after sub $1.
(peephole2): Eliminate a register-to-register move by inverting
the condition of a conditional move.
gcc/testsuite/ChangeLog
* gcc.target/i386/dec-cmov-1.c: New test.
* gcc.target/i386/dec-cmov-2.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386.md | 87 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/dec-cmov-1.c | 105 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/dec-cmov-2.c | 57 |
3 files changed, 249 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 44ae18e..73a4953 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -6756,6 +6756,29 @@ ? GEU : LTU, VOIDmode, cc, const0_rtx); }) +;; Help combine use borrow flag to test for -1 after dec (add $-1). +(define_insn_and_split "*dec_cmov<mode>" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (if_then_else:SWI248 + (match_operator 1 "bt_comparison_operator" + [(match_operand:SWI248 2 "register_operand" "0") (const_int 0)]) + (plus:SWI248 (match_dup 2) (const_int -1)) + (match_operand:SWI248 3 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_CMOVE" + "#" + "&& reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_dup 2) (const_int 1))) + (set (match_dup 0) (minus:SWI248 (match_dup 2) (const_int 1)))]) + (set (match_dup 0) + (if_then_else:SWI248 (match_dup 4) (match_dup 0) (match_dup 3)))] +{ + rtx cc = gen_rtx_REG (CCCmode, FLAGS_REG); + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == NE + ? GEU : LTU, VOIDmode, cc, const0_rtx); +}) + (define_insn "*subsi_3_zext" [(set (reg FLAGS_REG) (compare (match_operand:SI 1 "register_operand" "0") @@ -19182,6 +19205,70 @@ gcc_unreachable (); }) +;; Eliminate a reg-reg mov by inverting the condition of a cmov (#1). +;; mov r0,r1; dec r0; mov r2,r3; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1 +(define_peephole2 + [(set (match_operand:SWI248 0 "register_operand") + (match_operand:SWI248 1 "register_operand")) + (parallel [(set (reg FLAGS_REG) (match_operand 5)) + (set (match_dup 0) (match_operand:SWI248 6))]) + (set (match_operand:SWI248 2 "register_operand") + (match_operand:SWI248 3)) + (set (match_dup 0) + (if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_dup 0) + (match_dup 2)))] + "TARGET_CMOVE + && REGNO (operands[2]) != REGNO (operands[0]) + && REGNO (operands[2]) != REGNO (operands[1]) + && peep2_reg_dead_p (1, operands[1]) + && peep2_reg_dead_p (4, operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[3])" + [(parallel [(set (match_dup 7) (match_dup 8)) + (set (match_dup 1) (match_dup 9))]) + (set (match_dup 0) (match_dup 3)) + (set (match_dup 0) (if_then_else:SWI248 (match_dup 4) + (match_dup 1) + (match_dup 0)))] +{ + operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0)); + operands[8] = replace_rtx (operands[5], operands[0], operands[1]); + operands[9] = replace_rtx (operands[6], operands[0], operands[1]); +}) + +;; Eliminate a reg-reg mov by inverting the condition of a cmov (#2). +;; mov r2,r3; mov r0,r1; dec r0; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1 +(define_peephole2 + [(set (match_operand:SWI248 2 "register_operand") + (match_operand:SWI248 3)) + (set (match_operand:SWI248 0 "register_operand") + (match_operand:SWI248 1 "register_operand")) + (parallel [(set (reg FLAGS_REG) (match_operand 5)) + (set (match_dup 0) (match_operand:SWI248 6))]) + (set (match_dup 0) + (if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_dup 0) + (match_dup 2)))] + "TARGET_CMOVE + && REGNO (operands[2]) != REGNO (operands[0]) + && REGNO (operands[2]) != REGNO (operands[1]) + && peep2_reg_dead_p (2, operands[1]) + && peep2_reg_dead_p (4, operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[3])" + [(parallel [(set (match_dup 7) (match_dup 8)) + (set (match_dup 1) (match_dup 9))]) + (set (match_dup 0) (match_dup 3)) + (set (match_dup 0) (if_then_else:SWI248 (match_dup 4) + (match_dup 1) + (match_dup 0)))] +{ + operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (2)), 0, 0)); + operands[8] = replace_rtx (operands[5], operands[0], operands[1]); + operands[9] = replace_rtx (operands[6], operands[0], operands[1]); +}) + (define_expand "mov<mode>cc" [(set (match_operand:X87MODEF 0 "register_operand") (if_then_else:X87MODEF diff --git a/gcc/testsuite/gcc.target/i386/dec-cmov-1.c b/gcc/testsuite/gcc.target/i386/dec-cmov-1.c new file mode 100644 index 0000000..0eae63f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/dec-cmov-1.c @@ -0,0 +1,105 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-march=pentiumpro -mregparm=3" { target ia32 } } */ + +int foo_m1(int x) +{ + x--; + if (x == 0) + x = 16; + return x; +} + +int foo_m2(int x) +{ + x -= 2; + if (x == 0) + x = 16; + return x; +} + +int foo_p1(int x) +{ + x++; + if (x == 0) + x = 16; + return x; +} + +int foo_p2(int x) +{ + x += 2; + if (x == 0) + x = 16; + return x; +} + + +#ifdef __x86_64__ +long long fool_m1(long long x) +{ + x--; + if (x == 0) + x = 16; + return x; +} + +long long fool_m2(long long x) +{ + x -= 2; + if (x == 0) + x = 16; + return x; +} + +long long fool_p1(long long x) +{ + x++; + if (x == 0) + x = 16; + return x; +} + +long long fool_p2(long long x) +{ + x += 2; + if (x == 0) + x = 16; + return x; +} +#endif /* __X86_64__ */ + +short foos_m1(short x) +{ + x--; + if (x == 0) + x = 16; + return x; +} + +short foos_m2(short x) +{ + x -= 2; + if (x == 0) + x = 16; + return x; +} + +short foos_p1(short x) +{ + x++; + if (x == 0) + x = 16; + return x; +} + +short foos_p2(short x) +{ + x += 2; + if (x == 0) + x = 16; + return x; +} + +/* { dg-final { scan-assembler-not "mov(l|q)\[ \\t\]*%(e|r)(cx|di), %(e|r)ax" } } */ + diff --git a/gcc/testsuite/gcc.target/i386/dec-cmov-2.c b/gcc/testsuite/gcc.target/i386/dec-cmov-2.c new file mode 100644 index 0000000..115db99 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/dec-cmov-2.c @@ -0,0 +1,57 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-march=pentiumpro -mregparm=3" { target ia32 } } */ + +int foo(int x) +{ + x--; + if (x == -1) + x = 16; + return x; +} + +int bar(int x) +{ + if (x == 0) + x = 16; + else x--; + return x; +} + +#ifdef __x86_64__ +long long fool(long long x) +{ + x--; + if (x == -1) + x = 16; + return x; +} + +long long barl(long long x) +{ + if (x == 0) + x = 16; + else x--; + return x; +} +#endif + +short foos(short x) +{ + x--; + if (x == -1) + x = 16; + return x; +} + +short bars(short x) +{ + if (x == 0) + x = 16; + else x--; + return x; +} + +/* { dg-final { scan-assembler-not "lea(l|q)" } } */ +/* { dg-final { scan-assembler-not "test(l|q|w)" } } */ + |