diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2023-01-01 17:00:28 +0000 |
---|---|---|
committer | Roger Sayle <roger@nextmovesoftware.com> | 2023-01-01 17:00:28 +0000 |
commit | 4f1314f547f69d3a2b1f16ce301267e3bfb4e427 (patch) | |
tree | cd81f61942e259e4949991d71322753d0f895857 /gcc/config | |
parent | d64f877906111645af0217d4d54993a7b2ecbd2e (diff) | |
download | gcc-4f1314f547f69d3a2b1f16ce301267e3bfb4e427.zip gcc-4f1314f547f69d3a2b1f16ce301267e3bfb4e427.tar.gz gcc-4f1314f547f69d3a2b1f16ce301267e3bfb4e427.tar.bz2 |
Add post-reload splitter for extendditi2 on x86_64.
This is another step towards a possible solution for PR 105137.
This patch introduces a define_insn for extendditi2 that allows
DImode to TImode sign-extension to be represented in the early
RTL optimizers, before being split post-reload into the exact
same idiom as currently produced by RTL expansion.
Typically this produces the identical code, so the first new
test case:
__int128 foo(long long x) { return (__int128)x; }
continues to generate:
foo: movq %rdi, %rax
cqto
ret
The "magic" is that this representation allows combine and the
other RTL optimizers to do a better job. Hence, the second
test case:
__int128 foo(__int128 a, long long b) {
a += ((__int128)b) << 70;
return a;
}
which mainline with -O2 currently generates as:
foo: movq %rsi, %rax
movq %rdx, %rcx
movq %rdi, %rsi
salq $6, %rcx
movq %rax, %rdi
xorl %eax, %eax
movq %rcx, %rdx
addq %rsi, %rax
adcq %rdi, %rdx
ret
with this patch now becomes:
foo: movl $0, %eax
salq $6, %rdx
addq %rdi, %rax
adcq %rsi, %rdx
ret
i.e. the same code for the signed and unsigned extension variants.
2023-01-01 Roger Sayle <roger@nextmovesoftware.com>
Uroš Bizjak <ubizjak@gmail.com>
gcc/ChangeLog
* config/i386/i386.md (extendditi2): New define_insn.
(define_split): Use DWIH mode iterator to treat new extendditi2
identically to existing extendsidi2_1.
(define_peephole2): Likewise.
(define_peephole2): Likewise.
(define_Split): Likewise.
gcc/testsuite/ChangeLog
* gcc.target/i386/extendditi2-1.c: New test case.
* gcc.target/i386/extendditi2-2.c: Likewise.
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/i386.md | 70 |
1 files changed, 45 insertions, 25 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ca40c4f..890c4c8 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4548,17 +4548,27 @@ "!TARGET_64BIT" "#") +(define_insn "extendditi2" + [(set (match_operand:TI 0 "nonimmediate_operand" "=*A,r,?r,?*o") + (sign_extend:TI (match_operand:DI 1 "register_operand" "0,0,r,r"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:DI 2 "=X,X,X,&r"))] + "TARGET_64BIT" + "#") + ;; Split the memory case. If the source register doesn't die, it will stay ;; this way, if it does die, following peephole2s take care of it. (define_split - [(set (match_operand:DI 0 "memory_operand") - (sign_extend:DI (match_operand:SI 1 "register_operand"))) + [(set (match_operand:<DWI> 0 "memory_operand") + (sign_extend:<DWI> (match_operand:DWIH 1 "register_operand"))) (clobber (reg:CC FLAGS_REG)) - (clobber (match_operand:SI 2 "register_operand"))] + (clobber (match_operand:DWIH 2 "register_operand"))] "reload_completed" [(const_int 0)] { - split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]); + rtx bits = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1); + + split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]); emit_move_insn (operands[3], operands[1]); @@ -4567,12 +4577,12 @@ && REGNO (operands[1]) == AX_REG && REGNO (operands[2]) == DX_REG) { - emit_insn (gen_ashrsi3_cvt (operands[2], operands[1], GEN_INT (31))); + emit_insn (gen_ashr<mode>3_cvt (operands[2], operands[1], bits)); } else { emit_move_insn (operands[2], operands[1]); - emit_insn (gen_ashrsi3_cvt (operands[2], operands[2], GEN_INT (31))); + emit_insn (gen_ashr<mode>3_cvt (operands[2], operands[2], bits)); } emit_move_insn (operands[4], operands[2]); DONE; @@ -4581,69 +4591,79 @@ ;; Peepholes for the case where the source register does die, after ;; being split with the above splitter. (define_peephole2 - [(set (match_operand:SI 0 "memory_operand") - (match_operand:SI 1 "general_reg_operand")) - (set (match_operand:SI 2 "general_reg_operand") (match_dup 1)) + [(set (match_operand:DWIH 0 "memory_operand") + (match_operand:DWIH 1 "general_reg_operand")) + (set (match_operand:DWIH 2 "general_reg_operand") (match_dup 1)) (parallel [(set (match_dup 2) - (ashiftrt:SI (match_dup 2) (const_int 31))) + (ashiftrt:DWIH (match_dup 2) + (match_operand 4 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))]) - (set (match_operand:SI 3 "memory_operand") (match_dup 2))] + (set (match_operand:DWIH 3 "memory_operand") (match_dup 2))] "REGNO (operands[1]) != REGNO (operands[2]) + && INTVAL (operands[4]) == (<MODE_SIZE> * BITS_PER_UNIT - 1) && peep2_reg_dead_p (2, operands[1]) && peep2_reg_dead_p (4, operands[2]) && !reg_mentioned_p (operands[2], operands[3])" [(set (match_dup 0) (match_dup 1)) - (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31))) + (parallel [(set (match_dup 1) (ashiftrt:DWIH (match_dup 1) (match_dup 4))) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 3) (match_dup 1))]) (define_peephole2 - [(set (match_operand:SI 0 "memory_operand") - (match_operand:SI 1 "general_reg_operand")) - (parallel [(set (match_operand:SI 2 "general_reg_operand") - (ashiftrt:SI (match_dup 1) (const_int 31))) + [(set (match_operand:DWIH 0 "memory_operand") + (match_operand:DWIH 1 "general_reg_operand")) + (parallel [(set (match_operand:DWIH 2 "general_reg_operand") + (ashiftrt:DWIH (match_dup 1) + (match_operand 4 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))]) - (set (match_operand:SI 3 "memory_operand") (match_dup 2))] + (set (match_operand:DWIH 3 "memory_operand") (match_dup 2))] "/* cltd is shorter than sarl $31, %eax */ !optimize_function_for_size_p (cfun) && REGNO (operands[1]) == AX_REG && REGNO (operands[2]) == DX_REG + && INTVAL (operands[4]) == (<MODE_SIZE> * BITS_PER_UNIT - 1) && peep2_reg_dead_p (2, operands[1]) && peep2_reg_dead_p (3, operands[2]) && !reg_mentioned_p (operands[2], operands[3])" [(set (match_dup 0) (match_dup 1)) - (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31))) + (parallel [(set (match_dup 1) (ashiftrt:DWIH (match_dup 1) (match_dup 4))) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 3) (match_dup 1))]) ;; Extend to register case. Optimize case where source and destination ;; registers match and cases where we can use cltd. (define_split - [(set (match_operand:DI 0 "register_operand") - (sign_extend:DI (match_operand:SI 1 "register_operand"))) + [(set (match_operand:<DWI> 0 "register_operand") + (sign_extend:<DWI> (match_operand:DWIH 1 "register_operand"))) (clobber (reg:CC FLAGS_REG)) - (clobber (match_scratch:SI 2))] + (clobber (match_scratch:DWIH 2))] "reload_completed" [(const_int 0)] { - split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]); + rtx bits = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1); + + split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]); if (REGNO (operands[3]) != REGNO (operands[1])) emit_move_insn (operands[3], operands[1]); + rtx src = operands[1]; + if (REGNO (operands[3]) == AX_REG) + src = operands[3]; + /* Generate a cltd if possible and doing so it profitable. */ if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) - && REGNO (operands[3]) == AX_REG + && REGNO (src) == AX_REG && REGNO (operands[4]) == DX_REG) { - emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31))); + emit_insn (gen_ashr<mode>3_cvt (operands[4], src, bits)); DONE; } if (REGNO (operands[4]) != REGNO (operands[1])) emit_move_insn (operands[4], operands[1]); - emit_insn (gen_ashrsi3_cvt (operands[4], operands[4], GEN_INT (31))); + emit_insn (gen_ashr<mode>3_cvt (operands[4], operands[4], bits)); DONE; }) |