diff options
Diffstat (limited to 'gcc/config/i386/i386.c')
| -rw-r--r-- | gcc/config/i386/i386.c | 109 |
1 files changed, 77 insertions, 32 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e034fa9..c191109 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -10019,30 +10019,88 @@ ix86_split_ashldi (rtx *operands, rtx scratch) emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count))); ix86_expand_ashlsi3_const (low[0], count); } + return; } - else + + split_di (operands, 1, low, high); + + if (operands[1] == const1_rtx) { - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); + /* Assuming we've chosen a QImode capable registers, then 1LL << N + can be done with two 32-bit shifts, no branches, no cmoves. */ + if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) + { + rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); - split_di (operands, 1, low, high); + ix86_expand_clear (low[0]); + ix86_expand_clear (high[0]); + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32))); + + d = gen_lowpart (QImode, low[0]); + d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); + s = gen_rtx_EQ (QImode, flags, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, d, s)); - emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2])); - emit_insn (gen_ashlsi3 (low[0], low[0], operands[2])); + d = gen_lowpart (QImode, high[0]); + d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); + s = gen_rtx_NE (QImode, flags, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, d, s)); + } - if (TARGET_CMOVE && (! no_new_pseudos || scratch)) + /* Otherwise, we can get the same results by manually performing + a bit extract operation on bit 5, and then performing the two + shifts. The two methods of getting 0/1 into low/high are exactly + the same size. Avoiding the shift in the bit extract case helps + pentium4 a bit; no one else seems to care much either way. */ + else { - if (! no_new_pseudos) - scratch = force_reg (SImode, const0_rtx); + rtx x; + + if (TARGET_PARTIAL_REG_STALL && !optimize_size) + x = gen_rtx_ZERO_EXTEND (SImode, operands[2]); else - emit_move_insn (scratch, const0_rtx); + x = gen_lowpart (SImode, operands[2]); + emit_insn (gen_rtx_SET (VOIDmode, high[0], x)); - emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], - scratch)); + emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5))); + emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1))); + emit_move_insn (low[0], high[0]); + emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1))); } + + emit_insn (gen_ashlsi3 (low[0], low[0], operands[2])); + emit_insn (gen_ashlsi3 (high[0], high[0], operands[2])); + return; + } + + if (operands[1] == constm1_rtx) + { + /* For -1LL << N, we can avoid the shld instruction, because we + know that we're shifting 0...31 ones into a -1. */ + emit_move_insn (low[0], constm1_rtx); + if (optimize_size) + emit_move_insn (high[0], low[0]); else - emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); + emit_move_insn (high[0], constm1_rtx); } + else + { + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + split_di (operands, 1, low, high); + emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2])); + } + + emit_insn (gen_ashlsi3 (low[0], low[0], operands[2])); + + if (TARGET_CMOVE && scratch) + { + ix86_expand_clear (scratch); + emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch)); + } + else + emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); } void @@ -10066,15 +10124,8 @@ ix86_split_ashrdi (rtx *operands, rtx scratch) else if (count >= 32) { emit_move_insn (low[0], high[1]); - - if (! reload_completed) - emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31))); - else - { - emit_move_insn (high[0], low[0]); - emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31))); - } - + emit_move_insn (high[0], low[0]); + emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31))); if (count > 32) emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32))); } @@ -10096,10 +10147,8 @@ ix86_split_ashrdi (rtx *operands, rtx scratch) emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); emit_insn (gen_ashrsi3 (high[0], high[0], operands[2])); - if (TARGET_CMOVE && (! no_new_pseudos || scratch)) + if (TARGET_CMOVE && scratch) { - if (! no_new_pseudos) - scratch = gen_reg_rtx (SImode); emit_move_insn (scratch, high[0]); emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31))); emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], @@ -10124,7 +10173,7 @@ ix86_split_lshrdi (rtx *operands, rtx scratch) if (count >= 32) { emit_move_insn (low[0], high[1]); - emit_move_insn (high[0], const0_rtx); + ix86_expand_clear (high[0]); if (count > 32) emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32))); @@ -10148,13 +10197,9 @@ ix86_split_lshrdi (rtx *operands, rtx scratch) emit_insn (gen_lshrsi3 (high[0], high[0], operands[2])); /* Heh. By reversing the arguments, we can reuse this pattern. */ - if (TARGET_CMOVE && (! no_new_pseudos || scratch)) + if (TARGET_CMOVE && scratch) { - if (! no_new_pseudos) - scratch = force_reg (SImode, const0_rtx); - else - emit_move_insn (scratch, const0_rtx); - + ix86_expand_clear (scratch); emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], scratch)); } |
