aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/i386.c')
-rw-r--r--gcc/config/i386/i386.c109
1 files changed, 77 insertions, 32 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e034fa9..c191109 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -10019,30 +10019,88 @@ ix86_split_ashldi (rtx *operands, rtx scratch)
emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
ix86_expand_ashlsi3_const (low[0], count);
}
+ return;
}
- else
+
+ split_di (operands, 1, low, high);
+
+ if (operands[1] == const1_rtx)
{
- if (!rtx_equal_p (operands[0], operands[1]))
- emit_move_insn (operands[0], operands[1]);
+ /* Assuming we've chosen a QImode capable registers, then 1LL << N
+ can be done with two 32-bit shifts, no branches, no cmoves. */
+ if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
+ {
+ rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
- split_di (operands, 1, low, high);
+ ix86_expand_clear (low[0]);
+ ix86_expand_clear (high[0]);
+ emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
+
+ d = gen_lowpart (QImode, low[0]);
+ d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
+ s = gen_rtx_EQ (QImode, flags, const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, d, s));
- emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
- emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
+ d = gen_lowpart (QImode, high[0]);
+ d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
+ s = gen_rtx_NE (QImode, flags, const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, d, s));
+ }
- if (TARGET_CMOVE && (! no_new_pseudos || scratch))
+ /* Otherwise, we can get the same results by manually performing
+ a bit extract operation on bit 5, and then performing the two
+ shifts. The two methods of getting 0/1 into low/high are exactly
+ the same size. Avoiding the shift in the bit extract case helps
+ pentium4 a bit; no one else seems to care much either way. */
+ else
{
- if (! no_new_pseudos)
- scratch = force_reg (SImode, const0_rtx);
+ rtx x;
+
+ if (TARGET_PARTIAL_REG_STALL && !optimize_size)
+ x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
else
- emit_move_insn (scratch, const0_rtx);
+ x = gen_lowpart (SImode, operands[2]);
+ emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
- emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
- scratch));
+ emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
+ emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
+ emit_move_insn (low[0], high[0]);
+ emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
}
+
+ emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
+ emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
+ return;
+ }
+
+ if (operands[1] == constm1_rtx)
+ {
+ /* For -1LL << N, we can avoid the shld instruction, because we
+ know that we're shifting 0...31 ones into a -1. */
+ emit_move_insn (low[0], constm1_rtx);
+ if (optimize_size)
+ emit_move_insn (high[0], low[0]);
else
- emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
+ emit_move_insn (high[0], constm1_rtx);
}
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+
+ split_di (operands, 1, low, high);
+ emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
+ }
+
+ emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
+
+ if (TARGET_CMOVE && scratch)
+ {
+ ix86_expand_clear (scratch);
+ emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
+ }
+ else
+ emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
}
void
@@ -10066,15 +10124,8 @@ ix86_split_ashrdi (rtx *operands, rtx scratch)
else if (count >= 32)
{
emit_move_insn (low[0], high[1]);
-
- if (! reload_completed)
- emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
- else
- {
- emit_move_insn (high[0], low[0]);
- emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
- }
-
+ emit_move_insn (high[0], low[0]);
+ emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
if (count > 32)
emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
}
@@ -10096,10 +10147,8 @@ ix86_split_ashrdi (rtx *operands, rtx scratch)
emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
- if (TARGET_CMOVE && (! no_new_pseudos || scratch))
+ if (TARGET_CMOVE && scratch)
{
- if (! no_new_pseudos)
- scratch = gen_reg_rtx (SImode);
emit_move_insn (scratch, high[0]);
emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
@@ -10124,7 +10173,7 @@ ix86_split_lshrdi (rtx *operands, rtx scratch)
if (count >= 32)
{
emit_move_insn (low[0], high[1]);
- emit_move_insn (high[0], const0_rtx);
+ ix86_expand_clear (high[0]);
if (count > 32)
emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
@@ -10148,13 +10197,9 @@ ix86_split_lshrdi (rtx *operands, rtx scratch)
emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
/* Heh. By reversing the arguments, we can reuse this pattern. */
- if (TARGET_CMOVE && (! no_new_pseudos || scratch))
+ if (TARGET_CMOVE && scratch)
{
- if (! no_new_pseudos)
- scratch = force_reg (SImode, const0_rtx);
- else
- emit_move_insn (scratch, const0_rtx);
-
+ ix86_expand_clear (scratch);
emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
scratch));
}