aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2017-04-23 09:25:30 +0200
committerUros Bizjak <uros@gcc.gnu.org>2017-04-23 09:25:30 +0200
commit2eb8a34363025d04482a798dec1c885e1e3a3803 (patch)
treeb5da08f631dab69d29e95297a4be8d4f01c857d2 /gcc/config/i386
parenta1687c59a105c3131f0f33be8cae4f0a7ea660cc (diff)
downloadgcc-2eb8a34363025d04482a798dec1c885e1e3a3803.zip
gcc-2eb8a34363025d04482a798dec1c885e1e3a3803.tar.gz
gcc-2eb8a34363025d04482a798dec1c885e1e3a3803.tar.bz2
re PR target/70799 (STV pass does not convert DImode shifts)
PR target/70799 * config/i386/i386.c (dimode_scalar_to_vector_candidate_p) <case ASHIFT, case LSHIFTRT>: Also consider variable shifts. Check "XEXP (src, 1)" operand here. <case PLUS, case MINUS, case IOR, case XOR, case AND>: Check "XEXP (src, 1)" operand here. (dimode_scalar_chain::make_vector_copies): Detect count register of a shift instruction. Zero extend count register from QImode to DImode to satisfy vector shift pattern count operand predicate. Substitute vector shift count operand with a DImode copy. (dimode_scalar_chain::convert_reg): Ditto, zero-extend from vector register. testsuite/ChangeLog: PR target/70799 * gcc.target/i186/pr70799-4.c: New test. From-SVN: r247082
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/i386.c159
1 files changed, 136 insertions, 23 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 535181c..3bebb47 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2811,9 +2811,16 @@ dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
{
case ASHIFT:
case LSHIFTRT:
- /* FIXME: consider also variable shifts. */
- if (!CONST_INT_P (XEXP (src, 1))
- || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63))
+ if (!REG_P (XEXP (src, 1))
+ && (!SUBREG_P (XEXP (src, 1))
+ || SUBREG_BYTE (XEXP (src, 1)) != 0
+ || !REG_P (SUBREG_REG (XEXP (src, 1))))
+ && (!CONST_INT_P (XEXP (src, 1))
+ || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)))
+ return false;
+
+ if (GET_MODE (XEXP (src, 1)) != QImode
+ && !CONST_INT_P (XEXP (src, 1)))
return false;
break;
@@ -2826,6 +2833,10 @@ dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
&& !MEM_P (XEXP (src, 1))
&& !CONST_INT_P (XEXP (src, 1)))
return false;
+
+ if (GET_MODE (XEXP (src, 1)) != DImode
+ && !CONST_INT_P (XEXP (src, 1)))
+ return false;
break;
case NEG:
@@ -2852,12 +2863,8 @@ dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
|| !REG_P (XEXP (XEXP (src, 0), 0))))
return false;
- if ((GET_MODE (XEXP (src, 0)) != DImode
- && !CONST_INT_P (XEXP (src, 0)))
- || (GET_CODE (src) != NEG
- && GET_CODE (src) != NOT
- && GET_MODE (XEXP (src, 1)) != DImode
- && !CONST_INT_P (XEXP (src, 1))))
+ if (GET_MODE (XEXP (src, 0)) != DImode
+ && !CONST_INT_P (XEXP (src, 0)))
return false;
return true;
@@ -3407,12 +3414,17 @@ dimode_scalar_chain::compute_convert_gain ()
else if (GET_CODE (src) == ASHIFT
|| GET_CODE (src) == LSHIFTRT)
{
- gain += ix86_cost->shift_const;
if (CONST_INT_P (XEXP (src, 0)))
gain -= vector_const_cost (XEXP (src, 0));
- if (CONST_INT_P (XEXP (src, 1))
- && INTVAL (XEXP (src, 1)) >= 32)
- gain -= COSTS_N_INSNS (1);
+ if (CONST_INT_P (XEXP (src, 1)))
+ {
+ gain += ix86_cost->shift_const;
+ if (INTVAL (XEXP (src, 1)) >= 32)
+ gain -= COSTS_N_INSNS (1);
+ }
+ else
+ /* Additional gain for omitting two CMOVs. */
+ gain += ix86_cost->shift_var + COSTS_N_INSNS (2);
}
else if (GET_CODE (src) == PLUS
|| GET_CODE (src) == MINUS
@@ -3528,15 +3540,59 @@ dimode_scalar_chain::make_vector_copies (unsigned regno)
{
rtx reg = regno_reg_rtx[regno];
rtx vreg = gen_reg_rtx (DImode);
+ bool count_reg = false;
df_ref ref;
for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
{
- rtx_insn *insn = DF_REF_INSN (ref);
+ df_ref use;
+
+ /* Detect the count register of a shift instruction. */
+ for (use = DF_REG_USE_CHAIN (regno); use; use = DF_REF_NEXT_REG (use))
+ if (bitmap_bit_p (insns, DF_REF_INSN_UID (use)))
+ {
+ rtx_insn *insn = DF_REF_INSN (use);
+ rtx def_set = single_set (insn);
+
+ gcc_assert (def_set);
+
+ rtx src = SET_SRC (def_set);
+
+ if ((GET_CODE (src) == ASHIFT
+ || GET_CODE (src) == LSHIFTRT)
+ && !CONST_INT_P (XEXP (src, 1))
+ && reg_or_subregno (XEXP (src, 1)) == regno)
+ count_reg = true;
+ }
start_sequence ();
- if (TARGET_SSE4_1)
+ if (count_reg)
+ {
+ rtx qreg = gen_lowpart (QImode, reg);
+ rtx tmp = gen_reg_rtx (SImode);
+
+ if (TARGET_ZERO_EXTEND_WITH_AND
+ && optimize_function_for_speed_p (cfun))
+ {
+ emit_move_insn (tmp, const0_rtx);
+ emit_insn (gen_movstrictqi
+ (gen_lowpart (QImode, tmp), qreg));
+ }
+ else
+ emit_insn (gen_rtx_SET
+ (tmp, gen_rtx_ZERO_EXTEND (SImode, qreg)));
+
+ if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
+ {
+ rtx slot = assign_386_stack_local (SImode, SLOT_STV_TEMP);
+ emit_move_insn (slot, tmp);
+ tmp = copy_rtx (slot);
+ }
+
+ emit_insn (gen_zero_extendsidi2 (vreg, tmp));
+ }
+ else if (TARGET_SSE4_1)
{
emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
CONST0_RTX (V4SImode),
@@ -3571,22 +3627,38 @@ dimode_scalar_chain::make_vector_copies (unsigned regno)
}
rtx_insn *seq = get_insns ();
end_sequence ();
+ rtx_insn *insn = DF_REF_INSN (ref);
emit_conversion_insns (seq, insn);
if (dump_file)
fprintf (dump_file,
" Copied r%d to a vector register r%d for insn %d\n",
- regno, REGNO (vreg), DF_REF_INSN_UID (ref));
+ regno, REGNO (vreg), INSN_UID (insn));
}
for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
{
- replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, vreg);
+ rtx_insn *insn = DF_REF_INSN (ref);
+ if (count_reg)
+ {
+ rtx def_set = single_set (insn);
+ gcc_assert (def_set);
+
+ rtx src = SET_SRC (def_set);
+
+ if ((GET_CODE (src) == ASHIFT
+ || GET_CODE (src) == LSHIFTRT)
+ && !CONST_INT_P (XEXP (src, 1))
+ && reg_or_subregno (XEXP (src, 1)) == regno)
+ XEXP (src, 1) = vreg;
+ }
+ else
+ replace_with_subreg_in_insn (insn, reg, vreg);
if (dump_file)
fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
- regno, REGNO (vreg), DF_REF_INSN_UID (ref));
+ regno, REGNO (vreg), INSN_UID (insn));
}
}
@@ -3677,11 +3749,52 @@ dimode_scalar_chain::convert_reg (unsigned regno)
{
if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
{
- rtx def_set = single_set (DF_REF_INSN (ref));
- if (!MEM_P (SET_DEST (def_set))
- || !REG_P (SET_SRC (def_set)))
- replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, reg);
- bitmap_clear_bit (conv, DF_REF_INSN_UID (ref));
+ rtx_insn *insn = DF_REF_INSN (ref);
+
+ rtx def_set = single_set (insn);
+ gcc_assert (def_set);
+
+ rtx src = SET_SRC (def_set);
+ rtx dst = SET_DEST (def_set);
+
+ if ((GET_CODE (src) == ASHIFT
+ || GET_CODE (src) == LSHIFTRT)
+ && !CONST_INT_P (XEXP (src, 1))
+ && reg_or_subregno (XEXP (src, 1)) == regno)
+ {
+ rtx tmp2 = gen_reg_rtx (V2DImode);
+
+ start_sequence ();
+
+ if (TARGET_SSE4_1)
+ emit_insn (gen_sse4_1_zero_extendv2qiv2di2
+ (tmp2, gen_rtx_SUBREG (V16QImode, reg, 0)));
+ else
+ {
+ rtx vec_cst
+ = gen_rtx_CONST_VECTOR (V2DImode,
+ gen_rtvec (2, GEN_INT (0xff),
+ const0_rtx));
+ vec_cst
+ = validize_mem (force_const_mem (V2DImode, vec_cst));
+
+ emit_insn (gen_rtx_SET
+ (tmp2,
+ gen_rtx_AND (V2DImode,
+ gen_rtx_SUBREG (V2DImode, reg, 0),
+ vec_cst)));
+ }
+ rtx_insn *seq = get_insns ();
+ end_sequence ();
+
+ emit_insn_before (seq, insn);
+
+ XEXP (src, 1) = gen_rtx_SUBREG (DImode, tmp2, 0);
+ }
+ else if (!MEM_P (dst) || !REG_P (src))
+ replace_with_subreg_in_insn (insn, reg, reg);
+
+ bitmap_clear_bit (conv, INSN_UID (insn));
}
}
/* Skip debug insns and uninitialized uses. */