aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog14
-rw-r--r--gcc/config/i386/i386.c77
-rw-r--r--gcc/config/i386/sse.md29
3 files changed, 79 insertions, 41 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 3719552..2eb5259 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,19 @@
2012-06-26 Richard Henderson <rth@redhat.com>
+ * config/i386/i386.c (bdesc_args): Update. Change
+ IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI to OPTION_MASK_ISA_SSE2.
+ (IX86_BUILTIN_VEC_WIDEN_SMUL_EVEN_V4SI): New.
+ (ix86_builtin_mul_widen_even): Use it.
+ (ix86_builtin_mul_widen_odd): Relax SMUL_ODD from sse4 to sse2.
+ (ix86_expand_mul_widen_evenodd): Handle signed for sse2.
+ * config/i386/sse.md (vec_widen_<s>mult_hi_<V124_AVX2>): Allow
+ for all SSE2.
+ (vec_widen_<s>mult_lo_<V124_AVX2>): Likewise.
+ (vec_widen_<s>mult_odd_<VI4_AVX2>): Likewise. Relax from V124_AVX2.
+ (vec_widen_smult_even_v4si): New.
+
+2012-06-26 Richard Henderson <rth@redhat.com>
+
* config/i386/sse.md (mul<VI8_AVX2>3): Change from insn_and_split
to expander; move guts to ...
* config/i386/i386.c (ix86_expand_sse2_mulvxdi3): ... here. Add
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 5cf230f..b96fc6e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -25758,6 +25758,7 @@ enum ix86_builtins
IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI,
IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI,
IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI,
+ IX86_BUILTIN_VEC_WIDEN_SMUL_EVEN_V4SI,
IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI,
IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI,
@@ -26620,7 +26621,9 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_vw_umul_even_v4si", IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI, UNKNOWN, (int) V2UDI_FTYPE_V4USI_V4USI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_smult_even_v4si, "__builtin_ia32_vw_smul_even_v4si", IX86_BUILTIN_VEC_WIDEN_SMUL_EVEN_V4SI, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_odd_v4si, "__builtin_ia32_vw_umul_odd_v4si", IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI, UNKNOWN, (int) V2UDI_FTYPE_V4USI_V4USI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_smult_odd_v4si, "__builtin_ia32_vw_smul_odd_v4si", IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
@@ -26747,7 +26750,6 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
- { OPTION_MASK_ISA_SSE4_1, CODE_FOR_vec_widen_smult_odd_v4si, "__builtin_ia32_vw_smul_odd_v4si", IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
/* SSE4.1 */
@@ -31067,18 +31069,10 @@ ix86_builtin_mul_widen_even (tree type)
switch (TYPE_MODE (type))
{
case V4SImode:
- if (uns_p)
- {
- if (!TARGET_SSE2)
- return NULL;
- code = IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI;
- }
- else
- {
- if (!TARGET_SSE4_1)
- return NULL;
- code = IX86_BUILTIN_PMULDQ128;
- }
+ if (!TARGET_SSE2)
+ return NULL;
+ code = (uns_p ? IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI
+ : IX86_BUILTIN_VEC_WIDEN_SMUL_EVEN_V4SI);
break;
case V8SImode:
@@ -31103,18 +31097,10 @@ ix86_builtin_mul_widen_odd (tree type)
switch (TYPE_MODE (type))
{
case V4SImode:
- if (uns_p)
- {
- if (!TARGET_SSE2)
- return NULL;
- code = IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI;
- }
- else
- {
- if (!TARGET_SSE4_1)
- return NULL;
- code = IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI;
- }
+ if (!TARGET_SSE2)
+ return NULL;
+ code = (uns_p ? IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI
+ : IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI);
break;
case V8SImode:
@@ -38774,12 +38760,12 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
return;
}
+
+ x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
- GEN_INT (GET_MODE_UNIT_BITSIZE (mode)), NULL,
- 1, OPTAB_DIRECT);
+ x, NULL, 1, OPTAB_DIRECT);
op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
- GEN_INT (GET_MODE_UNIT_BITSIZE (mode)), NULL,
- 1, OPTAB_DIRECT);
+ x, NULL, 1, OPTAB_DIRECT);
op1 = gen_lowpart (mode, op1);
op2 = gen_lowpart (mode, op2);
}
@@ -38801,7 +38787,38 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
x = gen_xop_pmacsdql (dest, op1, op2, x);
}
else
- gcc_unreachable ();
+ {
+ rtx s1, s2, t0, t1, t2;
+
+ /* The easiest way to implement this without PMULDQ is to go through
+ the motions as if we are performing a full 64-bit multiply. With
+ the exception that we need to do less shuffling of the elements. */
+
+ /* Compute the sign-extension, aka highparts, of the two operands. */
+ s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
+ op1, pc_rtx, pc_rtx);
+ s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
+ op2, pc_rtx, pc_rtx);
+
+ /* Multiply LO(A) * HI(B), and vice-versa. */
+ t1 = gen_reg_rtx (wmode);
+ t2 = gen_reg_rtx (wmode);
+ emit_insn (gen_sse2_umulv2siv2di3 (t1, s1, op2));
+ emit_insn (gen_sse2_umulv2siv2di3 (t2, s2, op1));
+
+ /* Multiply LO(A) * LO(B). */
+ t0 = gen_reg_rtx (wmode);
+ emit_insn (gen_sse2_umulv2siv2di3 (t0, op1, op2));
+
+ /* Combine and shift the highparts into place. */
+ t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
+ t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
+ 1, OPTAB_DIRECT);
+
+ /* Combine high and low parts. */
+ force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
+ return;
+ }
emit_insn (x);
}
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 81e7dc0..754b8b4 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5607,9 +5607,7 @@
(any_extend:<sseunpackmode>
(match_operand:VI124_AVX2 1 "register_operand"))
(match_operand:VI124_AVX2 2 "register_operand")]
- ; Note that SSE2 does not have signed SI multiply
- "TARGET_XOP || TARGET_SSE4_1
- || (TARGET_SSE2 && (<u_bool> || <MODE>mode != V4SImode))"
+ "TARGET_SSE2"
{
ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
<u_bool>, true);
@@ -5621,23 +5619,32 @@
(any_extend:<sseunpackmode>
(match_operand:VI124_AVX2 1 "register_operand"))
(match_operand:VI124_AVX2 2 "register_operand")]
- ; Note that SSE2 does not have signed SI multiply
- "TARGET_XOP || TARGET_SSE4_1
- || (TARGET_SSE2 && (<u_bool> || <MODE>mode != V4SImode))"
+ "TARGET_SSE2"
{
ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
<u_bool>, false);
DONE;
})
+;; Most widen_<s>mult_even_<mode> can be handled directly from other
+;; named patterns, but signed V4SI needs special help for plain SSE2.
+(define_expand "vec_widen_smult_even_v4si"
+ [(match_operand:V2DI 0 "register_operand")
+ (match_operand:V4SI 1 "register_operand")
+ (match_operand:V4SI 2 "register_operand")]
+ "TARGET_SSE2"
+{
+ ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
+ false, false);
+ DONE;
+})
+
(define_expand "vec_widen_<s>mult_odd_<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
(any_extend:<sseunpackmode>
- (match_operand:VI124_AVX2 1 "register_operand"))
- (match_operand:VI124_AVX2 2 "register_operand")]
- ; Note that SSE2 does not have signed SI multiply
- "TARGET_AVX || TARGET_XOP || TARGET_SSE4_1
- || (TARGET_SSE2 && (<u_bool> || <MODE>mode != V4SImode))"
+ (match_operand:VI4_AVX2 1 "register_operand"))
+ (match_operand:VI4_AVX2 2 "register_operand")]
+ "TARGET_SSE2"
{
ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
<u_bool>, true);