aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <rth@redhat.com>2009-11-30 08:56:37 -0800
committerRichard Henderson <rth@gcc.gnu.org>2009-11-30 08:56:37 -0800
commit784e5ae13ae4f05fbd5aaebdc98e176b513b3c62 (patch)
tree3204678d58c7a4ef511bdb984c0d1b77f92d014a
parent1feb57517d546f3c026833ea9cc5880c8c01b050 (diff)
downloadgcc-784e5ae13ae4f05fbd5aaebdc98e176b513b3c62.zip
gcc-784e5ae13ae4f05fbd5aaebdc98e176b513b3c62.tar.gz
gcc-784e5ae13ae4f05fbd5aaebdc98e176b513b3c62.tar.bz2
i386-builtin-types.def (V4DF_FTYPE_V4DF_V4DF_V4DI): New.
* config/i386/i386-builtin-types.def (V4DF_FTYPE_V4DF_V4DF_V4DI): New. (V8SF_FTYPE_V8SF_V8SF_V8SI): New. * config/i386/i386.c (ix86_vectorize_builtin_vec_perm): Support V4DF and V8SF for AVX; relax constraint on V4SF to SSE1 from SSE2. (IX86_BUILTIN_VEC_PERM_V4DF, IX86_BUILTIN_VEC_PERM_V8SF): New. (bdesc_args): Add them. (ix86_expand_builtin): Expand them. (expand_vec_perm_pshufb2): Only operate on 16-byte vectors. From-SVN: r154831
-rw-r--r--gcc/ChangeLog11
-rw-r--r--gcc/config/i386/i386-builtin-types.def2
-rw-r--r--gcc/config/i386/i386.c41
3 files changed, 45 insertions, 9 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 0995c52..8260bb6 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2009-11-30 Richard Henderson <rth@redhat.com>
+
+ * config/i386/i386-builtin-types.def (V4DF_FTYPE_V4DF_V4DF_V4DI): New.
+ (V8SF_FTYPE_V8SF_V8SF_V8SI): New.
+ * config/i386/i386.c (ix86_vectorize_builtin_vec_perm): Support
+ V4DF and V8SF for AVX; relax constraint on V4SF to SSE1 from SSE2.
+ (IX86_BUILTIN_VEC_PERM_V4DF, IX86_BUILTIN_VEC_PERM_V8SF): New.
+ (bdesc_args): Add them.
+ (ix86_expand_builtin): Expand them.
+ (expand_vec_perm_pshufb2): Only operate on 16-byte vectors.
+
2009-11-30 Martin Jambor <mjambor@suse.cz>
PR middle-end/42206
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 9f45a13..0843d4c 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -351,6 +351,8 @@ DEF_FUNCTION_TYPE (V2UDI, V2UDI, V2UDI, V2UDI)
DEF_FUNCTION_TYPE (V4USI, V4USI, V4USI, V4USI)
DEF_FUNCTION_TYPE (V8UHI, V8UHI, V8UHI, V8UHI)
DEF_FUNCTION_TYPE (V16UQI, V16UQI, V16UQI, V16UQI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, UINT, UINT)
DEF_FUNCTION_TYPE (V4HI, HI, HI, HI, HI)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 37fe24f..be8f38b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21047,6 +21047,8 @@ enum ix86_builtins
IX86_BUILTIN_VEC_PERM_V4SI_U,
IX86_BUILTIN_VEC_PERM_V8HI_U,
IX86_BUILTIN_VEC_PERM_V16QI_U,
+ IX86_BUILTIN_VEC_PERM_V4DF,
+ IX86_BUILTIN_VEC_PERM_V8SF,
/* FMA4 and XOP instructions. */
IX86_BUILTIN_VFMADDSS,
@@ -21722,7 +21724,7 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
@@ -21731,6 +21733,8 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
@@ -24151,6 +24155,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case IX86_BUILTIN_VEC_PERM_V4SI_U:
case IX86_BUILTIN_VEC_PERM_V8HI_U:
case IX86_BUILTIN_VEC_PERM_V16QI_U:
+ case IX86_BUILTIN_VEC_PERM_V4DF:
+ case IX86_BUILTIN_VEC_PERM_V8SF:
return ix86_expand_vec_perm_builtin (exp);
case IX86_BUILTIN_INFQ:
@@ -28976,21 +28982,33 @@ ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
{
tree itype = TREE_TYPE (vec_type);
bool u = TYPE_UNSIGNED (itype);
+ enum machine_mode vmode = TYPE_MODE (vec_type);
enum ix86_builtins fcode;
+ bool ok = TARGET_SSE2;
- if (!TARGET_SSE2)
- return NULL_TREE;
-
- switch (TYPE_MODE (vec_type))
+ switch (vmode)
{
+ case V4DFmode:
+ ok = TARGET_AVX;
+ fcode = IX86_BUILTIN_VEC_PERM_V4DF;
+ goto get_di;
case V2DFmode:
- itype = ix86_get_builtin_type (IX86_BT_DI);
fcode = IX86_BUILTIN_VEC_PERM_V2DF;
+ get_di:
+ itype = ix86_get_builtin_type (IX86_BT_DI);
break;
+
+ case V8SFmode:
+ ok = TARGET_AVX;
+ fcode = IX86_BUILTIN_VEC_PERM_V8SF;
+ goto get_si;
case V4SFmode:
- itype = ix86_get_builtin_type (IX86_BT_SI);
+ ok = TARGET_SSE;
fcode = IX86_BUILTIN_VEC_PERM_V4SF;
+ get_si:
+ itype = ix86_get_builtin_type (IX86_BT_SI);
break;
+
case V2DImode:
fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
break;
@@ -29004,9 +29022,13 @@ ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
break;
default:
- return NULL_TREE;
+ ok = false;
+ break;
}
+ if (!ok)
+ return NULL_TREE;
+
*mask_type = itype;
return ix86_builtins[(int) fcode];
}
@@ -29619,8 +29641,9 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
rtx rperm[2][16], vperm, l, h, op, m128;
unsigned int i, nelt, eltsz;
- if (!TARGET_SSSE3)
+ if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
return false;
+ gcc_assert (d->op0 != d->op1);
nelt = d->nelt;
eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));