aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Henderson <rth@redhat.com>2009-11-22 16:01:47 -0800
committerRichard Henderson <rth@gcc.gnu.org>2009-11-22 16:01:47 -0800
commit8a67ca92a10c0387243b482ffe3f8c1bfc0ac313 (patch)
tree87c45b877f688ae9cf524b6f49c09a695a6c6e5e /gcc
parentd90a2c59eb3e014cd145aed7086966790bef42f7 (diff)
downloadgcc-8a67ca92a10c0387243b482ffe3f8c1bfc0ac313.zip
gcc-8a67ca92a10c0387243b482ffe3f8c1bfc0ac313.tar.gz
gcc-8a67ca92a10c0387243b482ffe3f8c1bfc0ac313.tar.bz2
i386.c (avx_vpermilp_parallel): New function.
* i386.c (avx_vpermilp_parallel): New function. * i386-protos.h: Declare it. * predicates.md (avx_vpermilp_v8sf_operand, avx_vpermilp_v4df_operand, avx_vpermilp_v4sf_operand, avx_vpermilp_v2df_operand): New. * sse.md (AVXMODEFDP, AVXMODEFSP): New iterators. (ssescalarnum, ssedoublesizemode): Add AVX modes. (vpermilbits): Remove. (avx_vpermil<mode>): Change insns to expanders. (*avx_vpermil<mode>): New. Use vec_select. From-SVN: r154427
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog12
-rw-r--r--gcc/config/i386/i386-protos.h3
-rw-r--r--gcc/config/i386/i386.c76
-rw-r--r--gcc/config/i386/predicates.md21
-rw-r--r--gcc/config/i386/sse.md83
5 files changed, 180 insertions, 15 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a2bc0f8..9a26636 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2009-11-22 Richard Henderson <rth@redhat.com>
+
+ * i386.c (avx_vpermilp_parallel): New function.
+ * i386-protos.h: Declare it.
+ * predicates.md (avx_vpermilp_v8sf_operand, avx_vpermilp_v4df_operand,
+ avx_vpermilp_v4sf_operand, avx_vpermilp_v2df_operand): New.
+ * sse.md (AVXMODEFDP, AVXMODEFSP): New iterators.
+ (ssescalarnum, ssedoublesizemode): Add AVX modes.
+ (vpermilbits): Remove.
+ (avx_vpermil<mode>): Change insns to expanders.
+ (*avx_vpermil<mode>): New. Use vec_select.
+
2009-11-22 Richard Earnshaw <rearnsha@arm.com>
* opts.c (decode_options): Don't enable flag_schedule_insns
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 58da131..d36b269 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -47,6 +47,8 @@ extern bool x86_extended_QIreg_mentioned_p (rtx);
extern bool x86_extended_reg_mentioned_p (rtx);
extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
+extern int avx_vpermilp_parallel (rtx par, enum machine_mode mode);
+
extern int ix86_expand_movmem (rtx, rtx, rtx, rtx, rtx, rtx);
extern int ix86_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx);
extern int ix86_expand_strlen (rtx, rtx, rtx, rtx);
@@ -275,3 +277,4 @@ extern int asm_preferred_eh_data_format (int, int);
#ifdef HAVE_ATTR_cpu
extern enum attr_cpu ix86_schedule;
#endif
+
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index b85ef63..b149924 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -24527,6 +24527,82 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
return NULL_TREE;
}
}
+
+/* Helper for avx_vpermilps256_operand et al. This is also used by
+ the expansion functions to turn the parallel back into a mask.
+ The return value is 0 for no match and the imm8+1 for a match. */
+
+int
+avx_vpermilp_parallel (rtx par, enum machine_mode mode)
+{
+ unsigned i, nelt = GET_MODE_NUNITS (mode);
+ unsigned mask = 0;
+ unsigned char ipar[8];
+
+ if (XVECLEN (par, 0) != nelt)
+ return 0;
+
+ /* Validate that all of the elements are constants, and not totally
+ out of range. Copy the data into an integral array to make the
+ subsequent checks easier. */
+ for (i = 0; i < nelt; ++i)
+ {
+ rtx er = XVECEXP (par, 0, i);
+ unsigned HOST_WIDE_INT ei;
+
+ if (!CONST_INT_P (er))
+ return 0;
+ ei = INTVAL (er);
+ if (ei >= nelt)
+ return 0;
+ ipar[i] = ei;
+ }
+
+ switch (mode)
+ {
+ case V4DFmode:
+ /* In the 256-bit DFmode case, we can only move elements within
+ a 128-bit lane. */
+ for (i = 0; i < 2; ++i)
+ {
+ if (ipar[i] >= 2)
+ return 0;
+ mask |= ipar[i] << i;
+ }
+ for (i = 2; i < 4; ++i)
+ {
+ if (ipar[i] < 2)
+ return 0;
+ mask |= (ipar[i] - 2) << i;
+ }
+ break;
+
+ case V8SFmode:
+ /* In the 256-bit SFmode case, we have full freedom of movement
+ within the low 128-bit lane, but the high 128-bit lane must
+ mirror the exact same pattern. */
+ for (i = 0; i < 4; ++i)
+ if (ipar[i] + 4 != ipar[i + 4])
+ return 0;
+ nelt = 4;
+ /* FALLTHRU */
+
+ case V2DFmode:
+ case V4SFmode:
+ /* In the 128-bit case, we've full freedom in the placement of
+ the elements from the source operand. */
+ for (i = 0; i < nelt; ++i)
+ mask |= ipar[i] << (i * (nelt / 2));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Make sure success has a non-zero value by adding one. */
+ return mask + 1;
+}
+
/* Store OPERAND to the memory after reload is completed. This means
that we can't easily use assign_stack_local. */
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 31a2037..1e8200a 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1148,3 +1148,24 @@
return 1;
})
+
+;; Return 1 if OP is a parallel for a vpermilp[ds] permute.
+;; ??? It would be much easier if the PARALLEL for a VEC_SELECT
+;; had a mode, but it doesn't. So we have 4 copies and install
+;; the mode by hand.
+
+(define_predicate "avx_vpermilp_v8sf_operand"
+ (and (match_code "parallel")
+ (match_test "avx_vpermilp_parallel (op, V8SFmode)")))
+
+(define_predicate "avx_vpermilp_v4df_operand"
+ (and (match_code "parallel")
+ (match_test "avx_vpermilp_parallel (op, V4DFmode)")))
+
+(define_predicate "avx_vpermilp_v4sf_operand"
+ (and (match_code "parallel")
+ (match_test "avx_vpermilp_parallel (op, V4SFmode)")))
+
+(define_predicate "avx_vpermilp_v2df_operand"
+ (and (match_code "parallel")
+ (match_test "avx_vpermilp_parallel (op, V2DFmode)")))
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 7144799..79adc77 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -58,6 +58,8 @@
(define_mode_iterator AVX256MODE8P [V8SI V8SF])
(define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
(define_mode_iterator AVXMODEF4P [V4SF V4DF])
+(define_mode_iterator AVXMODEFDP [V2DF V4DF])
+(define_mode_iterator AVXMODEFSP [V4SF V8SF])
(define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
(define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
@@ -95,13 +97,16 @@
(V4SI "SI") (V2DI "DI")])
;; Mapping of vector modes to a vector mode of double size
-(define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
- (V4SF "V8SF") (V4SI "V8SI")])
+(define_mode_attr ssedoublesizemode
+ [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
+ (V8HI "V16HI") (V16QI "V32QI")
+ (V4DF "V8DF") (V8SF "V16SF")
+ (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
;; Number of scalar elements in each vector type
-(define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
- (V16QI "16") (V8HI "8")
- (V4SI "4") (V2DI "2")])
+(define_mode_attr ssescalarnum
+ [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
+ (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
;; Mapping for AVX
(define_mode_attr avxvecmode
@@ -134,10 +139,6 @@
(define_mode_attr blendbits
[(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
-;; Mapping of immediate bits for vpermil instructions
-(define_mode_attr vpermilbits
- [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")])
-
;; Mapping of immediate bits for pinsr instructions
(define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
@@ -12088,14 +12089,66 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_insn "avx_vpermil<mode>"
+(define_expand "avx_vpermil<mode>"
+ [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
+ (vec_select:AVXMODEFDP
+ (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_0_to_255_operand" "")))]
+ "TARGET_AVX"
+{
+ int mask = INTVAL (operands[2]);
+ rtx perm[<ssescalarnum>];
+
+ perm[0] = GEN_INT (mask & 1);
+ perm[1] = GEN_INT ((mask >> 1) & 1);
+ if (<MODE>mode == V4DFmode)
+ {
+ perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
+ perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
+ }
+
+ operands[2]
+ = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
+})
+
+(define_expand "avx_vpermil<mode>"
+ [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
+ (vec_select:AVXMODEFSP
+ (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_0_to_255_operand" "")))]
+ "TARGET_AVX"
+{
+ int mask = INTVAL (operands[2]);
+ rtx perm[<ssescalarnum>];
+
+ perm[0] = GEN_INT (mask & 3);
+ perm[1] = GEN_INT ((mask >> 2) & 3);
+ perm[2] = GEN_INT ((mask >> 4) & 3);
+ perm[3] = GEN_INT ((mask >> 6) & 3);
+ if (<MODE>mode == V8SFmode)
+ {
+ perm[4] = GEN_INT ((mask & 3) + 4);
+ perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
+ perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
+ perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
+ }
+
+ operands[2]
+ = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
+})
+
+(define_insn "*avx_vpermilp<mode>"
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
- (unspec:AVXMODEF2P
- [(match_operand:AVXMODEF2P 1 "register_operand" "xm")
- (match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")]
- UNSPEC_VPERMIL))]
+ (vec_select:AVXMODEF2P
+ (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
+ (match_parallel 2 "avx_vpermilp_<mode>_operand"
+ [(match_operand 3 "const_int_operand" "")])))]
"TARGET_AVX"
- "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+{
+ int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
+ operands[2] = GEN_INT (mask);
+ return "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")