aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Henderson <rth@redhat.com>2011-10-06 10:12:36 -0700
committerRichard Henderson <rth@gcc.gnu.org>2011-10-06 10:12:36 -0700
commit44167383a18a5e2af75ccccaf2c1ea7b6c334695 (patch)
tree79d2344f83536551f5ddcd1e7afd6976092174f1 /gcc
parentcf9899972bcb7ec5e577519737cd9443eb783d6d (diff)
downloadgcc-44167383a18a5e2af75ccccaf2c1ea7b6c334695.zip
gcc-44167383a18a5e2af75ccccaf2c1ea7b6c334695.tar.gz
gcc-44167383a18a5e2af75ccccaf2c1ea7b6c334695.tar.bz2
i386: Add AVX2 support to ix86_expand_vshuffle.
From-SVN: r179624
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/config/i386/i386.c112
-rw-r--r--gcc/config/i386/sse.md31
3 files changed, 135 insertions, 17 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 32e80ea..89905f5 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,14 @@
2001-10-06 Richard Henderson <rth@redhat.com>
+ * config/i386/i386.c (ix86_expand_vshuffle): Add AVX2 support.
+ * config/i386/sse.md (sseshuffint): Remove.
+ (sseintvecmode): Support V16HI, V8HI, V32QI, V16QI.
+ (VSHUFFLE_AVX2): New mode iterator.
+ (vshuffle<mode>): Use it.
+ (avx_vec_concat<V_256>): Rename from *vec_concat<V_256>_avx.
+
+2001-10-06 Richard Henderson <rth@redhat.com>
+
* optabs.c (expand_vec_shuffle_expr): Use the proper mode for the
mask operand. Tidy the code.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 688fba1..9960fd2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19312,17 +19312,120 @@ ix86_expand_vshuffle (rtx operands[])
rtx op0 = operands[1];
rtx op1 = operands[2];
rtx mask = operands[3];
- rtx vt, vec[16];
+ rtx t1, t2, vt, vec[16];
enum machine_mode mode = GET_MODE (op0);
enum machine_mode maskmode = GET_MODE (mask);
int w, e, i;
bool one_operand_shuffle = rtx_equal_p (op0, op1);
- gcc_checking_assert (GET_MODE_BITSIZE (mode) == 128);
-
/* Number of elements in the vector. */
w = GET_MODE_NUNITS (mode);
e = GET_MODE_UNIT_SIZE (mode);
+ gcc_assert (w <= 16);
+
+ if (TARGET_AVX2)
+ {
+ if (mode == V4DImode || mode == V4DFmode)
+ {
+ /* Unfortunately, the VPERMQ and VPERMPD instructions only support
+ an constant shuffle operand. With a tiny bit of effort we can
+ use VPERMD instead. A re-interpretation stall for V4DFmode is
+ unfortunate but there's no avoiding it. */
+ t1 = gen_reg_rtx (V8SImode);
+
+ /* Replicate the low bits of the V4DImode mask into V8SImode:
+ mask = { A B C D }
+ t1 = { A A B B C C D D }. */
+ for (i = 0; i < 4; ++i)
+ vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
+ vt = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, vec));
+ vt = force_reg (V8SImode, vt);
+ mask = gen_lowpart (V8SImode, mask);
+ emit_insn (gen_avx2_permvarv8si (t1, vt, mask));
+
+ /* Multiply the shuffle indicies by two. */
+ emit_insn (gen_avx2_lshlv8si3 (t1, t1, const1_rtx));
+
+ /* Add one to the odd shuffle indicies:
+ t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
+ for (i = 0; i < 4; ++i)
+ {
+ vec[i * 2] = const0_rtx;
+ vec[i * 2 + 1] = const1_rtx;
+ }
+ vt = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, vec));
+ vt = force_const_mem (V8SImode, vt);
+ emit_insn (gen_addv8si3 (t1, t1, vt));
+
+ /* Continue as if V8SImode was used initially. */
+ operands[3] = mask = t1;
+ target = gen_lowpart (V8SImode, target);
+ op0 = gen_lowpart (V8SImode, op0);
+ op1 = gen_lowpart (V8SImode, op1);
+ maskmode = mode = V8SImode;
+ w = 8;
+ e = 4;
+ }
+
+ switch (mode)
+ {
+ case V8SImode:
+ /* The VPERMD and VPERMPS instructions already properly ignore
+ the high bits of the shuffle elements. No need for us to
+ perform an AND ourselves. */
+ if (one_operand_shuffle)
+ emit_insn (gen_avx2_permvarv8si (target, mask, op0));
+ else
+ {
+ t1 = gen_reg_rtx (V8SImode);
+ t2 = gen_reg_rtx (V8SImode);
+ emit_insn (gen_avx2_permvarv8si (t1, mask, op0));
+ emit_insn (gen_avx2_permvarv8si (t2, mask, op1));
+ goto merge_two;
+ }
+ return;
+
+ case V8SFmode:
+ mask = gen_lowpart (V8SFmode, mask);
+ if (one_operand_shuffle)
+ emit_insn (gen_avx2_permvarv8sf (target, mask, op0));
+ else
+ {
+ t1 = gen_reg_rtx (V8SFmode);
+ t2 = gen_reg_rtx (V8SFmode);
+ emit_insn (gen_avx2_permvarv8sf (t1, mask, op0));
+ emit_insn (gen_avx2_permvarv8sf (t2, mask, op1));
+ goto merge_two;
+ }
+ return;
+
+ case V4SImode:
+ /* By combining the two 128-bit input vectors into one 256-bit
+ input vector, we can use VPERMD and VPERMPS for the full
+ two-operand shuffle. */
+ t1 = gen_reg_rtx (V8SImode);
+ t2 = gen_reg_rtx (V8SImode);
+ emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
+ emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
+ emit_insn (gen_avx2_permvarv8si (t1, t2, t1));
+ emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
+ return;
+
+ case V4SFmode:
+ t1 = gen_reg_rtx (V8SFmode);
+ t2 = gen_reg_rtx (V8SFmode);
+ mask = gen_lowpart (V4SFmode, mask);
+ emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
+ emit_insn (gen_avx_vec_concatv8sf (t2, mask, mask));
+ emit_insn (gen_avx2_permvarv8sf (t1, t2, t1));
+ emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
+ return;
+
+ default:
+ gcc_assert (GET_MODE_SIZE (mode) <= 16);
+ break;
+ }
+ }
if (TARGET_XOP)
{
@@ -19394,7 +19497,7 @@ ix86_expand_vshuffle (rtx operands[])
}
else
{
- rtx xops[6], t1, t2;
+ rtx xops[6];
bool ok;
/* Shuffle the two input vectors independently. */
@@ -19403,6 +19506,7 @@ ix86_expand_vshuffle (rtx operands[])
emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
+ merge_two:
/* Then merge them together. The key is whether any given control
element contained a bit set that indicates the second word. */
mask = operands[3];
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 88f4d6c..bf1d448 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -230,19 +230,16 @@
(V4SF "V4SF") (V2DF "V2DF")
(TI "TI")])
-;; All 128bit vector modes
-(define_mode_attr sseshuffint
- [(V16QI "V16QI") (V8HI "V8HI")
- (V4SI "V4SI") (V2DI "V2DI")
- (V4SF "V4SI") (V2DF "V2DI")])
-
;; Mapping of vector float modes to an integer mode of the same size
(define_mode_attr sseintvecmode
[(V8SF "V8SI") (V4DF "V4DI")
(V4SF "V4SI") (V2DF "V2DI")
(V4DF "V4DI") (V8SF "V8SI")
(V8SI "V8SI") (V4DI "V4DI")
- (V4SI "V4SI") (V2DI "V2DI")])
+ (V4SI "V4SI") (V2DI "V2DI")
+ (V16HI "V16HI") (V8HI "V8HI")
+ (V32QI "V32QI") (V16QI "V16QI")
+ ])
;; Mapping of vector modes to a vector mode of double size
(define_mode_attr ssedoublevecmode
@@ -6226,12 +6223,20 @@
DONE;
})
+;; ??? Irritatingly, the 256-bit VPSHUFB only shuffles within the 128-bit
+;; lanes. For now, we don't try to support V32QI or V16HImode. So we
+;; don't want to use VI_AVX2.
+(define_mode_iterator VSHUFFLE_AVX2
+ [V16QI V8HI V4SI V2DI V4SF V2DF
+ (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
+ (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
+
(define_expand "vshuffle<mode>"
- [(match_operand:V_128 0 "register_operand" "")
- (match_operand:V_128 1 "register_operand" "")
- (match_operand:V_128 2 "register_operand" "")
- (match_operand:<sseshuffint> 3 "register_operand" "")]
- "TARGET_SSSE3 || TARGET_AVX"
+ [(match_operand:VSHUFFLE_AVX2 0 "register_operand" "")
+ (match_operand:VSHUFFLE_AVX2 1 "register_operand" "")
+ (match_operand:VSHUFFLE_AVX2 2 "register_operand" "")
+ (match_operand:<sseintvecmode> 3 "register_operand" "")]
+ "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
{
ix86_expand_vshuffle (operands);
DONE;
@@ -12397,7 +12402,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "TI")])
-(define_insn "*vec_concat<mode>_avx"
+(define_insn "avx_vec_concat<mode>"
[(set (match_operand:V_256 0 "register_operand" "=x,x")
(vec_concat:V_256
(match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")