diff options
author | Richard Henderson <rth@redhat.com> | 2009-11-27 07:58:50 -0800 |
---|---|---|
committer | Richard Henderson <rth@gcc.gnu.org> | 2009-11-27 07:58:50 -0800 |
commit | b0d49a6e80c68e623c8930006f2d4536e658ed47 (patch) | |
tree | c9133c447b0c99c9125d9fb22569fa8b15c6d6c3 | |
parent | 315adedaf3f2072b55095f101d7df04c39d4e3ae (diff) | |
download | gcc-b0d49a6e80c68e623c8930006f2d4536e658ed47.zip gcc-b0d49a6e80c68e623c8930006f2d4536e658ed47.tar.gz gcc-b0d49a6e80c68e623c8930006f2d4536e658ed47.tar.bz2 |
sse.md (vec_interleave_highv16qi, [...]): Remove expanders.
* config/i386/sse.md (vec_interleave_highv16qi,
vec_interleave_lowv16qi, vec_interleave_highv8hi,
vec_interleave_lowv8hi, vec_interleave_highv4si,
vec_interleave_lowv4si, vec_interleave_highv2di,
vec_interleave_lowv2di, vec_interleave_highv4sf,
vec_interleave_lowv4sf, vec_interleave_highv2df,
vec_interleave_lowv2df): Remove expanders.
(sse_unpcklps, sse_unpckhps, sse2_unpckhpd_exp, sse2_unpcklpd_exp,
sse2_punpckhqdq, sse2_punpcklqdq, sse2_punpckhbw, sse2_punpcklbw,
sse2_punpckhwd, sse2_punpcklwd, sse2_punpckhdq, sse2_punpckldq):
Rename to their proper vec_interleave equivalent. Update all users
to the new name.
* config/i386/i386.c, config/i386/i386.md: Update all users.
From-SVN: r154711
-rw-r--r-- | gcc/ChangeLog | 16 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 71 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 6 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 254 |
4 files changed, 103 insertions, 244 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8165df5..c38e970 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2009-11-27 Richard Henderson <rth@redhat.com> + + * config/i386/sse.md (vec_interleave_highv16qi, + vec_interleave_lowv16qi, vec_interleave_highv8hi, + vec_interleave_lowv8hi, vec_interleave_highv4si, + vec_interleave_lowv4si, vec_interleave_highv2di, + vec_interleave_lowv2di, vec_interleave_highv4sf, + vec_interleave_lowv4sf, vec_interleave_highv2df, + vec_interleave_lowv2df): Remove expanders. + (sse_unpcklps, sse_unpckhps, sse2_unpckhpd_exp, sse2_unpcklpd_exp, + sse2_punpckhqdq, sse2_punpcklqdq, sse2_punpckhbw, sse2_punpcklbw, + sse2_punpckhwd, sse2_punpcklwd, sse2_punpckhdq, sse2_punpckldq): + Rename to their proper vec_interleave equivalent. Update all users + to the new name. + * config/i386/i386.c, config/i386/i386.md: Update all users. + 2009-11-27 Michael Matz <matz@suse.de> PR rtl-optimization/42084 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 6cbc2dc..443b4eb 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -13944,7 +13944,7 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input) exponents = validize_mem (force_const_mem (V4SImode, x)); /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */ - emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents)); + emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents)); /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm) yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)). @@ -13970,7 +13970,7 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input) else { x = copy_to_mode_reg (V2DFmode, fp_xmm); - emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm)); + emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm)); emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x)); } @@ -21690,8 +21690,8 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI }, @@ -21799,8 +21799,8 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF }, @@ -21845,14 +21845,14 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI }, @@ -26483,6 +26483,7 @@ x86_emit_floatuns (rtx operands[2]) /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector with all elements equal to VAR. Return true if successful. */ +/* ??? Call into the vec_perm support to implement the broadcast. */ static bool ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, @@ -26552,7 +26553,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, tmp1 = gen_reg_rtx (V8HImode); emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2)); /* Duplicate the low short through the whole low SImode word. */ - emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1)); + emit_insn (gen_vec_interleave_lowv8hi (tmp1, tmp1, tmp1)); /* Cast the V8HImode vector back to a V4SImode vector. */ tmp2 = gen_reg_rtx (V4SImode); emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); @@ -26584,8 +26585,8 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, tmp1 = gen_reg_rtx (V16QImode); emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2)); /* Duplicate the low byte through the whole low SImode word. */ - emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); - emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); + emit_insn (gen_vec_interleave_lowv16qi (tmp1, tmp1, tmp1)); + emit_insn (gen_vec_interleave_lowv16qi (tmp1, tmp1, tmp1)); /* Cast the V16QImode vector back to a V4SImode vector. */ tmp2 = gen_reg_rtx (V4SImode); emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); @@ -27417,7 +27418,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) /* tmp = target = A B C D */ tmp = copy_to_reg (target); /* target = A A B B */ - emit_insn (gen_sse_unpcklps (target, target, target)); + emit_insn (gen_vec_interleave_lowv4sf (target, target, target)); /* target = X A B B */ ix86_expand_vector_set (false, target, val, 0); /* target = A X C D */ @@ -27627,7 +27628,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) case 2: tmp = gen_reg_rtx (mode); - emit_insn (gen_sse_unpckhps (tmp, vec, vec)); + emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec)); break; default: @@ -27661,7 +27662,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) case 2: tmp = gen_reg_rtx (mode); - emit_insn (gen_sse2_punpckhdq (tmp, vec, vec)); + emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec)); break; default: @@ -29730,14 +29731,15 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) with interleave. */ t1 = gen_reg_rtx (V8HImode); t2 = gen_reg_rtx (V8HImode); - emit_insn (gen_sse2_punpckhwd (t1, d->op0, d->op1)); - emit_insn (gen_sse2_punpcklwd (d->target, d->op0, d->op1)); - emit_insn (gen_sse2_punpckhwd (t2, d->target, t1)); - emit_insn (gen_sse2_punpcklwd (d->target, d->target, t1)); + emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1)); + emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1)); + emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1)); + emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1)); if (odd) - emit_insn (gen_sse2_punpckhwd (d->target, d->target, t2)); + t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2); else - emit_insn (gen_sse2_punpcklwd (d->target, d->target, t2)); + t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2); + emit_insn (t3); } break; @@ -29749,16 +29751,17 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) t1 = gen_reg_rtx (V16QImode); t2 = gen_reg_rtx (V16QImode); t3 = gen_reg_rtx (V16QImode); - emit_insn (gen_sse2_punpckhbw (t1, d->op0, d->op1)); - emit_insn (gen_sse2_punpcklbw (d->target, d->op0, d->op1)); - emit_insn (gen_sse2_punpckhbw (t2, d->target, t1)); - emit_insn (gen_sse2_punpcklbw (d->target, d->target, t1)); - emit_insn (gen_sse2_punpckhbw (t3, d->target, t2)); - emit_insn (gen_sse2_punpcklbw (d->target, d->target, t2)); + emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1)); + emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1)); + emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1)); + emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1)); + emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2)); + emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2)); if (odd) - emit_insn (gen_sse2_punpckhbw (d->target, d->target, t3)); + t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3); else - emit_insn (gen_sse2_punpcklbw (d->target, d->target, t3)); + t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3); + emit_insn (t3); } break; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6059dd4..d401f92 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4372,7 +4372,8 @@ } else operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); - emit_insn (gen_sse_unpcklps (operands[3], operands[3], operands[3])); + emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3], + operands[3])); } else emit_insn (gen_vec_setv4sf_0 (operands[3], @@ -5769,7 +5770,8 @@ gen_rtx_SUBREG (SImode, operands[1], 0))); emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode), gen_rtx_SUBREG (SImode, operands[1], 4))); - emit_insn (gen_sse2_punpckldq (operands[3], operands[3], operands[4])); + emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3], + operands[4])); operands[3] = gen_rtx_REG (DImode, REGNO (operands[3])); }) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 12c5b17..27c7a8b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -279,12 +279,14 @@ gen_rtx_SUBREG (SImode, operands[1], 0))); emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), gen_rtx_SUBREG (SImode, operands[1], 4))); - emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2])); + emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0], + operands[2])); } else if (memory_operand (operands[1], DImode)) - emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx)); + emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), + operands[1], const0_rtx)); else - gcc_unreachable (); + gcc_unreachable (); }) (define_split @@ -3453,9 +3455,9 @@ emit_insn (gen_sse2_cvttpd2dq (r1, operands[1])); emit_insn (gen_sse2_cvttpd2dq (r2, operands[2])); - emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]), - gen_lowpart (V2DImode, r1), - gen_lowpart (V2DImode, r2))); + emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), + gen_lowpart (V2DImode, r1), + gen_lowpart (V2DImode, r2))); DONE; }) @@ -3472,9 +3474,9 @@ emit_insn (gen_sse2_cvtpd2dq (r1, operands[1])); emit_insn (gen_sse2_cvtpd2dq (r2, operands[2])); - emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]), - gen_lowpart (V2DImode, r1), - gen_lowpart (V2DImode, r2))); + emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), + gen_lowpart (V2DImode, r1), + gen_lowpart (V2DImode, r2))); DONE; }) @@ -3584,6 +3586,7 @@ [(set_attr "type" "ssemov") (set_attr "mode" "V4SF,V2SF,V2SF")]) +;; Recall that the 256-bit unpck insns only shuffle within their lanes. (define_insn "avx_unpckhps256" [(set (match_operand:V8SF 0 "register_operand" "=x") (vec_select:V8SF @@ -3600,7 +3603,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) -(define_insn "*avx_unpckhps" +(define_insn "*avx_interleave_highv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_select:V4SF (vec_concat:V8SF @@ -3614,7 +3617,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "V4SF")]) -(define_insn "sse_unpckhps" +(define_insn "vec_interleave_highv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_select:V4SF (vec_concat:V8SF @@ -3627,6 +3630,7 @@ [(set_attr "type" "sselog") (set_attr "mode" "V4SF")]) +;; Recall that the 256-bit unpck insns only shuffle within their lanes. (define_insn "avx_unpcklps256" [(set (match_operand:V8SF 0 "register_operand" "=x") (vec_select:V8SF @@ -3643,7 +3647,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) -(define_insn "*avx_unpcklps" +(define_insn "*avx_interleave_lowv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_select:V4SF (vec_concat:V8SF @@ -3657,7 +3661,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "V4SF")]) -(define_insn "sse_unpcklps" +(define_insn "vec_interleave_lowv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_select:V4SF (vec_concat:V8SF @@ -4464,6 +4468,7 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Recall that the 256-bit unpck insns only shuffle within their lanes. (define_insn "avx_unpckhpd256" [(set (match_operand:V4DF 0 "register_operand" "=x") (vec_select:V4DF @@ -4478,7 +4483,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "V4DF")]) -(define_expand "sse2_unpckhpd_exp" +(define_expand "vec_interleave_highv2df" [(set (match_operand:V2DF 0 "nonimmediate_operand" "") (vec_select:V2DF (vec_concat:V4DF @@ -4489,7 +4494,7 @@ "TARGET_SSE2" "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);") -(define_insn "*avx_unpckhpd" +(define_insn "*avx_interleave_highv2df" [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") (vec_select:V2DF (vec_concat:V4DF @@ -4506,7 +4511,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "V2DF,V1DF,V1DF")]) -(define_insn "sse2_unpckhpd" +(define_insn "*sse2_interleave_highv2df" [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") (vec_select:V2DF (vec_concat:V4DF @@ -4585,6 +4590,7 @@ DONE; }) +;; Recall that the 256-bit unpck insns only shuffle within their lanes. (define_insn "avx_unpcklpd256" [(set (match_operand:V4DF 0 "register_operand" "=x") (vec_select:V4DF @@ -4599,7 +4605,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "V4DF")]) -(define_expand "sse2_unpcklpd_exp" +(define_expand "vec_interleave_lowv2df" [(set (match_operand:V2DF 0 "nonimmediate_operand" "") (vec_select:V2DF (vec_concat:V4DF @@ -4610,7 +4616,7 @@ "TARGET_SSE2" "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);") -(define_insn "*avx_unpcklpd" +(define_insn "*avx_interleave_lowv2df" [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o") (vec_select:V2DF (vec_concat:V4DF @@ -4627,7 +4633,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "V2DF,V1DF,V1DF")]) -(define_insn "sse2_unpcklpd" +(define_insn "*sse2_interleave_lowv2df" [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o") (vec_select:V2DF (vec_concat:V4DF @@ -4721,7 +4727,7 @@ }) ;; punpcklqdq and punpckhqdq are shorter than shufpd. -(define_insn "*avx_punpckhqdq" +(define_insn "*avx_interleave_highv2di" [(set (match_operand:V2DI 0 "register_operand" "=x") (vec_select:V2DI (vec_concat:V4DI @@ -4735,7 +4741,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "sse2_punpckhqdq" +(define_insn "vec_interleave_highv2di" [(set (match_operand:V2DI 0 "register_operand" "=x") (vec_select:V2DI (vec_concat:V4DI @@ -4749,7 +4755,7 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "*avx_punpcklqdq" +(define_insn "*avx_interleave_lowv2di" [(set (match_operand:V2DI 0 "register_operand" "=x") (vec_select:V2DI (vec_concat:V4DI @@ -4763,7 +4769,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "sse2_punpcklqdq" +(define_insn "vec_interleave_lowv2di" [(set (match_operand:V2DI 0 "register_operand" "=x") (vec_select:V2DI (vec_concat:V4DI @@ -5247,10 +5253,10 @@ each word. We don't care what goes into the high byte of each word. Rather than trying to get zero in there, most convenient is to let it be a copy of the low byte. */ - emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1])); - emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2])); - emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1])); - emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2])); + emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1])); + emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2])); + emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1])); + emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2])); /* Multiply words. The end-of-line annotations here give a picture of what the output of that instruction looks like. Dot means don't care; the @@ -5670,7 +5676,7 @@ const0_rtx, const0_rtx)); /* Merge the parts back together. */ - emit_insn (gen_sse2_punpckldq (op0, t5, t6)); + emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6)); DONE; }) @@ -6589,174 +6595,6 @@ DONE; }) -(define_expand "vec_interleave_highv16qi" - [(set (match_operand:V16QI 0 "register_operand" "") - (vec_select:V16QI - (vec_concat:V32QI - (match_operand:V16QI 1 "register_operand" "") - (match_operand:V16QI 2 "nonimmediate_operand" "")) - (parallel [(const_int 8) (const_int 24) - (const_int 9) (const_int 25) - (const_int 10) (const_int 26) - (const_int 11) (const_int 27) - (const_int 12) (const_int 28) - (const_int 13) (const_int 29) - (const_int 14) (const_int 30) - (const_int 15) (const_int 31)])))] - "TARGET_SSE2" -{ - emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "vec_interleave_lowv16qi" - [(set (match_operand:V16QI 0 "register_operand" "") - (vec_select:V16QI - (vec_concat:V32QI - (match_operand:V16QI 1 "register_operand" "") - (match_operand:V16QI 2 "nonimmediate_operand" "")) - (parallel [(const_int 0) (const_int 16) - (const_int 1) (const_int 17) - (const_int 2) (const_int 18) - (const_int 3) (const_int 19) - (const_int 4) (const_int 20) - (const_int 5) (const_int 21) - (const_int 6) (const_int 22) - (const_int 7) (const_int 23)])))] - "TARGET_SSE2" -{ - emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "vec_interleave_highv8hi" - [(set (match_operand:V8HI 0 "register_operand" "=") - (vec_select:V8HI - (vec_concat:V16HI - (match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "nonimmediate_operand" "")) - (parallel [(const_int 4) (const_int 12) - (const_int 5) (const_int 13) - (const_int 6) (const_int 14) - (const_int 7) (const_int 15)])))] - "TARGET_SSE2" -{ - emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "vec_interleave_lowv8hi" - [(set (match_operand:V8HI 0 "register_operand" "") - (vec_select:V8HI - (vec_concat:V16HI - (match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "nonimmediate_operand" "")) - (parallel [(const_int 0) (const_int 8) - (const_int 1) (const_int 9) - (const_int 2) (const_int 10) - (const_int 3) (const_int 11)])))] - "TARGET_SSE2" -{ - emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "vec_interleave_highv4si" - [(set (match_operand:V4SI 0 "register_operand" "") - (vec_select:V4SI - (vec_concat:V8SI - (match_operand:V4SI 1 "register_operand" "") - (match_operand:V4SI 2 "nonimmediate_operand" "")) - (parallel [(const_int 2) (const_int 6) - (const_int 3) (const_int 7)])))] - "TARGET_SSE2" -{ - emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "vec_interleave_lowv4si" - [(set (match_operand:V4SI 0 "register_operand" "") - (vec_select:V4SI - (vec_concat:V8SI - (match_operand:V4SI 1 "register_operand" "") - (match_operand:V4SI 2 "nonimmediate_operand" "")) - (parallel [(const_int 0) (const_int 4) - (const_int 1) (const_int 5)])))] - "TARGET_SSE2" -{ - emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "vec_interleave_highv2di" - [(set (match_operand:V2DI 0 "register_operand" "") - (vec_select:V2DI - (vec_concat:V4DI - (match_operand:V2DI 1 "register_operand" "") - (match_operand:V2DI 2 "nonimmediate_operand" "")) - (parallel [(const_int 1) - (const_int 3)])))] - "TARGET_SSE2" -{ - emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "vec_interleave_lowv2di" - [(set (match_operand:V2DI 0 "register_operand" "") - (vec_select:V2DI - (vec_concat:V4DI - (match_operand:V2DI 1 "register_operand" "") - (match_operand:V2DI 2 "nonimmediate_operand" "")) - (parallel [(const_int 0) - (const_int 2)])))] - "TARGET_SSE2" -{ - emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "vec_interleave_highv4sf" - [(set (match_operand:V4SF 0 "register_operand" "") - (vec_select:V4SF - (vec_concat:V8SF - (match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")) - (parallel [(const_int 2) (const_int 6) - (const_int 3) (const_int 7)])))] - "TARGET_SSE") - -(define_expand "vec_interleave_lowv4sf" - [(set (match_operand:V4SF 0 "register_operand" "") - (vec_select:V4SF - (vec_concat:V8SF - (match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")) - (parallel [(const_int 0) (const_int 4) - (const_int 1) (const_int 5)])))] - "TARGET_SSE") - -(define_expand "vec_interleave_highv2df" - [(set (match_operand:V2DF 0 "register_operand" "") - (vec_select:V2DF - (vec_concat:V4DF - (match_operand:V2DF 1 "register_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")) - (parallel [(const_int 1) - (const_int 3)])))] - "TARGET_SSE2") - -(define_expand "vec_interleave_lowv2df" - [(set (match_operand:V2DF 0 "register_operand" "") - (vec_select:V2DF - (vec_concat:V4DF - (match_operand:V2DF 1 "register_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")) - (parallel [(const_int 0) - (const_int 2)])))] - "TARGET_SSE2") - (define_insn "*avx_packsswb" [(set (match_operand:V16QI 0 "register_operand" "=x") (vec_concat:V16QI @@ -6835,7 +6673,7 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "*avx_punpckhbw" +(define_insn "*avx_interleave_highv16qi" [(set (match_operand:V16QI 0 "register_operand" "=x") (vec_select:V16QI (vec_concat:V32QI @@ -6855,7 +6693,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "sse2_punpckhbw" +(define_insn "vec_interleave_highv16qi" [(set (match_operand:V16QI 0 "register_operand" "=x") (vec_select:V16QI (vec_concat:V32QI @@ -6875,7 +6713,7 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "*avx_punpcklbw" +(define_insn "*avx_interleave_lowv16qi" [(set (match_operand:V16QI 0 "register_operand" "=x") (vec_select:V16QI (vec_concat:V32QI @@ -6895,7 +6733,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "sse2_punpcklbw" +(define_insn "vec_interleave_lowv16qi" [(set (match_operand:V16QI 0 "register_operand" "=x") (vec_select:V16QI (vec_concat:V32QI @@ -6915,7 +6753,7 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "*avx_punpckhwd" +(define_insn "*avx_interleave_highv8hi" [(set (match_operand:V8HI 0 "register_operand" "=x") (vec_select:V8HI (vec_concat:V16HI @@ -6931,7 +6769,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "sse2_punpckhwd" +(define_insn "vec_interleave_highv8hi" [(set (match_operand:V8HI 0 "register_operand" "=x") (vec_select:V8HI (vec_concat:V16HI @@ -6947,7 +6785,7 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "*avx_punpcklwd" +(define_insn "*avx_interleave_lowv8hi" [(set (match_operand:V8HI 0 "register_operand" "=x") (vec_select:V8HI (vec_concat:V16HI @@ -6963,7 +6801,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "sse2_punpcklwd" +(define_insn "vec_interleave_lowv8hi" [(set (match_operand:V8HI 0 "register_operand" "=x") (vec_select:V8HI (vec_concat:V16HI @@ -6979,7 +6817,7 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "*avx_punpckhdq" +(define_insn "*avx_interleave_highv4si" [(set (match_operand:V4SI 0 "register_operand" "=x") (vec_select:V4SI (vec_concat:V8SI @@ -6993,7 +6831,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "sse2_punpckhdq" +(define_insn "vec_interleave_highv4si" [(set (match_operand:V4SI 0 "register_operand" "=x") (vec_select:V4SI (vec_concat:V8SI @@ -7007,7 +6845,7 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "*avx_punpckldq" +(define_insn "*avx_interleave_lowv4si" [(set (match_operand:V4SI 0 "register_operand" "=x") (vec_select:V4SI (vec_concat:V8SI @@ -7021,7 +6859,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "sse2_punpckldq" +(define_insn "vec_interleave_lowv4si" [(set (match_operand:V4SI 0 "register_operand" "=x") (vec_select:V4SI (vec_concat:V8SI |