diff options
-rw-r--r-- | gcc/cfgexpand.c | 2 | ||||
-rw-r--r-- | gcc/config/arm/arm-protos.h | 3 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 526 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 59 | ||||
-rw-r--r-- | gcc/config/arm/vec-common.md | 26 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 26 | ||||
-rw-r--r-- | gcc/doc/generic.texi | 13 | ||||
-rw-r--r-- | gcc/doc/md.texi | 14 | ||||
-rw-r--r-- | gcc/expr.c | 2 | ||||
-rw-r--r-- | gcc/fold-const.c | 10 | ||||
-rw-r--r-- | gcc/genopinit.c | 4 | ||||
-rw-r--r-- | gcc/gimple-pretty-print.c | 2 | ||||
-rw-r--r-- | gcc/optabs.c | 28 | ||||
-rw-r--r-- | gcc/optabs.h | 5 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 9 | ||||
-rw-r--r-- | gcc/tree-cfg.c | 2 | ||||
-rw-r--r-- | gcc/tree-inline.c | 2 | ||||
-rw-r--r-- | gcc/tree-pretty-print.c | 16 | ||||
-rw-r--r-- | gcc/tree-vect-data-refs.c | 86 | ||||
-rw-r--r-- | gcc/tree-vect-generic.c | 4 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 14 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 1 | ||||
-rw-r--r-- | gcc/tree.def | 4 |
23 files changed, 162 insertions, 696 deletions
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index dfe5442..295d624 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -3451,6 +3451,8 @@ expand_debug_expr (tree exp) case VEC_COND_EXPR: case VEC_EXTRACT_EVEN_EXPR: case VEC_EXTRACT_ODD_EXPR: + case VEC_INTERLEAVE_HIGH_EXPR: + case VEC_INTERLEAVE_LOW_EXPR: case VEC_LSHIFT_EXPR: case VEC_PACK_FIX_TRUNC_EXPR: case VEC_PACK_SAT_EXPR: diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 8c3e412..296550a 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -244,7 +244,4 @@ extern const struct tune_params *current_tune; extern int vfp3_const_double_for_fract_bits (rtx); #endif /* RTX_CODE */ -extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); -extern bool arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); - #endif /* ! GCC_ARM_PROTOS_H */ diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index f8de09e..65b4e9d 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -267,9 +267,6 @@ static unsigned int arm_autovectorize_vector_sizes (void); static int arm_default_branch_cost (bool, bool); static int arm_cortex_a5_branch_cost (bool, bool); -static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode, - const unsigned char *sel); - /* Table of machine attributes. */ static const struct attribute_spec arm_attribute_table[] = @@ -607,10 +604,6 @@ static const struct attribute_spec arm_attribute_table[] = #define TARGET_PREFERRED_RENAME_CLASS \ arm_preferred_rename_class -#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK -#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ - arm_vectorize_vec_perm_const_ok - struct gcc_target targetm = TARGET_INITIALIZER; /* Obstack for minipool constant handling. */ @@ -25071,523 +25064,6 @@ vfp3_const_double_for_fract_bits (rtx operand) } return 0; } - -#define MAX_VECT_LEN 16 - -struct expand_vec_perm_d -{ - rtx target, op0, op1; - unsigned char perm[MAX_VECT_LEN]; - enum machine_mode vmode; - unsigned char nelt; - bool one_vector_p; - bool testing_p; -}; - -/* Generate a variable permutation. */ - -static void -arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel) -{ - enum machine_mode vmode = GET_MODE (target); - bool one_vector_p = rtx_equal_p (op0, op1); - - gcc_checking_assert (vmode == V8QImode || vmode == V16QImode); - gcc_checking_assert (GET_MODE (op0) == vmode); - gcc_checking_assert (GET_MODE (op1) == vmode); - gcc_checking_assert (GET_MODE (sel) == vmode); - gcc_checking_assert (TARGET_NEON); - - if (one_vector_p) - { - if (vmode == V8QImode) - emit_insn (gen_neon_vtbl1v8qi (target, op0, sel)); - else - emit_insn (gen_neon_vtbl1v16qi (target, op0, sel)); - } - else - { - enum machine_mode mode1, mode2; - rtx pair, part; - - if (vmode == V8QImode) - mode1 = DImode, mode2 = TImode; - else - mode1 = TImode, mode2 = OImode; - - pair = gen_reg_rtx (mode2); - emit_insn (gen_rtx_CLOBBER (VOIDmode, pair)); - - part = simplify_gen_subreg (mode1, pair, mode2, - subreg_lowpart_offset (mode1, mode2)); - emit_move_insn (part, gen_lowpart (mode1, op0)); - - part = simplify_gen_subreg (mode1, pair, mode2, - subreg_highpart_offset (mode1, mode2)); - emit_move_insn (part, gen_lowpart (mode1, op1)); - - if (vmode == V8QImode) - emit_insn (gen_neon_vtbl2v8qi (target, pair, sel)); - else - emit_insn (gen_neon_vtbl2v16qi (target, pair, sel)); - } -} - -void -arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) -{ - enum machine_mode vmode = GET_MODE (target); - unsigned int i, nelt = GET_MODE_NUNITS (vmode); - bool one_vector_p = rtx_equal_p (op0, op1); - rtx rmask[MAX_VECT_LEN], mask; - - /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's - numbering of elements for big-endian, we must reverse the order. */ - gcc_checking_assert (!BYTES_BIG_ENDIAN); - - /* The VTBL instruction does not use a modulo index, so we must take care - of that ourselves. */ - mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1); - for (i = 0; i < nelt; ++i) - rmask[i] = mask; - mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask)); - sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN); - - arm_expand_vec_perm_1 (target, op0, op1, sel); -} - -/* Generate or test for an insn that supports a constant permutation. */ - -/* Recognize patterns for the VUZP insns. */ - -static bool -arm_evpc_neon_vuzp (struct expand_vec_perm_d *d) -{ - unsigned int i, odd, mask, nelt = d->nelt; - rtx out0, out1, in0, in1, x; - rtx (*gen)(rtx, rtx, rtx, rtx); - - if (GET_MODE_UNIT_SIZE (d->vmode) >= 8) - return false; - - /* Note that these are little-endian tests. Adjust for big-endian later. */ - if (d->perm[0] == 0) - odd = 0; - else if (d->perm[0] == 1) - odd = 1; - else - return false; - mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); - - for (i = 0; i < nelt; i++) - { - unsigned elt = (i * 2 + odd) & mask; - if (d->perm[i] != elt) - return false; - } - - /* Success! */ - if (d->testing_p) - return true; - - switch (d->vmode) - { - case V16QImode: gen = gen_neon_vuzpv16qi_internal; break; - case V8QImode: gen = gen_neon_vuzpv8qi_internal; break; - case V8HImode: gen = gen_neon_vuzpv8hi_internal; break; - case V4HImode: gen = gen_neon_vuzpv4hi_internal; break; - case V4SImode: gen = gen_neon_vuzpv4si_internal; break; - case V2SImode: gen = gen_neon_vuzpv2si_internal; break; - case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break; - case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break; - default: - gcc_unreachable (); - } - - in0 = d->op0; - in1 = d->op1; - if (BYTES_BIG_ENDIAN) - { - x = in0, in0 = in1, in1 = x; - odd = !odd; - } - out0 = d->target; - out1 = gen_reg_rtx (d->vmode); - if (odd) - x = out0, out0 = out1, out1 = x; - - emit_insn (gen (out0, in0, in1, out1)); - return true; -} - -/* Recognize patterns for the VZIP insns. */ - -static bool -arm_evpc_neon_vzip (struct expand_vec_perm_d *d) -{ - unsigned int i, high, mask, nelt = d->nelt; - rtx out0, out1, in0, in1, x; - rtx (*gen)(rtx, rtx, rtx, rtx); - - if (GET_MODE_UNIT_SIZE (d->vmode) >= 8) - return false; - - /* Note that these are little-endian tests. Adjust for big-endian later. */ - high = nelt / 2; - if (d->perm[0] == high) - ; - else if (d->perm[0] == 0) - high = 0; - else - return false; - mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); - - for (i = 0; i < nelt / 2; i++) - { - unsigned elt = (i + high) & mask; - if (d->perm[i * 2] != elt) - return false; - elt = (elt + nelt) & mask; - if (d->perm[i * 2 + 1] != elt) - return false; - } - - /* Success! */ - if (d->testing_p) - return true; - - switch (d->vmode) - { - case V16QImode: gen = gen_neon_vzipv16qi_internal; break; - case V8QImode: gen = gen_neon_vzipv8qi_internal; break; - case V8HImode: gen = gen_neon_vzipv8hi_internal; break; - case V4HImode: gen = gen_neon_vzipv4hi_internal; break; - case V4SImode: gen = gen_neon_vzipv4si_internal; break; - case V2SImode: gen = gen_neon_vzipv2si_internal; break; - case V2SFmode: gen = gen_neon_vzipv2sf_internal; break; - case V4SFmode: gen = gen_neon_vzipv4sf_internal; break; - default: - gcc_unreachable (); - } - - in0 = d->op0; - in1 = d->op1; - if (BYTES_BIG_ENDIAN) - { - x = in0, in0 = in1, in1 = x; - high = !high; - } - - out0 = d->target; - out1 = gen_reg_rtx (d->vmode); - if (high) - x = out0, out0 = out1, out1 = x; - - emit_insn (gen (out0, in0, in1, out1)); - return true; -} - -/* Recognize patterns for the VREV insns. */ - -static bool -arm_evpc_neon_vrev (struct expand_vec_perm_d *d) -{ - unsigned int i, j, diff, nelt = d->nelt; - rtx (*gen)(rtx, rtx, rtx); - - if (!d->one_vector_p) - return false; - - diff = d->perm[0]; - switch (diff) - { - case 7: - switch (d->vmode) - { - case V16QImode: gen = gen_neon_vrev64v16qi; break; - case V8QImode: gen = gen_neon_vrev64v8qi; break; - default: - return false; - } - break; - case 3: - switch (d->vmode) - { - case V16QImode: gen = gen_neon_vrev32v16qi; break; - case V8QImode: gen = gen_neon_vrev32v8qi; break; - case V8HImode: gen = gen_neon_vrev64v8hi; break; - case V4HImode: gen = gen_neon_vrev64v4hi; break; - default: - return false; - } - break; - case 1: - switch (d->vmode) - { - case V16QImode: gen = gen_neon_vrev16v16qi; break; - case V8QImode: gen = gen_neon_vrev16v8qi; break; - case V8HImode: gen = gen_neon_vrev32v8hi; break; - case V4HImode: gen = gen_neon_vrev32v4hi; break; - case V4SImode: gen = gen_neon_vrev64v4si; break; - case V2SImode: gen = gen_neon_vrev64v2si; break; - case V4SFmode: gen = gen_neon_vrev64v4sf; break; - case V2SFmode: gen = gen_neon_vrev64v2sf; break; - default: - return false; - } - break; - default: - return false; - } - - for (i = 0; i < nelt; i += diff) - for (j = 0; j <= diff; j += 1) - if (d->perm[i + j] != i + diff - j) - return false; - - /* Success! */ - if (d->testing_p) - return true; - - /* ??? The third operand is an artifact of the builtin infrastructure - and is ignored by the actual instruction. */ - emit_insn (gen (d->target, d->op0, const0_rtx)); - return true; -} - -/* Recognize patterns for the VTRN insns. */ - -static bool -arm_evpc_neon_vtrn (struct expand_vec_perm_d *d) -{ - unsigned int i, odd, mask, nelt = d->nelt; - rtx out0, out1, in0, in1, x; - rtx (*gen)(rtx, rtx, rtx, rtx); - - if (GET_MODE_UNIT_SIZE (d->vmode) >= 8) - return false; - - /* Note that these are little-endian tests. Adjust for big-endian later. */ - if (d->perm[0] == 0) - odd = 0; - else if (d->perm[0] == 1) - odd = 1; - else - return false; - mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); - - for (i = 0; i < nelt; i += 2) - { - if (d->perm[i] != i + odd) - return false; - if (d->perm[i + 1] != ((i + nelt + odd) & mask)) - return false; - } - - /* Success! */ - if (d->testing_p) - return true; - - switch (d->vmode) - { - case V16QImode: gen = gen_neon_vtrnv16qi_internal; break; - case V8QImode: gen = gen_neon_vtrnv8qi_internal; break; - case V8HImode: gen = gen_neon_vtrnv8hi_internal; break; - case V4HImode: gen = gen_neon_vtrnv4hi_internal; break; - case V4SImode: gen = gen_neon_vtrnv4si_internal; break; - case V2SImode: gen = gen_neon_vtrnv2si_internal; break; - case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break; - case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break; - default: - gcc_unreachable (); - } - - in0 = d->op0; - in1 = d->op1; - if (BYTES_BIG_ENDIAN) - { - x = in0, in0 = in1, in1 = x; - odd = !odd; - } - - out0 = d->target; - out1 = gen_reg_rtx (d->vmode); - if (odd) - x = out0, out0 = out1, out1 = x; - - emit_insn (gen (out0, in0, in1, out1)); - return true; -} - -/* The NEON VTBL instruction is a fully variable permuation that's even - stronger than what we expose via VEC_PERM_EXPR. What it doesn't do - is mask the index operand as VEC_PERM_EXPR requires. Therefore we - can do slightly better by expanding this as a constant where we don't - have to apply a mask. */ - -static bool -arm_evpc_neon_vtbl (struct expand_vec_perm_d *d) -{ - rtx rperm[MAX_VECT_LEN], sel; - enum machine_mode vmode = d->vmode; - unsigned int i, nelt = d->nelt; - - /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's - numbering of elements for big-endian, we must reverse the order. */ - if (BYTES_BIG_ENDIAN) - return false; - - if (d->testing_p) - return true; - - /* Generic code will try constant permutation twice. Once with the - original mode and again with the elements lowered to QImode. - So wait and don't do the selector expansion ourselves. */ - if (vmode != V8QImode && vmode != V16QImode) - return false; - - for (i = 0; i < nelt; ++i) - rperm[i] = GEN_INT (d->perm[i]); - sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); - sel = force_reg (vmode, sel); - - arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel); - return true; -} - -static bool -arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) -{ - /* The pattern matching functions above are written to look for a small - number to begin the sequence (0, 1, N/2). If we begin with an index - from the second operand, we can swap the operands. */ - if (d->perm[0] >= d->nelt) - { - unsigned i, nelt = d->nelt; - rtx x; - - for (i = 0; i < nelt; ++i) - d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1); - - x = d->op0; - d->op0 = d->op1; - d->op1 = x; - } - - if (TARGET_NEON) - { - if (arm_evpc_neon_vuzp (d)) - return true; - if (arm_evpc_neon_vzip (d)) - return true; - if (arm_evpc_neon_vrev (d)) - return true; - if (arm_evpc_neon_vtrn (d)) - return true; - return arm_evpc_neon_vtbl (d); - } - return false; -} - -/* Expand a vec_perm_const pattern. */ - -bool -arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel) -{ - struct expand_vec_perm_d d; - int i, nelt, which; - - d.target = target; - d.op0 = op0; - d.op1 = op1; - - d.vmode = GET_MODE (target); - gcc_assert (VECTOR_MODE_P (d.vmode)); - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); - d.testing_p = false; - - for (i = which = 0; i < nelt; ++i) - { - rtx e = XVECEXP (sel, 0, i); - int ei = INTVAL (e) & (2 * nelt - 1); - which |= (ei < nelt ? 1 : 2); - d.perm[i] = ei; - } - - switch (which) - { - default: - gcc_unreachable(); - - case 3: - d.one_vector_p = false; - if (!rtx_equal_p (op0, op1)) - break; - - /* The elements of PERM do not suggest that only the first operand - is used, but both operands are identical. Allow easier matching - of the permutation by folding the permutation into the single - input vector. */ - /* FALLTHRU */ - case 2: - for (i = 0; i < nelt; ++i) - d.perm[i] &= nelt - 1; - d.op0 = op1; - d.one_vector_p = true; - break; - - case 1: - d.op1 = op0; - d.one_vector_p = true; - break; - } - - return arm_expand_vec_perm_const_1 (&d); -} - -/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */ - -static bool -arm_vectorize_vec_perm_const_ok (enum machine_mode vmode, - const unsigned char *sel) -{ - struct expand_vec_perm_d d; - unsigned int i, nelt, which; - bool ret; - - d.vmode = vmode; - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); - d.testing_p = true; - memcpy (d.perm, sel, nelt); - - /* Categorize the set of elements in the selector. */ - for (i = which = 0; i < nelt; ++i) - { - unsigned char e = d.perm[i]; - gcc_assert (e < 2 * nelt); - which |= (e < nelt ? 1 : 2); - } - - /* For all elements from second vector, fold the elements to first. */ - if (which == 2) - for (i = 0; i < nelt; ++i) - d.perm[i] -= nelt; - - /* Check whether the mask can be applied to the vector type. */ - d.one_vector_p = (which != 3); - - d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); - d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); - if (!d.one_vector_p) - d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); - - start_sequence (); - ret = arm_expand_vec_perm_const_1 (&d); - end_sequence (); - - return ret; -} - - #include "gt-arm.h" + diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index bd68d39..94e0a5f 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -3876,65 +3876,6 @@ [(set_attr "neon_type" "neon_bp_3cycle")] ) -;; These two are used by the vec_perm infrastructure for V16QImode. -(define_insn_and_split "neon_vtbl1v16qi" - [(set (match_operand:V16QI 0 "s_register_operand" "=w") - (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w") - (match_operand:V16QI 2 "s_register_operand" "w")] - UNSPEC_VTBL))] - "TARGET_NEON" - "#" - "&& reload_completed" - [(const_int 0)] -{ - rtx op0, op1, op2, part0, part2; - unsigned ofs; - - op0 = operands[0]; - op1 = gen_lowpart (TImode, operands[1]); - op2 = operands[2]; - - ofs = subreg_lowpart_offset (V8QImode, V16QImode); - part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); - part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); - emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); - - ofs = subreg_highpart_offset (V8QImode, V16QImode); - part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); - part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); - emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); - DONE; -}) - -(define_insn_and_split "neon_vtbl2v16qi" - [(set (match_operand:V16QI 0 "s_register_operand" "=w") - (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w") - (match_operand:V16QI 2 "s_register_operand" "w")] - UNSPEC_VTBL))] - "TARGET_NEON" - "#" - "&& reload_completed" - [(const_int 0)] -{ - rtx op0, op1, op2, part0, part2; - unsigned ofs; - - op0 = operands[0]; - op1 = operands[1]; - op2 = operands[2]; - - ofs = subreg_lowpart_offset (V8QImode, V16QImode); - part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); - part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); - emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); - - ofs = subreg_highpart_offset (V8QImode, V16QImode); - part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); - part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); - emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); - DONE; -}) - (define_insn "neon_vtbx1v8qi" [(set (match_operand:V8QI 0 "s_register_operand" "=w") (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index eb29900..c27c414 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -108,29 +108,3 @@ || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))" { }) - -(define_expand "vec_perm_const<mode>" - [(match_operand:VALL 0 "s_register_operand" "") - (match_operand:VALL 1 "s_register_operand" "") - (match_operand:VALL 2 "s_register_operand" "") - (match_operand:<V_cmp_result> 3 "" "")] - "TARGET_NEON - || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))" -{ - if (arm_expand_vec_perm_const (operands[0], operands[1], - operands[2], operands[3])) - DONE; - else - FAIL; -}) - -(define_expand "vec_perm<mode>" - [(match_operand:VE 0 "s_register_operand" "") - (match_operand:VE 1 "s_register_operand" "") - (match_operand:VE 2 "s_register_operand" "") - (match_operand:VE 3 "s_register_operand" "")] - "TARGET_NEON && !BYTES_BIG_ENDIAN" -{ - arm_expand_vec_perm (operands[0], operands[1], operands[2], operands[3]); - DONE; -}) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 986604e..216ab0b 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -35984,8 +35984,6 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d) return ok; } -static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d); - /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify a two vector permutation into a single vector permutation by using an interleave operation to merge the vectors. */ @@ -36012,17 +36010,6 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d) /* For 32-byte modes allow even d->op0 == d->op1. The lack of cross-lane shuffling in some instructions might prevent a single insn shuffle. */ - dfinal = *d; - dfinal.testing_p = true; - /* If expand_vec_perm_interleave3 can expand this into - a 3 insn sequence, give up and let it be expanded as - 3 insn sequence. While that is one insn longer, - it doesn't need a memory operand and in the common - case that both interleave low and high permutations - with the same operands are adjacent needs 4 insns - for both after CSE. */ - if (expand_vec_perm_interleave3 (&dfinal)) - return false; } else return false; @@ -36862,23 +36849,18 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d) stopping once we have promoted to V4SImode and then use pshufd. */ do { - rtx dest; - rtx (*gen) (rtx, rtx, rtx) - = vmode == V16QImode ? gen_vec_interleave_lowv16qi - : gen_vec_interleave_lowv8hi; + optab otab = vec_interleave_low_optab; if (elt >= nelt2) { - gen = vmode == V16QImode ? gen_vec_interleave_highv16qi - : gen_vec_interleave_highv8hi; + otab = vec_interleave_high_optab; elt -= nelt2; } nelt2 /= 2; - dest = gen_reg_rtx (vmode); - emit_insn (gen (dest, op0, op0)); + op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT); vmode = get_mode_wider_vector (vmode); - op0 = gen_lowpart (vmode, dest); + op0 = gen_lowpart (vmode, op0); } while (vmode != V4SImode); diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi index 4f26238..82b2663 100644 --- a/gcc/doc/generic.texi +++ b/gcc/doc/generic.texi @@ -1697,6 +1697,8 @@ its sole argument yields the representation for @code{ap}. @tindex VEC_PACK_FIX_TRUNC_EXPR @tindex VEC_EXTRACT_EVEN_EXPR @tindex VEC_EXTRACT_ODD_EXPR +@tindex VEC_INTERLEAVE_HIGH_EXPR +@tindex VEC_INTERLEAVE_LOW_EXPR @table @code @item VEC_LSHIFT_EXPR @@ -1772,6 +1774,17 @@ These nodes represent extracting of the even/odd elements of the two input vectors, respectively. Their operands and result are vectors that contain the same number of elements of the same type. +@item VEC_INTERLEAVE_HIGH_EXPR +@itemx VEC_INTERLEAVE_LOW_EXPR +These nodes represent merging and interleaving of the high/low elements of the +two input vectors, respectively. The operands and the result are vectors that +contain the same number of elements (@code{N}) of the same type. +In the case of @code{VEC_INTERLEAVE_HIGH_EXPR}, the high @code{N/2} elements of +the first input vector are interleaved with the high @code{N/2} elements of the +second input vector. In the case of @code{VEC_INTERLEAVE_LOW_EXPR}, the low +@code{N/2} elements of the first input vector are interleaved with the low +@code{N/2} elements of the second input vector. + @end table diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 6dd6a58..dc87ca7 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -4159,6 +4159,20 @@ The odd elements of operand 2 are concatenated to the odd elements of operand 1 in their original order. The result is stored in operand 0. The output and input vectors should have the same modes. +@cindex @code{vec_interleave_high@var{m}} instruction pattern +@item @samp{vec_interleave_high@var{m}} +Merge high elements of the two input vectors into the output vector. The output +and input vectors should have the same modes (@code{N} elements). The high +@code{N/2} elements of the first input vector are interleaved with the high +@code{N/2} elements of the second input vector. + +@cindex @code{vec_interleave_low@var{m}} instruction pattern +@item @samp{vec_interleave_low@var{m}} +Merge low elements of the two input vectors into the output vector. The output +and input vectors should have the same modes (@code{N} elements). The low +@code{N/2} elements of the first input vector are interleaved with the low +@code{N/2} elements of the second input vector. + @cindex @code{vec_init@var{m}} instruction pattern @item @samp{vec_init@var{m}} Initialize the vector to given values. Operand 0 is the vector to initialize @@ -8647,6 +8647,8 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode, case VEC_EXTRACT_EVEN_EXPR: case VEC_EXTRACT_ODD_EXPR: + case VEC_INTERLEAVE_HIGH_EXPR: + case VEC_INTERLEAVE_LOW_EXPR: goto binop; case VEC_LSHIFT_EXPR: diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 89c68cf..a32ea90 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -13503,6 +13503,8 @@ fold_binary_loc (location_t loc, case VEC_EXTRACT_EVEN_EXPR: case VEC_EXTRACT_ODD_EXPR: + case VEC_INTERLEAVE_HIGH_EXPR: + case VEC_INTERLEAVE_LOW_EXPR: if ((TREE_CODE (arg0) == VECTOR_CST || TREE_CODE (arg0) == CONSTRUCTOR) && (TREE_CODE (arg1) == VECTOR_CST @@ -13520,6 +13522,14 @@ fold_binary_loc (location_t loc, case VEC_EXTRACT_ODD_EXPR: sel[i] = i * 2 + 1; break; + case VEC_INTERLEAVE_HIGH_EXPR: + sel[i] = (i + (BYTES_BIG_ENDIAN ? 0 : nelts)) / 2 + + ((i & 1) ? nelts : 0); + break; + case VEC_INTERLEAVE_LOW_EXPR: + sel[i] = (i + (BYTES_BIG_ENDIAN ? nelts : 0)) / 2 + + ((i & 1) ? nelts : 0); + break; default: gcc_unreachable (); } diff --git a/gcc/genopinit.c b/gcc/genopinit.c index 9cd77fa..63c58a8 100644 --- a/gcc/genopinit.c +++ b/gcc/genopinit.c @@ -1,6 +1,6 @@ /* Generate code to initialize optabs from machine description. Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, - 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010, 2011 + 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010 Free Software Foundation, Inc. This file is part of GCC. @@ -269,6 +269,8 @@ static const char * const optabs[] = "set_optab_handler (vec_extract_optab, $A, CODE_FOR_$(vec_extract$a$))", "set_optab_handler (vec_extract_even_optab, $A, CODE_FOR_$(vec_extract_even$a$))", "set_optab_handler (vec_extract_odd_optab, $A, CODE_FOR_$(vec_extract_odd$a$))", + "set_optab_handler (vec_interleave_high_optab, $A, CODE_FOR_$(vec_interleave_high$a$))", + "set_optab_handler (vec_interleave_low_optab, $A, CODE_FOR_$(vec_interleave_low$a$))", "set_optab_handler (vec_init_optab, $A, CODE_FOR_$(vec_init$a$))", "set_optab_handler (vec_shl_optab, $A, CODE_FOR_$(vec_shl_$a$))", "set_optab_handler (vec_shr_optab, $A, CODE_FOR_$(vec_shr_$a$))", diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c index b93d66d..3b5f670 100644 --- a/gcc/gimple-pretty-print.c +++ b/gcc/gimple-pretty-print.c @@ -347,6 +347,8 @@ dump_binary_rhs (pretty_printer *buffer, gimple gs, int spc, int flags) case VEC_PACK_FIX_TRUNC_EXPR: case VEC_EXTRACT_EVEN_EXPR: case VEC_EXTRACT_ODD_EXPR: + case VEC_INTERLEAVE_HIGH_EXPR: + case VEC_INTERLEAVE_LOW_EXPR: case VEC_WIDEN_LSHIFT_HI_EXPR: case VEC_WIDEN_LSHIFT_LO_EXPR: for (p = tree_code_name [(int) code]; *p; p++) diff --git a/gcc/optabs.c b/gcc/optabs.c index 1c13b5a..0d5cd73 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -553,6 +553,12 @@ optab_for_tree_code (enum tree_code code, const_tree type, case VEC_EXTRACT_ODD_EXPR: return vec_extract_odd_optab; + case VEC_INTERLEAVE_HIGH_EXPR: + return vec_interleave_high_optab; + + case VEC_INTERLEAVE_LOW_EXPR: + return vec_interleave_low_optab; + default: return NULL; } @@ -1606,7 +1612,11 @@ expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1, enum tree_code tcode = ERROR_MARK; rtx sel; - if (binoptab == vec_extract_even_optab) + if (binoptab == vec_interleave_high_optab) + tcode = VEC_INTERLEAVE_HIGH_EXPR; + else if (binoptab == vec_interleave_low_optab) + tcode = VEC_INTERLEAVE_LOW_EXPR; + else if (binoptab == vec_extract_even_optab) tcode = VEC_EXTRACT_EVEN_EXPR; else if (binoptab == vec_extract_odd_optab) tcode = VEC_EXTRACT_ODD_EXPR; @@ -6261,6 +6271,8 @@ init_optabs (void) init_optab (vec_extract_optab, UNKNOWN); init_optab (vec_extract_even_optab, UNKNOWN); init_optab (vec_extract_odd_optab, UNKNOWN); + init_optab (vec_interleave_high_optab, UNKNOWN); + init_optab (vec_interleave_low_optab, UNKNOWN); init_optab (vec_set_optab, UNKNOWN); init_optab (vec_init_optab, UNKNOWN); init_optab (vec_shl_optab, UNKNOWN); @@ -6868,7 +6880,8 @@ can_vec_perm_p (enum machine_mode mode, bool variable, return true; } -/* Return true if we can implement with VEC_PERM_EXPR for this target. +/* Return true if we can implement VEC_INTERLEAVE_{HIGH,LOW}_EXPR or + VEC_EXTRACT_{EVEN,ODD}_EXPR with VEC_PERM_EXPR for this target. If PSEL is non-null, return the selector for the permutation. */ bool @@ -6918,6 +6931,17 @@ can_vec_perm_for_code_p (enum tree_code code, enum machine_mode mode, data[i] = i * 2 + alt; break; + case VEC_INTERLEAVE_HIGH_EXPR: + case VEC_INTERLEAVE_LOW_EXPR: + if ((BYTES_BIG_ENDIAN != 0) ^ (code == VEC_INTERLEAVE_HIGH_EXPR)) + alt = nelt / 2; + for (i = 0; i < nelt / 2; ++i) + { + data[i * 2] = i + alt; + data[i * 2 + 1] = i + nelt + alt; + } + break; + default: gcc_unreachable (); } diff --git a/gcc/optabs.h b/gcc/optabs.h index a7c43ac..ec13f6f 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -335,6 +335,9 @@ enum optab_index /* Extract even/odd fields of vector operands. */ OTI_vec_extract_even, OTI_vec_extract_odd, + /* Interleave fields of vector operands. */ + OTI_vec_interleave_high, + OTI_vec_interleave_low, /* Initialize vector operand. */ OTI_vec_init, /* Whole vector shift. The shift amount is in bits. */ @@ -561,6 +564,8 @@ enum optab_index #define vec_extract_optab (&optab_table[OTI_vec_extract]) #define vec_extract_even_optab (&optab_table[OTI_vec_extract_even]) #define vec_extract_odd_optab (&optab_table[OTI_vec_extract_odd]) +#define vec_interleave_high_optab (&optab_table[OTI_vec_interleave_high]) +#define vec_interleave_low_optab (&optab_table[OTI_vec_interleave_low]) #define vec_init_optab (&optab_table[OTI_vec_init]) #define vec_shl_optab (&optab_table[OTI_vec_shl]) #define vec_shr_optab (&optab_table[OTI_vec_shr]) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index d99a0b3..78223af 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2725,8 +2725,7 @@ proc check_effective_target_vect_perm { } { verbose "check_effective_target_vect_perm: using cached result" 2 } else { set et_vect_perm_saved 0 - if { [is-effective-target arm_neon_ok] - || [istarget powerpc*-*-*] + if { [istarget powerpc*-*-*] || [istarget spu-*-*] || [istarget i?86-*-*] || [istarget x86_64-*-*] } { @@ -2749,8 +2748,7 @@ proc check_effective_target_vect_perm_byte { } { verbose "check_effective_target_vect_perm_byte: using cached result" 2 } else { set et_vect_perm_byte_saved 0 - if { [is-effective-target arm_neon_ok] - || [istarget powerpc*-*-*] + if { [istarget powerpc*-*-*] || [istarget spu-*-*] } { set et_vect_perm_byte_saved 1 } @@ -2771,8 +2769,7 @@ proc check_effective_target_vect_perm_short { } { verbose "check_effective_target_vect_perm_short: using cached result" 2 } else { set et_vect_perm_short_saved 0 - if { [is-effective-target arm_neon_ok] - || [istarget powerpc*-*-*] + if { [istarget powerpc*-*-*] || [istarget spu-*-*] } { set et_vect_perm_short_saved 1 } diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index 65ad0c0..db10daf 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -3704,6 +3704,8 @@ do_pointer_plus_expr_check: case VEC_PACK_FIX_TRUNC_EXPR: case VEC_EXTRACT_EVEN_EXPR: case VEC_EXTRACT_ODD_EXPR: + case VEC_INTERLEAVE_HIGH_EXPR: + case VEC_INTERLEAVE_LOW_EXPR: /* FIXME. */ return false; diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index 7be13bf..13ad815 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -3401,6 +3401,8 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights, case VEC_PACK_FIX_TRUNC_EXPR: case VEC_EXTRACT_EVEN_EXPR: case VEC_EXTRACT_ODD_EXPR: + case VEC_INTERLEAVE_HIGH_EXPR: + case VEC_INTERLEAVE_LOW_EXPR: case VEC_WIDEN_LSHIFT_HI_EXPR: case VEC_WIDEN_LSHIFT_LO_EXPR: diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 9363aea..9abe004 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -2404,6 +2404,22 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags, pp_string (buffer, " > "); break; + case VEC_INTERLEAVE_HIGH_EXPR: + pp_string (buffer, " VEC_INTERLEAVE_HIGH_EXPR < "); + dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); + pp_string (buffer, ", "); + dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false); + pp_string (buffer, " > "); + break; + + case VEC_INTERLEAVE_LOW_EXPR: + pp_string (buffer, " VEC_INTERLEAVE_LOW_EXPR < "); + dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); + pp_string (buffer, ", "); + dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false); + pp_string (buffer, " > "); + break; + default: NIY; } diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index e6f0381..6a85b7b 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -3800,6 +3800,7 @@ vect_create_destination_var (tree scalar_dest, tree vectype) bool vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count) { + optab ih_optab, il_optab; enum machine_mode mode; mode = TYPE_MODE (vectype); @@ -3814,23 +3815,18 @@ vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count) } /* Check that the operation is supported. */ - if (VECTOR_MODE_P (mode)) - { - unsigned int i, nelt = GET_MODE_NUNITS (mode); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); - for (i = 0; i < nelt / 2; i++) - { - sel[i * 2] = i; - sel[i * 2 + 1] = i + nelt; - } - if (can_vec_perm_p (mode, false, sel)) - { - for (i = 0; i < nelt; i++) - sel[i] += nelt / 2; - if (can_vec_perm_p (mode, false, sel)) - return true; - } - } + ih_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR, + vectype, optab_default); + il_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR, + vectype, optab_default); + if (il_optab && ih_optab + && optab_handler (ih_optab, mode) != CODE_FOR_nothing + && optab_handler (il_optab, mode) != CODE_FOR_nothing) + return true; + + if (can_vec_perm_for_code_p (VEC_INTERLEAVE_HIGH_EXPR, mode, NULL) + && can_vec_perm_for_code_p (VEC_INTERLEAVE_LOW_EXPR, mode, NULL)) + return true; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "interleave op not supported by target."); @@ -3921,26 +3917,15 @@ vect_permute_store_chain (VEC(tree,heap) *dr_chain, tree perm_dest, vect1, vect2, high, low; gimple perm_stmt; tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); - tree perm_mask_low, perm_mask_high; - unsigned int i, n; - unsigned int j, nelt = GET_MODE_NUNITS (TYPE_MODE (vectype)); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); + int i; + unsigned int j; + enum tree_code high_code, low_code; gcc_assert (vect_strided_store_supported (vectype, length)); *result_chain = VEC_copy (tree, heap, dr_chain); - for (i = 0, n = nelt / 2; i < n; i++) - { - sel[i * 2] = i; - sel[i * 2 + 1] = i + nelt; - } - perm_mask_high = vect_gen_perm_mask (vectype, sel); - for (i = 0; i < nelt; i++) - sel[i] += nelt / 2; - perm_mask_low = vect_gen_perm_mask (vectype, sel); - - for (i = 0, n = exact_log2 (length); i < n; i++) + for (i = 0; i < exact_log2 (length); i++) { for (j = 0; j < length/2; j++) { @@ -3948,27 +3933,42 @@ vect_permute_store_chain (VEC(tree,heap) *dr_chain, vect2 = VEC_index (tree, dr_chain, j+length/2); /* Create interleaving stmt: - high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */ + in the case of big endian: + high = interleave_high (vect1, vect2) + and in the case of little endian: + high = interleave_low (vect1, vect2). */ perm_dest = create_tmp_var (vectype, "vect_inter_high"); DECL_GIMPLE_REG_P (perm_dest) = 1; add_referenced_var (perm_dest); - high = make_ssa_name (perm_dest, NULL); - perm_stmt - = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, high, - vect1, vect2, perm_mask_high); + if (BYTES_BIG_ENDIAN) + { + high_code = VEC_INTERLEAVE_HIGH_EXPR; + low_code = VEC_INTERLEAVE_LOW_EXPR; + } + else + { + low_code = VEC_INTERLEAVE_HIGH_EXPR; + high_code = VEC_INTERLEAVE_LOW_EXPR; + } + perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest, + vect1, vect2); + high = make_ssa_name (perm_dest, perm_stmt); + gimple_assign_set_lhs (perm_stmt, high); vect_finish_stmt_generation (stmt, perm_stmt, gsi); VEC_replace (tree, *result_chain, 2*j, high); /* Create interleaving stmt: - low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1, - nelt*3/2+1, ...}> */ + in the case of big endian: + low = interleave_low (vect1, vect2) + and in the case of little endian: + low = interleave_high (vect1, vect2). */ perm_dest = create_tmp_var (vectype, "vect_inter_low"); DECL_GIMPLE_REG_P (perm_dest) = 1; add_referenced_var (perm_dest); - low = make_ssa_name (perm_dest, NULL); - perm_stmt - = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, low, - vect1, vect2, perm_mask_low); + perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest, + vect1, vect2); + low = make_ssa_name (perm_dest, perm_stmt); + gimple_assign_set_lhs (perm_stmt, low); vect_finish_stmt_generation (stmt, perm_stmt, gsi); VEC_replace (tree, *result_chain, 2*j+1, low); } diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c index bc917d9..dc01ce7 100644 --- a/gcc/tree-vect-generic.c +++ b/gcc/tree-vect-generic.c @@ -776,7 +776,9 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi) /* These are only created by the vectorizer, after having queried the target support. It's more than just looking at the optab, and there's no need to do it again. */ - if (code == VEC_EXTRACT_EVEN_EXPR + if (code == VEC_INTERLEAVE_HIGH_EXPR + || code == VEC_INTERLEAVE_LOW_EXPR + || code == VEC_EXTRACT_EVEN_EXPR || code == VEC_EXTRACT_ODD_EXPR) return; diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index ed61080..046a76f 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -3828,8 +3828,8 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, Then permutation statements are generated: - VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} > - VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} > + VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 > + VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 > ... And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts @@ -4026,8 +4026,8 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, the VECTOR_CST mask that implements the permutation of the vector elements. If that is impossible to do, returns NULL. */ -tree -vect_gen_perm_mask (tree vectype, unsigned char *sel) +static tree +gen_perm_mask (tree vectype, unsigned char *sel) { tree mask_elt_type, mask_type, mask_vec; int i, nunits; @@ -4067,7 +4067,7 @@ perm_mask_for_reverse (tree vectype) for (i = 0; i < nunits; ++i) sel[i] = nunits - 1 - i; - return vect_gen_perm_mask (vectype, sel); + return gen_perm_mask (vectype, sel); } /* Given a vector variable X and Y, that was generated for the scalar @@ -4314,7 +4314,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, for (i = 0; i < gather_off_nunits; ++i) sel[i] = i | nunits; - perm_mask = vect_gen_perm_mask (gather_off_vectype, sel); + perm_mask = gen_perm_mask (gather_off_vectype, sel); gcc_assert (perm_mask != NULL_TREE); } else if (nunits == gather_off_nunits * 2) @@ -4326,7 +4326,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, sel[i] = i < gather_off_nunits ? i : i + nunits - gather_off_nunits; - perm_mask = vect_gen_perm_mask (vectype, sel); + perm_mask = gen_perm_mask (vectype, sel); gcc_assert (perm_mask != NULL_TREE); ncopies *= 2; } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index fe1a6bcc..927c0bd 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -848,7 +848,6 @@ extern void vect_get_store_cost (struct data_reference *, int, unsigned int *); extern bool vect_supportable_shift (enum tree_code, tree); extern void vect_get_vec_defs (tree, tree, gimple, VEC (tree, heap) **, VEC (tree, heap) **, slp_tree, int); -extern tree vect_gen_perm_mask (tree, unsigned char *); /* In tree-vect-data-refs.c. */ extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); diff --git a/gcc/tree.def b/gcc/tree.def index 2f096f9..11ce8b5 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -1192,6 +1192,10 @@ DEFTREECODE (VEC_PACK_FIX_TRUNC_EXPR, "vec_pack_fix_trunc_expr", tcc_binary, 2) DEFTREECODE (VEC_EXTRACT_EVEN_EXPR, "vec_extract_even_expr", tcc_binary, 2) DEFTREECODE (VEC_EXTRACT_ODD_EXPR, "vec_extract_odd_expr", tcc_binary, 2) +/* Merge input vectors interleaving their fields. */ +DEFTREECODE (VEC_INTERLEAVE_HIGH_EXPR, "vec_interleave_high_expr", tcc_binary, 2) +DEFTREECODE (VEC_INTERLEAVE_LOW_EXPR, "vec_interleave_low_expr", tcc_binary, 2) + /* Widening vector shift left in bits. Operand 0 is a vector to be shifted with N elements of size S. Operand 1 is an integer shift amount in bits. |