diff options
author | Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> | 2022-05-25 00:26:28 +0530 |
---|---|---|
committer | Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> | 2022-05-25 00:42:00 +0530 |
commit | ae8decf1d2b8329af59592b4fa78ee8dfab3ba5e (patch) | |
tree | 276aaf3fc98f7ef3454430e574fc66affe4265b5 | |
parent | 2f4f7de787e5844515d27b2269fc472f95a9916a (diff) | |
download | gcc-ae8decf1d2b8329af59592b4fa78ee8dfab3ba5e.zip gcc-ae8decf1d2b8329af59592b4fa78ee8dfab3ba5e.tar.gz gcc-ae8decf1d2b8329af59592b4fa78ee8dfab3ba5e.tar.bz2 |
Add new parameter to vec_perm_const hook for specifying operand mode.
The rationale of the patch is to support vec_perm_expr of the form:
lhs = vec_perm_expr<rhs, mask>
where lhs and rhs are vector types with different lengths but have
same element type. For example, lhs is SVE vector and rhs
is corresponding AdvSIMD vector.
It would also allow to express extract even/odd and interleave operations
with a VEC_PERM_EXPR. The interleave currently has the issue that we have
to artificially widen the inputs with "dont-care" elements.
gcc/ChangeLog:
* target.def (vec_perm_const): Define new parameter op_mode and
update doc.
* doc/tm.texi: Regenerate.
* config/aarch64/aarch64.cc (aarch64_vectorize_vec_perm_const): Adjust
vec_perm_const hook to add new parameter op_mode and return false
if result and operand modes do not match.
* config/arm/arm.cc (arm_vectorize_vec_perm_const): Likewise.
* config/gcn/gcn.cc (gcn_vectorize_vec_perm_const): Likewise.
* config/ia64/ia64.cc (ia64_vectorize_vec_perm_const): Likewise.
* config/mips/mips.cc (mips_vectorize_vec_perm_const): Likewise.
* config/rs6000/rs6000.cc (rs6000_vectorize_vec_perm_const): Likewise
* config/s390/s390.cc (s390_vectorize_vec_perm_const): Likewise.
* config/sparc/sparc.cc (sparc_vectorize_vec_perm_const): Likewise.
* config/i386/i386-expand.cc (ix86_vectorize_vec_perm_const): Likewise.
* config/i386/i386-expand.h (ix86_vectorize_vec_perm_const): Adjust
prototype.
* config/i386/sse.md (ashrv4di3): Adjust call to vec_perm_const hook.
(ashrv2di3): Likewise.
* optabs.cc (expand_vec_perm_const): Likewise.
* optabs-query.h (can_vec_perm_const_p): Adjust prototype.
* optabs-query.cc (can_vec_perm_const_p): Define new parameter
op_mode and pass it to vec_perm_const hook.
(can_mult_highpart_p): Adjust call to can_vec_perm_const_p.
* match.pd (vec_perm X Y CST): Likewise.
* tree-ssa-forwprop.cc (simplify_vector_constructor): Likewise.
* tree-vect-data-refs.cc (vect_grouped_store_supported): Likewise.
(vect_grouped_load_supported): Likewise.
(vect_shift_permute_load_chain): Likewise.
* tree-vect-generic.cc (lower_vec_perm): Likewise.
* tree-vect-loop-manip.cc (interleave_supported_p): Likewise.
* tree-vect-loop.cc (have_whole_vector_shift): Likewise.
* tree-vect-patterns.cc (vect_recog_rotate_pattern): Likewise.
* tree-vect-slp.cc (can_duplicate_and_interleave_p): Likewise.
(vect_transform_slp_perm_load): Likewise.
(vectorizable_slp_permutation): Likewise.
* tree-vect-stmts.cc (perm_mask_for_reverse): Likewise.
(vectorizable_bswap): Likewise.
(scan_store_can_perm_p): Likewise.
(vect_gen_perm_mask_checked): Likewise.
-rw-r--r-- | gcc/config/aarch64/aarch64.cc | 8 | ||||
-rw-r--r-- | gcc/config/arm/arm.cc | 6 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.cc | 7 | ||||
-rw-r--r-- | gcc/config/i386/i386-expand.cc | 8 | ||||
-rw-r--r-- | gcc/config/i386/i386-expand.h | 5 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 10 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.cc | 12 | ||||
-rw-r--r-- | gcc/config/mips/mips.cc | 8 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.cc | 8 | ||||
-rw-r--r-- | gcc/config/s390/s390.cc | 6 | ||||
-rw-r--r-- | gcc/config/sparc/sparc.cc | 11 | ||||
-rw-r--r-- | gcc/doc/tm.texi | 18 | ||||
-rw-r--r-- | gcc/match.pd | 8 | ||||
-rw-r--r-- | gcc/optabs-query.cc | 16 | ||||
-rw-r--r-- | gcc/optabs-query.h | 4 | ||||
-rw-r--r-- | gcc/optabs.cc | 7 | ||||
-rw-r--r-- | gcc/target.def | 18 | ||||
-rw-r--r-- | gcc/tree-ssa-forwprop.cc | 6 | ||||
-rw-r--r-- | gcc/tree-vect-data-refs.cc | 35 | ||||
-rw-r--r-- | gcc/tree-vect-generic.cc | 5 | ||||
-rw-r--r-- | gcc/tree-vect-loop-manip.cc | 3 | ||||
-rw-r--r-- | gcc/tree-vect-loop.cc | 2 | ||||
-rw-r--r-- | gcc/tree-vect-patterns.cc | 3 | ||||
-rw-r--r-- | gcc/tree-vect-slp.cc | 10 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.cc | 11 |
25 files changed, 150 insertions, 85 deletions
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index ba5b6be..d4c575c 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -24115,9 +24115,13 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ static bool -aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, - rtx op1, const vec_perm_indices &sel) +aarch64_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, + rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel) { + if (vmode != op_mode) + return false; + struct expand_vec_perm_d d; /* Check whether the mask can be applied to a single vector. */ diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 2afe044..70c2d50 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -31813,9 +31813,13 @@ arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ static bool -arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1, +arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, + rtx target, rtx op0, rtx op1, const vec_perm_indices &sel) { + if (vmode != op_mode) + return false; + struct expand_vec_perm_d d; int i, nelt, which; diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 5e75a1b..6fc20d3 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -4161,10 +4161,13 @@ gcn_make_vec_perm_address (unsigned int *perm) permutations. */ static bool -gcn_vectorize_vec_perm_const (machine_mode vmode, rtx dst, - rtx src0, rtx src1, +gcn_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, + rtx dst, rtx src0, rtx src1, const vec_perm_indices & sel) { + if (vmode != op_mode) + return false; + unsigned int nelt = GET_MODE_NUNITS (vmode); gcc_assert (VECTOR_MODE_P (vmode)); diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index e3bd661..5cd7b99 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -22069,9 +22069,13 @@ canonicalize_perm (struct expand_vec_perm_d *d) /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ bool -ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, - rtx op1, const vec_perm_indices &sel) +ix86_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, + rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel) { + if (vmode != op_mode) + return false; + struct expand_vec_perm_d d; unsigned char perm[MAX_VECT_LEN]; unsigned int i, nelt, which; diff --git a/gcc/config/i386/i386-expand.h b/gcc/config/i386/i386-expand.h index 9d320c2..6c65019 100644 --- a/gcc/config/i386/i386-expand.h +++ b/gcc/config/i386/i386-expand.h @@ -48,8 +48,9 @@ rtx gen_push (rtx arg); rtx gen_pop (rtx arg); rtx ix86_expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode, int ignore); -bool ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, - rtx op1, const vec_perm_indices &sel); +bool ix86_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, + rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel); bool ix86_notrack_prefixed_insn_p (rtx_insn *); machine_mode ix86_split_reduction (machine_mode mode); void ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode, rtx op0, diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index f261ff6..8b2602b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15842,8 +15842,9 @@ sel[7] = 15; } vec_perm_indices indices (sel, 2, 8); - bool ok = targetm.vectorize.vec_perm_const (V8SImode, target, - arg0, arg1, indices); + bool ok = targetm.vectorize.vec_perm_const (V8SImode, V8SImode, + target, arg0, arg1, + indices); gcc_assert (ok); emit_move_insn (operands[0], lowpart_subreg (V4DImode, target, V8SImode)); @@ -24629,8 +24630,9 @@ sel[3] = 7; } vec_perm_indices indices (sel, arg0 != arg1 ? 2 : 1, 4); - bool ok = targetm.vectorize.vec_perm_const (V4SImode, target, - arg0, arg1, indices); + bool ok = targetm.vectorize.vec_perm_const (V4SImode, V4SImode, + target, arg0, arg1, + indices); gcc_assert (ok); emit_move_insn (operands[0], lowpart_subreg (V2DImode, target, V4SImode)); diff --git a/gcc/config/ia64/ia64.cc b/gcc/config/ia64/ia64.cc index f9fb681..25e4a47 100644 --- a/gcc/config/ia64/ia64.cc +++ b/gcc/config/ia64/ia64.cc @@ -332,8 +332,8 @@ static fixed_size_mode ia64_get_reg_raw_mode (int regno); static section * ia64_hpux_function_section (tree, enum node_frequency, bool, bool); -static bool ia64_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx, - const vec_perm_indices &); +static bool ia64_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, + rtx, rtx, const vec_perm_indices &); static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode); static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode); @@ -11751,9 +11751,13 @@ ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ static bool -ia64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, - rtx op1, const vec_perm_indices &sel) +ia64_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, + rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel) { + if (vmode != op_mode) + return false; + struct expand_vec_perm_d d; unsigned char perm[MAX_VECT_LEN]; unsigned int i, nelt, which; diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc index e64928f..5eb8459 100644 --- a/gcc/config/mips/mips.cc +++ b/gcc/config/mips/mips.cc @@ -21790,9 +21790,13 @@ mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ static bool -mips_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, - rtx op1, const vec_perm_indices &sel) +mips_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, + rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel) { + if (vmode != op_mode) + return false; + struct expand_vec_perm_d d; int i, nelt, which; unsigned char orig_perm[MAX_VECT_LEN]; diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index d4defc8..0af2085 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -23294,9 +23294,13 @@ rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1, /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ static bool -rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, - rtx op1, const vec_perm_indices &sel) +rs6000_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, + rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel) { + if (vmode != op_mode) + return false; + bool testing_p = !target; /* AltiVec (and thus VSX) can handle arbitrary permutations. */ diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index d46aba6..444b1ec 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -17175,9 +17175,13 @@ vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d) hook is supposed to emit the required INSNs. */ bool -s390_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1, +s390_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, + rtx target, rtx op0, rtx op1, const vec_perm_indices &sel) { + if (vmode != op_mode) + return false; + struct expand_vec_perm_d d; unsigned int i, nelt; diff --git a/gcc/config/sparc/sparc.cc b/gcc/config/sparc/sparc.cc index aca925b..c72c38e 100644 --- a/gcc/config/sparc/sparc.cc +++ b/gcc/config/sparc/sparc.cc @@ -712,7 +712,8 @@ static bool sparc_modes_tieable_p (machine_mode, machine_mode); static bool sparc_can_change_mode_class (machine_mode, machine_mode, reg_class_t); static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT); -static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx, +static bool sparc_vectorize_vec_perm_const (machine_mode, machine_mode, + rtx, rtx, rtx, const vec_perm_indices &); static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *); static HARD_REG_SET sparc_zero_call_used_regs (HARD_REG_SET); @@ -13035,9 +13036,13 @@ sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel) /* Implement TARGET_VEC_PERM_CONST. */ static bool -sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, - rtx op1, const vec_perm_indices &sel) +sparc_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, + rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel) { + if (vmode != op_mode) + return false; + if (!TARGET_VIS2) return false; diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index c5006af..b0ea398 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6088,14 +6088,18 @@ for the given scalar type @var{type}. @var{is_packed} is false if the scalar access using @var{type} is known to be naturally aligned. @end deftypefn -@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST (machine_mode @var{mode}, rtx @var{output}, rtx @var{in0}, rtx @var{in1}, const vec_perm_indices @var{&sel}) +@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST (machine_mode @var{mode}, machine_mode @var{op_mode}, rtx @var{output}, rtx @var{in0}, rtx @var{in1}, const vec_perm_indices @var{&sel}) This hook is used to test whether the target can permute up to two -vectors of mode @var{mode} using the permutation vector @code{sel}, and -also to emit such a permutation. In the former case @var{in0}, @var{in1} -and @var{out} are all null. In the latter case @var{in0} and @var{in1} are -the source vectors and @var{out} is the destination vector; all three are -operands of mode @var{mode}. @var{in1} is the same as @var{in0} if -@var{sel} describes a permutation on one vector instead of two. +vectors of mode @var{op_mode} using the permutation vector @code{sel}, +producing a vector of mode @var{mode}. The hook is also used to emit such +a permutation. + +When the hook is being used to test whether the target supports a permutation, +@var{in0}, @var{in1}, and @var{out} are all null. When the hook is being used +to emit a permutation, @var{in0} and @var{in1} are the source vectors of mode +@var{op_mode} and @var{out} is the destination vector of mode @var{mode}. +@var{in1} is the same as @var{in0} if @var{sel} describes a permutation on one +vector instead of two. Return true if the operation is possible, emitting instructions for it if rtxes are provided. diff --git a/gcc/match.pd b/gcc/match.pd index c2fed9b..183a0d4 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -7642,6 +7642,8 @@ and, (with { tree op0 = @0, op1 = @1, op2 = @2; + machine_mode result_mode = TYPE_MODE (type); + machine_mode op_mode = TYPE_MODE (TREE_TYPE (op0)); /* Build a vector of integers from the tree mask. */ vec_perm_builder builder; @@ -7703,7 +7705,7 @@ and, insert from a CONSTRUCTOR or constant and use a BIT_INSERT_EXPR in that case. But only if the vector mode is supported, otherwise this is invalid GIMPLE. */ - if (TYPE_MODE (type) != BLKmode + if (op_mode != BLKmode && (TREE_CODE (cop0) == VECTOR_CST || TREE_CODE (cop0) == CONSTRUCTOR || TREE_CODE (cop1) == VECTOR_CST @@ -7749,12 +7751,12 @@ and, 2-argument version. */ tree oldop2 = op2; if (sel.ninputs () == 2 - || can_vec_perm_const_p (TYPE_MODE (type), sel, false)) + || can_vec_perm_const_p (result_mode, op_mode, sel, false)) op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel); else { vec_perm_indices sel2 (builder, 2, nelts); - if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false)) + if (can_vec_perm_const_p (result_mode, op_mode, sel2, false)) op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel2); else /* Not directly supported with either encoding, diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc index 68dc679..809482b 100644 --- a/gcc/optabs-query.cc +++ b/gcc/optabs-query.cc @@ -407,9 +407,9 @@ can_vec_perm_var_p (machine_mode mode) } /* Return true if the target directly supports VEC_PERM_EXPRs on vectors - of mode MODE using the selector SEL. ALLOW_VARIABLE_P is true if it - is acceptable to force the selector into a register and use a variable - permute (if the target supports that). + of mode OP_MODE and result vector of mode MODE using the selector SEL. + ALLOW_VARIABLE_P is true if it is acceptable to force the selector into a + register and use a variable permute (if the target supports that). Note that additional permutations representing whole-vector shifts may also be handled via the vec_shr or vec_shl optab, but only where the @@ -417,8 +417,8 @@ can_vec_perm_var_p (machine_mode mode) with here. */ bool -can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel, - bool allow_variable_p) +can_vec_perm_const_p (machine_mode mode, machine_mode op_mode, + const vec_perm_indices &sel, bool allow_variable_p) { /* If the target doesn't implement a vector mode for the vector type, then no operations are supported. */ @@ -448,7 +448,7 @@ can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel, if (targetm.vectorize.vec_perm_const != NULL) { - if (targetm.vectorize.vec_perm_const (mode, NULL_RTX, NULL_RTX, + if (targetm.vectorize.vec_perm_const (mode, op_mode, NULL_RTX, NULL_RTX, NULL_RTX, sel)) return true; @@ -534,7 +534,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) + (i & ~1) + ((i & 1) ? nunits : 0)); vec_perm_indices indices (sel, 2, nunits); - if (can_vec_perm_const_p (mode, indices)) + if (can_vec_perm_const_p (mode, mode, indices)) return 2; } } @@ -550,7 +550,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) for (unsigned int i = 0; i < 3; ++i) sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1)); vec_perm_indices indices (sel, 2, nunits); - if (can_vec_perm_const_p (mode, indices)) + if (can_vec_perm_const_p (mode, mode, indices)) return 3; } } diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h index b9c9fd6..945d2a8 100644 --- a/gcc/optabs-query.h +++ b/gcc/optabs-query.h @@ -178,8 +178,8 @@ bool can_conditionally_move_p (machine_mode mode); opt_machine_mode qimode_for_vec_perm (machine_mode); bool selector_fits_mode_p (machine_mode, const vec_perm_indices &); bool can_vec_perm_var_p (machine_mode); -bool can_vec_perm_const_p (machine_mode, const vec_perm_indices &, - bool = true); +bool can_vec_perm_const_p (machine_mode, machine_mode, + const vec_perm_indices &, bool = true); /* Find a widening optab even if it doesn't widen as much as we want. */ #define find_widening_optab_handler(A, B, C) \ find_widening_optab_handler_and_mode (A, B, C, NULL) diff --git a/gcc/optabs.cc b/gcc/optabs.cc index 3d8fa3a..c0a6847 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -6250,7 +6250,10 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1, if (single_arg_p) v1 = v0; - if (targetm.vectorize.vec_perm_const (mode, target, v0, v1, indices)) + gcc_checking_assert (GET_MODE (v0) == GET_MODE (v1)); + machine_mode op_mode = GET_MODE (v0); + if (targetm.vectorize.vec_perm_const (mode, op_mode, target, v0, v1, + indices)) return target; } @@ -6264,7 +6267,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1, v0_qi = gen_lowpart (qimode, v0); v1_qi = gen_lowpart (qimode, v1); if (targetm.vectorize.vec_perm_const != NULL - && targetm.vectorize.vec_perm_const (qimode, target_qi, v0_qi, + && targetm.vectorize.vec_perm_const (qimode, qimode, target_qi, v0_qi, v1_qi, qimode_indices)) return gen_lowpart (mode, target_qi); } diff --git a/gcc/target.def b/gcc/target.def index d85adf3..2a7fa68 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1878,12 +1878,16 @@ access using @var{type} is known to be naturally aligned.", DEFHOOK (vec_perm_const, "This hook is used to test whether the target can permute up to two\n\ -vectors of mode @var{mode} using the permutation vector @code{sel}, and\n\ -also to emit such a permutation. In the former case @var{in0}, @var{in1}\n\ -and @var{out} are all null. In the latter case @var{in0} and @var{in1} are\n\ -the source vectors and @var{out} is the destination vector; all three are\n\ -operands of mode @var{mode}. @var{in1} is the same as @var{in0} if\n\ -@var{sel} describes a permutation on one vector instead of two.\n\ +vectors of mode @var{op_mode} using the permutation vector @code{sel},\n\ +producing a vector of mode @var{mode}. The hook is also used to emit such\n\ +a permutation.\n\ +\n\ +When the hook is being used to test whether the target supports a permutation,\n\ +@var{in0}, @var{in1}, and @var{out} are all null. When the hook is being used\n\ +to emit a permutation, @var{in0} and @var{in1} are the source vectors of mode\n\ +@var{op_mode} and @var{out} is the destination vector of mode @var{mode}.\n\ +@var{in1} is the same as @var{in0} if @var{sel} describes a permutation on one\n\ +vector instead of two.\n\ \n\ Return true if the operation is possible, emitting instructions for it\n\ if rtxes are provided.\n\ @@ -1894,7 +1898,7 @@ try the equivalent byte operation. If that also fails, it will try forcing\n\ the selector into a register and using the @var{vec_perm@var{mode}}\n\ instruction pattern. There is no need for the hook to handle these two\n\ implementation approaches itself.", - bool, (machine_mode mode, rtx output, rtx in0, rtx in1, + bool, (machine_mode mode, machine_mode op_mode, rtx output, rtx in0, rtx in1, const vec_perm_indices &sel), NULL) diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index d698a48..c387e84 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -2953,7 +2953,8 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) : (elts[0].second == 0 && elts[0].first == 0 ? 0 : refnelts) + i); vec_perm_indices indices (sel, orig[1] ? 2 : 1, refnelts); - if (!can_vec_perm_const_p (TYPE_MODE (perm_type), indices)) + machine_mode vmode = TYPE_MODE (perm_type); + if (!can_vec_perm_const_p (vmode, vmode, indices)) return false; mask_type = build_vector_type (build_nonstandard_integer_type (elem_size, 1), @@ -3002,7 +3003,8 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) sel.quick_push (elts[i].first ? elts[i].second + nelts : i); vec_perm_indices indices (sel, 2, nelts); - if (!can_vec_perm_const_p (TYPE_MODE (type), indices)) + machine_mode vmode = TYPE_MODE (type); + if (!can_vec_perm_const_p (vmode, vmode, indices)) return false; mask_type = build_vector_type (build_nonstandard_integer_type (elem_size, 1), diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 09223ba..d20a10a 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -5347,7 +5347,7 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) sel[3 * i + nelt2] = 0; } indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (mode, indices)) + if (!can_vec_perm_const_p (mode, mode, indices)) { if (dump_enabled_p ()) dump_printf (MSG_MISSED_OPTIMIZATION, @@ -5365,7 +5365,7 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) sel[3 * i + nelt2] = nelt + j2++; } indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (mode, indices)) + if (!can_vec_perm_const_p (mode, mode, indices)) { if (dump_enabled_p ()) dump_printf (MSG_MISSED_OPTIMIZATION, @@ -5390,12 +5390,12 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) sel[i * 2 + 1] = i + nelt; } vec_perm_indices indices (sel, 2, nelt); - if (can_vec_perm_const_p (mode, indices)) + if (can_vec_perm_const_p (mode, mode, indices)) { for (i = 0; i < 6; i++) sel[i] += exact_div (nelt, 2); indices.new_vector (sel, 2, nelt); - if (can_vec_perm_const_p (mode, indices)) + if (can_vec_perm_const_p (mode, mode, indices)) return true; } } @@ -5963,7 +5963,7 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, else sel[i] = 0; indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (mode, indices)) + if (!can_vec_perm_const_p (mode, mode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5977,7 +5977,7 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, else sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++); indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (mode, indices)) + if (!can_vec_perm_const_p (mode, mode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6000,12 +6000,12 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, for (i = 0; i < 3; i++) sel[i] = i * 2; vec_perm_indices indices (sel, 2, nelt); - if (can_vec_perm_const_p (mode, indices)) + if (can_vec_perm_const_p (mode, mode, indices)) { for (i = 0; i < 3; i++) sel[i] = i * 2 + 1; indices.new_vector (sel, 2, nelt); - if (can_vec_perm_const_p (mode, indices)) + if (can_vec_perm_const_p (mode, mode, indices)) return true; } } @@ -6327,6 +6327,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain, gimple *perm_stmt; tree vectype = STMT_VINFO_VECTYPE (stmt_info); + machine_mode vmode = TYPE_MODE (vectype); unsigned int i; loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); @@ -6351,7 +6352,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain, for (i = 0; i < nelt / 2; ++i) sel[nelt / 2 + i] = i * 2 + 1; vec_perm_indices indices (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6366,7 +6367,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain, for (i = 0; i < nelt / 2; ++i) sel[nelt / 2 + i] = i * 2; indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6381,7 +6382,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain, for (i = 0; i < nelt; i++) sel[i] = nelt / 2 + i; indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6397,7 +6398,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain, for (i = nelt / 2; i < nelt; i++) sel[i] = nelt + i; indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6461,7 +6462,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain, k++; } vec_perm_indices indices (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6476,7 +6477,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain, for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + (nelt % 3) + i; indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6490,7 +6491,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain, for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + 1 + i; indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6504,7 +6505,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain, for (i = 0; i < nelt; i++) sel[i] = (nelt / 3) + (nelt % 3) / 2 + i; indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6518,7 +6519,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain, for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i; indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc index f7de64c..92aba5d 100644 --- a/gcc/tree-vect-generic.cc +++ b/gcc/tree-vect-generic.cc @@ -1527,7 +1527,10 @@ lower_vec_perm (gimple_stmt_iterator *gsi) && tree_to_vec_perm_builder (&sel_int, mask)) { vec_perm_indices indices (sel_int, 2, elements); - if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices)) + machine_mode vmode = TYPE_MODE (vect_type); + tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt)); + machine_mode lhs_mode = TYPE_MODE (lhs_type); + if (can_vec_perm_const_p (lhs_mode, vmode, indices)) { gimple_assign_set_rhs3 (stmt, mask); update_stmt (stmt); diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 1d4337e..11dc6cb 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -312,7 +312,8 @@ interleave_supported_p (vec_perm_indices *indices, tree vectype, sel.quick_push (base + i + nelts); } indices->new_vector (sel, 2, nelts); - return can_vec_perm_const_p (TYPE_MODE (vectype), *indices); + return can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype), + *indices); } /* Try to use permutes to define the masks in DEST_RGM using the masks diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index f204b72..246347b 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -4527,7 +4527,7 @@ have_whole_vector_shift (machine_mode mode) { calc_vec_perm_mask_for_shift (i, nelt, &sel); indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (mode, indices, false)) + if (!can_vec_perm_const_p (mode, mode, indices, false)) return false; } return true; diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index ac49c1a..0fad4db 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -2649,7 +2649,8 @@ vect_recog_rotate_pattern (vec_info *vinfo, vec_perm_indices indices (elts, 1, TYPE_VECTOR_SUBPARTS (char_vectype)); - if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices)) + machine_mode vmode = TYPE_MODE (char_vectype); + if (can_vec_perm_const_p (vmode, vmode, indices)) { /* vectorizable_bswap can handle the __builtin_bswap16 if we undo the argument promotion. */ diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index cdfff1a..fe9361c 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -420,8 +420,9 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count, } vec_perm_indices indices1 (sel1, 2, nelts); vec_perm_indices indices2 (sel2, 2, nelts); - if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1) - && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2)) + machine_mode vmode = TYPE_MODE (vector_type); + if (can_vec_perm_const_p (vmode, vmode, indices1) + && can_vec_perm_const_p (vmode, vmode, indices2)) { if (nvectors_out) *nvectors_out = nvectors; @@ -6762,7 +6763,7 @@ vect_transform_slp_perm_load (vec_info *vinfo, if (index == count && !noop_p) { indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits); - if (!can_vec_perm_const_p (mode, indices)) + if (!can_vec_perm_const_p (mode, mode, indices)) { if (dump_enabled_p ()) { @@ -7122,8 +7123,9 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, { indices.new_vector (mask, second_vec.first == -1U ? 1 : 2, nunits); bool identity_p = indices.series_p (0, 1, 0, 1); + machine_mode vmode = TYPE_MODE (vectype); if (!identity_p - && !can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + && !can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) { diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 8327e9d..346d8ce 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2016,7 +2016,8 @@ perm_mask_for_reverse (tree vectype) sel.quick_push (nunits - 1 - i); vec_perm_indices indices (sel, 1, nunits); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype), + indices)) return NULL_TREE; return vect_gen_perm_mask_checked (vectype, indices); } @@ -3168,7 +3169,8 @@ vectorizable_bswap (vec_info *vinfo, elts.quick_push ((i + 1) * word_bytes - j - 1); vec_perm_indices indices (elts, 1, num_bytes); - if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices)) + machine_mode vmode = TYPE_MODE (char_vectype); + if (!can_vec_perm_const_p (vmode, vmode, indices)) return false; if (! vec_stmt) @@ -6712,7 +6714,7 @@ scan_store_can_perm_p (tree vectype, tree init, sel[j] = nunits + k; } vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits); - if (!can_vec_perm_const_p (vec_mode, indices)) + if (!can_vec_perm_const_p (vec_mode, vec_mode, indices)) { if (i == units_log2) return -1; @@ -8582,7 +8584,8 @@ vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel) tree vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel) { - gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel)); + machine_mode vmode = TYPE_MODE (vectype); + gcc_assert (can_vec_perm_const_p (vmode, vmode, sel)); return vect_gen_perm_mask_any (vectype, sel); } |