diff options
author | Robin Dapp <rdapp@ventanamicro.com> | 2023-10-25 22:19:43 +0200 |
---|---|---|
committer | Robin Dapp <rdapp@ventanamicro.com> | 2023-11-06 12:21:57 +0100 |
commit | 0c42741ad95af3a1e3ac07350da4c3a94865ed63 (patch) | |
tree | 4dd137b298c08345aded697acd25d6c43006d32a | |
parent | 89abbaa5fb3823601710394683cf2e2101aba66a (diff) | |
download | gcc-0c42741ad95af3a1e3ac07350da4c3a94865ed63.zip gcc-0c42741ad95af3a1e3ac07350da4c3a94865ed63.tar.gz gcc-0c42741ad95af3a1e3ac07350da4c3a94865ed63.tar.bz2 |
internal-fn: Add VCOND_MASK_LEN.
In order to prevent simplification of a COND_OP with degenerate mask
(CONSTM1_RTX) into just an OP in the presence of length masking this
patch introduces a length-masked analog to VEC_COND_EXPR:
IFN_VCOND_MASK_LEN.
It also adds new match patterns that allow the combination of
unconditional unary, binary and ternay operations with the
VCOND_MASK_LEN into a conditional operation if the target supports it.
gcc/ChangeLog:
PR tree-optimization/111760
* config/riscv/autovec.md (vcond_mask_len_<mode><vm>): Add
expander.
* config/riscv/riscv-protos.h (enum insn_type): Add.
* config/riscv/riscv-v.cc (needs_fp_rounding): Add !pred_mov.
* doc/md.texi: Add vcond_mask_len.
* gimple-match-exports.cc (maybe_resimplify_conditional_op):
Create VCOND_MASK_LEN when length masking.
* gimple-match.h (gimple_match_op::gimple_match_op): Always
initialize len and bias.
* internal-fn.cc (vec_cond_mask_len_direct): Add.
(direct_vec_cond_mask_len_optab_supported_p): Add.
(internal_fn_len_index): Add VCOND_MASK_LEN.
(internal_fn_mask_index): Ditto.
* internal-fn.def (VCOND_MASK_LEN): New internal function.
* match.pd: Combine unconditional unary, binary and ternary
operations into the respective COND_LEN operations.
* optabs.def (OPTAB_D): Add vcond_mask_len optab.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/vect-cond-arith-2.c: No vect cost model for
riscv_v.
-rw-r--r-- | gcc/config/riscv/autovec.md | 26 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-protos.h | 3 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-v.cc | 3 | ||||
-rw-r--r-- | gcc/doc/md.texi | 15 | ||||
-rw-r--r-- | gcc/gimple-match-exports.cc | 13 | ||||
-rw-r--r-- | gcc/gimple-match.h | 6 | ||||
-rw-r--r-- | gcc/internal-fn.cc | 5 | ||||
-rw-r--r-- | gcc/internal-fn.def | 2 | ||||
-rw-r--r-- | gcc/match.pd | 51 | ||||
-rw-r--r-- | gcc/optabs.def | 1 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-cond-arith-2.c | 1 |
11 files changed, 120 insertions, 6 deletions
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index f1f0523..13e91d1 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -565,6 +565,32 @@ [(set_attr "type" "vector")] ) +(define_expand "vcond_mask_len_<mode>" + [(match_operand:V 0 "register_operand") + (match_operand:<VM> 1 "nonmemory_operand") + (match_operand:V 2 "nonmemory_operand") + (match_operand:V 3 "autovec_else_operand") + (match_operand 4 "autovec_length_operand") + (match_operand 5 "const_0_operand")] + "TARGET_VECTOR" + { + if (satisfies_constraint_Wc1 (operands[1])) + riscv_vector::expand_cond_len_unop (code_for_pred_mov (<MODE>mode), + operands); + else + { + /* The order of then and else is opposite to pred_merge. */ + rtx ops[] = {operands[0], operands[3], operands[3], operands[2], + operands[1]}; + riscv_vector::emit_nonvlmax_insn (code_for_pred_merge (<MODE>mode), + riscv_vector::MERGE_OP_TU, + ops, operands[4]); + } + DONE; + } + [(set_attr "type" "vector")] +) + ;; ------------------------------------------------------------------------- ;; ---- [BOOL] Select based on masks ;; ------------------------------------------------------------------------- diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index d322b7e..52d2a2c 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -359,6 +359,9 @@ enum insn_type : unsigned int /* For vmerge, no mask operand, no mask policy operand. */ MERGE_OP = __NORMAL_OP_TA2 | TERNARY_OP_P, + /* For vmerge with TU policy. */ + MERGE_OP_TU = HAS_DEST_P | HAS_MERGE_P | TERNARY_OP_P | TU_POLICY_P, + /* For vm<compare>, no tail policy operand. */ COMPARE_OP = __NORMAL_OP_MA | TERNARY_OP_P, COMPARE_OP_MU = __MASK_OP_MU | TERNARY_OP_P, diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 52eb2ac..eeefda64 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3214,7 +3214,8 @@ needs_fp_rounding (unsigned icode, machine_mode mode) && icode != maybe_code_for_pred_widen (FLOAT, mode) && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode) /* vfsgnj */ - && icode != maybe_code_for_pred (UNSPEC_VCOPYSIGN, mode); + && icode != maybe_code_for_pred (UNSPEC_VCOPYSIGN, mode) + && icode != maybe_code_for_pred_mov (mode); } /* Subroutine to expand COND_LEN_* patterns. */ diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index fab2513..a5c1d1f 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5306,6 +5306,21 @@ no need to define this instruction pattern if the others are supported. Similar to @code{vcond@var{m}@var{n}} but operand 3 holds a pre-computed result of vector comparison. +@cindex @code{vcond_mask_len_@var{m}@var{n}} instruction pattern +@item @samp{vcond_mask_@var{m}@var{n}} +Set each element of operand 0 to the corresponding element of operand 2 +or operand 3. Choose operand 2 if both the element index is less than +operand 4 plus operand 5 and the corresponding element of operand 1 +is nonzero: + +@smallexample +for (i = 0; i < GET_MODE_NUNITS (@var{m}); i++) + op0[i] = i < op4 + op5 && op1[i] ? op2[i] : op3[i]; +@end smallexample + +Operands 0, 2 and 3 have mode @var{m}. Operand 1 has mode @var{n}. +Operands 4 and 5 have a target-dependent scalar integer mode. + @cindex @code{maskload@var{m}@var{n}} instruction pattern @item @samp{maskload@var{m}@var{n}} Perform a masked load of vector from memory operand 1 of mode @var{m} diff --git a/gcc/gimple-match-exports.cc b/gcc/gimple-match-exports.cc index b36027b..d6dac08 100644 --- a/gcc/gimple-match-exports.cc +++ b/gcc/gimple-match-exports.cc @@ -307,9 +307,16 @@ maybe_resimplify_conditional_op (gimple_seq *seq, gimple_match_op *res_op, && VECTOR_TYPE_P (res_op->type) && gimple_simplified_result_is_gimple_val (res_op)) { - new_op.set_op (VEC_COND_EXPR, res_op->type, - res_op->cond.cond, res_op->ops[0], - res_op->cond.else_value); + tree len = res_op->cond.len; + if (!len) + new_op.set_op (VEC_COND_EXPR, res_op->type, + res_op->cond.cond, res_op->ops[0], + res_op->cond.else_value); + else + new_op.set_op (IFN_VCOND_MASK_LEN, res_op->type, + res_op->cond.cond, res_op->ops[0], + res_op->cond.else_value, + res_op->cond.len, res_op->cond.bias); *res_op = new_op; return gimple_resimplify3 (seq, res_op, valueize); } diff --git a/gcc/gimple-match.h b/gcc/gimple-match.h index 9892c142..63a9f02 100644 --- a/gcc/gimple-match.h +++ b/gcc/gimple-match.h @@ -32,7 +32,8 @@ public: enum uncond { UNCOND }; /* Build an unconditional op. */ - gimple_match_cond (uncond) : cond (NULL_TREE), else_value (NULL_TREE) {} + gimple_match_cond (uncond) : cond (NULL_TREE), else_value (NULL_TREE), len + (NULL_TREE), bias (NULL_TREE) {} gimple_match_cond (tree, tree); gimple_match_cond (tree, tree, tree, tree); @@ -56,7 +57,8 @@ public: inline gimple_match_cond::gimple_match_cond (tree cond_in, tree else_value_in) - : cond (cond_in), else_value (else_value_in) + : cond (cond_in), else_value (else_value_in), len (NULL_TREE), + bias (NULL_TREE) { } diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index c7d3564..5a998e7 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -170,6 +170,7 @@ init_internal_fns () #define store_lanes_direct { 0, 0, false } #define mask_store_lanes_direct { 0, 0, false } #define vec_cond_mask_direct { 1, 0, false } +#define vec_cond_mask_len_direct { 1, 1, false } #define vec_cond_direct { 2, 0, false } #define scatter_store_direct { 3, 1, false } #define len_store_direct { 3, 3, false } @@ -4690,6 +4691,7 @@ internal_fn_len_index (internal_fn fn) case IFN_MASK_LEN_STORE: case IFN_MASK_LEN_LOAD_LANES: case IFN_MASK_LEN_STORE_LANES: + case IFN_VCOND_MASK_LEN: return 3; default: @@ -4782,6 +4784,9 @@ internal_fn_mask_index (internal_fn fn) case IFN_MASK_LEN_SCATTER_STORE: return 4; + case IFN_VCOND_MASK_LEN: + return 0; + default: return (conditional_internal_fn_code (fn) != ERROR_MARK || get_unconditional_internal_fn (fn) != IFN_LAST ? 0 : -1); diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index a2023ab..7f0e375 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -221,6 +221,8 @@ DEF_INTERNAL_OPTAB_FN (VCONDU, ECF_CONST | ECF_NOTHROW, vcondu, vec_cond) DEF_INTERNAL_OPTAB_FN (VCONDEQ, ECF_CONST | ECF_NOTHROW, vcondeq, vec_cond) DEF_INTERNAL_OPTAB_FN (VCOND_MASK, ECF_CONST | ECF_NOTHROW, vcond_mask, vec_cond_mask) +DEF_INTERNAL_OPTAB_FN (VCOND_MASK_LEN, ECF_CONST | ECF_NOTHROW, + vcond_mask_len, cond_len_unary) DEF_INTERNAL_OPTAB_FN (VEC_SET, ECF_CONST | ECF_NOTHROW, vec_set, vec_set) DEF_INTERNAL_OPTAB_FN (VEC_EXTRACT, ECF_CONST | ECF_NOTHROW, diff --git a/gcc/match.pd b/gcc/match.pd index 424bbd0..dbc811b 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -87,6 +87,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) negate bit_not) (define_operator_list COND_UNARY IFN_COND_NEG IFN_COND_NOT) +(define_operator_list COND_LEN_UNARY + IFN_COND_LEN_NEG IFN_COND_LEN_NOT) /* Binary operations and their associated IFN_COND_* function. */ (define_operator_list UNCOND_BINARY @@ -8961,6 +8963,21 @@ and, && is_truth_type_for (op_type, TREE_TYPE (@0))) (cond_op (bit_not @0) @2 @1))))) +(for uncond_op (UNCOND_UNARY) + cond_op (COND_LEN_UNARY) + (simplify + (IFN_VCOND_MASK_LEN @0 (view_convert? (uncond_op@3 @1)) @2 @4 @5) + (with { tree op_type = TREE_TYPE (@3); } + (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) + && is_truth_type_for (op_type, TREE_TYPE (@0))) + (cond_op @0 @1 @2 @4 @5)))) + (simplify + (IFN_VCOND_MASK_LEN @0 @1 (view_convert? (uncond_op@3 @2)) @4 @5) + (with { tree op_type = TREE_TYPE (@3); } + (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) + && is_truth_type_for (op_type, TREE_TYPE (@0))) + (cond_op (bit_not @0) @2 @1 @4 @5))))) + /* `(a ? -1 : 0) ^ b` can be converted into a conditional not. */ (simplify (bit_xor:c (vec_cond @0 uniform_integer_cst_p@1 uniform_integer_cst_p@2) @3) @@ -9007,6 +9024,23 @@ and, && single_use (@4)) (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1))))))) +(for uncond_op (UNCOND_BINARY) + cond_op (COND_LEN_BINARY) + (simplify + (IFN_VCOND_MASK_LEN @0 (view_convert? (uncond_op@4 @1 @2)) @3 @5 @6) + (with { tree op_type = TREE_TYPE (@4); } + (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) + && is_truth_type_for (op_type, TREE_TYPE (@0)) + && single_use (@4)) + (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3) @5 @6))))) + (simplify + (IFN_VCOND_MASK_LEN @0 @1 (view_convert? (uncond_op@4 @2 @3)) @5 @6) + (with { tree op_type = TREE_TYPE (@4); } + (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) + && is_truth_type_for (op_type, TREE_TYPE (@0)) + && single_use (@4)) + (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1) @5 @6)))))) + /* Same for ternary operations. */ (for uncond_op (UNCOND_TERNARY) cond_op (COND_TERNARY) @@ -9025,6 +9059,23 @@ and, && single_use (@5)) (view_convert (cond_op (bit_not @0) @2 @3 @4 (view_convert:op_type @1))))))) + +(for uncond_op (UNCOND_TERNARY) + cond_op (COND_LEN_TERNARY) + (simplify + (IFN_VCOND_MASK_LEN @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4 @6 @7) + (with { tree op_type = TREE_TYPE (@5); } + (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) + && is_truth_type_for (op_type, TREE_TYPE (@0)) + && single_use (@5)) + (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4) @6 @7))))) + (simplify + (IFN_VCOND_MASK_LEN @0 @1 (view_convert? (uncond_op@5 @2 @3 @4 @6 @7))) + (with { tree op_type = TREE_TYPE (@5); } + (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) + && is_truth_type_for (op_type, TREE_TYPE (@0)) + && single_use (@5)) + (view_convert (cond_op (bit_not @0) @2 @3 @4 (view_convert:op_type @1) @6 @7)))))) #endif /* Detect cases in which a VEC_COND_EXPR effectively replaces the diff --git a/gcc/optabs.def b/gcc/optabs.def index 2ccbe41..8d5ceeb 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -282,6 +282,7 @@ OPTAB_D (cond_len_fnma_optab, "cond_len_fnma$a") OPTAB_D (cond_len_fnms_optab, "cond_len_fnms$a") OPTAB_D (cond_len_neg_optab, "cond_len_neg$a") OPTAB_D (cond_len_one_cmpl_optab, "cond_len_one_cmpl$a") +OPTAB_D (vcond_mask_len_optab, "vcond_mask_len_$a") OPTAB_D (cmov_optab, "cmov$a6") OPTAB_D (cstore_optab, "cstore$a4") OPTAB_D (ctrap_optab, "ctrap$a4") diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-arith-2.c b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-2.c index 7e16597..7b3d73a 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-cond-arith-2.c +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-additional-options "-fgimple -fdump-tree-optimized -ffast-math" } */ +/* { dg-additional-options "-fno-vect-cost-model" { target { riscv_v } } } */ double __GIMPLE (ssa, startwith("loop")) neg_xi (double *x) |