diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2023-11-27 14:44:02 +0000 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2023-11-27 14:44:02 +0000 |
commit | e09007308c96a036a4a4e6fd4d6c09442b4c4420 (patch) | |
tree | b2ad14b739bf91c0627b1f0ea0a3bed98d8d1b1c /gcc/config | |
parent | 31e9074977bb7de83fa5d28d286323987d5d87f2 (diff) | |
download | gcc-e09007308c96a036a4a4e6fd4d6c09442b4c4420.zip gcc-e09007308c96a036a4a4e6fd4d6c09442b4c4420.tar.gz gcc-e09007308c96a036a4a4e6fd4d6c09442b4c4420.tar.bz2 |
aarch64: Remove redundant zeroing/merging in SVE intrinsics [PR106326]
Many predicated SVE intrinsics provide three forms of predication:
zeroing, merging, and any/dont-care. All three are equivalent when
the predicate is all-true, so this patch drops the zeroing and
merging in that case.
gcc/
PR target/106326
* config/aarch64/aarch64-sve-builtins.h (is_ptrue): Declare.
* config/aarch64/aarch64-sve-builtins.cc (is_ptrue): New function.
(gimple_folder::redirect_pred_x): Likewise.
(gimple_folder::fold): Use it.
gcc/testsuite/
PR target/106326
* gcc.target/aarch64/sve/acle/general/pr106326_1.c: New test.
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/aarch64/aarch64-sve-builtins.cc | 46 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve-builtins.h | 3 |
2 files changed, 49 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index b611563..ee81282 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -2561,6 +2561,17 @@ vector_cst_all_same (tree v, unsigned int step) return true; } +/* Return true if V is a constant predicate that acts as a ptrue when + predicating STEP-byte elements. */ +bool +is_ptrue (tree v, unsigned int step) +{ + return (TREE_CODE (v) == VECTOR_CST + && TYPE_MODE (TREE_TYPE (v)) == VNx16BImode + && integer_nonzerop (VECTOR_CST_ENCODED_ELT (v, 0)) + && vector_cst_all_same (v, step)); +} + gimple_folder::gimple_folder (const function_instance &instance, tree fndecl, gimple_stmt_iterator *gsi_in, gcall *call_in) : function_call_info (gimple_location (call_in), instance, fndecl), @@ -2635,6 +2646,37 @@ gimple_folder::redirect_call (const function_instance &instance) return call; } +/* Redirect _z and _m calls to _x functions if the predicate is all-true. + This allows us to use unpredicated instructions, where available. */ +gimple * +gimple_folder::redirect_pred_x () +{ + if (pred != PRED_z && pred != PRED_m) + return nullptr; + + if (gimple_call_num_args (call) < 2) + return nullptr; + + tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl)); + tree arg0_type = type_argument_type (TREE_TYPE (fndecl), 1); + tree arg1_type = type_argument_type (TREE_TYPE (fndecl), 2); + if (!VECTOR_TYPE_P (lhs_type) + || !VECTOR_TYPE_P (arg0_type) + || !VECTOR_TYPE_P (arg1_type)) + return nullptr; + + auto lhs_step = element_precision (lhs_type); + auto rhs_step = element_precision (arg1_type); + auto step = MAX (lhs_step, rhs_step); + if (!multiple_p (step, BITS_PER_UNIT) + || !is_ptrue (gimple_call_arg (call, 0), step / BITS_PER_UNIT)) + return nullptr; + + function_instance instance (*this); + instance.pred = PRED_x; + return redirect_call (instance); +} + /* Fold the call to constant VAL. */ gimple * gimple_folder::fold_to_cstu (poly_uint64 val) @@ -2707,6 +2749,10 @@ gimple_folder::fold () if (!lhs && TREE_TYPE (gimple_call_fntype (call)) != void_type_node) return NULL; + /* First try some simplifications that are common to many functions. */ + if (auto *call = redirect_pred_x ()) + return call; + return base->fold (*this); } diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h index d646df1..b9148c5 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.h +++ b/gcc/config/aarch64/aarch64-sve-builtins.h @@ -500,6 +500,8 @@ public: tree load_store_cookie (tree); gimple *redirect_call (const function_instance &); + gimple *redirect_pred_x (); + gimple *fold_to_cstu (poly_uint64); gimple *fold_to_pfalse (); gimple *fold_to_ptrue (); @@ -673,6 +675,7 @@ extern tree acle_svpattern; extern tree acle_svprfop; bool vector_cst_all_same (tree, unsigned int); +bool is_ptrue (tree, unsigned int); /* Return the ACLE type svbool_t. */ inline tree |