aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2023-11-27 14:44:02 +0000
committerRichard Sandiford <richard.sandiford@arm.com>2023-11-27 14:44:02 +0000
commite09007308c96a036a4a4e6fd4d6c09442b4c4420 (patch)
treeb2ad14b739bf91c0627b1f0ea0a3bed98d8d1b1c /gcc/config
parent31e9074977bb7de83fa5d28d286323987d5d87f2 (diff)
downloadgcc-e09007308c96a036a4a4e6fd4d6c09442b4c4420.zip
gcc-e09007308c96a036a4a4e6fd4d6c09442b4c4420.tar.gz
gcc-e09007308c96a036a4a4e6fd4d6c09442b4c4420.tar.bz2
aarch64: Remove redundant zeroing/merging in SVE intrinsics [PR106326]
Many predicated SVE intrinsics provide three forms of predication: zeroing, merging, and any/dont-care. All three are equivalent when the predicate is all-true, so this patch drops the zeroing and merging in that case. gcc/ PR target/106326 * config/aarch64/aarch64-sve-builtins.h (is_ptrue): Declare. * config/aarch64/aarch64-sve-builtins.cc (is_ptrue): New function. (gimple_folder::redirect_pred_x): Likewise. (gimple_folder::fold): Use it. gcc/testsuite/ PR target/106326 * gcc.target/aarch64/sve/acle/general/pr106326_1.c: New test.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.cc46
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.h3
2 files changed, 49 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index b611563..ee81282 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -2561,6 +2561,17 @@ vector_cst_all_same (tree v, unsigned int step)
return true;
}
+/* Return true if V is a constant predicate that acts as a ptrue when
+ predicating STEP-byte elements. */
+bool
+is_ptrue (tree v, unsigned int step)
+{
+ return (TREE_CODE (v) == VECTOR_CST
+ && TYPE_MODE (TREE_TYPE (v)) == VNx16BImode
+ && integer_nonzerop (VECTOR_CST_ENCODED_ELT (v, 0))
+ && vector_cst_all_same (v, step));
+}
+
gimple_folder::gimple_folder (const function_instance &instance, tree fndecl,
gimple_stmt_iterator *gsi_in, gcall *call_in)
: function_call_info (gimple_location (call_in), instance, fndecl),
@@ -2635,6 +2646,37 @@ gimple_folder::redirect_call (const function_instance &instance)
return call;
}
+/* Redirect _z and _m calls to _x functions if the predicate is all-true.
+ This allows us to use unpredicated instructions, where available. */
+gimple *
+gimple_folder::redirect_pred_x ()
+{
+ if (pred != PRED_z && pred != PRED_m)
+ return nullptr;
+
+ if (gimple_call_num_args (call) < 2)
+ return nullptr;
+
+ tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
+ tree arg0_type = type_argument_type (TREE_TYPE (fndecl), 1);
+ tree arg1_type = type_argument_type (TREE_TYPE (fndecl), 2);
+ if (!VECTOR_TYPE_P (lhs_type)
+ || !VECTOR_TYPE_P (arg0_type)
+ || !VECTOR_TYPE_P (arg1_type))
+ return nullptr;
+
+ auto lhs_step = element_precision (lhs_type);
+ auto rhs_step = element_precision (arg1_type);
+ auto step = MAX (lhs_step, rhs_step);
+ if (!multiple_p (step, BITS_PER_UNIT)
+ || !is_ptrue (gimple_call_arg (call, 0), step / BITS_PER_UNIT))
+ return nullptr;
+
+ function_instance instance (*this);
+ instance.pred = PRED_x;
+ return redirect_call (instance);
+}
+
/* Fold the call to constant VAL. */
gimple *
gimple_folder::fold_to_cstu (poly_uint64 val)
@@ -2707,6 +2749,10 @@ gimple_folder::fold ()
if (!lhs && TREE_TYPE (gimple_call_fntype (call)) != void_type_node)
return NULL;
+ /* First try some simplifications that are common to many functions. */
+ if (auto *call = redirect_pred_x ())
+ return call;
+
return base->fold (*this);
}
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index d646df1..b9148c5 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -500,6 +500,8 @@ public:
tree load_store_cookie (tree);
gimple *redirect_call (const function_instance &);
+ gimple *redirect_pred_x ();
+
gimple *fold_to_cstu (poly_uint64);
gimple *fold_to_pfalse ();
gimple *fold_to_ptrue ();
@@ -673,6 +675,7 @@ extern tree acle_svpattern;
extern tree acle_svprfop;
bool vector_cst_all_same (tree, unsigned int);
+bool is_ptrue (tree, unsigned int);
/* Return the ACLE type svbool_t. */
inline tree