aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVictor Do Nascimento <victor.donascimento@arm.com>2024-09-23 17:10:18 +0100
committerVictor Do Nascimento <victor.donascimento@arm.com>2024-10-02 15:27:42 +0100
commit4d9e473d125ec36ae4818d36d42bf4fea09cef1f (patch)
treea236678f2f85dbb262bae98e244941f9f21eb470
parent4e11ad7c345b6084ffe45ac569352dd316ee5cc6 (diff)
downloadgcc-4d9e473d125ec36ae4818d36d42bf4fea09cef1f.zip
gcc-4d9e473d125ec36ae4818d36d42bf4fea09cef1f.tar.gz
gcc-4d9e473d125ec36ae4818d36d42bf4fea09cef1f.tar.bz2
middle-end: Fix ifcvt predicate generation for masked function calls
Up until now, due to a latent bug in the code for the ifcvt pass, irrespective of the branch taken in a conditional statement, the original condition for the if statement was used in masking the function call. Thus, for code such as: if (a[i] > limit) b[i] = fixed_const; else b[i] = fn (a[i]); we would generate the following (wrong) if-converted tree code: _1 = a[i_1]; _2 = _1 > limit; _3 = .MASK_CALL (fn, _1, _2); cstore_4 = _2 ? fixed_const : _3; as opposed to the correct expected sequence: _1 = a[i_1]; _2 = _1 > limit; _3 = ~_2; _4 = .MASK_CALL (fn, _1, _3); cstore_5 = _2 ? fixed_const : _4; This patch ensures that the correct predicate mask generation is carried out such that, upon autovectorization, the correct vector lanes are selected in the vectorized function call. gcc/ChangeLog: * tree-if-conv.cc (predicate_statements): Fix handling of predicated function calls. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-fncall-mask.c: New.
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c31
-rw-r--r--gcc/tree-if-conv.cc14
2 files changed, 44 insertions, 1 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c b/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c
new file mode 100644
index 0000000..554488e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-additional-options "-march=armv8.2-a+sve -fdump-tree-ifcvt-raw -Ofast" { target { aarch64*-*-* } } } */
+
+extern int __attribute__ ((simd, const)) fn (int);
+
+const int N = 20;
+const float lim = 101.0;
+const float cst = -1.0;
+float tot = 0.0;
+
+float b[20];
+float a[20] = { [0 ... 9] = 1.7014118e39, /* If branch. */
+ [10 ... 19] = 100.0 }; /* Else branch. */
+
+int main (void)
+{
+ #pragma omp simd
+ for (int i = 0; i < N; i += 1)
+ {
+ if (a[i] > lim)
+ b[i] = cst;
+ else
+ b[i] = fn (a[i]);
+ tot += b[i];
+ }
+ return (0);
+}
+
+/* { dg-final { scan-tree-dump {gimple_assign <gt_expr, _12, _1, 1.01e\+2, NULL>} ifcvt } } */
+/* { dg-final { scan-tree-dump {gimple_assign <bit_not_expr, _34, _12, NULL, NULL>} ifcvt } } */
+/* { dg-final { scan-tree-dump {gimple_call <.MASK_CALL, _3, fn, _2, _34>} ifcvt } } */
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 0346a13..3b04d1e 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -2907,6 +2907,7 @@ predicate_statements (loop_p loop)
This will cause the vectorizer to match the "in branch"
clone variants, and serves to build the mask vector
in a natural way. */
+ tree mask = cond;
gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
tree orig_fn = gimple_call_fn (call);
int orig_nargs = gimple_call_num_args (call);
@@ -2914,7 +2915,18 @@ predicate_statements (loop_p loop)
args.safe_push (orig_fn);
for (int i = 0; i < orig_nargs; i++)
args.safe_push (gimple_call_arg (call, i));
- args.safe_push (cond);
+ /* If `swap', we invert the mask used for the if branch for use
+ when masking the function call. */
+ if (swap)
+ {
+ gimple_seq stmts = NULL;
+ tree true_val
+ = constant_boolean_node (true, TREE_TYPE (mask));
+ mask = gimple_build (&stmts, BIT_XOR_EXPR,
+ TREE_TYPE (mask), mask, true_val);
+ gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+ }
+ args.safe_push (mask);
/* Replace the call with a IFN_MASK_CALL that has the extra
condition parameter. */