diff options
author | Victor Do Nascimento <victor.donascimento@arm.com> | 2024-09-23 17:10:18 +0100 |
---|---|---|
committer | Victor Do Nascimento <victor.donascimento@arm.com> | 2024-10-02 15:27:42 +0100 |
commit | 4d9e473d125ec36ae4818d36d42bf4fea09cef1f (patch) | |
tree | a236678f2f85dbb262bae98e244941f9f21eb470 | |
parent | 4e11ad7c345b6084ffe45ac569352dd316ee5cc6 (diff) | |
download | gcc-4d9e473d125ec36ae4818d36d42bf4fea09cef1f.zip gcc-4d9e473d125ec36ae4818d36d42bf4fea09cef1f.tar.gz gcc-4d9e473d125ec36ae4818d36d42bf4fea09cef1f.tar.bz2 |
middle-end: Fix ifcvt predicate generation for masked function calls
Up until now, due to a latent bug in the code for the ifcvt pass,
irrespective of the branch taken in a conditional statement, the
original condition for the if statement was used in masking the
function call.
Thus, for code such as:
if (a[i] > limit)
b[i] = fixed_const;
else
b[i] = fn (a[i]);
we would generate the following (wrong) if-converted tree code:
_1 = a[i_1];
_2 = _1 > limit;
_3 = .MASK_CALL (fn, _1, _2);
cstore_4 = _2 ? fixed_const : _3;
as opposed to the correct expected sequence:
_1 = a[i_1];
_2 = _1 > limit;
_3 = ~_2;
_4 = .MASK_CALL (fn, _1, _3);
cstore_5 = _2 ? fixed_const : _4;
This patch ensures that the correct predicate mask generation is
carried out such that, upon autovectorization, the correct vector
lanes are selected in the vectorized function call.
gcc/ChangeLog:
* tree-if-conv.cc (predicate_statements): Fix handling of
predicated function calls.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/vect-fncall-mask.c: New.
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c | 31 | ||||
-rw-r--r-- | gcc/tree-if-conv.cc | 14 |
2 files changed, 44 insertions, 1 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c b/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c new file mode 100644 index 0000000..554488e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c @@ -0,0 +1,31 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-march=armv8.2-a+sve -fdump-tree-ifcvt-raw -Ofast" { target { aarch64*-*-* } } } */ + +extern int __attribute__ ((simd, const)) fn (int); + +const int N = 20; +const float lim = 101.0; +const float cst = -1.0; +float tot = 0.0; + +float b[20]; +float a[20] = { [0 ... 9] = 1.7014118e39, /* If branch. */ + [10 ... 19] = 100.0 }; /* Else branch. */ + +int main (void) +{ + #pragma omp simd + for (int i = 0; i < N; i += 1) + { + if (a[i] > lim) + b[i] = cst; + else + b[i] = fn (a[i]); + tot += b[i]; + } + return (0); +} + +/* { dg-final { scan-tree-dump {gimple_assign <gt_expr, _12, _1, 1.01e\+2, NULL>} ifcvt } } */ +/* { dg-final { scan-tree-dump {gimple_assign <bit_not_expr, _34, _12, NULL, NULL>} ifcvt } } */ +/* { dg-final { scan-tree-dump {gimple_call <.MASK_CALL, _3, fn, _2, _34>} ifcvt } } */ diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index 0346a13..3b04d1e 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -2907,6 +2907,7 @@ predicate_statements (loop_p loop) This will cause the vectorizer to match the "in branch" clone variants, and serves to build the mask vector in a natural way. */ + tree mask = cond; gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi)); tree orig_fn = gimple_call_fn (call); int orig_nargs = gimple_call_num_args (call); @@ -2914,7 +2915,18 @@ predicate_statements (loop_p loop) args.safe_push (orig_fn); for (int i = 0; i < orig_nargs; i++) args.safe_push (gimple_call_arg (call, i)); - args.safe_push (cond); + /* If `swap', we invert the mask used for the if branch for use + when masking the function call. */ + if (swap) + { + gimple_seq stmts = NULL; + tree true_val + = constant_boolean_node (true, TREE_TYPE (mask)); + mask = gimple_build (&stmts, BIT_XOR_EXPR, + TREE_TYPE (mask), mask, true_val); + gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); + } + args.safe_push (mask); /* Replace the call with a IFN_MASK_CALL that has the extra condition parameter. */ |