diff options
author | Feng Xue <fxue@os.amperecomputing.com> | 2024-10-11 14:55:05 +0800 |
---|---|---|
committer | Feng Xue <fxue@os.amperecomputing.com> | 2024-10-12 23:05:37 +0800 |
commit | a9173a50e7e346a218323916e4d3add8552529ae (patch) | |
tree | 9dc5dc68e888fe4dfc368c6f83644570fa617db7 | |
parent | f54d42e00007e7a558b273d87f95b3e5b1938f5a (diff) | |
download | gcc-a9173a50e7e346a218323916e4d3add8552529ae.zip gcc-a9173a50e7e346a218323916e4d3add8552529ae.tar.gz gcc-a9173a50e7e346a218323916e4d3add8552529ae.tar.bz2 |
vect: Fix inconsistency in fully-masked lane-reducing op generation [PR116985]
To align vectorized def/use when lane-reducing op is present in loop reduction,
we may need to insert extra trivial pass-through copies, which would cause
mismatch between lane-reducing vector copy and loop mask index. This could be
fixed by computing the right index around a new counter on effective lane-
reducing vector copies.
2024-10-11 Feng Xue <fxue@os.amperecomputing.com>
gcc/
PR tree-optimization/116985
* tree-vect-loop.cc (vect_transform_reduction): Compute loop mask
index based on effective vector copies for reduction op.
gcc/testsuite/
PR tree-optimization/116985
* gcc.dg/vect/pr116985.c: New testcase.
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr116985.c | 23 | ||||
-rw-r--r-- | gcc/tree-vect-loop.cc | 7 |
2 files changed, 28 insertions, 2 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/pr116985.c b/gcc/testsuite/gcc.dg/vect/pr116985.c new file mode 100644 index 0000000..e6dfdaf --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr116985.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "--param=vect-partial-vector-usage=2 -O2" } */ +/* { dg-additional-options "-mavx512vbmi2" { target { i?86-*-* x86_64-*-* } } } */ +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */ + +signed int __attribute__ ((noipa)) +fn (signed int n, + signed char *restrict a, + signed char *restrict b, + signed char *restrict c, + signed char *restrict d) +{ + signed int res = 0; + + for (int i = 0; i < n; ++i) + { + res += a[i] * b[i]; + res += i + 1; + res += c[i] * d[i]; + } + return res; +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index ade72a5..025442a 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -8916,6 +8916,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo, bool emulated_mixed_dot_prod = vect_is_emulated_mixed_dot_prod (stmt_info); unsigned num = vec_oprnds[reduc_index == 0 ? 1 : 0].length (); + unsigned mask_index = 0; for (unsigned i = 0; i < num; ++i) { @@ -8954,7 +8955,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo, std::swap (vop[0], vop[1]); } tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, - vec_num * ncopies, vectype_in, i); + vec_num * ncopies, vectype_in, + mask_index++); gcall *call = gimple_build_call_internal (cond_fn, 4, mask, vop[0], vop[1], vop[0]); new_temp = make_ssa_name (vec_dest, call); @@ -8971,7 +8973,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo, if (masked_loop_p && mask_by_cond_expr) { tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, - vec_num * ncopies, vectype_in, i); + vec_num * ncopies, vectype_in, + mask_index++); build_vect_cond_expr (code, vop, mask, gsi); } |