aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2024-07-11 09:56:56 +0200
committerRichard Biener <rguenth@gcc.gnu.org>2024-07-13 13:29:49 +0200
commitabf3964711f05b6858d9775c3595ec2b45483e14 (patch)
treef3137dcc996b37403307abf3f36e15b9654f7919
parent2ee5b58be62ba83814fd3b09a604ae0858bbd058 (diff)
downloadgcc-abf3964711f05b6858d9775c3595ec2b45483e14.zip
gcc-abf3964711f05b6858d9775c3595ec2b45483e14.tar.gz
gcc-abf3964711f05b6858d9775c3595ec2b45483e14.tar.bz2
tree-optimization/115868 - ICE with .MASK_CALL in simdclone
The following adjusts mask recording which didn't take into account that we can merge call arguments from two vectors like _50 = {vect_d_1.253_41, vect_d_1.254_43}; _51 = VIEW_CONVERT_EXPR<unsigned char>(mask__19.257_49); _52 = (unsigned int) _51; _53 = _Z3bazd.simdclone.7 (_50, _52); _54 = BIT_FIELD_REF <_53, 256, 0>; _55 = BIT_FIELD_REF <_53, 256, 256>; The testcase g++.dg/vect/pr68762-2.cc exercises this on x86_64 with partial vector usage enabled and AVX512 support. PR tree-optimization/115868 * tree-vect-stmts.cc (vectorizable_simd_clone_call): Correctly compute the number of mask copies required for vect_record_loop_mask.
-rw-r--r--gcc/tree-vect-stmts.cc11
1 files changed, 8 insertions, 3 deletions
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 2e4d500..8530a98 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -4349,9 +4349,14 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
case SIMD_CLONE_ARG_TYPE_MASK:
if (loop_vinfo
&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
- vect_record_loop_mask (loop_vinfo,
- &LOOP_VINFO_MASKS (loop_vinfo),
- ncopies, vectype, op);
+ {
+ unsigned nmasks
+ = exact_div (ncopies * bestn->simdclone->simdlen,
+ TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
+ vect_record_loop_mask (loop_vinfo,
+ &LOOP_VINFO_MASKS (loop_vinfo),
+ nmasks, vectype, op);
+ }
break;
}