diff options
author | Alexandre Oliva <oliva@adacore.com> | 2025-04-21 22:48:55 -0300 |
---|---|---|
committer | Alexandre Oliva <oliva@gnu.org> | 2025-04-21 22:48:55 -0300 |
commit | 14fa625bcb91028cb97f3575d2e394401bbb4a3a (patch) | |
tree | e13b770fe66241d9ce5a3ebf561ee2f1f1268f0e /gcc | |
parent | 5c430061648071408996c44f7094f7af7aae0868 (diff) | |
download | gcc-14fa625bcb91028cb97f3575d2e394401bbb4a3a.zip gcc-14fa625bcb91028cb97f3575d2e394401bbb4a3a.tar.gz gcc-14fa625bcb91028cb97f3575d2e394401bbb4a3a.tar.bz2 |
[riscv] vec_dup immediate constants in pred_broadcast expand [PR118182]
pr118182-2.c fails on gcc-14 because it lacks the late_combine passes,
particularly the one that runs after register allocation.
Even in the trunk, the predicate broadcast for the add reduction is
expanded and register-allocated as _zvfh, taking up an unneeded scalar
register to hold the constant to be vec_duplicated.
It is the late combine pass after register allocation that substitutes
this unneeded scalar register into the vec_duplicate, resolving to the
_zero or _imm insns.
It's easy enough and more efficient to expand pred_broadcast to the
insns that take the already-duplicated vector constant, when the
operands satisfy the predicates of the _zero or _imm insns.
for gcc/ChangeLog
PR target/118182
* config/riscv/vector.md (@pred_broadcast<mode>): Expand to
_zero and _imm variants without vec_duplicate.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/riscv/vector.md | 22 |
1 files changed, 20 insertions, 2 deletions
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 51eb64f..3ab4d76 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -2136,18 +2136,34 @@ (match_operand 7 "const_int_operand") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (vec_duplicate:V_VLS - (match_operand:<VEL> 3 "direct_broadcast_operand")) + ;; (vec_duplicate:V_VLS ;; wrapper activated by wrap_vec_dup below. + (match_operand:<VEL> 3 "direct_broadcast_operand") ;; ) (match_operand:V_VLS 2 "vector_merge_operand")))] "TARGET_VECTOR" { /* Transform vmv.v.x/vfmv.v.f (avl = 1) into vmv.s.x since vmv.s.x/vfmv.s.f has better chances to do vsetvl fusion in vsetvl pass. */ + bool wrap_vec_dup = true; + rtx vec_cst = NULL_RTX; if (riscv_vector::splat_to_scalar_move_p (operands)) { operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode); operands[3] = force_reg (<VEL>mode, operands[3]); } + else if (immediate_operand (operands[3], <VEL>mode) + && (vec_cst = gen_const_vec_duplicate (<MODE>mode, operands[3])) + && (/* -> pred_broadcast<mode>_zero */ + (vector_least_significant_set_mask_operand (operands[1], + <VM>mode) + && vector_const_0_operand (vec_cst, <MODE>mode)) + || (/* pred_broadcast<mode>_imm */ + vector_all_trues_mask_operand (operands[1], <VM>mode) + && vector_const_int_or_double_0_operand (vec_cst, + <MODE>mode)))) + { + operands[3] = vec_cst; + wrap_vec_dup = false; + } /* Handle vmv.s.x instruction (Wb1 mask) which has memory scalar. */ else if (satisfies_constraint_Wdm (operands[3])) { @@ -2191,6 +2207,8 @@ ; else operands[3] = force_reg (<VEL>mode, operands[3]); + if (wrap_vec_dup) + operands[3] = gen_rtx_VEC_DUPLICATE (<MODE>mode, operands[3]); }) (define_insn_and_split "*pred_broadcast<mode>" |