aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorAlexandre Oliva <oliva@adacore.com>2025-04-21 22:48:55 -0300
committerAlexandre Oliva <oliva@gnu.org>2025-04-21 22:48:55 -0300
commit14fa625bcb91028cb97f3575d2e394401bbb4a3a (patch)
treee13b770fe66241d9ce5a3ebf561ee2f1f1268f0e /gcc
parent5c430061648071408996c44f7094f7af7aae0868 (diff)
downloadgcc-14fa625bcb91028cb97f3575d2e394401bbb4a3a.zip
gcc-14fa625bcb91028cb97f3575d2e394401bbb4a3a.tar.gz
gcc-14fa625bcb91028cb97f3575d2e394401bbb4a3a.tar.bz2
[riscv] vec_dup immediate constants in pred_broadcast expand [PR118182]
pr118182-2.c fails on gcc-14 because it lacks the late_combine passes, particularly the one that runs after register allocation. Even in the trunk, the predicate broadcast for the add reduction is expanded and register-allocated as _zvfh, taking up an unneeded scalar register to hold the constant to be vec_duplicated. It is the late combine pass after register allocation that substitutes this unneeded scalar register into the vec_duplicate, resolving to the _zero or _imm insns. It's easy enough and more efficient to expand pred_broadcast to the insns that take the already-duplicated vector constant, when the operands satisfy the predicates of the _zero or _imm insns. for gcc/ChangeLog PR target/118182 * config/riscv/vector.md (@pred_broadcast<mode>): Expand to _zero and _imm variants without vec_duplicate.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/riscv/vector.md22
1 files changed, 20 insertions, 2 deletions
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 51eb64f..3ab4d76 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2136,18 +2136,34 @@
(match_operand 7 "const_int_operand")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (vec_duplicate:V_VLS
- (match_operand:<VEL> 3 "direct_broadcast_operand"))
+ ;; (vec_duplicate:V_VLS ;; wrapper activated by wrap_vec_dup below.
+ (match_operand:<VEL> 3 "direct_broadcast_operand") ;; )
(match_operand:V_VLS 2 "vector_merge_operand")))]
"TARGET_VECTOR"
{
/* Transform vmv.v.x/vfmv.v.f (avl = 1) into vmv.s.x since vmv.s.x/vfmv.s.f
has better chances to do vsetvl fusion in vsetvl pass. */
+ bool wrap_vec_dup = true;
+ rtx vec_cst = NULL_RTX;
if (riscv_vector::splat_to_scalar_move_p (operands))
{
operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode);
operands[3] = force_reg (<VEL>mode, operands[3]);
}
+ else if (immediate_operand (operands[3], <VEL>mode)
+ && (vec_cst = gen_const_vec_duplicate (<MODE>mode, operands[3]))
+ && (/* -> pred_broadcast<mode>_zero */
+ (vector_least_significant_set_mask_operand (operands[1],
+ <VM>mode)
+ && vector_const_0_operand (vec_cst, <MODE>mode))
+ || (/* pred_broadcast<mode>_imm */
+ vector_all_trues_mask_operand (operands[1], <VM>mode)
+ && vector_const_int_or_double_0_operand (vec_cst,
+ <MODE>mode))))
+ {
+ operands[3] = vec_cst;
+ wrap_vec_dup = false;
+ }
/* Handle vmv.s.x instruction (Wb1 mask) which has memory scalar. */
else if (satisfies_constraint_Wdm (operands[3]))
{
@@ -2191,6 +2207,8 @@
;
else
operands[3] = force_reg (<VEL>mode, operands[3]);
+ if (wrap_vec_dup)
+ operands[3] = gen_rtx_VEC_DUPLICATE (<MODE>mode, operands[3]);
})
(define_insn_and_split "*pred_broadcast<mode>"