diff options
author | Luke Lau <luke@igalia.com> | 2024-07-06 12:43:11 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-06 12:43:11 +0800 |
commit | a348824798e03c1ffd10e6a1c5340130b0f48bf9 (patch) | |
tree | 8fc6ad0f925f1b12e45ec5b1134c0a4045e9e687 | |
parent | 0b9f2847da79298ed09c29493245113f02b32d9f (diff) | |
download | llvm-a348824798e03c1ffd10e6a1c5340130b0f48bf9.zip llvm-a348824798e03c1ffd10e6a1c5340130b0f48bf9.tar.gz llvm-a348824798e03c1ffd10e6a1c5340130b0f48bf9.tar.bz2 |
[RISCV] Allow folding vmerge with implicit passthru when true has tied dest (#78565)
We currently don't fold a vmerge if it has an implicit-def passthru and
its true operand also has a passthru (i.e. tied dest).
This restriction was added in https://reviews.llvm.org/D151596, back
whenever we had separate TU/TA pseudos. It looks like it was added
because the policy might not have been handled correctly.
However the policy should be set correctly if we relax this restriction
today, since we compute the policy differently now that we have removed
the TU/TA distinction in our pseudos.
We use a TUMU policy, and relax it to TAMU iff the vmerge's passthru is
implicit-def.
The reasoning behind this being that the tail elements always come from
the vmerge's passthru[^1], so if vmerge's passthru is implicit-def then
the tail is also implicit-def. So a tail agnostic policy is OK.
[^1]: unless the VL was shrunk, but in this case which case we
conservatively use TUMU.
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll | 34 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll | 110 |
3 files changed, 78 insertions, 74 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 8c1f8dc..7bdd4f8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3749,11 +3749,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { // If True has a merge operand then it needs to be the same as vmerge's False, // since False will be used for the result's merge operand. if (HasTiedDest && !isImplicitDef(True->getOperand(0))) { - // The vmerge instruction must be TU. - // FIXME: This could be relaxed, but we need to handle the policy for the - // resulting op correctly. - if (isImplicitDef(Merge)) - return false; SDValue MergeOpTrue = True->getOperand(0); if (False != MergeOpTrue) return false; @@ -3763,9 +3758,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { // going to keep the mask from True. if (IsMasked) { assert(HasTiedDest && "Expected tied dest"); - // The vmerge instruction must be TU. - if (isImplicitDef(Merge)) - return false; // FIXME: Support mask agnostic True instruction which would have an // undef merge operand. if (Mask && !usesAllOnesMask(Mask, Glue)) diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index 183741d..b6921ab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -1144,3 +1144,37 @@ define <vscale x 2 x double> @vpmerge_vfwsub.w_tied(<vscale x 2 x double> %passt %b = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %mask, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl) ret <vscale x 2 x double> %b } + +define <vscale x 2 x i32> @true_tied_dest_vmerge_implicit_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl) { +; CHECK-LABEL: true_tied_dest_vmerge_implicit_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret + %a = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %avl, i64 0) + %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32( + <vscale x 2 x i32> poison, + <vscale x 2 x i32> %passthru, + <vscale x 2 x i32> %a, + <vscale x 2 x i1> %m, + i64 %avl + ) + ret <vscale x 2 x i32> %b +} + +define <vscale x 2 x i32> @true_mask_vmerge_implicit_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl) { +; CHECK-LABEL: true_mask_vmerge_implicit_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret + %a = call <vscale x 2 x i32> @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl, i64 0) + %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32( + <vscale x 2 x i32> poison, + <vscale x 2 x i32> %passthru, + <vscale x 2 x i32> %a, + <vscale x 2 x i1> shufflevector(<vscale x 2 x i1> insertelement(<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), + i64 %avl + ) + ret <vscale x 2 x i32> %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll index 0322c1a..22ed56a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll @@ -81,9 +81,8 @@ define <vscale x 1 x i8> @vmadd_vv_nxv1i8_ta(<vscale x 1 x i8> %a, <vscale x 1 x define <vscale x 1 x i8> @vmadd_vx_nxv1i8_ta(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv1i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmacc.vx v9, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer @@ -170,9 +169,8 @@ define <vscale x 2 x i8> @vmadd_vv_nxv2i8_ta(<vscale x 2 x i8> %a, <vscale x 2 x define <vscale x 2 x i8> @vmadd_vx_nxv2i8_ta(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv2i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmacc.vx v9, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer @@ -259,9 +257,8 @@ define <vscale x 4 x i8> @vmadd_vv_nxv4i8_ta(<vscale x 4 x i8> %a, <vscale x 4 x define <vscale x 4 x i8> @vmadd_vx_nxv4i8_ta(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv4i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmacc.vx v9, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer @@ -348,9 +345,8 @@ define <vscale x 8 x i8> @vmadd_vv_nxv8i8_ta(<vscale x 8 x i8> %a, <vscale x 8 x define <vscale x 8 x i8> @vmadd_vx_nxv8i8_ta(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv8i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmacc.vx v9, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer @@ -437,9 +433,8 @@ define <vscale x 16 x i8> @vmadd_vv_nxv16i8_ta(<vscale x 16 x i8> %a, <vscale x define <vscale x 16 x i8> @vmadd_vx_nxv16i8_ta(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv16i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmacc.vx v10, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v10, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer @@ -526,9 +521,8 @@ define <vscale x 32 x i8> @vmadd_vv_nxv32i8_ta(<vscale x 32 x i8> %a, <vscale x define <vscale x 32 x i8> @vmadd_vx_nxv32i8_ta(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv32i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmacc.vx v12, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v12, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0 %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer @@ -618,9 +612,8 @@ define <vscale x 64 x i8> @vmadd_vv_nxv64i8_ta(<vscale x 64 x i8> %a, <vscale x define <vscale x 64 x i8> @vmadd_vx_nxv64i8_ta(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c, <vscale x 64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv64i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmacc.vx v16, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0 %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer @@ -707,9 +700,8 @@ define <vscale x 1 x i16> @vmadd_vv_nxv1i16_ta(<vscale x 1 x i16> %a, <vscale x define <vscale x 1 x i16> @vmadd_vx_nxv1i16_ta(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv1i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vmacc.vx v9, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0 %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer @@ -796,9 +788,8 @@ define <vscale x 2 x i16> @vmadd_vv_nxv2i16_ta(<vscale x 2 x i16> %a, <vscale x define <vscale x 2 x i16> @vmadd_vx_nxv2i16_ta(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv2i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vmacc.vx v9, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer @@ -885,9 +876,8 @@ define <vscale x 4 x i16> @vmadd_vv_nxv4i16_ta(<vscale x 4 x i16> %a, <vscale x define <vscale x 4 x i16> @vmadd_vx_nxv4i16_ta(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv4i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vmacc.vx v9, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer @@ -974,9 +964,8 @@ define <vscale x 8 x i16> @vmadd_vv_nxv8i16_ta(<vscale x 8 x i16> %a, <vscale x define <vscale x 8 x i16> @vmadd_vx_nxv8i16_ta(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv8i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vmacc.vx v10, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v10, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer @@ -1063,9 +1052,8 @@ define <vscale x 16 x i16> @vmadd_vv_nxv16i16_ta(<vscale x 16 x i16> %a, <vscale define <vscale x 16 x i16> @vmadd_vx_nxv16i16_ta(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv16i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vmacc.vx v12, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v12, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer @@ -1155,9 +1143,8 @@ define <vscale x 32 x i16> @vmadd_vv_nxv32i16_ta(<vscale x 32 x i16> %a, <vscale define <vscale x 32 x i16> @vmadd_vx_nxv32i16_ta(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv32i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vmacc.vx v16, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0 %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer @@ -1244,9 +1231,8 @@ define <vscale x 1 x i32> @vmadd_vv_nxv1i32_ta(<vscale x 1 x i32> %a, <vscale x define <vscale x 1 x i32> @vmadd_vx_nxv1i32_ta(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv1i32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vmacc.vx v9, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer @@ -1333,9 +1319,8 @@ define <vscale x 2 x i32> @vmadd_vv_nxv2i32_ta(<vscale x 2 x i32> %a, <vscale x define <vscale x 2 x i32> @vmadd_vx_nxv2i32_ta(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv2i32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vmacc.vx v9, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer @@ -1422,9 +1407,8 @@ define <vscale x 4 x i32> @vmadd_vv_nxv4i32_ta(<vscale x 4 x i32> %a, <vscale x define <vscale x 4 x i32> @vmadd_vx_nxv4i32_ta(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv4i32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmacc.vx v10, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v10, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer @@ -1511,9 +1495,8 @@ define <vscale x 8 x i32> @vmadd_vv_nxv8i32_ta(<vscale x 8 x i32> %a, <vscale x define <vscale x 8 x i32> @vmadd_vx_nxv8i32_ta(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv8i32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmacc.vx v12, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v12, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1603,9 +1586,8 @@ define <vscale x 16 x i32> @vmadd_vv_nxv16i32_ta(<vscale x 16 x i32> %a, <vscale define <vscale x 16 x i32> @vmadd_vx_nxv16i32_ta(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vx_nxv16i32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmacc.vx v16, a0, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer @@ -1739,9 +1721,8 @@ define <vscale x 1 x i64> @vmadd_vx_nxv1i64_ta(<vscale x 1 x i64> %a, i64 %b, <v ; ; RV64-LABEL: vmadd_vx_nxv1i64_ta: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; RV64-NEXT: vmacc.vx v9, a0, v8 -; RV64-NEXT: vmerge.vvm v8, v8, v9, v0 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vmadd.vx v8, a0, v9, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer @@ -1875,9 +1856,8 @@ define <vscale x 2 x i64> @vmadd_vx_nxv2i64_ta(<vscale x 2 x i64> %a, i64 %b, <v ; ; RV64-LABEL: vmadd_vx_nxv2i64_ta: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; RV64-NEXT: vmacc.vx v10, a0, v8 -; RV64-NEXT: vmerge.vvm v8, v8, v10, v0 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vmadd.vx v8, a0, v10, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer @@ -2011,9 +1991,8 @@ define <vscale x 4 x i64> @vmadd_vx_nxv4i64_ta(<vscale x 4 x i64> %a, i64 %b, <v ; ; RV64-LABEL: vmadd_vx_nxv4i64_ta: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmacc.vx v12, a0, v8 -; RV64-NEXT: vmerge.vvm v8, v8, v12, v0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vmadd.vx v8, a0, v12, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer @@ -2150,9 +2129,8 @@ define <vscale x 8 x i64> @vmadd_vx_nxv8i64_ta(<vscale x 8 x i64> %a, i64 %b, <v ; ; RV64-LABEL: vmadd_vx_nxv8i64_ta: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmacc.vx v16, a0, v8 -; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vmadd.vx v8, a0, v16, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer |