diff options
author | Craig Topper <craig.topper@sifive.com> | 2021-06-08 09:31:30 -0700 |
---|---|---|
committer | Craig Topper <craig.topper@sifive.com> | 2021-06-08 09:43:43 -0700 |
commit | 8b4c80d380a681e6ea6ea60e9d9f9424e7782980 (patch) | |
tree | 04090c1f56b2232f9487f627d95024cc35aa83a0 | |
parent | 71fb98e0c1de97c8ba2aa3292447b0c5d0f248d5 (diff) | |
download | llvm-8b4c80d380a681e6ea6ea60e9d9f9424e7782980.zip llvm-8b4c80d380a681e6ea6ea60e9d9f9424e7782980.tar.gz llvm-8b4c80d380a681e6ea6ea60e9d9f9424e7782980.tar.bz2 |
Further improve register allocation for vwadd(u).wv, vwsub(u).wv, vfwadd.wv, and vfwsub.wv.
The first source has the same EEW as the destination, but we're
using earlyclobber which prevents them from ever being the same
register. This patch attempts to work around this.
-For unmasked .wv, add a special TIED pseudo that pretends like
the first operand and the destination must be the same register. This
disables the earlyclobber for that source. Mark the instruction
as convertible to 3 address form which will switch it to the
original untied pseudo when the TwoAddressInstructionPass decides
that keeping them tied would require an extra copy. This uses
code in RISCVInstrInfo.cpp to do the conversion to the untied
opcode.
The untie test case show that we can generate the untied version.
Not sure it was profitable to do it in this case, but they have
really simple IR.
Reviewed By: arcbbb
Differential Revision: https://reviews.llvm.org/D103552
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 81 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfo.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 47 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll | 155 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll | 155 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll | 155 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll | 155 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll | 269 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll | 269 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll | 269 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll | 269 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll | 269 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll | 269 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll | 269 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll | 269 |
15 files changed, 2588 insertions, 316 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 93e65cd..a14cd4a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -1353,6 +1354,86 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, #undef CASE_VFMA_OPCODE_LMULS #undef CASE_VFMA_OPCODE_COMMON +// clang-format off +#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \ + RISCV::PseudoV##OP##_##LMUL##_TIED + +#define CASE_WIDEOP_OPCODE_LMULS(OP) \ + CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \ + case CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \ + case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \ + case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \ + case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \ + case CASE_WIDEOP_OPCODE_COMMON(OP, M4) +// clang-format on + +#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \ + case RISCV::PseudoV##OP##_##LMUL##_TIED: \ + NewOpc = RISCV::PseudoV##OP##_##LMUL; \ + break; + +#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \ + CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \ + CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \ + CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \ + CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \ + CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \ + CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4) + +MachineInstr *RISCVInstrInfo::convertToThreeAddress( + MachineFunction::iterator &MBB, MachineInstr &MI, LiveVariables *LV) const { + switch (MI.getOpcode()) { + default: + break; + case CASE_WIDEOP_OPCODE_LMULS(FWADD_WV): + case CASE_WIDEOP_OPCODE_LMULS(FWSUB_WV): + case CASE_WIDEOP_OPCODE_LMULS(WADD_WV): + case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV): + case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV): + case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): { + // clang-format off + unsigned NewOpc; + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + CASE_WIDEOP_CHANGE_OPCODE_LMULS(FWADD_WV) + CASE_WIDEOP_CHANGE_OPCODE_LMULS(FWSUB_WV) + CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV) + CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV) + CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV) + CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV) + } + //clang-format on + + MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + MIB.copyImplicitOps(MI); + + if (LV) { + unsigned NumOps = MI.getNumOperands(); + for (unsigned I = 1; I < NumOps; ++I) { + MachineOperand &Op = MI.getOperand(I); + if (Op.isReg() && Op.isKill()) + LV->replaceKillInstruction(Op.getReg(), MI, *MIB); + } + } + + return MIB; + } + } + + return nullptr; +} + +#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS +#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON +#undef CASE_WIDEOP_OPCODE_LMULS +#undef CASE_WIDEOP_OPCODE_COMMON + Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator II, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 74dc278..905fe69 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -143,6 +143,10 @@ public: unsigned OpIdx1, unsigned OpIdx2) const override; + MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB, + MachineInstr &MI, + LiveVariables *LV) const override; + Register getVLENFactoredAmount(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator II, const DebugLoc &DL, int64_t Amount) const; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 9a1c322..deb3cc9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -909,6 +909,24 @@ class VPseudoBinaryNoMask<VReg RetClass, let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); } +class VPseudoTiedBinaryNoMask<VReg RetClass, + DAGOperand Op2Class, + string Constraint> : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $rs2"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let ForceTailAgnostic = 1; + let isConvertibleToThreeAddress = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL, bit Ordered>: Pseudo<(outs), @@ -1511,6 +1529,8 @@ multiclass VPseudoTiedBinary<VReg RetClass, LMULInfo MInfo, string Constraint = ""> { let VLMul = MInfo.value in { + def "_" # MInfo.MX # "_TIED": VPseudoTiedBinaryNoMask<RetClass, Op2Class, + Constraint>; def "_" # MInfo.MX # "_MASK_TIED" : VPseudoTiedBinaryMask<RetClass, Op2Class, Constraint>; } @@ -2296,6 +2316,22 @@ class VPatBinaryMaskSwapped<string intrinsic_name, (op2_type op2_kind:$rs2), (mask_type V0), GPR:$vl, sew)>; +class VPatTiedBinaryNoMask<string intrinsic_name, + string inst, + ValueType result_type, + ValueType op2_type, + int sew, + VReg result_reg_class, + DAGOperand op2_kind> : + Pat<(result_type (!cast<Intrinsic>(intrinsic_name) + (result_type result_reg_class:$rs1), + (op2_type op2_kind:$rs2), + VLOpFrag)), + (!cast<Instruction>(inst#"_TIED") + (result_type result_reg_class:$rs1), + (op2_type op2_kind:$rs2), + GPR:$vl, sew)>; + class VPatTiedBinaryMask<string intrinsic_name, string inst, ValueType result_type, @@ -2697,14 +2733,17 @@ multiclass VPatBinaryW_WV<string intrinsic, string instruction, foreach VtiToWti = vtilist in { defvar Vti = VtiToWti.Vti; defvar Wti = VtiToWti.Wti; + def : VPatTiedBinaryNoMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX, + Wti.Vector, Vti.Vector, + Vti.Log2SEW, Wti.RegClass, Vti.RegClass>; let AddedComplexity = 1 in def : VPatTiedBinaryMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX, Wti.Vector, Vti.Vector, Vti.Mask, Vti.Log2SEW, Wti.RegClass, Vti.RegClass>; - defm : VPatBinary<intrinsic, instruction # "_WV_" # Vti.LMul.MX, - Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask, - Vti.Log2SEW, Wti.RegClass, - Wti.RegClass, Vti.RegClass>; + def : VPatBinaryMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX, + Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask, + Vti.Log2SEW, Wti.RegClass, + Wti.RegClass, Vti.RegClass>; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll index bcbd306..7bd331c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll @@ -10,8 +10,7 @@ define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16(<vsca ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vfwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( @@ -55,8 +54,7 @@ define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16(<vsca ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vfwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16( @@ -100,8 +98,7 @@ define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16(<vsca ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vfwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vfwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16( @@ -145,8 +142,7 @@ define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16(<vsca ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vfwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vfwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16( @@ -190,8 +186,7 @@ define <vscale x 16 x float> @intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16(< ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vfwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16f16( @@ -236,8 +231,7 @@ define <vscale x 1 x double> @intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32(<vsc ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vfwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32( @@ -281,8 +275,7 @@ define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32(<vsc ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vfwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vfwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32( @@ -326,8 +319,7 @@ define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32(<vsc ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vfwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vfwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32( @@ -371,8 +363,7 @@ define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32(<vsc ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vfwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32( @@ -1136,3 +1127,131 @@ entry: ret <vscale x 8 x double> %a } + +define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( + <vscale x 1 x float> %1, + <vscale x 1 x half> %0, + i32 %2) + + ret <vscale x 1 x float> %a +} + +define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16( + <vscale x 2 x float> %1, + <vscale x 2 x half> %0, + i32 %2) + + ret <vscale x 2 x float> %a +} + +define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16( + <vscale x 4 x float> %1, + <vscale x 4 x half> %0, + i32 %2) + + ret <vscale x 4 x float> %a +} + +define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16( + <vscale x 8 x float> %1, + <vscale x 8 x half> %0, + i32 %2) + + ret <vscale x 8 x float> %a +} + +define <vscale x 1 x double> @intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32( + <vscale x 1 x double> %1, + <vscale x 1 x float> %0, + i32 %2) + + ret <vscale x 1 x double> %a +} + +define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vfwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32( + <vscale x 2 x double> %1, + <vscale x 2 x float> %0, + i32 %2) + + ret <vscale x 2 x double> %a +} + +define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vfwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32( + <vscale x 4 x double> %1, + <vscale x 4 x float> %0, + i32 %2) + + ret <vscale x 4 x double> %a +} + +define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vfwadd.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32( + <vscale x 8 x double> %1, + <vscale x 8 x float> %0, + i32 %2) + + ret <vscale x 8 x double> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll index a4a4995..e73980c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll @@ -10,8 +10,7 @@ define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16(<vsca ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vfwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( @@ -55,8 +54,7 @@ define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16(<vsca ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vfwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16( @@ -100,8 +98,7 @@ define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16(<vsca ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vfwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vfwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16( @@ -145,8 +142,7 @@ define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16(<vsca ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vfwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vfwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16( @@ -190,8 +186,7 @@ define <vscale x 16 x float> @intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16(< ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vfwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16f16( @@ -236,8 +231,7 @@ define <vscale x 1 x double> @intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32(<vsc ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vfwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32( @@ -281,8 +275,7 @@ define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32(<vsc ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vfwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vfwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32( @@ -326,8 +319,7 @@ define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32(<vsc ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vfwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vfwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32( @@ -371,8 +363,7 @@ define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32(<vsc ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vfwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32( @@ -1136,3 +1127,131 @@ entry: ret <vscale x 8 x double> %a } + +define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( + <vscale x 1 x float> %1, + <vscale x 1 x half> %0, + i64 %2) + + ret <vscale x 1 x float> %a +} + +define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16( + <vscale x 2 x float> %1, + <vscale x 2 x half> %0, + i64 %2) + + ret <vscale x 2 x float> %a +} + +define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16( + <vscale x 4 x float> %1, + <vscale x 4 x half> %0, + i64 %2) + + ret <vscale x 4 x float> %a +} + +define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16( + <vscale x 8 x float> %1, + <vscale x 8 x half> %0, + i64 %2) + + ret <vscale x 8 x float> %a +} + +define <vscale x 1 x double> @intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32( + <vscale x 1 x double> %1, + <vscale x 1 x float> %0, + i64 %2) + + ret <vscale x 1 x double> %a +} + +define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vfwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32( + <vscale x 2 x double> %1, + <vscale x 2 x float> %0, + i64 %2) + + ret <vscale x 2 x double> %a +} + +define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vfwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32( + <vscale x 4 x double> %1, + <vscale x 4 x float> %0, + i64 %2) + + ret <vscale x 4 x double> %a +} + +define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vfwadd.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32( + <vscale x 8 x double> %1, + <vscale x 8 x float> %0, + i64 %2) + + ret <vscale x 8 x double> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll index 69b64ab..b291f73 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll @@ -10,8 +10,7 @@ define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16(<vsca ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vfwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( @@ -55,8 +54,7 @@ define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16(<vsca ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vfwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16( @@ -100,8 +98,7 @@ define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16(<vsca ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vfwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vfwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16( @@ -145,8 +142,7 @@ define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16(<vsca ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vfwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vfwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16( @@ -190,8 +186,7 @@ define <vscale x 16 x float> @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16(< ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vfwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16( @@ -236,8 +231,7 @@ define <vscale x 1 x double> @intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32(<vsc ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vfwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32( @@ -281,8 +275,7 @@ define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32(<vsc ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vfwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vfwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32( @@ -326,8 +319,7 @@ define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32(<vsc ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vfwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vfwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32( @@ -371,8 +363,7 @@ define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32(<vsc ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vfwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32( @@ -1136,3 +1127,131 @@ entry: ret <vscale x 8 x double> %a } + +define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( + <vscale x 1 x float> %1, + <vscale x 1 x half> %0, + i32 %2) + + ret <vscale x 1 x float> %a +} + +define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16( + <vscale x 2 x float> %1, + <vscale x 2 x half> %0, + i32 %2) + + ret <vscale x 2 x float> %a +} + +define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16( + <vscale x 4 x float> %1, + <vscale x 4 x half> %0, + i32 %2) + + ret <vscale x 4 x float> %a +} + +define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16( + <vscale x 8 x float> %1, + <vscale x 8 x half> %0, + i32 %2) + + ret <vscale x 8 x float> %a +} + +define <vscale x 1 x double> @intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32( + <vscale x 1 x double> %1, + <vscale x 1 x float> %0, + i32 %2) + + ret <vscale x 1 x double> %a +} + +define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vfwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32( + <vscale x 2 x double> %1, + <vscale x 2 x float> %0, + i32 %2) + + ret <vscale x 2 x double> %a +} + +define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vfwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32( + <vscale x 4 x double> %1, + <vscale x 4 x float> %0, + i32 %2) + + ret <vscale x 4 x double> %a +} + +define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vfwsub.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32( + <vscale x 8 x double> %1, + <vscale x 8 x float> %0, + i32 %2) + + ret <vscale x 8 x double> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll index f3782ca..eb4c0ef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll @@ -10,8 +10,7 @@ define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16(<vsca ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vfwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( @@ -55,8 +54,7 @@ define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16(<vsca ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vfwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16( @@ -100,8 +98,7 @@ define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16(<vsca ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vfwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vfwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16( @@ -145,8 +142,7 @@ define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16(<vsca ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vfwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vfwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16( @@ -190,8 +186,7 @@ define <vscale x 16 x float> @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16(< ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vfwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16( @@ -236,8 +231,7 @@ define <vscale x 1 x double> @intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32(<vsc ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vfwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32( @@ -281,8 +275,7 @@ define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32(<vsc ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vfwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vfwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32( @@ -326,8 +319,7 @@ define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32(<vsc ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vfwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vfwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32( @@ -371,8 +363,7 @@ define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32(<vsc ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vfwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32( @@ -1136,3 +1127,131 @@ entry: ret <vscale x 8 x double> %a } + +define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( + <vscale x 1 x float> %1, + <vscale x 1 x half> %0, + i64 %2) + + ret <vscale x 1 x float> %a +} + +define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16( + <vscale x 2 x float> %1, + <vscale x 2 x half> %0, + i64 %2) + + ret <vscale x 2 x float> %a +} + +define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16( + <vscale x 4 x float> %1, + <vscale x 4 x half> %0, + i64 %2) + + ret <vscale x 4 x float> %a +} + +define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16( + <vscale x 8 x float> %1, + <vscale x 8 x half> %0, + i64 %2) + + ret <vscale x 8 x float> %a +} + +define <vscale x 1 x double> @intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32( + <vscale x 1 x double> %1, + <vscale x 1 x float> %0, + i64 %2) + + ret <vscale x 1 x double> %a +} + +define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vfwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32( + <vscale x 2 x double> %1, + <vscale x 2 x float> %0, + i64 %2) + + ret <vscale x 2 x double> %a +} + +define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vfwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32( + <vscale x 4 x double> %1, + <vscale x 4 x float> %0, + i64 %2) + + ret <vscale x 4 x double> %a +} + +define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vfwsub.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32( + <vscale x 8 x double> %1, + <vscale x 8 x float> %0, + i64 %2) + + ret <vscale x 8 x double> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll index c56f4f4..b0f48cb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll @@ -10,8 +10,7 @@ define <vscale x 1 x i16> @intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale x ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i16> @llvm.riscv.vwadd.w.nxv1i16.nxv1i8( @@ -55,8 +54,7 @@ define <vscale x 2 x i16> @intrinsic_vwadd.w_wv_nxv2i16_nxv2i16_nxv2i8(<vscale x ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i16_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i16> @llvm.riscv.vwadd.w.nxv2i16.nxv2i8( @@ -100,8 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vwadd.w_wv_nxv4i16_nxv4i16_nxv4i8(<vscale x ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i16_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i16> @llvm.riscv.vwadd.w.nxv4i16.nxv4i8( @@ -145,8 +142,7 @@ define <vscale x 8 x i16> @intrinsic_vwadd.w_wv_nxv8i16_nxv8i16_nxv8i8(<vscale x ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i16_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu -; CHECK-NEXT: vwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i16> @llvm.riscv.vwadd.w.nxv8i16.nxv8i8( @@ -190,8 +186,7 @@ define <vscale x 16 x i16> @intrinsic_vwadd.w_wv_nxv16i16_nxv16i16_nxv16i8(<vsca ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv16i16_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu -; CHECK-NEXT: vwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i16> @llvm.riscv.vwadd.w.nxv16i16.nxv16i8( @@ -235,8 +230,7 @@ define <vscale x 32 x i16> @intrinsic_vwadd.w_wv_nxv32i16_nxv32i16_nxv32i8(<vsca ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu -; CHECK-NEXT: vwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i16> @llvm.riscv.vwadd.w.nxv32i16.nxv32i8( @@ -281,8 +275,7 @@ define <vscale x 1 x i32> @intrinsic_vwadd.w_wv_nxv1i32_nxv1i32_nxv1i16(<vscale ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i32_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i32> @llvm.riscv.vwadd.w.nxv1i32.nxv1i16( @@ -326,8 +319,7 @@ define <vscale x 2 x i32> @intrinsic_vwadd.w_wv_nxv2i32_nxv2i32_nxv2i16(<vscale ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i32_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16( @@ -371,8 +363,7 @@ define <vscale x 4 x i32> @intrinsic_vwadd.w_wv_nxv4i32_nxv4i32_nxv4i16(<vscale ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i32_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i32> @llvm.riscv.vwadd.w.nxv4i32.nxv4i16( @@ -416,8 +407,7 @@ define <vscale x 8 x i32> @intrinsic_vwadd.w_wv_nxv8i32_nxv8i32_nxv8i16(<vscale ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i32_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i32> @llvm.riscv.vwadd.w.nxv8i32.nxv8i16( @@ -461,8 +451,7 @@ define <vscale x 16 x i32> @intrinsic_vwadd.w_wv_nxv16i32_nxv16i32_nxv16i16(<vsc ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i32> @llvm.riscv.vwadd.w.nxv16i32.nxv16i16( @@ -507,8 +496,7 @@ define <vscale x 1 x i64> @intrinsic_vwadd.w_wv_nxv1i64_nxv1i64_nxv1i32(<vscale ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i64_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i64> @llvm.riscv.vwadd.w.nxv1i64.nxv1i32( @@ -552,8 +540,7 @@ define <vscale x 2 x i64> @intrinsic_vwadd.w_wv_nxv2i64_nxv2i64_nxv2i32(<vscale ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i64_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i64> @llvm.riscv.vwadd.w.nxv2i64.nxv2i32( @@ -597,8 +584,7 @@ define <vscale x 4 x i64> @intrinsic_vwadd.w_wv_nxv4i64_nxv4i64_nxv4i32(<vscale ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i64_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32( @@ -642,8 +628,7 @@ define <vscale x 8 x i64> @intrinsic_vwadd.w_wv_nxv8i64_nxv8i64_nxv8i32(<vscale ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i64> @llvm.riscv.vwadd.w.nxv8i64.nxv8i32( @@ -1848,3 +1833,227 @@ entry: ret <vscale x 8 x i64> %a } + +define <vscale x 1 x i16> @intrinsic_vwadd.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i16> @llvm.riscv.vwadd.w.nxv1i16.nxv1i8( + <vscale x 1 x i16> %1, + <vscale x 1 x i8> %0, + i32 %2) + + ret <vscale x 1 x i16> %a +} + +define <vscale x 2 x i16> @intrinsic_vwadd.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i16_nxv2i16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i16> @llvm.riscv.vwadd.w.nxv2i16.nxv2i8( + <vscale x 2 x i16> %1, + <vscale x 2 x i8> %0, + i32 %2) + + ret <vscale x 2 x i16> %a +} + +define <vscale x 4 x i16> @intrinsic_vwadd.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i16_nxv4i16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i16> @llvm.riscv.vwadd.w.nxv4i16.nxv4i8( + <vscale x 4 x i16> %1, + <vscale x 4 x i8> %0, + i32 %2) + + ret <vscale x 4 x i16> %a +} + +define <vscale x 8 x i16> @intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu +; CHECK-NEXT: vwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i16> @llvm.riscv.vwadd.w.nxv8i16.nxv8i8( + <vscale x 8 x i16> %1, + <vscale x 8 x i8> %0, + i32 %2) + + ret <vscale x 8 x i16> %a +} + +define <vscale x 16 x i16> @intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu +; CHECK-NEXT: vwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i16> @llvm.riscv.vwadd.w.nxv16i16.nxv16i8( + <vscale x 16 x i16> %1, + <vscale x 16 x i8> %0, + i32 %2) + + ret <vscale x 16 x i16> %a +} + +define <vscale x 32 x i16> @intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; CHECK-NEXT: vwadd.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i16> @llvm.riscv.vwadd.w.nxv32i16.nxv32i8( + <vscale x 32 x i16> %1, + <vscale x 32 x i8> %0, + i32 %2) + + ret <vscale x 32 x i16> %a +} + +define <vscale x 1 x i32> @intrinsic_vwadd.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i32_nxv1i32_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i32> @llvm.riscv.vwadd.w.nxv1i32.nxv1i16( + <vscale x 1 x i32> %1, + <vscale x 1 x i16> %0, + i32 %2) + + ret <vscale x 1 x i32> %a +} + +define <vscale x 2 x i32> @intrinsic_vwadd.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i32_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16( + <vscale x 2 x i32> %1, + <vscale x 2 x i16> %0, + i32 %2) + + ret <vscale x 2 x i32> %a +} + +define <vscale x 4 x i32> @intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i32> @llvm.riscv.vwadd.w.nxv4i32.nxv4i16( + <vscale x 4 x i32> %1, + <vscale x 4 x i16> %0, + i32 %2) + + ret <vscale x 4 x i32> %a +} + +define <vscale x 8 x i32> @intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i32> @llvm.riscv.vwadd.w.nxv8i32.nxv8i16( + <vscale x 8 x i32> %1, + <vscale x 8 x i16> %0, + i32 %2) + + ret <vscale x 8 x i32> %a +} + +define <vscale x 1 x i64> @intrinsic_vwadd.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i64_nxv1i64_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i64> @llvm.riscv.vwadd.w.nxv1i64.nxv1i32( + <vscale x 1 x i64> %1, + <vscale x 1 x i32> %0, + i32 %2) + + ret <vscale x 1 x i64> %a +} + +define <vscale x 2 x i64> @intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i64> @llvm.riscv.vwadd.w.nxv2i64.nxv2i32( + <vscale x 2 x i64> %1, + <vscale x 2 x i32> %0, + i32 %2) + + ret <vscale x 2 x i64> %a +} + +define <vscale x 4 x i64> @intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32( + <vscale x 4 x i64> %1, + <vscale x 4 x i32> %0, + i32 %2) + + ret <vscale x 4 x i64> %a +} + +define <vscale x 8 x i64> @intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vwadd.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i64> @llvm.riscv.vwadd.w.nxv8i64.nxv8i32( + <vscale x 8 x i64> %1, + <vscale x 8 x i32> %0, + i32 %2) + + ret <vscale x 8 x i64> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll index fc03c38..ec0bb0e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll @@ -10,8 +10,7 @@ define <vscale x 1 x i16> @intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale x ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i16> @llvm.riscv.vwadd.w.nxv1i16.nxv1i8( @@ -55,8 +54,7 @@ define <vscale x 2 x i16> @intrinsic_vwadd.w_wv_nxv2i16_nxv2i16_nxv2i8(<vscale x ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i16_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i16> @llvm.riscv.vwadd.w.nxv2i16.nxv2i8( @@ -100,8 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vwadd.w_wv_nxv4i16_nxv4i16_nxv4i8(<vscale x ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i16_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i16> @llvm.riscv.vwadd.w.nxv4i16.nxv4i8( @@ -145,8 +142,7 @@ define <vscale x 8 x i16> @intrinsic_vwadd.w_wv_nxv8i16_nxv8i16_nxv8i8(<vscale x ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i16_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu -; CHECK-NEXT: vwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i16> @llvm.riscv.vwadd.w.nxv8i16.nxv8i8( @@ -190,8 +186,7 @@ define <vscale x 16 x i16> @intrinsic_vwadd.w_wv_nxv16i16_nxv16i16_nxv16i8(<vsca ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv16i16_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu -; CHECK-NEXT: vwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i16> @llvm.riscv.vwadd.w.nxv16i16.nxv16i8( @@ -235,8 +230,7 @@ define <vscale x 32 x i16> @intrinsic_vwadd.w_wv_nxv32i16_nxv32i16_nxv32i8(<vsca ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu -; CHECK-NEXT: vwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i16> @llvm.riscv.vwadd.w.nxv32i16.nxv32i8( @@ -281,8 +275,7 @@ define <vscale x 1 x i32> @intrinsic_vwadd.w_wv_nxv1i32_nxv1i32_nxv1i16(<vscale ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i32_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i32> @llvm.riscv.vwadd.w.nxv1i32.nxv1i16( @@ -326,8 +319,7 @@ define <vscale x 2 x i32> @intrinsic_vwadd.w_wv_nxv2i32_nxv2i32_nxv2i16(<vscale ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i32_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16( @@ -371,8 +363,7 @@ define <vscale x 4 x i32> @intrinsic_vwadd.w_wv_nxv4i32_nxv4i32_nxv4i16(<vscale ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i32_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i32> @llvm.riscv.vwadd.w.nxv4i32.nxv4i16( @@ -416,8 +407,7 @@ define <vscale x 8 x i32> @intrinsic_vwadd.w_wv_nxv8i32_nxv8i32_nxv8i16(<vscale ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i32_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i32> @llvm.riscv.vwadd.w.nxv8i32.nxv8i16( @@ -461,8 +451,7 @@ define <vscale x 16 x i32> @intrinsic_vwadd.w_wv_nxv16i32_nxv16i32_nxv16i16(<vsc ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i32> @llvm.riscv.vwadd.w.nxv16i32.nxv16i16( @@ -507,8 +496,7 @@ define <vscale x 1 x i64> @intrinsic_vwadd.w_wv_nxv1i64_nxv1i64_nxv1i32(<vscale ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i64_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i64> @llvm.riscv.vwadd.w.nxv1i64.nxv1i32( @@ -552,8 +540,7 @@ define <vscale x 2 x i64> @intrinsic_vwadd.w_wv_nxv2i64_nxv2i64_nxv2i32(<vscale ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i64_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i64> @llvm.riscv.vwadd.w.nxv2i64.nxv2i32( @@ -597,8 +584,7 @@ define <vscale x 4 x i64> @intrinsic_vwadd.w_wv_nxv4i64_nxv4i64_nxv4i32(<vscale ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i64_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32( @@ -642,8 +628,7 @@ define <vscale x 8 x i64> @intrinsic_vwadd.w_wv_nxv8i64_nxv8i64_nxv8i32(<vscale ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i64> @llvm.riscv.vwadd.w.nxv8i64.nxv8i32( @@ -1848,3 +1833,227 @@ entry: ret <vscale x 8 x i64> %a } + +define <vscale x 1 x i16> @intrinsic_vwadd.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i16> @llvm.riscv.vwadd.w.nxv1i16.nxv1i8( + <vscale x 1 x i16> %1, + <vscale x 1 x i8> %0, + i64 %2) + + ret <vscale x 1 x i16> %a +} + +define <vscale x 2 x i16> @intrinsic_vwadd.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i16_nxv2i16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i16> @llvm.riscv.vwadd.w.nxv2i16.nxv2i8( + <vscale x 2 x i16> %1, + <vscale x 2 x i8> %0, + i64 %2) + + ret <vscale x 2 x i16> %a +} + +define <vscale x 4 x i16> @intrinsic_vwadd.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i16_nxv4i16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i16> @llvm.riscv.vwadd.w.nxv4i16.nxv4i8( + <vscale x 4 x i16> %1, + <vscale x 4 x i8> %0, + i64 %2) + + ret <vscale x 4 x i16> %a +} + +define <vscale x 8 x i16> @intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu +; CHECK-NEXT: vwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i16> @llvm.riscv.vwadd.w.nxv8i16.nxv8i8( + <vscale x 8 x i16> %1, + <vscale x 8 x i8> %0, + i64 %2) + + ret <vscale x 8 x i16> %a +} + +define <vscale x 16 x i16> @intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu +; CHECK-NEXT: vwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i16> @llvm.riscv.vwadd.w.nxv16i16.nxv16i8( + <vscale x 16 x i16> %1, + <vscale x 16 x i8> %0, + i64 %2) + + ret <vscale x 16 x i16> %a +} + +define <vscale x 32 x i16> @intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; CHECK-NEXT: vwadd.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i16> @llvm.riscv.vwadd.w.nxv32i16.nxv32i8( + <vscale x 32 x i16> %1, + <vscale x 32 x i8> %0, + i64 %2) + + ret <vscale x 32 x i16> %a +} + +define <vscale x 1 x i32> @intrinsic_vwadd.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i32_nxv1i32_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i32> @llvm.riscv.vwadd.w.nxv1i32.nxv1i16( + <vscale x 1 x i32> %1, + <vscale x 1 x i16> %0, + i64 %2) + + ret <vscale x 1 x i32> %a +} + +define <vscale x 2 x i32> @intrinsic_vwadd.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i32_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16( + <vscale x 2 x i32> %1, + <vscale x 2 x i16> %0, + i64 %2) + + ret <vscale x 2 x i32> %a +} + +define <vscale x 4 x i32> @intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i32> @llvm.riscv.vwadd.w.nxv4i32.nxv4i16( + <vscale x 4 x i32> %1, + <vscale x 4 x i16> %0, + i64 %2) + + ret <vscale x 4 x i32> %a +} + +define <vscale x 8 x i32> @intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i32> @llvm.riscv.vwadd.w.nxv8i32.nxv8i16( + <vscale x 8 x i32> %1, + <vscale x 8 x i16> %0, + i64 %2) + + ret <vscale x 8 x i32> %a +} + +define <vscale x 1 x i64> @intrinsic_vwadd.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i64_nxv1i64_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i64> @llvm.riscv.vwadd.w.nxv1i64.nxv1i32( + <vscale x 1 x i64> %1, + <vscale x 1 x i32> %0, + i64 %2) + + ret <vscale x 1 x i64> %a +} + +define <vscale x 2 x i64> @intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i64> @llvm.riscv.vwadd.w.nxv2i64.nxv2i32( + <vscale x 2 x i64> %1, + <vscale x 2 x i32> %0, + i64 %2) + + ret <vscale x 2 x i64> %a +} + +define <vscale x 4 x i64> @intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32( + <vscale x 4 x i64> %1, + <vscale x 4 x i32> %0, + i64 %2) + + ret <vscale x 4 x i64> %a +} + +define <vscale x 8 x i64> @intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vwadd.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i64> @llvm.riscv.vwadd.w.nxv8i64.nxv8i32( + <vscale x 8 x i64> %1, + <vscale x 8 x i32> %0, + i64 %2) + + ret <vscale x 8 x i64> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll index dcbe238..f405406 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll @@ -10,8 +10,7 @@ define <vscale x 1 x i16> @intrinsic_vwaddu.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i16_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i16> @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8( @@ -55,8 +54,7 @@ define <vscale x 2 x i16> @intrinsic_vwaddu.w_wv_nxv2i16_nxv2i16_nxv2i8(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i16_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i16> @llvm.riscv.vwaddu.w.nxv2i16.nxv2i8( @@ -100,8 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vwaddu.w_wv_nxv4i16_nxv4i16_nxv4i8(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i16_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i16> @llvm.riscv.vwaddu.w.nxv4i16.nxv4i8( @@ -145,8 +142,7 @@ define <vscale x 8 x i16> @intrinsic_vwaddu.w_wv_nxv8i16_nxv8i16_nxv8i8(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i16_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu -; CHECK-NEXT: vwaddu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwaddu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8( @@ -190,8 +186,7 @@ define <vscale x 16 x i16> @intrinsic_vwaddu.w_wv_nxv16i16_nxv16i16_nxv16i8(<vsc ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv16i16_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu -; CHECK-NEXT: vwaddu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwaddu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8( @@ -235,8 +230,7 @@ define <vscale x 32 x i16> @intrinsic_vwaddu.w_wv_nxv32i16_nxv32i16_nxv32i8(<vsc ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu -; CHECK-NEXT: vwaddu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwaddu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8( @@ -281,8 +275,7 @@ define <vscale x 1 x i32> @intrinsic_vwaddu.w_wv_nxv1i32_nxv1i32_nxv1i16(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i32_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i32> @llvm.riscv.vwaddu.w.nxv1i32.nxv1i16( @@ -326,8 +319,7 @@ define <vscale x 2 x i32> @intrinsic_vwaddu.w_wv_nxv2i32_nxv2i32_nxv2i16(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i32_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i32> @llvm.riscv.vwaddu.w.nxv2i32.nxv2i16( @@ -371,8 +363,7 @@ define <vscale x 4 x i32> @intrinsic_vwaddu.w_wv_nxv4i32_nxv4i32_nxv4i16(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i32_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vwaddu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwaddu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16( @@ -416,8 +407,7 @@ define <vscale x 8 x i32> @intrinsic_vwaddu.w_wv_nxv8i32_nxv8i32_nxv8i16(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i32_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vwaddu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwaddu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16( @@ -461,8 +451,7 @@ define <vscale x 16 x i32> @intrinsic_vwaddu.w_wv_nxv16i32_nxv16i32_nxv16i16(<vs ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vwaddu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwaddu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i32> @llvm.riscv.vwaddu.w.nxv16i32.nxv16i16( @@ -507,8 +496,7 @@ define <vscale x 1 x i64> @intrinsic_vwaddu.w_wv_nxv1i64_nxv1i64_nxv1i32(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i64_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i64> @llvm.riscv.vwaddu.w.nxv1i64.nxv1i32( @@ -552,8 +540,7 @@ define <vscale x 2 x i64> @intrinsic_vwaddu.w_wv_nxv2i64_nxv2i64_nxv2i32(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i64_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vwaddu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwaddu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32( @@ -597,8 +584,7 @@ define <vscale x 4 x i64> @intrinsic_vwaddu.w_wv_nxv4i64_nxv4i64_nxv4i32(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i64_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vwaddu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwaddu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32( @@ -642,8 +628,7 @@ define <vscale x 8 x i64> @intrinsic_vwaddu.w_wv_nxv8i64_nxv8i64_nxv8i32(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vwaddu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwaddu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32( @@ -1848,3 +1833,227 @@ entry: ret <vscale x 8 x i64> %a } + +define <vscale x 1 x i16> @intrinsic_vwaddu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i16> @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8( + <vscale x 1 x i16> %1, + <vscale x 1 x i8> %0, + i32 %2) + + ret <vscale x 1 x i16> %a +} + +define <vscale x 2 x i16> @intrinsic_vwaddu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i16> @llvm.riscv.vwaddu.w.nxv2i16.nxv2i8( + <vscale x 2 x i16> %1, + <vscale x 2 x i8> %0, + i32 %2) + + ret <vscale x 2 x i16> %a +} + +define <vscale x 4 x i16> @intrinsic_vwaddu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i16> @llvm.riscv.vwaddu.w.nxv4i16.nxv4i8( + <vscale x 4 x i16> %1, + <vscale x 4 x i8> %0, + i32 %2) + + ret <vscale x 4 x i16> %a +} + +define <vscale x 8 x i16> @intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu +; CHECK-NEXT: vwaddu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8( + <vscale x 8 x i16> %1, + <vscale x 8 x i8> %0, + i32 %2) + + ret <vscale x 8 x i16> %a +} + +define <vscale x 16 x i16> @intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu +; CHECK-NEXT: vwaddu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8( + <vscale x 16 x i16> %1, + <vscale x 16 x i8> %0, + i32 %2) + + ret <vscale x 16 x i16> %a +} + +define <vscale x 32 x i16> @intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; CHECK-NEXT: vwaddu.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8( + <vscale x 32 x i16> %1, + <vscale x 32 x i8> %0, + i32 %2) + + ret <vscale x 32 x i16> %a +} + +define <vscale x 1 x i32> @intrinsic_vwaddu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i32> @llvm.riscv.vwaddu.w.nxv1i32.nxv1i16( + <vscale x 1 x i32> %1, + <vscale x 1 x i16> %0, + i32 %2) + + ret <vscale x 1 x i32> %a +} + +define <vscale x 2 x i32> @intrinsic_vwaddu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i32> @llvm.riscv.vwaddu.w.nxv2i32.nxv2i16( + <vscale x 2 x i32> %1, + <vscale x 2 x i16> %0, + i32 %2) + + ret <vscale x 2 x i32> %a +} + +define <vscale x 4 x i32> @intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vwaddu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16( + <vscale x 4 x i32> %1, + <vscale x 4 x i16> %0, + i32 %2) + + ret <vscale x 4 x i32> %a +} + +define <vscale x 8 x i32> @intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vwaddu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16( + <vscale x 8 x i32> %1, + <vscale x 8 x i16> %0, + i32 %2) + + ret <vscale x 8 x i32> %a +} + +define <vscale x 1 x i64> @intrinsic_vwaddu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i64> @llvm.riscv.vwaddu.w.nxv1i64.nxv1i32( + <vscale x 1 x i64> %1, + <vscale x 1 x i32> %0, + i32 %2) + + ret <vscale x 1 x i64> %a +} + +define <vscale x 2 x i64> @intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vwaddu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32( + <vscale x 2 x i64> %1, + <vscale x 2 x i32> %0, + i32 %2) + + ret <vscale x 2 x i64> %a +} + +define <vscale x 4 x i64> @intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vwaddu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32( + <vscale x 4 x i64> %1, + <vscale x 4 x i32> %0, + i32 %2) + + ret <vscale x 4 x i64> %a +} + +define <vscale x 8 x i64> @intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vwaddu.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32( + <vscale x 8 x i64> %1, + <vscale x 8 x i32> %0, + i32 %2) + + ret <vscale x 8 x i64> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll index e5bb4a3..2df24fa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll @@ -10,8 +10,7 @@ define <vscale x 1 x i16> @intrinsic_vwaddu.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i16_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i16> @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8( @@ -55,8 +54,7 @@ define <vscale x 2 x i16> @intrinsic_vwaddu.w_wv_nxv2i16_nxv2i16_nxv2i8(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i16_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i16> @llvm.riscv.vwaddu.w.nxv2i16.nxv2i8( @@ -100,8 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vwaddu.w_wv_nxv4i16_nxv4i16_nxv4i8(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i16_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i16> @llvm.riscv.vwaddu.w.nxv4i16.nxv4i8( @@ -145,8 +142,7 @@ define <vscale x 8 x i16> @intrinsic_vwaddu.w_wv_nxv8i16_nxv8i16_nxv8i8(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i16_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu -; CHECK-NEXT: vwaddu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwaddu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8( @@ -190,8 +186,7 @@ define <vscale x 16 x i16> @intrinsic_vwaddu.w_wv_nxv16i16_nxv16i16_nxv16i8(<vsc ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv16i16_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu -; CHECK-NEXT: vwaddu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwaddu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8( @@ -235,8 +230,7 @@ define <vscale x 32 x i16> @intrinsic_vwaddu.w_wv_nxv32i16_nxv32i16_nxv32i8(<vsc ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu -; CHECK-NEXT: vwaddu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwaddu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8( @@ -281,8 +275,7 @@ define <vscale x 1 x i32> @intrinsic_vwaddu.w_wv_nxv1i32_nxv1i32_nxv1i16(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i32_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i32> @llvm.riscv.vwaddu.w.nxv1i32.nxv1i16( @@ -326,8 +319,7 @@ define <vscale x 2 x i32> @intrinsic_vwaddu.w_wv_nxv2i32_nxv2i32_nxv2i16(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i32_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i32> @llvm.riscv.vwaddu.w.nxv2i32.nxv2i16( @@ -371,8 +363,7 @@ define <vscale x 4 x i32> @intrinsic_vwaddu.w_wv_nxv4i32_nxv4i32_nxv4i16(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i32_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vwaddu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwaddu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16( @@ -416,8 +407,7 @@ define <vscale x 8 x i32> @intrinsic_vwaddu.w_wv_nxv8i32_nxv8i32_nxv8i16(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i32_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vwaddu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwaddu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16( @@ -461,8 +451,7 @@ define <vscale x 16 x i32> @intrinsic_vwaddu.w_wv_nxv16i32_nxv16i32_nxv16i16(<vs ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vwaddu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwaddu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i32> @llvm.riscv.vwaddu.w.nxv16i32.nxv16i16( @@ -507,8 +496,7 @@ define <vscale x 1 x i64> @intrinsic_vwaddu.w_wv_nxv1i64_nxv1i64_nxv1i32(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i64_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i64> @llvm.riscv.vwaddu.w.nxv1i64.nxv1i32( @@ -552,8 +540,7 @@ define <vscale x 2 x i64> @intrinsic_vwaddu.w_wv_nxv2i64_nxv2i64_nxv2i32(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i64_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vwaddu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwaddu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32( @@ -597,8 +584,7 @@ define <vscale x 4 x i64> @intrinsic_vwaddu.w_wv_nxv4i64_nxv4i64_nxv4i32(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i64_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vwaddu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwaddu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32( @@ -642,8 +628,7 @@ define <vscale x 8 x i64> @intrinsic_vwaddu.w_wv_nxv8i64_nxv8i64_nxv8i32(<vscale ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vwaddu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwaddu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32( @@ -1848,3 +1833,227 @@ entry: ret <vscale x 8 x i64> %a } + +define <vscale x 1 x i16> @intrinsic_vwaddu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i16> @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8( + <vscale x 1 x i16> %1, + <vscale x 1 x i8> %0, + i64 %2) + + ret <vscale x 1 x i16> %a +} + +define <vscale x 2 x i16> @intrinsic_vwaddu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i16> @llvm.riscv.vwaddu.w.nxv2i16.nxv2i8( + <vscale x 2 x i16> %1, + <vscale x 2 x i8> %0, + i64 %2) + + ret <vscale x 2 x i16> %a +} + +define <vscale x 4 x i16> @intrinsic_vwaddu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i16> @llvm.riscv.vwaddu.w.nxv4i16.nxv4i8( + <vscale x 4 x i16> %1, + <vscale x 4 x i8> %0, + i64 %2) + + ret <vscale x 4 x i16> %a +} + +define <vscale x 8 x i16> @intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu +; CHECK-NEXT: vwaddu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8( + <vscale x 8 x i16> %1, + <vscale x 8 x i8> %0, + i64 %2) + + ret <vscale x 8 x i16> %a +} + +define <vscale x 16 x i16> @intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu +; CHECK-NEXT: vwaddu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8( + <vscale x 16 x i16> %1, + <vscale x 16 x i8> %0, + i64 %2) + + ret <vscale x 16 x i16> %a +} + +define <vscale x 32 x i16> @intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; CHECK-NEXT: vwaddu.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8( + <vscale x 32 x i16> %1, + <vscale x 32 x i8> %0, + i64 %2) + + ret <vscale x 32 x i16> %a +} + +define <vscale x 1 x i32> @intrinsic_vwaddu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i32> @llvm.riscv.vwaddu.w.nxv1i32.nxv1i16( + <vscale x 1 x i32> %1, + <vscale x 1 x i16> %0, + i64 %2) + + ret <vscale x 1 x i32> %a +} + +define <vscale x 2 x i32> @intrinsic_vwaddu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i32> @llvm.riscv.vwaddu.w.nxv2i32.nxv2i16( + <vscale x 2 x i32> %1, + <vscale x 2 x i16> %0, + i64 %2) + + ret <vscale x 2 x i32> %a +} + +define <vscale x 4 x i32> @intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vwaddu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16( + <vscale x 4 x i32> %1, + <vscale x 4 x i16> %0, + i64 %2) + + ret <vscale x 4 x i32> %a +} + +define <vscale x 8 x i32> @intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vwaddu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16( + <vscale x 8 x i32> %1, + <vscale x 8 x i16> %0, + i64 %2) + + ret <vscale x 8 x i32> %a +} + +define <vscale x 1 x i64> @intrinsic_vwaddu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i64> @llvm.riscv.vwaddu.w.nxv1i64.nxv1i32( + <vscale x 1 x i64> %1, + <vscale x 1 x i32> %0, + i64 %2) + + ret <vscale x 1 x i64> %a +} + +define <vscale x 2 x i64> @intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vwaddu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32( + <vscale x 2 x i64> %1, + <vscale x 2 x i32> %0, + i64 %2) + + ret <vscale x 2 x i64> %a +} + +define <vscale x 4 x i64> @intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vwaddu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32( + <vscale x 4 x i64> %1, + <vscale x 4 x i32> %0, + i64 %2) + + ret <vscale x 4 x i64> %a +} + +define <vscale x 8 x i64> @intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vwaddu.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32( + <vscale x 8 x i64> %1, + <vscale x 8 x i32> %0, + i64 %2) + + ret <vscale x 8 x i64> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll index 8f58e8c0..f5cef80 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll @@ -10,8 +10,7 @@ define <vscale x 1 x i16> @intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale x ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8( @@ -55,8 +54,7 @@ define <vscale x 2 x i16> @intrinsic_vwsub.w_wv_nxv2i16_nxv2i16_nxv2i8(<vscale x ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i16_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i16> @llvm.riscv.vwsub.w.nxv2i16.nxv2i8( @@ -100,8 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vwsub.w_wv_nxv4i16_nxv4i16_nxv4i8(<vscale x ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i16_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i16> @llvm.riscv.vwsub.w.nxv4i16.nxv4i8( @@ -145,8 +142,7 @@ define <vscale x 8 x i16> @intrinsic_vwsub.w_wv_nxv8i16_nxv8i16_nxv8i8(<vscale x ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i16_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu -; CHECK-NEXT: vwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i16> @llvm.riscv.vwsub.w.nxv8i16.nxv8i8( @@ -190,8 +186,7 @@ define <vscale x 16 x i16> @intrinsic_vwsub.w_wv_nxv16i16_nxv16i16_nxv16i8(<vsca ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv16i16_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu -; CHECK-NEXT: vwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i16> @llvm.riscv.vwsub.w.nxv16i16.nxv16i8( @@ -235,8 +230,7 @@ define <vscale x 32 x i16> @intrinsic_vwsub.w_wv_nxv32i16_nxv32i16_nxv32i8(<vsca ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu -; CHECK-NEXT: vwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i16> @llvm.riscv.vwsub.w.nxv32i16.nxv32i8( @@ -281,8 +275,7 @@ define <vscale x 1 x i32> @intrinsic_vwsub.w_wv_nxv1i32_nxv1i32_nxv1i16(<vscale ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i32_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i32> @llvm.riscv.vwsub.w.nxv1i32.nxv1i16( @@ -326,8 +319,7 @@ define <vscale x 2 x i32> @intrinsic_vwsub.w_wv_nxv2i32_nxv2i32_nxv2i16(<vscale ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i32_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i32> @llvm.riscv.vwsub.w.nxv2i32.nxv2i16( @@ -371,8 +363,7 @@ define <vscale x 4 x i32> @intrinsic_vwsub.w_wv_nxv4i32_nxv4i32_nxv4i16(<vscale ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i32_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i32> @llvm.riscv.vwsub.w.nxv4i32.nxv4i16( @@ -416,8 +407,7 @@ define <vscale x 8 x i32> @intrinsic_vwsub.w_wv_nxv8i32_nxv8i32_nxv8i16(<vscale ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i32_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i32> @llvm.riscv.vwsub.w.nxv8i32.nxv8i16( @@ -461,8 +451,7 @@ define <vscale x 16 x i32> @intrinsic_vwsub.w_wv_nxv16i32_nxv16i32_nxv16i16(<vsc ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i32> @llvm.riscv.vwsub.w.nxv16i32.nxv16i16( @@ -507,8 +496,7 @@ define <vscale x 1 x i64> @intrinsic_vwsub.w_wv_nxv1i64_nxv1i64_nxv1i32(<vscale ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i64_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i64> @llvm.riscv.vwsub.w.nxv1i64.nxv1i32( @@ -552,8 +540,7 @@ define <vscale x 2 x i64> @intrinsic_vwsub.w_wv_nxv2i64_nxv2i64_nxv2i32(<vscale ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i64_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32( @@ -597,8 +584,7 @@ define <vscale x 4 x i64> @intrinsic_vwsub.w_wv_nxv4i64_nxv4i64_nxv4i32(<vscale ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i64_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32( @@ -642,8 +628,7 @@ define <vscale x 8 x i64> @intrinsic_vwsub.w_wv_nxv8i64_nxv8i64_nxv8i32(<vscale ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i64> @llvm.riscv.vwsub.w.nxv8i64.nxv8i32( @@ -1848,3 +1833,227 @@ entry: ret <vscale x 8 x i64> %a } + +define <vscale x 1 x i16> @intrinsic_vwsub.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8( + <vscale x 1 x i16> %1, + <vscale x 1 x i8> %0, + i32 %2) + + ret <vscale x 1 x i16> %a +} + +define <vscale x 2 x i16> @intrinsic_vwsub.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i16_nxv2i16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i16> @llvm.riscv.vwsub.w.nxv2i16.nxv2i8( + <vscale x 2 x i16> %1, + <vscale x 2 x i8> %0, + i32 %2) + + ret <vscale x 2 x i16> %a +} + +define <vscale x 4 x i16> @intrinsic_vwsub.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i16_nxv4i16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i16> @llvm.riscv.vwsub.w.nxv4i16.nxv4i8( + <vscale x 4 x i16> %1, + <vscale x 4 x i8> %0, + i32 %2) + + ret <vscale x 4 x i16> %a +} + +define <vscale x 8 x i16> @intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu +; CHECK-NEXT: vwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i16> @llvm.riscv.vwsub.w.nxv8i16.nxv8i8( + <vscale x 8 x i16> %1, + <vscale x 8 x i8> %0, + i32 %2) + + ret <vscale x 8 x i16> %a +} + +define <vscale x 16 x i16> @intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu +; CHECK-NEXT: vwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i16> @llvm.riscv.vwsub.w.nxv16i16.nxv16i8( + <vscale x 16 x i16> %1, + <vscale x 16 x i8> %0, + i32 %2) + + ret <vscale x 16 x i16> %a +} + +define <vscale x 32 x i16> @intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; CHECK-NEXT: vwsub.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i16> @llvm.riscv.vwsub.w.nxv32i16.nxv32i8( + <vscale x 32 x i16> %1, + <vscale x 32 x i8> %0, + i32 %2) + + ret <vscale x 32 x i16> %a +} + +define <vscale x 1 x i32> @intrinsic_vwsub.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i32_nxv1i32_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i32> @llvm.riscv.vwsub.w.nxv1i32.nxv1i16( + <vscale x 1 x i32> %1, + <vscale x 1 x i16> %0, + i32 %2) + + ret <vscale x 1 x i32> %a +} + +define <vscale x 2 x i32> @intrinsic_vwsub.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i32_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i32> @llvm.riscv.vwsub.w.nxv2i32.nxv2i16( + <vscale x 2 x i32> %1, + <vscale x 2 x i16> %0, + i32 %2) + + ret <vscale x 2 x i32> %a +} + +define <vscale x 4 x i32> @intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i32> @llvm.riscv.vwsub.w.nxv4i32.nxv4i16( + <vscale x 4 x i32> %1, + <vscale x 4 x i16> %0, + i32 %2) + + ret <vscale x 4 x i32> %a +} + +define <vscale x 8 x i32> @intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i32> @llvm.riscv.vwsub.w.nxv8i32.nxv8i16( + <vscale x 8 x i32> %1, + <vscale x 8 x i16> %0, + i32 %2) + + ret <vscale x 8 x i32> %a +} + +define <vscale x 1 x i64> @intrinsic_vwsub.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i64_nxv1i64_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i64> @llvm.riscv.vwsub.w.nxv1i64.nxv1i32( + <vscale x 1 x i64> %1, + <vscale x 1 x i32> %0, + i32 %2) + + ret <vscale x 1 x i64> %a +} + +define <vscale x 2 x i64> @intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32( + <vscale x 2 x i64> %1, + <vscale x 2 x i32> %0, + i32 %2) + + ret <vscale x 2 x i64> %a +} + +define <vscale x 4 x i64> @intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32( + <vscale x 4 x i64> %1, + <vscale x 4 x i32> %0, + i32 %2) + + ret <vscale x 4 x i64> %a +} + +define <vscale x 8 x i64> @intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vwsub.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i64> @llvm.riscv.vwsub.w.nxv8i64.nxv8i32( + <vscale x 8 x i64> %1, + <vscale x 8 x i32> %0, + i32 %2) + + ret <vscale x 8 x i64> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll index 76dcdeb..5359916 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll @@ -10,8 +10,7 @@ define <vscale x 1 x i16> @intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale x ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8( @@ -55,8 +54,7 @@ define <vscale x 2 x i16> @intrinsic_vwsub.w_wv_nxv2i16_nxv2i16_nxv2i8(<vscale x ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i16_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i16> @llvm.riscv.vwsub.w.nxv2i16.nxv2i8( @@ -100,8 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vwsub.w_wv_nxv4i16_nxv4i16_nxv4i8(<vscale x ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i16_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i16> @llvm.riscv.vwsub.w.nxv4i16.nxv4i8( @@ -145,8 +142,7 @@ define <vscale x 8 x i16> @intrinsic_vwsub.w_wv_nxv8i16_nxv8i16_nxv8i8(<vscale x ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i16_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu -; CHECK-NEXT: vwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i16> @llvm.riscv.vwsub.w.nxv8i16.nxv8i8( @@ -190,8 +186,7 @@ define <vscale x 16 x i16> @intrinsic_vwsub.w_wv_nxv16i16_nxv16i16_nxv16i8(<vsca ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv16i16_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu -; CHECK-NEXT: vwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i16> @llvm.riscv.vwsub.w.nxv16i16.nxv16i8( @@ -235,8 +230,7 @@ define <vscale x 32 x i16> @intrinsic_vwsub.w_wv_nxv32i16_nxv32i16_nxv32i8(<vsca ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu -; CHECK-NEXT: vwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i16> @llvm.riscv.vwsub.w.nxv32i16.nxv32i8( @@ -281,8 +275,7 @@ define <vscale x 1 x i32> @intrinsic_vwsub.w_wv_nxv1i32_nxv1i32_nxv1i16(<vscale ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i32_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i32> @llvm.riscv.vwsub.w.nxv1i32.nxv1i16( @@ -326,8 +319,7 @@ define <vscale x 2 x i32> @intrinsic_vwsub.w_wv_nxv2i32_nxv2i32_nxv2i16(<vscale ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i32_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i32> @llvm.riscv.vwsub.w.nxv2i32.nxv2i16( @@ -371,8 +363,7 @@ define <vscale x 4 x i32> @intrinsic_vwsub.w_wv_nxv4i32_nxv4i32_nxv4i16(<vscale ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i32_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i32> @llvm.riscv.vwsub.w.nxv4i32.nxv4i16( @@ -416,8 +407,7 @@ define <vscale x 8 x i32> @intrinsic_vwsub.w_wv_nxv8i32_nxv8i32_nxv8i16(<vscale ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i32_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i32> @llvm.riscv.vwsub.w.nxv8i32.nxv8i16( @@ -461,8 +451,7 @@ define <vscale x 16 x i32> @intrinsic_vwsub.w_wv_nxv16i32_nxv16i32_nxv16i16(<vsc ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i32> @llvm.riscv.vwsub.w.nxv16i32.nxv16i16( @@ -507,8 +496,7 @@ define <vscale x 1 x i64> @intrinsic_vwsub.w_wv_nxv1i64_nxv1i64_nxv1i32(<vscale ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i64_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i64> @llvm.riscv.vwsub.w.nxv1i64.nxv1i32( @@ -552,8 +540,7 @@ define <vscale x 2 x i64> @intrinsic_vwsub.w_wv_nxv2i64_nxv2i64_nxv2i32(<vscale ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i64_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32( @@ -597,8 +584,7 @@ define <vscale x 4 x i64> @intrinsic_vwsub.w_wv_nxv4i64_nxv4i64_nxv4i32(<vscale ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i64_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32( @@ -642,8 +628,7 @@ define <vscale x 8 x i64> @intrinsic_vwsub.w_wv_nxv8i64_nxv8i64_nxv8i32(<vscale ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i64> @llvm.riscv.vwsub.w.nxv8i64.nxv8i32( @@ -1848,3 +1833,227 @@ entry: ret <vscale x 8 x i64> %a } + +define <vscale x 1 x i16> @intrinsic_vwsub.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8( + <vscale x 1 x i16> %1, + <vscale x 1 x i8> %0, + i64 %2) + + ret <vscale x 1 x i16> %a +} + +define <vscale x 2 x i16> @intrinsic_vwsub.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i16_nxv2i16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i16> @llvm.riscv.vwsub.w.nxv2i16.nxv2i8( + <vscale x 2 x i16> %1, + <vscale x 2 x i8> %0, + i64 %2) + + ret <vscale x 2 x i16> %a +} + +define <vscale x 4 x i16> @intrinsic_vwsub.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i16_nxv4i16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i16> @llvm.riscv.vwsub.w.nxv4i16.nxv4i8( + <vscale x 4 x i16> %1, + <vscale x 4 x i8> %0, + i64 %2) + + ret <vscale x 4 x i16> %a +} + +define <vscale x 8 x i16> @intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu +; CHECK-NEXT: vwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i16> @llvm.riscv.vwsub.w.nxv8i16.nxv8i8( + <vscale x 8 x i16> %1, + <vscale x 8 x i8> %0, + i64 %2) + + ret <vscale x 8 x i16> %a +} + +define <vscale x 16 x i16> @intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu +; CHECK-NEXT: vwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i16> @llvm.riscv.vwsub.w.nxv16i16.nxv16i8( + <vscale x 16 x i16> %1, + <vscale x 16 x i8> %0, + i64 %2) + + ret <vscale x 16 x i16> %a +} + +define <vscale x 32 x i16> @intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; CHECK-NEXT: vwsub.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i16> @llvm.riscv.vwsub.w.nxv32i16.nxv32i8( + <vscale x 32 x i16> %1, + <vscale x 32 x i8> %0, + i64 %2) + + ret <vscale x 32 x i16> %a +} + +define <vscale x 1 x i32> @intrinsic_vwsub.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i32_nxv1i32_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i32> @llvm.riscv.vwsub.w.nxv1i32.nxv1i16( + <vscale x 1 x i32> %1, + <vscale x 1 x i16> %0, + i64 %2) + + ret <vscale x 1 x i32> %a +} + +define <vscale x 2 x i32> @intrinsic_vwsub.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i32_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i32> @llvm.riscv.vwsub.w.nxv2i32.nxv2i16( + <vscale x 2 x i32> %1, + <vscale x 2 x i16> %0, + i64 %2) + + ret <vscale x 2 x i32> %a +} + +define <vscale x 4 x i32> @intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i32> @llvm.riscv.vwsub.w.nxv4i32.nxv4i16( + <vscale x 4 x i32> %1, + <vscale x 4 x i16> %0, + i64 %2) + + ret <vscale x 4 x i32> %a +} + +define <vscale x 8 x i32> @intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i32> @llvm.riscv.vwsub.w.nxv8i32.nxv8i16( + <vscale x 8 x i32> %1, + <vscale x 8 x i16> %0, + i64 %2) + + ret <vscale x 8 x i32> %a +} + +define <vscale x 1 x i64> @intrinsic_vwsub.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i64_nxv1i64_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i64> @llvm.riscv.vwsub.w.nxv1i64.nxv1i32( + <vscale x 1 x i64> %1, + <vscale x 1 x i32> %0, + i64 %2) + + ret <vscale x 1 x i64> %a +} + +define <vscale x 2 x i64> @intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32( + <vscale x 2 x i64> %1, + <vscale x 2 x i32> %0, + i64 %2) + + ret <vscale x 2 x i64> %a +} + +define <vscale x 4 x i64> @intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32( + <vscale x 4 x i64> %1, + <vscale x 4 x i32> %0, + i64 %2) + + ret <vscale x 4 x i64> %a +} + +define <vscale x 8 x i64> @intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vwsub.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i64> @llvm.riscv.vwsub.w.nxv8i64.nxv8i32( + <vscale x 8 x i64> %1, + <vscale x 8 x i32> %0, + i64 %2) + + ret <vscale x 8 x i64> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll index 79b858d..1b3e96e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll @@ -10,8 +10,7 @@ define <vscale x 1 x i16> @intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i16> @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8( @@ -55,8 +54,7 @@ define <vscale x 2 x i16> @intrinsic_vwsubu.w_wv_nxv2i16_nxv2i16_nxv2i8(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i16_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i16> @llvm.riscv.vwsubu.w.nxv2i16.nxv2i8( @@ -100,8 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vwsubu.w_wv_nxv4i16_nxv4i16_nxv4i8(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i16_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i16> @llvm.riscv.vwsubu.w.nxv4i16.nxv4i8( @@ -145,8 +142,7 @@ define <vscale x 8 x i16> @intrinsic_vwsubu.w_wv_nxv8i16_nxv8i16_nxv8i8(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i16_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu -; CHECK-NEXT: vwsubu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsubu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8( @@ -190,8 +186,7 @@ define <vscale x 16 x i16> @intrinsic_vwsubu.w_wv_nxv16i16_nxv16i16_nxv16i8(<vsc ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv16i16_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu -; CHECK-NEXT: vwsubu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsubu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8( @@ -235,8 +230,7 @@ define <vscale x 32 x i16> @intrinsic_vwsubu.w_wv_nxv32i16_nxv32i16_nxv32i8(<vsc ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu -; CHECK-NEXT: vwsubu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsubu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8( @@ -281,8 +275,7 @@ define <vscale x 1 x i32> @intrinsic_vwsubu.w_wv_nxv1i32_nxv1i32_nxv1i16(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i32_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i32> @llvm.riscv.vwsubu.w.nxv1i32.nxv1i16( @@ -326,8 +319,7 @@ define <vscale x 2 x i32> @intrinsic_vwsubu.w_wv_nxv2i32_nxv2i32_nxv2i16(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i32_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i32> @llvm.riscv.vwsubu.w.nxv2i32.nxv2i16( @@ -371,8 +363,7 @@ define <vscale x 4 x i32> @intrinsic_vwsubu.w_wv_nxv4i32_nxv4i32_nxv4i16(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i32_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vwsubu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsubu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16( @@ -416,8 +407,7 @@ define <vscale x 8 x i32> @intrinsic_vwsubu.w_wv_nxv8i32_nxv8i32_nxv8i16(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i32_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vwsubu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsubu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16( @@ -461,8 +451,7 @@ define <vscale x 16 x i32> @intrinsic_vwsubu.w_wv_nxv16i32_nxv16i32_nxv16i16(<vs ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vwsubu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsubu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i32> @llvm.riscv.vwsubu.w.nxv16i32.nxv16i16( @@ -507,8 +496,7 @@ define <vscale x 1 x i64> @intrinsic_vwsubu.w_wv_nxv1i64_nxv1i64_nxv1i32(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i64_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i64> @llvm.riscv.vwsubu.w.nxv1i64.nxv1i32( @@ -552,8 +540,7 @@ define <vscale x 2 x i64> @intrinsic_vwsubu.w_wv_nxv2i64_nxv2i64_nxv2i32(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i64_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vwsubu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsubu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32( @@ -597,8 +584,7 @@ define <vscale x 4 x i64> @intrinsic_vwsubu.w_wv_nxv4i64_nxv4i64_nxv4i32(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i64_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vwsubu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsubu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32( @@ -642,8 +628,7 @@ define <vscale x 8 x i64> @intrinsic_vwsubu.w_wv_nxv8i64_nxv8i64_nxv8i32(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vwsubu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsubu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32( @@ -1848,3 +1833,227 @@ entry: ret <vscale x 8 x i64> %a } + +define <vscale x 1 x i16> @intrinsic_vwsubu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i16> @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8( + <vscale x 1 x i16> %1, + <vscale x 1 x i8> %0, + i32 %2) + + ret <vscale x 1 x i16> %a +} + +define <vscale x 2 x i16> @intrinsic_vwsubu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i16> @llvm.riscv.vwsubu.w.nxv2i16.nxv2i8( + <vscale x 2 x i16> %1, + <vscale x 2 x i8> %0, + i32 %2) + + ret <vscale x 2 x i16> %a +} + +define <vscale x 4 x i16> @intrinsic_vwsubu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i16> @llvm.riscv.vwsubu.w.nxv4i16.nxv4i8( + <vscale x 4 x i16> %1, + <vscale x 4 x i8> %0, + i32 %2) + + ret <vscale x 4 x i16> %a +} + +define <vscale x 8 x i16> @intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu +; CHECK-NEXT: vwsubu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8( + <vscale x 8 x i16> %1, + <vscale x 8 x i8> %0, + i32 %2) + + ret <vscale x 8 x i16> %a +} + +define <vscale x 16 x i16> @intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu +; CHECK-NEXT: vwsubu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8( + <vscale x 16 x i16> %1, + <vscale x 16 x i8> %0, + i32 %2) + + ret <vscale x 16 x i16> %a +} + +define <vscale x 32 x i16> @intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; CHECK-NEXT: vwsubu.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8( + <vscale x 32 x i16> %1, + <vscale x 32 x i8> %0, + i32 %2) + + ret <vscale x 32 x i16> %a +} + +define <vscale x 1 x i32> @intrinsic_vwsubu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i32> @llvm.riscv.vwsubu.w.nxv1i32.nxv1i16( + <vscale x 1 x i32> %1, + <vscale x 1 x i16> %0, + i32 %2) + + ret <vscale x 1 x i32> %a +} + +define <vscale x 2 x i32> @intrinsic_vwsubu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i32> @llvm.riscv.vwsubu.w.nxv2i32.nxv2i16( + <vscale x 2 x i32> %1, + <vscale x 2 x i16> %0, + i32 %2) + + ret <vscale x 2 x i32> %a +} + +define <vscale x 4 x i32> @intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vwsubu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16( + <vscale x 4 x i32> %1, + <vscale x 4 x i16> %0, + i32 %2) + + ret <vscale x 4 x i32> %a +} + +define <vscale x 8 x i32> @intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vwsubu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16( + <vscale x 8 x i32> %1, + <vscale x 8 x i16> %0, + i32 %2) + + ret <vscale x 8 x i32> %a +} + +define <vscale x 1 x i64> @intrinsic_vwsubu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i64> @llvm.riscv.vwsubu.w.nxv1i64.nxv1i32( + <vscale x 1 x i64> %1, + <vscale x 1 x i32> %0, + i32 %2) + + ret <vscale x 1 x i64> %a +} + +define <vscale x 2 x i64> @intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vwsubu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32( + <vscale x 2 x i64> %1, + <vscale x 2 x i32> %0, + i32 %2) + + ret <vscale x 2 x i64> %a +} + +define <vscale x 4 x i64> @intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vwsubu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32( + <vscale x 4 x i64> %1, + <vscale x 4 x i32> %0, + i32 %2) + + ret <vscale x 4 x i64> %a +} + +define <vscale x 8 x i64> @intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vwsubu.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32( + <vscale x 8 x i64> %1, + <vscale x 8 x i32> %0, + i32 %2) + + ret <vscale x 8 x i64> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll index 2152b4d..8205429 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll @@ -10,8 +10,7 @@ define <vscale x 1 x i16> @intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i16> @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8( @@ -55,8 +54,7 @@ define <vscale x 2 x i16> @intrinsic_vwsubu.w_wv_nxv2i16_nxv2i16_nxv2i8(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i16_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i16> @llvm.riscv.vwsubu.w.nxv2i16.nxv2i8( @@ -100,8 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vwsubu.w_wv_nxv4i16_nxv4i16_nxv4i8(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i16_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i16> @llvm.riscv.vwsubu.w.nxv4i16.nxv4i8( @@ -145,8 +142,7 @@ define <vscale x 8 x i16> @intrinsic_vwsubu.w_wv_nxv8i16_nxv8i16_nxv8i8(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i16_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu -; CHECK-NEXT: vwsubu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsubu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8( @@ -190,8 +186,7 @@ define <vscale x 16 x i16> @intrinsic_vwsubu.w_wv_nxv16i16_nxv16i16_nxv16i8(<vsc ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv16i16_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu -; CHECK-NEXT: vwsubu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsubu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8( @@ -235,8 +230,7 @@ define <vscale x 32 x i16> @intrinsic_vwsubu.w_wv_nxv32i16_nxv32i16_nxv32i8(<vsc ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu -; CHECK-NEXT: vwsubu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsubu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8( @@ -281,8 +275,7 @@ define <vscale x 1 x i32> @intrinsic_vwsubu.w_wv_nxv1i32_nxv1i32_nxv1i16(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i32_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i32> @llvm.riscv.vwsubu.w.nxv1i32.nxv1i16( @@ -326,8 +319,7 @@ define <vscale x 2 x i32> @intrinsic_vwsubu.w_wv_nxv2i32_nxv2i32_nxv2i16(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i32_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i32> @llvm.riscv.vwsubu.w.nxv2i32.nxv2i16( @@ -371,8 +363,7 @@ define <vscale x 4 x i32> @intrinsic_vwsubu.w_wv_nxv4i32_nxv4i32_nxv4i16(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i32_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vwsubu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsubu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16( @@ -416,8 +407,7 @@ define <vscale x 8 x i32> @intrinsic_vwsubu.w_wv_nxv8i32_nxv8i32_nxv8i16(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i32_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vwsubu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsubu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16( @@ -461,8 +451,7 @@ define <vscale x 16 x i32> @intrinsic_vwsubu.w_wv_nxv16i32_nxv16i32_nxv16i16(<vs ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vwsubu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsubu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i32> @llvm.riscv.vwsubu.w.nxv16i32.nxv16i16( @@ -507,8 +496,7 @@ define <vscale x 1 x i64> @intrinsic_vwsubu.w_wv_nxv1i64_nxv1i64_nxv1i32(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i64_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call <vscale x 1 x i64> @llvm.riscv.vwsubu.w.nxv1i64.nxv1i32( @@ -552,8 +540,7 @@ define <vscale x 2 x i64> @intrinsic_vwsubu.w_wv_nxv2i64_nxv2i64_nxv2i32(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i64_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vwsubu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsubu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32( @@ -597,8 +584,7 @@ define <vscale x 4 x i64> @intrinsic_vwsubu.w_wv_nxv4i64_nxv4i64_nxv4i32(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i64_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vwsubu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsubu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32( @@ -642,8 +628,7 @@ define <vscale x 8 x i64> @intrinsic_vwsubu.w_wv_nxv8i64_nxv8i64_nxv8i32(<vscale ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vwsubu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsubu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32( @@ -1848,3 +1833,227 @@ entry: ret <vscale x 8 x i64> %a } + +define <vscale x 1 x i16> @intrinsic_vwsubu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i16> @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8( + <vscale x 1 x i16> %1, + <vscale x 1 x i8> %0, + i64 %2) + + ret <vscale x 1 x i16> %a +} + +define <vscale x 2 x i16> @intrinsic_vwsubu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i16> @llvm.riscv.vwsubu.w.nxv2i16.nxv2i8( + <vscale x 2 x i16> %1, + <vscale x 2 x i8> %0, + i64 %2) + + ret <vscale x 2 x i16> %a +} + +define <vscale x 4 x i16> @intrinsic_vwsubu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i16> @llvm.riscv.vwsubu.w.nxv4i16.nxv4i8( + <vscale x 4 x i16> %1, + <vscale x 4 x i8> %0, + i64 %2) + + ret <vscale x 4 x i16> %a +} + +define <vscale x 8 x i16> @intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu +; CHECK-NEXT: vwsubu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8( + <vscale x 8 x i16> %1, + <vscale x 8 x i8> %0, + i64 %2) + + ret <vscale x 8 x i16> %a +} + +define <vscale x 16 x i16> @intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu +; CHECK-NEXT: vwsubu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8( + <vscale x 16 x i16> %1, + <vscale x 16 x i8> %0, + i64 %2) + + ret <vscale x 16 x i16> %a +} + +define <vscale x 32 x i16> @intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; CHECK-NEXT: vwsubu.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8( + <vscale x 32 x i16> %1, + <vscale x 32 x i8> %0, + i64 %2) + + ret <vscale x 32 x i16> %a +} + +define <vscale x 1 x i32> @intrinsic_vwsubu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i32> @llvm.riscv.vwsubu.w.nxv1i32.nxv1i16( + <vscale x 1 x i32> %1, + <vscale x 1 x i16> %0, + i64 %2) + + ret <vscale x 1 x i32> %a +} + +define <vscale x 2 x i32> @intrinsic_vwsubu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i32> @llvm.riscv.vwsubu.w.nxv2i32.nxv2i16( + <vscale x 2 x i32> %1, + <vscale x 2 x i16> %0, + i64 %2) + + ret <vscale x 2 x i32> %a +} + +define <vscale x 4 x i32> @intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vwsubu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16( + <vscale x 4 x i32> %1, + <vscale x 4 x i16> %0, + i64 %2) + + ret <vscale x 4 x i32> %a +} + +define <vscale x 8 x i32> @intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vwsubu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16( + <vscale x 8 x i32> %1, + <vscale x 8 x i16> %0, + i64 %2) + + ret <vscale x 8 x i32> %a +} + +define <vscale x 1 x i64> @intrinsic_vwsubu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i64> @llvm.riscv.vwsubu.w.nxv1i64.nxv1i32( + <vscale x 1 x i64> %1, + <vscale x 1 x i32> %0, + i64 %2) + + ret <vscale x 1 x i64> %a +} + +define <vscale x 2 x i64> @intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vwsubu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32( + <vscale x 2 x i64> %1, + <vscale x 2 x i32> %0, + i64 %2) + + ret <vscale x 2 x i64> %a +} + +define <vscale x 4 x i64> @intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vwsubu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32( + <vscale x 4 x i64> %1, + <vscale x 4 x i32> %0, + i64 %2) + + ret <vscale x 4 x i64> %a +} + +define <vscale x 8 x i64> @intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vwsubu.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32( + <vscale x 8 x i64> %1, + <vscale x 8 x i32> %0, + i64 %2) + + ret <vscale x 8 x i64> %a +} |