diff options
author | Him188 <tguan@nvidia.com> | 2024-06-18 09:25:47 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-18 09:25:47 +0100 |
commit | 0e21f125c69f0e3204ea76d931717c88493e5cb3 (patch) | |
tree | a826773abeee966a26ffc30c3f55ce226e76344e | |
parent | 8b0d38be9ece414dbc7a91dbd8fa506b0e3287e7 (diff) | |
download | llvm-0e21f125c69f0e3204ea76d931717c88493e5cb3.zip llvm-0e21f125c69f0e3204ea76d931717c88493e5cb3.tar.gz llvm-0e21f125c69f0e3204ea76d931717c88493e5cb3.tar.bz2 |
[AArch64][GISel] Translate legal SVE formal arguments and select COPY for SVE (#95236)
This patch adds support for legal SVE fromal arguments in IRTranslator,
and support for COPY with SVE.
SVE arguments are allowed only if the hidden option
`-aarch64-enable-gisel-sve` is enabled. Illegal types and predicates
like `nxv8i1` are not supported yet.
10 files changed, 1103 insertions, 46 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def index b87421e..d243ca5 100644 --- a/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def +++ b/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def @@ -137,7 +137,9 @@ bool AArch64GenRegisterBankInfo::checkValueMapImpl(unsigned Idx, unsigned Offset) { unsigned PartialMapBaseIdx = Idx - PartialMappingIdx::PMI_Min; const ValueMapping &Map = - AArch64GenRegisterBankInfo::getValueMapping((PartialMappingIdx)FirstInBank, Size)[Offset]; + AArch64GenRegisterBankInfo::getValueMapping( + (PartialMappingIdx)FirstInBank, + TypeSize::getFixed(Size))[Offset]; return Map.BreakDown == &PartMappings[PartialMapBaseIdx] && Map.NumBreakDowns == 1; } @@ -167,7 +169,7 @@ bool AArch64GenRegisterBankInfo::checkPartialMappingIdx( } unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx, - unsigned Size) { + TypeSize Size) { if (RBIdx == PMI_FirstGPR) { if (Size <= 32) return 0; @@ -178,17 +180,20 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx, return -1; } if (RBIdx == PMI_FirstFPR) { - if (Size <= 16) + const unsigned MinSize = Size.getKnownMinValue(); + assert(!Size.isScalable() || MinSize >= 128 + && "Scalable vector types should have size of at least 128 bits"); + if (MinSize <= 16) return 0; - if (Size <= 32) + if (MinSize <= 32) return 1; - if (Size <= 64) + if (MinSize <= 64) return 2; - if (Size <= 128) + if (MinSize <= 128) return 3; - if (Size <= 256) + if (MinSize <= 256) return 4; - if (Size <= 512) + if (MinSize <= 512) return 5; return -1; } @@ -197,7 +202,7 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx, const RegisterBankInfo::ValueMapping * AArch64GenRegisterBankInfo::getValueMapping(PartialMappingIdx RBIdx, - unsigned Size) { + const TypeSize Size) { assert(RBIdx != PartialMappingIdx::PMI_None && "No mapping needed for that"); unsigned BaseIdxOffset = getRegBankBaseIdxOffset(RBIdx, Size); if (BaseIdxOffset == -1u) @@ -221,7 +226,8 @@ const AArch64GenRegisterBankInfo::PartialMappingIdx const RegisterBankInfo::ValueMapping * AArch64GenRegisterBankInfo::getCopyMapping(unsigned DstBankID, - unsigned SrcBankID, unsigned Size) { + unsigned SrcBankID, + const TypeSize Size) { assert(DstBankID < AArch64::NumRegisterBanks && "Invalid bank ID"); assert(SrcBankID < AArch64::NumRegisterBanks && "Invalid bank ID"); PartialMappingIdx DstRBIdx = BankIDToCopyMapIdx[DstBankID]; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 35871cc..0f06bfb 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -149,7 +149,7 @@ static cl::opt<unsigned> MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden, // scalable vector types for all instruction, even if SVE is not yet supported // with some instructions. // See [AArch64TargetLowering::fallbackToDAGISel] for implementation details. -static cl::opt<bool> EnableSVEGISel( +cl::opt<bool> EnableSVEGISel( "aarch64-enable-gisel-sve", cl::Hidden, cl::desc("Enable / disable SVE scalable vectors in Global ISel"), cl::init(false)); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 322bde3..a93e74f 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -53,6 +53,8 @@ using namespace llvm; using namespace AArch64GISelUtils; +extern cl::opt<bool> EnableSVEGISel; + AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI) : CallLowering(&TLI) {} @@ -525,10 +527,10 @@ static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder, bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const { auto &F = MF.getFunction(); - if (F.getReturnType()->isScalableTy() || - llvm::any_of(F.args(), [](const Argument &A) { - return A.getType()->isScalableTy(); - })) + if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() || + llvm::any_of(F.args(), [](const Argument &A) { + return A.getType()->isScalableTy(); + }))) return true; const auto &ST = MF.getSubtarget<AArch64Subtarget>(); if (!ST.hasNEON() || !ST.hasFPARMv8()) { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 4a7c82b..d32007e 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -597,8 +597,14 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, /// Given a register bank, and size in bits, return the smallest register class /// that can represent that combination. static const TargetRegisterClass * -getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, +getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet = false) { + if (SizeInBits.isScalable()) { + assert(RB.getID() == AArch64::FPRRegBankID && + "Expected FPR regbank for scalable type size"); + return &AArch64::ZPRRegClass; + } + unsigned RegBankID = RB.getID(); if (RegBankID == AArch64::GPRRegBankID) { @@ -939,8 +945,9 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, Register SrcReg = I.getOperand(1).getReg(); const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); - unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); - unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); + + TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); + TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); // Special casing for cross-bank copies of s1s. We can technically represent // a 1-bit value with any size of register. The minimum size for a GPR is 32 @@ -951,7 +958,7 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, // register bank. Or make a new helper that carries along some constraint // information. if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) - SrcSize = DstSize = 32; + SrcSize = DstSize = TypeSize::getFixed(32); return {getMinClassForRegBank(SrcRegBank, SrcSize, true), getMinClassForRegBank(DstRegBank, DstSize, true)}; @@ -1016,8 +1023,8 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, return false; } - unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); - unsigned DstSize = TRI.getRegSizeInBits(*DstRC); + const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC); + const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC); unsigned SubReg; // If the source bank doesn't support a subregister copy small enough, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index f63e29e..5616d06 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -163,17 +163,18 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo( unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \ (void)PartialMapDstIdx; \ (void)PartialMapSrcIdx; \ - const ValueMapping *Map = getCopyMapping( \ - AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size); \ + const ValueMapping *Map = getCopyMapping(AArch64::RBNameDst##RegBankID, \ + AArch64::RBNameSrc##RegBankID, \ + TypeSize::getFixed(Size)); \ (void)Map; \ assert(Map[0].BreakDown == \ &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \ - Map[0].NumBreakDowns == 1 && #RBNameDst #Size \ - " Dst is incorrectly initialized"); \ + Map[0].NumBreakDowns == 1 && \ + #RBNameDst #Size " Dst is incorrectly initialized"); \ assert(Map[1].BreakDown == \ &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \ - Map[1].NumBreakDowns == 1 && #RBNameSrc #Size \ - " Src is incorrectly initialized"); \ + Map[1].NumBreakDowns == 1 && \ + #RBNameSrc #Size " Src is incorrectly initialized"); \ \ } while (false) @@ -218,7 +219,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo( unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A, const RegisterBank &B, - TypeSize Size) const { + const TypeSize Size) const { // What do we do with different size? // copy are same size. // Will introduce other hooks for different size: @@ -258,6 +259,7 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, case AArch64::QQQRegClassID: case AArch64::QQQQRegClassID: case AArch64::ZPRRegClassID: + case AArch64::ZPR_3bRegClassID: return getRegBank(AArch64::FPRRegBankID); case AArch64::GPR32commonRegClassID: case AArch64::GPR32RegClassID: @@ -304,7 +306,7 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings( case TargetOpcode::G_OR: { // 32 and 64-bit or can be mapped on either FPR or // GPR for the same cost. - unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); + TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); if (Size != 32 && Size != 64) break; @@ -325,7 +327,7 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings( return AltMappings; } case TargetOpcode::G_BITCAST: { - unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); + TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); if (Size != 32 && Size != 64) break; @@ -365,7 +367,7 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings( return AltMappings; } case TargetOpcode::G_LOAD: { - unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); + TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); if (Size != 64) break; @@ -377,15 +379,17 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings( InstructionMappings AltMappings; const InstructionMapping &GPRMapping = getInstructionMapping( /*ID*/ 1, /*Cost*/ 1, - getOperandsMapping({getValueMapping(PMI_FirstGPR, Size), - // Addresses are GPR 64-bit. - getValueMapping(PMI_FirstGPR, 64)}), + getOperandsMapping( + {getValueMapping(PMI_FirstGPR, Size), + // Addresses are GPR 64-bit. + getValueMapping(PMI_FirstGPR, TypeSize::getFixed(64))}), /*NumOperands*/ 2); const InstructionMapping &FPRMapping = getInstructionMapping( /*ID*/ 2, /*Cost*/ 1, - getOperandsMapping({getValueMapping(PMI_FirstFPR, Size), - // Addresses are GPR 64-bit. - getValueMapping(PMI_FirstGPR, 64)}), + getOperandsMapping( + {getValueMapping(PMI_FirstFPR, Size), + // Addresses are GPR 64-bit. + getValueMapping(PMI_FirstGPR, TypeSize::getFixed(64))}), /*NumOperands*/ 2); AltMappings.push_back(&GPRMapping); @@ -437,7 +441,7 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping( "This code is for instructions with 3 or less operands"); LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - unsigned Size = Ty.getSizeInBits(); + TypeSize Size = Ty.getSizeInBits(); bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc); PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR; @@ -714,9 +718,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // If both RB are null that means both registers are generic. // We shouldn't be here. assert(DstRB && SrcRB && "Both RegBank were nullptr"); - unsigned Size = getSizeInBits(DstReg, MRI, TRI); + TypeSize Size = getSizeInBits(DstReg, MRI, TRI); return getInstructionMapping( - DefaultMappingID, copyCost(*DstRB, *SrcRB, TypeSize::getFixed(Size)), + DefaultMappingID, copyCost(*DstRB, *SrcRB, Size), getCopyMapping(DstRB->getID(), SrcRB->getID(), Size), // We only care about the mapping of the destination. /*NumOperands*/ 1); @@ -727,7 +731,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_BITCAST: { LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); - unsigned Size = DstTy.getSizeInBits(); + TypeSize Size = DstTy.getSizeInBits(); bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64; bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64; const RegisterBank &DstRB = @@ -735,7 +739,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const RegisterBank &SrcRB = SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; return getInstructionMapping( - DefaultMappingID, copyCost(DstRB, SrcRB, TypeSize::getFixed(Size)), + DefaultMappingID, copyCost(DstRB, SrcRB, Size), getCopyMapping(DstRB.getID(), SrcRB.getID(), Size), // We only care about the mapping of the destination for COPY. /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1); @@ -1126,7 +1130,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { LLT Ty = MRI.getType(MI.getOperand(Idx).getReg()); if (!Ty.isValid()) continue; - auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]); + auto Mapping = + getValueMapping(OpRegBankIdx[Idx], TypeSize::getFixed(OpSize[Idx])); if (!Mapping->isValid()) return getInvalidInstructionMapping(); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h index 9d23f84..0d89f54 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h @@ -70,7 +70,7 @@ protected: PartialMappingIdx LastAlias, ArrayRef<PartialMappingIdx> Order); - static unsigned getRegBankBaseIdxOffset(unsigned RBIdx, unsigned Size); + static unsigned getRegBankBaseIdxOffset(unsigned RBIdx, TypeSize Size); /// Get the pointer to the ValueMapping representing the RegisterBank /// at \p RBIdx with a size of \p Size. @@ -80,13 +80,13 @@ protected: /// /// \pre \p RBIdx != PartialMappingIdx::None static const RegisterBankInfo::ValueMapping * - getValueMapping(PartialMappingIdx RBIdx, unsigned Size); + getValueMapping(PartialMappingIdx RBIdx, TypeSize Size); /// Get the pointer to the ValueMapping of the operands of a copy /// instruction from the \p SrcBankID register bank to the \p DstBankID /// register bank with a size of \p Size. static const RegisterBankInfo::ValueMapping * - getCopyMapping(unsigned DstBankID, unsigned SrcBankID, unsigned Size); + getCopyMapping(unsigned DstBankID, unsigned SrcBankID, TypeSize Size); /// Get the instruction mapping for G_FPEXT. /// diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/sve-formal-argument-multiple.ll b/llvm/test/CodeGen/AArch64/GlobalISel/sve-formal-argument-multiple.ll new file mode 100644 index 0000000..621f620 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/sve-formal-argument-multiple.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -global-isel -global-isel-abort=1 -aarch64-enable-gisel-sve=1 %s -o - | FileCheck %s + +;; Test the correct usage of the Z registers with multiple SVE arguments. + +define void @formal_argument_nxv16i8_2(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, ptr %p) { +; CHECK-LABEL: formal_argument_nxv16i8_2: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: st1b { z0.b }, p0, [x0] +; CHECK-NEXT: st1b { z1.b }, p0, [x0] +; CHECK-NEXT: ret + store <vscale x 16 x i8> %0, ptr %p, align 16 + store <vscale x 16 x i8> %1, ptr %p, align 16 + ret void +} + +define void @formal_argument_nxv16i8_8( +; CHECK-LABEL: formal_argument_nxv16i8_8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: st1b { z0.b }, p0, [x0] +; CHECK-NEXT: st1b { z1.b }, p0, [x0] +; CHECK-NEXT: st1b { z2.b }, p0, [x0] +; CHECK-NEXT: st1b { z3.b }, p0, [x0] +; CHECK-NEXT: st1b { z4.b }, p0, [x0] +; CHECK-NEXT: st1b { z5.b }, p0, [x0] +; CHECK-NEXT: st1b { z6.b }, p0, [x0] +; CHECK-NEXT: st1b { z7.b }, p0, [x0] +; CHECK-NEXT: ret + <vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, + <vscale x 16 x i8> %4, <vscale x 16 x i8> %5, <vscale x 16 x i8> %6, <vscale x 16 x i8> %7, + ptr %p) { + + store <vscale x 16 x i8> %0, ptr %p, align 16 + store <vscale x 16 x i8> %1, ptr %p, align 16 + store <vscale x 16 x i8> %2, ptr %p, align 16 + store <vscale x 16 x i8> %3, ptr %p, align 16 + store <vscale x 16 x i8> %4, ptr %p, align 16 + store <vscale x 16 x i8> %5, ptr %p, align 16 + store <vscale x 16 x i8> %6, ptr %p, align 16 + store <vscale x 16 x i8> %7, ptr %p, align 16 + ret void +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/sve-formal-argument.ll b/llvm/test/CodeGen/AArch64/GlobalISel/sve-formal-argument.ll new file mode 100644 index 0000000..aa9a671 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/sve-formal-argument.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -global-isel -global-isel-abort=1 -aarch64-enable-gisel-sve=1 %s -o - | FileCheck %s +;; vscale x 128-bit + +define void @formal_argument_nxv16i8(<vscale x 16 x i8> %0, ptr %p) { +; CHECK-LABEL: formal_argument_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: st1b { z0.b }, p0, [x0] +; CHECK-NEXT: ret + store <vscale x 16 x i8> %0, ptr %p, align 16 + ret void +} + +define void @formal_argument_nxv8i16(<vscale x 8 x i16> %0, ptr %p) { +; CHECK-LABEL: formal_argument_nxv8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-NEXT: ret + store <vscale x 8 x i16> %0, ptr %p, align 16 + ret void +} + +define void @formal_argument_nxv4i32(<vscale x 4 x i32> %0, ptr %p) { +; CHECK-LABEL: formal_argument_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: ret + store <vscale x 4 x i32> %0, ptr %p, align 16 + ret void +} + +define void @formal_argument_nxv2i64(<vscale x 2 x i64> %0, ptr %p) { +; CHECK-LABEL: formal_argument_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: ret + store <vscale x 2 x i64> %0, ptr %p, align 16 + ret void +} + +;; TODO: Add tests for other types when store is supported for them. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/translate-sve-formal-argument-multiple.ll b/llvm/test/CodeGen/AArch64/GlobalISel/translate-sve-formal-argument-multiple.ll new file mode 100644 index 0000000..28d53da --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/translate-sve-formal-argument-multiple.ll @@ -0,0 +1,559 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu -O0 -mattr=+sve -global-isel -global-isel-abort=1 -aarch64-enable-gisel-sve=1 \ +; RUN: -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s + +;; Test the correct usage of the Z registers with multiple SVE arguments. + +;; Mixing SVE types + +define void @formal_argument_mix_sve( + ; CHECK-LABEL: name: formal_argument_mix_sve + ; CHECK: bb.1 (%ir-block.4): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s32>) = G_CONCAT_VECTORS [[COPY1]](<vscale x 4 x s32>), [[COPY2]](<vscale x 4 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<vscale x 8 x p0>) = G_CONCAT_VECTORS [[COPY4]](<vscale x 2 x s64>), [[COPY5]](<vscale x 2 x s64>), [[COPY6]](<vscale x 2 x s64>), [[COPY7]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + <vscale x 8 x i16> %0, <vscale x 8 x float> %1, <vscale x 16 x i8> %2, <vscale x 8 x ptr> %3 +) { + ret void +} + +;; Mixing SVE and non-SVE types + +define void @formal_argument_mix_sve_double( + ; CHECK-LABEL: name: formal_argument_mix_sve_double + ; CHECK: bb.1 (%ir-block.2): + ; CHECK-NEXT: liveins: $d1, $z0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1 + ; CHECK-NEXT: RET_ReallyLR + <vscale x 8 x i16> %0, double %1 +) { + ret void +} + +define void @formal_argument_mix_sve_int_double( + ; CHECK-LABEL: name: formal_argument_mix_sve_int_double + ; CHECK: bb.1 (%ir-block.4): + ; CHECK-NEXT: liveins: $d0, $w0, $z1, $z2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z2 + ; CHECK-NEXT: RET_ReallyLR + double %0, <vscale x 8 x i16> %1, i32 %2, <vscale x 8 x i16> %3 +) { + ret void +} + +;; 1024-bit cases which fit into Z0-Z7 +;; TODO: Add tests for arguments that do not fit into Z0-Z7, when we support them. (They should be passed as memory addresses) + +;; nxv2___ 1024-bit +define void @formal_argument_nxv2i64_8( + ; CHECK-LABEL: name: formal_argument_nxv2i64_8 + ; CHECK: bb.1 (%ir-block.8): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z7 + ; CHECK-NEXT: RET_ReallyLR + <vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, + <vscale x 2 x i64> %4, <vscale x 2 x i64> %5, <vscale x 2 x i64> %6, <vscale x 2 x i64> %7 +) { + ret void +} + +define void @formal_argument_nxv2f64_8( + ; CHECK-LABEL: name: formal_argument_nxv2f64_8 + ; CHECK: bb.1 (%ir-block.8): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z7 + ; CHECK-NEXT: RET_ReallyLR + <vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, + <vscale x 2 x double> %4, <vscale x 2 x double> %5, <vscale x 2 x double> %6, <vscale x 2 x double> %7 +) { + ret void +} + +define void @formal_argument_nxv2p0_8( + ; CHECK-LABEL: name: formal_argument_nxv2p0_8 + ; CHECK: bb.1 (%ir-block.8): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x p0>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x p0>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x p0>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x p0>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 2 x p0>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 2 x p0>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 2 x p0>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 2 x p0>) = COPY $z7 + ; CHECK-NEXT: RET_ReallyLR + <vscale x 2 x ptr> %0, <vscale x 2 x ptr> %1, <vscale x 2 x ptr> %2, <vscale x 2 x ptr> %3, + <vscale x 2 x ptr> %4, <vscale x 2 x ptr> %5, <vscale x 2 x ptr> %6, <vscale x 2 x ptr> %7 +) { + ret void +} + +;; nxv4___ 1024-bit +define void @formal_argument_nxv4i32_8( + ; CHECK-LABEL: name: formal_argument_nxv4i32_8 + ; CHECK: bb.1 (%ir-block.8): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z7 + ; CHECK-NEXT: RET_ReallyLR + <vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, + <vscale x 4 x i32> %4, <vscale x 4 x i32> %5, <vscale x 4 x i32> %6, <vscale x 4 x i32> %7 +) { + ret void +} + +define void @formal_argument_nxv4f32_8( + ; CHECK-LABEL: name: formal_argument_nxv4f32_8 + ; CHECK: bb.1 (%ir-block.8): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z7 + ; CHECK-NEXT: RET_ReallyLR + <vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, + <vscale x 4 x float> %4, <vscale x 4 x float> %5, <vscale x 4 x float> %6, <vscale x 4 x float> %7 +) { + ret void +} + +define void @formal_argument_nxv4i64_4(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3) { + ; CHECK-LABEL: name: formal_argument_nxv4i64_4 + ; CHECK: bb.1 (%ir-block.4): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 4 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<vscale x 4 x s64>) = G_CONCAT_VECTORS [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z5 + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<vscale x 4 x s64>) = G_CONCAT_VECTORS [[COPY4]](<vscale x 2 x s64>), [[COPY5]](<vscale x 2 x s64>) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<vscale x 4 x s64>) = G_CONCAT_VECTORS [[COPY6]](<vscale x 2 x s64>), [[COPY7]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv4p0_4(<vscale x 4 x ptr> %0, <vscale x 4 x ptr> %1, <vscale x 4 x ptr> %2, <vscale x 4 x ptr> %3) { + ; CHECK-LABEL: name: formal_argument_nxv4p0_4 + ; CHECK: bb.1 (%ir-block.4): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 4 x p0>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<vscale x 4 x p0>) = G_CONCAT_VECTORS [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z5 + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<vscale x 4 x p0>) = G_CONCAT_VECTORS [[COPY4]](<vscale x 2 x s64>), [[COPY5]](<vscale x 2 x s64>) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<vscale x 4 x p0>) = G_CONCAT_VECTORS [[COPY6]](<vscale x 2 x s64>), [[COPY7]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +;; nxv8___ 1024-bit +define void @formal_argument_nxv8i16_8( + ; CHECK-LABEL: name: formal_argument_nxv8i16_8 + ; CHECK: bb.1 (%ir-block.8): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z7 + ; CHECK-NEXT: RET_ReallyLR + <vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, + <vscale x 8 x i16> %4, <vscale x 8 x i16> %5, <vscale x 8 x i16> %6, <vscale x 8 x i16> %7 +) { + ret void +} + +define void @formal_argument_nxv8i32_4(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3) { + ; CHECK-LABEL: name: formal_argument_nxv8i32_4 + ; CHECK: bb.1 (%ir-block.4): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<vscale x 8 x s32>) = G_CONCAT_VECTORS [[COPY2]](<vscale x 4 x s32>), [[COPY3]](<vscale x 4 x s32>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z5 + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<vscale x 8 x s32>) = G_CONCAT_VECTORS [[COPY4]](<vscale x 4 x s32>), [[COPY5]](<vscale x 4 x s32>) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<vscale x 8 x s32>) = G_CONCAT_VECTORS [[COPY6]](<vscale x 4 x s32>), [[COPY7]](<vscale x 4 x s32>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv8f32_4(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3) { + ; CHECK-LABEL: name: formal_argument_nxv8f32_4 + ; CHECK: bb.1 (%ir-block.4): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<vscale x 8 x s32>) = G_CONCAT_VECTORS [[COPY2]](<vscale x 4 x s32>), [[COPY3]](<vscale x 4 x s32>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z5 + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<vscale x 8 x s32>) = G_CONCAT_VECTORS [[COPY4]](<vscale x 4 x s32>), [[COPY5]](<vscale x 4 x s32>) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<vscale x 8 x s32>) = G_CONCAT_VECTORS [[COPY6]](<vscale x 4 x s32>), [[COPY7]](<vscale x 4 x s32>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv8i64_2(<vscale x 8 x i64> %0, <vscale x 8 x i64> %1) { + ; CHECK-LABEL: name: formal_argument_nxv8i64_2 + ; CHECK: bb.1 (%ir-block.2): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<vscale x 8 x s64>) = G_CONCAT_VECTORS [[COPY4]](<vscale x 2 x s64>), [[COPY5]](<vscale x 2 x s64>), [[COPY6]](<vscale x 2 x s64>), [[COPY7]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv8f64_2(<vscale x 8 x double> %0, <vscale x 8 x double> %1) { + ; CHECK-LABEL: name: formal_argument_nxv8f64_2 + ; CHECK: bb.1 (%ir-block.2): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<vscale x 8 x s64>) = G_CONCAT_VECTORS [[COPY4]](<vscale x 2 x s64>), [[COPY5]](<vscale x 2 x s64>), [[COPY6]](<vscale x 2 x s64>), [[COPY7]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv8p0_2(<vscale x 8 x ptr> %0, <vscale x 8 x ptr> %1) { + ; CHECK-LABEL: name: formal_argument_nxv8p0_2 + ; CHECK: bb.1 (%ir-block.2): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x p0>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<vscale x 8 x p0>) = G_CONCAT_VECTORS [[COPY4]](<vscale x 2 x s64>), [[COPY5]](<vscale x 2 x s64>), [[COPY6]](<vscale x 2 x s64>), [[COPY7]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +;; nxv16___ 1024-bit + +define void @formal_argument_nxv16i8_8( + ; CHECK-LABEL: name: formal_argument_nxv16i8_8 + ; CHECK: bb.1 (%ir-block.8): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z7 + ; CHECK-NEXT: RET_ReallyLR + <vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, + <vscale x 16 x i8> %4, <vscale x 16 x i8> %5, <vscale x 16 x i8> %6, <vscale x 16 x i8> %7 +) { + ret void +} + +define void @formal_argument_nxv16i16_4(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3) { + ; CHECK-LABEL: name: formal_argument_nxv16i16_4 + ; CHECK: bb.1 (%ir-block.4): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x s16>) = G_CONCAT_VECTORS [[COPY]](<vscale x 8 x s16>), [[COPY1]](<vscale x 8 x s16>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<vscale x 16 x s16>) = G_CONCAT_VECTORS [[COPY2]](<vscale x 8 x s16>), [[COPY3]](<vscale x 8 x s16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z5 + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<vscale x 16 x s16>) = G_CONCAT_VECTORS [[COPY4]](<vscale x 8 x s16>), [[COPY5]](<vscale x 8 x s16>) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<vscale x 16 x s16>) = G_CONCAT_VECTORS [[COPY6]](<vscale x 8 x s16>), [[COPY7]](<vscale x 8 x s16>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv16i32_2(<vscale x 16 x i32> %0, <vscale x 16 x i32> %1) { + ; CHECK-LABEL: name: formal_argument_nxv16i32_2 + ; CHECK: bb.1 (%ir-block.2): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>), [[COPY2]](<vscale x 4 x s32>), [[COPY3]](<vscale x 4 x s32>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<vscale x 16 x s32>) = G_CONCAT_VECTORS [[COPY4]](<vscale x 4 x s32>), [[COPY5]](<vscale x 4 x s32>), [[COPY6]](<vscale x 4 x s32>), [[COPY7]](<vscale x 4 x s32>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv16f32_2(<vscale x 16 x float> %0, <vscale x 16 x float> %1) { + ; CHECK-LABEL: name: formal_argument_nxv16f32_2 + ; CHECK: bb.1 (%ir-block.2): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>), [[COPY2]](<vscale x 4 x s32>), [[COPY3]](<vscale x 4 x s32>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<vscale x 16 x s32>) = G_CONCAT_VECTORS [[COPY4]](<vscale x 4 x s32>), [[COPY5]](<vscale x 4 x s32>), [[COPY6]](<vscale x 4 x s32>), [[COPY7]](<vscale x 4 x s32>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv16i64_1(<vscale x 16 x i64> %0) { + ; CHECK-LABEL: name: formal_argument_nxv16i64_1 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>), [[COPY4]](<vscale x 2 x s64>), [[COPY5]](<vscale x 2 x s64>), [[COPY6]](<vscale x 2 x s64>), [[COPY7]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv16p0_1(<vscale x 16 x ptr> %0) { + ; CHECK-LABEL: name: formal_argument_nxv16p0_1 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x p0>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>), [[COPY4]](<vscale x 2 x s64>), [[COPY5]](<vscale x 2 x s64>), [[COPY6]](<vscale x 2 x s64>), [[COPY7]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +;; nxv32___ 1024-bit + +define void @formal_argument_nxv32i8_4(<vscale x 32 x i8> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3) { + ; CHECK-LABEL: name: formal_argument_nxv32i8_4 + ; CHECK: bb.1 (%ir-block.4): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 32 x s8>) = G_CONCAT_VECTORS [[COPY]](<vscale x 16 x s8>), [[COPY1]](<vscale x 16 x s8>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_CONCAT_VECTORS [[COPY2]](<vscale x 16 x s8>), [[COPY3]](<vscale x 16 x s8>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z5 + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<vscale x 32 x s8>) = G_CONCAT_VECTORS [[COPY4]](<vscale x 16 x s8>), [[COPY5]](<vscale x 16 x s8>) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<vscale x 32 x s8>) = G_CONCAT_VECTORS [[COPY6]](<vscale x 16 x s8>), [[COPY7]](<vscale x 16 x s8>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv32i16_2(<vscale x 32 x i16> %0, <vscale x 32 x i16> %1) { + ; CHECK-LABEL: name: formal_argument_nxv32i16_2 + ; CHECK: bb.1 (%ir-block.2): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 32 x s16>) = G_CONCAT_VECTORS [[COPY]](<vscale x 8 x s16>), [[COPY1]](<vscale x 8 x s16>), [[COPY2]](<vscale x 8 x s16>), [[COPY3]](<vscale x 8 x s16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<vscale x 32 x s16>) = G_CONCAT_VECTORS [[COPY4]](<vscale x 8 x s16>), [[COPY5]](<vscale x 8 x s16>), [[COPY6]](<vscale x 8 x s16>), [[COPY7]](<vscale x 8 x s16>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv32i32_1(<vscale x 32 x i32> %0) { + ; CHECK-LABEL: name: formal_argument_nxv32i32_1 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 32 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>), [[COPY2]](<vscale x 4 x s32>), [[COPY3]](<vscale x 4 x s32>), [[COPY4]](<vscale x 4 x s32>), [[COPY5]](<vscale x 4 x s32>), [[COPY6]](<vscale x 4 x s32>), [[COPY7]](<vscale x 4 x s32>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv32f32_1(<vscale x 32 x float> %0) { + ; CHECK-LABEL: name: formal_argument_nxv32f32_1 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 32 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>), [[COPY2]](<vscale x 4 x s32>), [[COPY3]](<vscale x 4 x s32>), [[COPY4]](<vscale x 4 x s32>), [[COPY5]](<vscale x 4 x s32>), [[COPY6]](<vscale x 4 x s32>), [[COPY7]](<vscale x 4 x s32>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +;; nxv64___ 1024-bit + +define void @formal_argument_nxv64i8_2(<vscale x 64 x i8> %0, <vscale x 64 x i8> %1) { + ; CHECK-LABEL: name: formal_argument_nxv64i8_2 + ; CHECK: bb.1 (%ir-block.2): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 64 x s8>) = G_CONCAT_VECTORS [[COPY]](<vscale x 16 x s8>), [[COPY1]](<vscale x 16 x s8>), [[COPY2]](<vscale x 16 x s8>), [[COPY3]](<vscale x 16 x s8>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_CONCAT_VECTORS [[COPY4]](<vscale x 16 x s8>), [[COPY5]](<vscale x 16 x s8>), [[COPY6]](<vscale x 16 x s8>), [[COPY7]](<vscale x 16 x s8>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv64i16_1(<vscale x 64 x i16> %0) { + ; CHECK-LABEL: name: formal_argument_nxv64i16_1 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 64 x s16>) = G_CONCAT_VECTORS [[COPY]](<vscale x 8 x s16>), [[COPY1]](<vscale x 8 x s16>), [[COPY2]](<vscale x 8 x s16>), [[COPY3]](<vscale x 8 x s16>), [[COPY4]](<vscale x 8 x s16>), [[COPY5]](<vscale x 8 x s16>), [[COPY6]](<vscale x 8 x s16>), [[COPY7]](<vscale x 8 x s16>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/translate-sve-formal-argument.ll b/llvm/test/CodeGen/AArch64/GlobalISel/translate-sve-formal-argument.ll new file mode 100644 index 0000000..ec89da8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/translate-sve-formal-argument.ll @@ -0,0 +1,389 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu -O0 -mattr=+sve -global-isel -global-isel-abort=1 -aarch64-enable-gisel-sve=1 \ +; RUN: -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s + +;; vscale x 128-bit + +define void @formal_argument_nxv16i8(<vscale x 16 x i8> %0) { + ; CHECK-LABEL: name: formal_argument_nxv16i8 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z0 + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv8i16(<vscale x 8 x i16> %0) { + ; CHECK-LABEL: name: formal_argument_nxv8i16 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z0 + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv4i32(<vscale x 4 x i32> %0) { + ; CHECK-LABEL: name: formal_argument_nxv4i32 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0 + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv2i64(<vscale x 2 x i64> %0) { + ; CHECK-LABEL: name: formal_argument_nxv2i64 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv4f32(<vscale x 4 x float> %0) { + ; CHECK-LABEL: name: formal_argument_nxv4f32 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0 + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv2f64(<vscale x 2 x double> %0) { + ; CHECK-LABEL: name: formal_argument_nxv2f64 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv2p0(<vscale x 2 x ptr> %0) { + ; CHECK-LABEL: name: formal_argument_nxv2p0 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x p0>) = COPY $z0 + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +;; vscale x 256-bit + +define void @formal_argument_nxv32i8(<vscale x 32 x i8> %0) { + ; CHECK-LABEL: name: formal_argument_nxv32i8 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 32 x s8>) = G_CONCAT_VECTORS [[COPY]](<vscale x 16 x s8>), [[COPY1]](<vscale x 16 x s8>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv16i16(<vscale x 16 x i16> %0) { + ; CHECK-LABEL: name: formal_argument_nxv16i16 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x s16>) = G_CONCAT_VECTORS [[COPY]](<vscale x 8 x s16>), [[COPY1]](<vscale x 8 x s16>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv8i32(<vscale x 8 x i32> %0) { + ; CHECK-LABEL: name: formal_argument_nxv8i32 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv4i64(<vscale x 4 x i64> %0) { + ; CHECK-LABEL: name: formal_argument_nxv4i64 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 4 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv8f32(<vscale x 8 x float> %0) { + ; CHECK-LABEL: name: formal_argument_nxv8f32 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv4f64(<vscale x 4 x double> %0) { + ; CHECK-LABEL: name: formal_argument_nxv4f64 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 4 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv4p0(<vscale x 4 x ptr> %0) { + ; CHECK-LABEL: name: formal_argument_nxv4p0 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 4 x p0>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +;; vscale x 512-bit + +define void @formal_argument_nxv64i8(<vscale x 64 x i8> %0) { + ; CHECK-LABEL: name: formal_argument_nxv64i8 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 64 x s8>) = G_CONCAT_VECTORS [[COPY]](<vscale x 16 x s8>), [[COPY1]](<vscale x 16 x s8>), [[COPY2]](<vscale x 16 x s8>), [[COPY3]](<vscale x 16 x s8>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv32i16(<vscale x 32 x i16> %0) { + ; CHECK-LABEL: name: formal_argument_nxv32i16 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 32 x s16>) = G_CONCAT_VECTORS [[COPY]](<vscale x 8 x s16>), [[COPY1]](<vscale x 8 x s16>), [[COPY2]](<vscale x 8 x s16>), [[COPY3]](<vscale x 8 x s16>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv16i32(<vscale x 16 x i32> %0) { + ; CHECK-LABEL: name: formal_argument_nxv16i32 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>), [[COPY2]](<vscale x 4 x s32>), [[COPY3]](<vscale x 4 x s32>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv8i64(<vscale x 8 x i64> %0) { + ; CHECK-LABEL: name: formal_argument_nxv8i64 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv16f32(<vscale x 16 x float> %0) { + ; CHECK-LABEL: name: formal_argument_nxv16f32 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>), [[COPY2]](<vscale x 4 x s32>), [[COPY3]](<vscale x 4 x s32>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv8f64(<vscale x 8 x double> %0) { + ; CHECK-LABEL: name: formal_argument_nxv8f64 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv8p0(<vscale x 8 x ptr> %0) { + ; CHECK-LABEL: name: formal_argument_nxv8p0 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x p0>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +;; vscale x 1024-bit + +define void @formal_argument_nxv128i8(<vscale x 128 x i8> %0) { + ; CHECK-LABEL: name: formal_argument_nxv128i8 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 128 x s8>) = G_CONCAT_VECTORS [[COPY]](<vscale x 16 x s8>), [[COPY1]](<vscale x 16 x s8>), [[COPY2]](<vscale x 16 x s8>), [[COPY3]](<vscale x 16 x s8>), [[COPY4]](<vscale x 16 x s8>), [[COPY5]](<vscale x 16 x s8>), [[COPY6]](<vscale x 16 x s8>), [[COPY7]](<vscale x 16 x s8>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv64i16(<vscale x 64 x i16> %0) { + ; CHECK-LABEL: name: formal_argument_nxv64i16 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 64 x s16>) = G_CONCAT_VECTORS [[COPY]](<vscale x 8 x s16>), [[COPY1]](<vscale x 8 x s16>), [[COPY2]](<vscale x 8 x s16>), [[COPY3]](<vscale x 8 x s16>), [[COPY4]](<vscale x 8 x s16>), [[COPY5]](<vscale x 8 x s16>), [[COPY6]](<vscale x 8 x s16>), [[COPY7]](<vscale x 8 x s16>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv32i32(<vscale x 32 x i32> %0) { + ; CHECK-LABEL: name: formal_argument_nxv32i32 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 32 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>), [[COPY2]](<vscale x 4 x s32>), [[COPY3]](<vscale x 4 x s32>), [[COPY4]](<vscale x 4 x s32>), [[COPY5]](<vscale x 4 x s32>), [[COPY6]](<vscale x 4 x s32>), [[COPY7]](<vscale x 4 x s32>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv16i64(<vscale x 16 x i64> %0) { + ; CHECK-LABEL: name: formal_argument_nxv16i64 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>), [[COPY4]](<vscale x 2 x s64>), [[COPY5]](<vscale x 2 x s64>), [[COPY6]](<vscale x 2 x s64>), [[COPY7]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv32f32(<vscale x 32 x float> %0) { + ; CHECK-LABEL: name: formal_argument_nxv32f32 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 32 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>), [[COPY2]](<vscale x 4 x s32>), [[COPY3]](<vscale x 4 x s32>), [[COPY4]](<vscale x 4 x s32>), [[COPY5]](<vscale x 4 x s32>), [[COPY6]](<vscale x 4 x s32>), [[COPY7]](<vscale x 4 x s32>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv16f64(<vscale x 16 x double> %0) { + ; CHECK-LABEL: name: formal_argument_nxv16f64 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>), [[COPY4]](<vscale x 2 x s64>), [[COPY5]](<vscale x 2 x s64>), [[COPY6]](<vscale x 2 x s64>), [[COPY7]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} + +define void @formal_argument_nxv16p0(<vscale x 16 x ptr> %0) { + ; CHECK-LABEL: name: formal_argument_nxv16p0 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x p0>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>), [[COPY4]](<vscale x 2 x s64>), [[COPY5]](<vscale x 2 x s64>), [[COPY6]](<vscale x 2 x s64>), [[COPY7]](<vscale x 2 x s64>) + ; CHECK-NEXT: RET_ReallyLR + ret void +} |