diff options
Diffstat (limited to 'llvm/lib/Target')
60 files changed, 471 insertions, 145 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 4b40733..66e4949 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -91,7 +91,7 @@ static cl::opt<unsigned> GatherOptSearchLimit( "machine-combiner gather pattern optimization")); AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) - : AArch64GenInstrInfo(STI, AArch64::ADJCALLSTACKDOWN, + : AArch64GenInstrInfo(STI, RI, AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP, AArch64::CATCHRET), RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {} @@ -1780,6 +1780,16 @@ static unsigned sForm(MachineInstr &Instr) { case AArch64::SUBSWri: case AArch64::SUBSXrr: case AArch64::SUBSXri: + case AArch64::ANDSWri: + case AArch64::ANDSWrr: + case AArch64::ANDSWrs: + case AArch64::ANDSXri: + case AArch64::ANDSXrr: + case AArch64::ANDSXrs: + case AArch64::BICSWrr: + case AArch64::BICSXrr: + case AArch64::BICSWrs: + case AArch64::BICSXrs: return Instr.getOpcode(); case AArch64::ADDWrr: @@ -1810,6 +1820,22 @@ static unsigned sForm(MachineInstr &Instr) { return AArch64::ANDSWri; case AArch64::ANDXri: return AArch64::ANDSXri; + case AArch64::ANDWrr: + return AArch64::ANDSWrr; + case AArch64::ANDWrs: + return AArch64::ANDSWrs; + case AArch64::ANDXrr: + return AArch64::ANDSXrr; + case AArch64::ANDXrs: + return AArch64::ANDSXrs; + case AArch64::BICWrr: + return AArch64::BICSWrr; + case AArch64::BICXrr: + return AArch64::BICSXrr; + case AArch64::BICWrs: + return AArch64::BICSWrs; + case AArch64::BICXrs: + return AArch64::BICSXrs; } } @@ -1947,6 +1973,25 @@ static bool isSUBSRegImm(unsigned Opcode) { return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri; } +static bool isANDOpcode(MachineInstr &MI) { + unsigned Opc = sForm(MI); + switch (Opc) { + case AArch64::ANDSWri: + case AArch64::ANDSWrr: + case AArch64::ANDSWrs: + case AArch64::ANDSXri: + case AArch64::ANDSXrr: + case AArch64::ANDSXrs: + case AArch64::BICSWrr: + case AArch64::BICSXrr: + case AArch64::BICSWrs: + case AArch64::BICSXrs: + return true; + default: + return false; + } +} + /// Check if CmpInstr can be substituted by MI. /// /// CmpInstr can be substituted: @@ -1984,7 +2029,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, // 1) MI and CmpInstr set N and V to the same value. // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when // signed overflow occurs, so CmpInstr could still be simplified away. - if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap)) + // Note that Ands and Bics instructions always clear the V flag. + if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDOpcode(MI)) return false; AccessKind AccessToCheck = AK_Write; @@ -9590,6 +9636,27 @@ AArch64InstrInfo::getOutliningCandidateInfo( unsigned NumBytesToCreateFrame = 0; + // Avoid splitting ADRP ADD/LDR pair into outlined functions. + // These instructions are fused together by the scheduler. + // Any candidate where ADRP is the last instruction should be rejected + // as that will lead to splitting ADRP pair. + MachineInstr &LastMI = RepeatedSequenceLocs[0].back(); + MachineInstr &FirstMI = RepeatedSequenceLocs[0].front(); + if (LastMI.getOpcode() == AArch64::ADRP && + (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_PAGE) != 0 && + (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_GOT) != 0) { + return std::nullopt; + } + + // Similarly any candidate where the first instruction is ADD/LDR with a + // page offset should be rejected to avoid ADRP splitting. + if ((FirstMI.getOpcode() == AArch64::ADDXri || + FirstMI.getOpcode() == AArch64::LDRXui) && + (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_PAGEOFF) != 0 && + (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) != 0) { + return std::nullopt; + } + // We only allow outlining for functions having exactly matching return // address signing attributes, i.e., all share the same value for the // attribute "sign-return-address" and all share the same type of key they diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index a5048b9..eaf8723 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -1123,24 +1123,85 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -// FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register allocation -// where a consecutive multi-vector tuple is constructed from the same indices -// of multiple strided loads. This may still result in unnecessary copies -// between the loads and the tuple. Here we try to return a hint to assign the -// contiguous ZPRMulReg starting at the same register as the first operand of -// the pseudo, which should be a subregister of the first strided load. +// We add regalloc hints for different cases: +// * Choosing a better destination operand for predicated SVE instructions +// where the inactive lanes are undef, by choosing a register that is not +// unique to the other operands of the instruction. // -// For example, if the first strided load has been assigned $z16_z20_z24_z28 -// and the operands of the pseudo are each accessing subregister zsub2, we -// should look through through Order to find a contiguous register which -// begins with $z24 (i.e. $z24_z25_z26_z27). +// * Improve register allocation for SME multi-vector instructions where we can +// benefit from the strided- and contiguous register multi-vector tuples. // +// Here FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register +// allocation where a consecutive multi-vector tuple is constructed from the +// same indices of multiple strided loads. This may still result in +// unnecessary copies between the loads and the tuple. Here we try to return a +// hint to assign the contiguous ZPRMulReg starting at the same register as +// the first operand of the pseudo, which should be a subregister of the first +// strided load. +// +// For example, if the first strided load has been assigned $z16_z20_z24_z28 +// and the operands of the pseudo are each accessing subregister zsub2, we +// should look through through Order to find a contiguous register which +// begins with $z24 (i.e. $z24_z25_z26_z27). bool AArch64RegisterInfo::getRegAllocationHints( Register VirtReg, ArrayRef<MCPhysReg> Order, SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { - auto &ST = MF.getSubtarget<AArch64Subtarget>(); + const AArch64InstrInfo *TII = + MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + // For predicated SVE instructions where the inactive lanes are undef, + // pick a destination register that is not unique to avoid introducing + // a movprfx. + const TargetRegisterClass *RegRC = MRI.getRegClass(VirtReg); + if (AArch64::ZPRRegClass.hasSubClassEq(RegRC)) { + for (const MachineOperand &DefOp : MRI.def_operands(VirtReg)) { + const MachineInstr &Def = *DefOp.getParent(); + if (DefOp.isImplicit() || + (TII->get(Def.getOpcode()).TSFlags & AArch64::FalseLanesMask) != + AArch64::FalseLanesUndef) + continue; + + unsigned InstFlags = + TII->get(AArch64::getSVEPseudoMap(Def.getOpcode())).TSFlags; + + for (MCPhysReg R : Order) { + auto AddHintIfSuitable = [&](MCPhysReg R, const MachineOperand &MO) { + // R is a suitable register hint if there exists an operand for the + // instruction that is not yet allocated a register or if R matches + // one of the other source operands. + if (!VRM->hasPhys(MO.getReg()) || VRM->getPhys(MO.getReg()) == R) + Hints.push_back(R); + }; + + switch (InstFlags & AArch64::DestructiveInstTypeMask) { + default: + break; + case AArch64::DestructiveTernaryCommWithRev: + AddHintIfSuitable(R, Def.getOperand(2)); + AddHintIfSuitable(R, Def.getOperand(3)); + AddHintIfSuitable(R, Def.getOperand(4)); + break; + case AArch64::DestructiveBinaryComm: + case AArch64::DestructiveBinaryCommWithRev: + AddHintIfSuitable(R, Def.getOperand(2)); + AddHintIfSuitable(R, Def.getOperand(3)); + break; + case AArch64::DestructiveBinary: + case AArch64::DestructiveBinaryImm: + AddHintIfSuitable(R, Def.getOperand(2)); + break; + } + } + } + + if (Hints.size()) + return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, + MF, VRM); + } + if (!ST.hasSME() || !ST.isStreaming()) return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM); @@ -1153,8 +1214,7 @@ bool AArch64RegisterInfo::getRegAllocationHints( // FORM_TRANSPOSED_REG_TUPLE pseudo, we want to favour reducing copy // instructions over reducing the number of clobbered callee-save registers, // so we add the strided registers as a hint. - const MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned RegID = MRI.getRegClass(VirtReg)->getID(); + unsigned RegID = RegRC->getID(); if (RegID == AArch64::ZPR2StridedOrContiguousRegClassID || RegID == AArch64::ZPR4StridedOrContiguousRegClassID) { diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 8974965..ab4004e 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -157,7 +157,7 @@ public: bool enableMachineScheduler() const override { return true; } bool enablePostRAScheduler() const override { return usePostRAScheduler(); } bool enableSubRegLiveness() const override { return EnableSubregLiveness; } - + bool enableTerminalRule() const override { return true; } bool enableMachinePipeliner() const override; bool useDFAforSMS() const override { return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 54ba2f8..dbca5af 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -5081,17 +5081,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { unsigned MinNumRegsRequired = DstSize / 32; const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); + bool UseAGPRForm = Info->selectAGPRFormMFMA(MinNumRegsRequired); + OpdsMapping[0] = - Info->getMinNumAGPRs() >= MinNumRegsRequired - ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI) - : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + UseAGPRForm ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI) + : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); OpdsMapping[4] = - Info->getMinNumAGPRs() >= MinNumRegsRequired - ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI) - : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + UseAGPRForm ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI) + : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); OpdsMapping[8] = getVGPROpMapping(MI.getOperand(8).getReg(), MRI, *TRI); OpdsMapping[10] = getVGPROpMapping(MI.getOperand(10).getReg(), MRI, *TRI); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp index 0ea9add..b03d50f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -261,13 +261,6 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage( const Function *Callee = getCalleeFunction(*CalleeOp); - // Avoid crashing on undefined behavior with an illegal call to a - // kernel. If a callsite's calling convention doesn't match the - // function's, it's undefined behavior. If the callsite calling - // convention does match, that would have errored earlier. - if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv())) - report_fatal_error("invalid call to entry function"); - auto isSameFunction = [](const MachineFunction &MF, const Function *F) { return F == &MF.getFunction(); }; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index f377b8a..da4bd87 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1040,6 +1040,8 @@ public: return true; } + bool enableTerminalRule() const override { return true; } + bool useAA() const override; bool enableSubRegLiveness() const override { diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp index 3e256cc..0104085 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -29,7 +29,7 @@ using namespace llvm; #include "R600GenInstrInfo.inc" R600InstrInfo::R600InstrInfo(const R600Subtarget &ST) - : R600GenInstrInfo(ST, -1, -1), RI(), ST(ST) {} + : R600GenInstrInfo(ST, RI, -1, -1), RI(), ST(ST) {} bool R600InstrInfo::isVector(const MachineInstr &MI) const { return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h index 22e56b6..efd99db 100644 --- a/llvm/lib/Target/AMDGPU/R600Subtarget.h +++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h @@ -126,6 +126,8 @@ public: return true; } + bool enableTerminalRule() const override { return true; } + bool enableSubRegLiveness() const override { return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 9c74c65..6b397e0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -63,7 +63,8 @@ static cl::opt<bool> Fix16BitCopies( cl::ReallyHidden); SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST) - : AMDGPUGenInstrInfo(ST, AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), + : AMDGPUGenInstrInfo(ST, RI, AMDGPU::ADJCALLSTACKUP, + AMDGPU::ADJCALLSTACKDOWN), RI(ST), ST(ST) { SchedModel.init(&ST); } diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index f0d1117..00aae2c9 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -233,10 +233,11 @@ private: void copyToDestRegs(CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore, - AMDGPU::OpName OpName, Register DestReg) const; + const DebugLoc &DL, AMDGPU::OpName OpName, + Register DestReg) const; Register copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore, - AMDGPU::OpName OpName) const; + const DebugLoc &DL, AMDGPU::OpName OpName) const; unsigned read2Opcode(unsigned EltSize) const; unsigned read2ST64Opcode(unsigned EltSize) const; @@ -1367,10 +1368,9 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI, // Paired. void SILoadStoreOptimizer::copyToDestRegs( CombineInfo &CI, CombineInfo &Paired, - MachineBasicBlock::iterator InsertBefore, AMDGPU::OpName OpName, - Register DestReg) const { + MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, + AMDGPU::OpName OpName, Register DestReg) const { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired); @@ -1398,9 +1398,9 @@ void SILoadStoreOptimizer::copyToDestRegs( Register SILoadStoreOptimizer::copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore, + const DebugLoc &DL, AMDGPU::OpName OpName) const { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired); @@ -1456,7 +1456,8 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired, const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired); Register DestReg = MRI->createVirtualRegister(SuperRC); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); Register BaseReg = AddrReg->getReg(); unsigned BaseSubReg = AddrReg->getSubReg(); @@ -1484,7 +1485,7 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired, .addImm(0) // gds .cloneMergedMemRefs({&*CI.I, &*Paired.I}); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdst, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1541,7 +1542,8 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( (NewOffset0 != NewOffset1) && "Computed offset doesn't fit"); const MCInstrDesc &Write2Desc = TII->get(Opc); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); Register BaseReg = AddrReg->getReg(); unsigned BaseSubReg = AddrReg->getSubReg(); @@ -1582,7 +1584,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); + const unsigned Opcode = getNewOpcode(CI, Paired); const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired); @@ -1607,7 +1611,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired, MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1618,7 +1622,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); + const unsigned Opcode = getNewOpcode(CI, Paired); const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired); @@ -1639,7 +1645,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair( New.addImm(MergedOffset); New.addImm(CI.CPol).addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::sdst, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::sdst, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1650,7 +1656,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); @@ -1680,7 +1688,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( .addImm(0) // swz .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1691,7 +1699,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); @@ -1731,7 +1741,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( .addImm(0) // swz .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1742,12 +1752,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); Register SrcReg = - copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata); + copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata); auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode)) .addReg(SrcReg, RegState::Kill); @@ -1789,7 +1800,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); @@ -1807,7 +1820,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair( .addImm(CI.CPol) .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdst, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1818,12 +1831,14 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatStorePair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); Register SrcReg = - copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata); + copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata); auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)) @@ -2094,12 +2109,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); Register SrcReg = - copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata); + copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata); auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode)) .addReg(SrcReg, RegState::Kill); diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 54f57e0..85adcab 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -513,6 +513,13 @@ defm V_CVT_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_u16_f16", defm V_CVT_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_sint>; + +let HasClamp = 0, HasOMod = 0 in { +def V_TRANS_BF16_Profile : VOPProfile <[bf16, bf16, untyped, untyped]>; +def V_TRANS_BF16_t16_Profile : VOPProfile_True16 <VOP_BF16_BF16>; +def V_TRANS_BF16_fake16_Profile : VOPProfile_Fake16 <VOP_BF16_BF16>; +} + let TRANS = 1, SchedRW = [WriteTrans32] in { defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; @@ -527,14 +534,30 @@ defm V_TANH_F16 : VOP1Inst_t16 <"v_tanh_f16", VOP_F16_F16, int_amdgcn_tanh>; } let SubtargetPredicate = HasBF16TransInsts in { -defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>; -defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>; -defm V_SQRT_BF16 : VOP1Inst_t16 <"v_sqrt_bf16", VOP_BF16_BF16, any_amdgcn_sqrt>; -defm V_RSQ_BF16 : VOP1Inst_t16 <"v_rsq_bf16", VOP_BF16_BF16, AMDGPUrsq>; -defm V_LOG_BF16 : VOP1Inst_t16 <"v_log_bf16", VOP_BF16_BF16, AMDGPUlogf16>; -defm V_EXP_BF16 : VOP1Inst_t16 <"v_exp_bf16", VOP_BF16_BF16, AMDGPUexpf16>; -defm V_SIN_BF16 : VOP1Inst_t16 <"v_sin_bf16", VOP_BF16_BF16, AMDGPUsin>; -defm V_COS_BF16 : VOP1Inst_t16 <"v_cos_bf16", VOP_BF16_BF16, AMDGPUcos>; +defm V_TANH_BF16 : VOP1Inst_t16_with_profiles<"v_tanh_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + int_amdgcn_tanh>; +defm V_RCP_BF16 : VOP1Inst_t16_with_profiles<"v_rcp_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUrcp>; +defm V_SQRT_BF16 : VOP1Inst_t16_with_profiles<"v_sqrt_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + any_amdgcn_sqrt>; +defm V_RSQ_BF16 : VOP1Inst_t16_with_profiles<"v_rsq_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUrsq>; +defm V_LOG_BF16 : VOP1Inst_t16_with_profiles<"v_log_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUlogf16>; +defm V_EXP_BF16 : VOP1Inst_t16_with_profiles<"v_exp_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUexpf16>; +defm V_SIN_BF16 : VOP1Inst_t16_with_profiles<"v_sin_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUsin>; +defm V_COS_BF16 : VOP1Inst_t16_with_profiles<"v_cos_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUcos>; } } // End TRANS = 1, SchedRW = [WriteTrans32] defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 8325c62..6df91a1 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -1357,8 +1357,12 @@ class VOPBinOpClampPat<SDPatternOperator node, Instruction inst, ValueType vt> : class getVOP3ModPat<VOPProfile P, SDPatternOperator node> { dag src0 = !if(P.HasOMod, - (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), - (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)); + !if(P.HasClamp, + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i32:$omod)), + !if(P.HasClamp, + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers))); list<dag> ret3 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret (P.Src0VT src0), diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.cpp b/llvm/lib/Target/ARC/ARCInstrInfo.cpp index 05bcb35..2dec6ff 100644 --- a/llvm/lib/Target/ARC/ARCInstrInfo.cpp +++ b/llvm/lib/Target/ARC/ARCInstrInfo.cpp @@ -44,7 +44,8 @@ enum TSFlagsConstants { void ARCInstrInfo::anchor() {} ARCInstrInfo::ARCInstrInfo(const ARCSubtarget &ST) - : ARCGenInstrInfo(ST, ARC::ADJCALLSTACKDOWN, ARC::ADJCALLSTACKUP), RI(ST) {} + : ARCGenInstrInfo(ST, RI, ARC::ADJCALLSTACKDOWN, ARC::ADJCALLSTACKUP), + RI(ST) {} static bool isZeroImm(const MachineOperand &Op) { return Op.isImm() && Op.getImm() == 0; diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 22769db..b466ca6f 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -107,8 +107,9 @@ static const ARM_MLxEntry ARM_MLxTable[] = { { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, }; -ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI) - : ARMGenInstrInfo(STI, ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), +ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI, + const ARMBaseRegisterInfo &TRI) + : ARMGenInstrInfo(STI, TRI, ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), Subtarget(STI) { for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) { if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 2869e7f..27f8e3b 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -44,7 +44,8 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { protected: // Can be only subclassed. - explicit ARMBaseInstrInfo(const ARMSubtarget &STI); + explicit ARMBaseInstrInfo(const ARMSubtarget &STI, + const ARMBaseRegisterInfo &TRI); void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, unsigned LoadImmOpc, unsigned LoadOpc) const; @@ -125,7 +126,11 @@ public: // if there is not such an opcode. virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0; - virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0; + const ARMBaseRegisterInfo &getRegisterInfo() const { + return static_cast<const ARMBaseRegisterInfo &>( + TargetInstrInfo::getRegisterInfo()); + } + const ARMSubtarget &getSubtarget() const { return Subtarget; } ScheduleHazardRecognizer * diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/llvm/lib/Target/ARM/ARMInstrInfo.cpp index c684de7..f370547 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -25,7 +25,8 @@ #include "llvm/MC/MCInst.h" using namespace llvm; -ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI) {} +ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) + : ARMBaseInstrInfo(STI, RI) {} /// Return the noop instruction to use for a noop. MCInst ARMInstrInfo::getNop() const { diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.h b/llvm/lib/Target/ARM/ARMInstrInfo.h index 178d7a2..9feaf14 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMInstrInfo.h @@ -35,7 +35,7 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const ARMRegisterInfo &getRegisterInfo() const override { return RI; } + const ARMRegisterInfo &getRegisterInfo() const { return RI; } private: void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 4a0883c..34baa31 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -377,6 +377,7 @@ public: bool isRWPI() const; bool useMachineScheduler() const { return UseMISched; } + bool enableTerminalRule() const override { return true; } bool useMachinePipeliner() const { return UseMIPipeliner; } bool hasMinSize() const { return OptMinSize; } bool isThumb1Only() const { return isThumb() && !hasThumb2(); } diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 4b8c2fd..f95ba6a4 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -24,7 +24,7 @@ using namespace llvm; Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(STI) {} + : ARMBaseInstrInfo(STI, RI), RI(STI) {} /// Return the noop instruction to use for a noop. MCInst Thumb1InstrInfo::getNop() const { diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/llvm/lib/Target/ARM/Thumb1InstrInfo.h index 68b326c..16350a6 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.h @@ -35,7 +35,7 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const ThumbRegisterInfo &getRegisterInfo() const override { return RI; } + const ThumbRegisterInfo &getRegisterInfo() const { return RI; } void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index f5653d4..b66e407 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -46,7 +46,7 @@ PreferNoCSEL("prefer-no-csel", cl::Hidden, cl::init(false)); Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(STI) {} + : ARMBaseInstrInfo(STI, RI), RI(STI) {} /// Return the noop instruction to use for a noop. MCInst Thumb2InstrInfo::getNop() const { diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h index 1b0bf2d..59ef39d 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -58,7 +58,7 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const ThumbRegisterInfo &getRegisterInfo() const override { return RI; } + const ThumbRegisterInfo &getRegisterInfo() const { return RI; } MachineInstr *optimizeSelect(MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &SeenMIs, diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp index ce99085..5e247cb 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -30,8 +30,8 @@ namespace llvm { AVRInstrInfo::AVRInstrInfo(const AVRSubtarget &STI) - : AVRGenInstrInfo(STI, AVR::ADJCALLSTACKDOWN, AVR::ADJCALLSTACKUP), RI(), - STI(STI) {} + : AVRGenInstrInfo(STI, RI, AVR::ADJCALLSTACKDOWN, AVR::ADJCALLSTACKUP), + RI(), STI(STI) {} void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/llvm/lib/Target/BPF/BPFInstrInfo.cpp index 409f8b4..0e56e65 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.cpp +++ b/llvm/lib/Target/BPF/BPFInstrInfo.cpp @@ -27,7 +27,7 @@ using namespace llvm; BPFInstrInfo::BPFInstrInfo(const BPFSubtarget &STI) - : BPFGenInstrInfo(STI, BPF::ADJCALLSTACKDOWN, BPF::ADJCALLSTACKUP) {} + : BPFGenInstrInfo(STI, RI, BPF::ADJCALLSTACKDOWN, BPF::ADJCALLSTACKUP) {} void BPFInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp index 619a797..34a7de8 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp @@ -25,7 +25,7 @@ using namespace llvm; #include "CSKYGenInstrInfo.inc" CSKYInstrInfo::CSKYInstrInfo(const CSKYSubtarget &STI) - : CSKYGenInstrInfo(STI, CSKY::ADJCALLSTACKDOWN, CSKY::ADJCALLSTACKUP), + : CSKYGenInstrInfo(STI, RI, CSKY::ADJCALLSTACKDOWN, CSKY::ADJCALLSTACKUP), STI(STI) { v2sf = STI.hasFPUv2SingleFloat(); v2df = STI.hasFPUv2DoubleFloat(); diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp index 6579d34..057d87b 100644 --- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp +++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp @@ -10,6 +10,7 @@ #include "DirectX.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/DXILResource.h" +#include "llvm/Frontend/HLSL/HLSLResource.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" @@ -20,6 +21,7 @@ #include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/User.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Transforms/Utils/ValueMapper.h" #define DEBUG_TYPE "dxil-resource-access" @@ -44,16 +46,28 @@ static Value *calculateGEPOffset(GetElementPtrInst *GEP, Value *PrevOffset, APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); if (GEP->accumulateConstantOffset(DL, ConstantOffset)) { APInt Scaled = ConstantOffset.udiv(ScalarSize); - return ConstantInt::get(Type::getInt32Ty(GEP->getContext()), Scaled); + return ConstantInt::get(DL.getIndexType(GEP->getType()), Scaled); } - auto IndexIt = GEP->idx_begin(); - assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 && - "GEP is not indexing through pointer"); - ++IndexIt; - Value *Offset = *IndexIt; - assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP"); - return Offset; + unsigned NumIndices = GEP->getNumIndices(); + + // If we have a single index we're indexing into a top level array. This + // generally only happens with cbuffers. + if (NumIndices == 1) + return *GEP->idx_begin(); + + // If we have two indices, this should be a simple access through a pointer. + if (NumIndices == 2) { + auto IndexIt = GEP->idx_begin(); + assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 && + "GEP is not indexing through pointer"); + ++IndexIt; + Value *Offset = *IndexIt; + assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP"); + return Offset; + } + + llvm_unreachable("Unhandled GEP structure for resource access"); } static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI, @@ -171,6 +185,127 @@ static void createRawLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset) { LI->replaceAllUsesWith(V); } +namespace { +/// Helper for building a `load.cbufferrow` intrinsic given a simple type. +struct CBufferRowIntrin { + Intrinsic::ID IID; + Type *RetTy; + unsigned int EltSize; + unsigned int NumElts; + + CBufferRowIntrin(const DataLayout &DL, Type *Ty) { + assert(Ty == Ty->getScalarType() && "Expected scalar type"); + + switch (DL.getTypeSizeInBits(Ty)) { + case 16: + IID = Intrinsic::dx_resource_load_cbufferrow_8; + RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty); + EltSize = 2; + NumElts = 8; + break; + case 32: + IID = Intrinsic::dx_resource_load_cbufferrow_4; + RetTy = StructType::get(Ty, Ty, Ty, Ty); + EltSize = 4; + NumElts = 4; + break; + case 64: + IID = Intrinsic::dx_resource_load_cbufferrow_2; + RetTy = StructType::get(Ty, Ty); + EltSize = 8; + NumElts = 2; + break; + default: + llvm_unreachable("Only 16, 32, and 64 bit types supported"); + } + } +}; +} // namespace + +static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset, + dxil::ResourceTypeInfo &RTI) { + const DataLayout &DL = LI->getDataLayout(); + + Type *Ty = LI->getType(); + assert(!isa<StructType>(Ty) && "Structs not handled yet"); + CBufferRowIntrin Intrin(DL, Ty->getScalarType()); + + StringRef Name = LI->getName(); + Value *Handle = II->getOperand(0); + + IRBuilder<> Builder(LI); + + ConstantInt *GlobalOffset = dyn_cast<ConstantInt>(II->getOperand(1)); + assert(GlobalOffset && "CBuffer getpointer index must be constant"); + + unsigned int FixedOffset = GlobalOffset->getZExtValue(); + // If we have a further constant offset we can just fold it in to the fixed + // offset. + if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) { + FixedOffset += ConstOffset->getZExtValue(); + Offset = nullptr; + } + + Value *CurrentRow = ConstantInt::get( + Builder.getInt32Ty(), FixedOffset / hlsl::CBufferRowSizeInBytes); + unsigned int CurrentIndex = + (FixedOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize; + + assert(!(CurrentIndex && Offset) && + "Dynamic indexing into elements of cbuffer rows is not supported"); + // At this point if we have a non-constant offset it has to be an array + // offset, so we can assume that it's a multiple of the row size. + if (Offset) + CurrentRow = FixedOffset ? Builder.CreateAdd(CurrentRow, Offset) : Offset; + + auto *CBufLoad = Builder.CreateIntrinsic( + Intrin.RetTy, Intrin.IID, {Handle, CurrentRow}, nullptr, Name + ".load"); + auto *Elt = + Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, Name + ".extract"); + + // At this point we've loaded the first scalar of our result, but our original + // type may have been a vector. + unsigned int Remaining = + ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1; + if (Remaining == 0) { + // We only have a single element, so we're done. + Value *Result = Elt; + + // However, if we loaded a <1 x T>, then we need to adjust the type. + if (auto *VT = dyn_cast<FixedVectorType>(Ty)) { + assert(VT->getNumElements() == 1 && "Can't have multiple elements here"); + Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result, + Builder.getInt32(0), Name); + } + LI->replaceAllUsesWith(Result); + return; + } + + // Walk each element and extract it, wrapping to new rows as needed. + SmallVector<Value *> Extracts{Elt}; + while (Remaining--) { + CurrentIndex %= Intrin.NumElts; + + if (CurrentIndex == 0) { + CurrentRow = Builder.CreateAdd(CurrentRow, + ConstantInt::get(Builder.getInt32Ty(), 1)); + CBufLoad = Builder.CreateIntrinsic(Intrin.RetTy, Intrin.IID, + {Handle, CurrentRow}, nullptr, + Name + ".load"); + } + + Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, + Name + ".extract")); + } + + // Finally, we build up the original loaded value. + Value *Result = PoisonValue::get(Ty); + for (int I = 0, E = Extracts.size(); I < E; ++I) + Result = Builder.CreateInsertElement( + Result, Extracts[I], Builder.getInt32(I), Name + formatv(".upto{}", I)); + LI->replaceAllUsesWith(Result); +} + static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, dxil::ResourceTypeInfo &RTI) { switch (RTI.getResourceKind()) { @@ -179,6 +314,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, case dxil::ResourceKind::RawBuffer: case dxil::ResourceKind::StructuredBuffer: return createRawLoad(II, LI, Offset); + case dxil::ResourceKind::CBuffer: + return createCBufferLoad(II, LI, Offset, RTI); case dxil::ResourceKind::Texture1D: case dxil::ResourceKind::Texture2D: case dxil::ResourceKind::Texture2DMS: @@ -190,9 +327,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, case dxil::ResourceKind::TextureCubeArray: case dxil::ResourceKind::FeedbackTexture2D: case dxil::ResourceKind::FeedbackTexture2DArray: - case dxil::ResourceKind::CBuffer: case dxil::ResourceKind::TBuffer: - // TODO: handle these + reportFatalUsageError("Load not yet implemented for resource type"); return; case dxil::ResourceKind::Sampler: case dxil::ResourceKind::RTAccelerationStructure: diff --git a/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp b/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp index bb2efa4..401881d 100644 --- a/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp @@ -19,6 +19,6 @@ using namespace llvm; DirectXInstrInfo::DirectXInstrInfo(const DirectXSubtarget &STI) - : DirectXGenInstrInfo(STI) {} + : DirectXGenInstrInfo(STI, RI) {} DirectXInstrInfo::~DirectXInstrInfo() {} diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 55bafde..dd9f2fa 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -118,9 +118,9 @@ const int Hexagon_ADDI_OFFSET_MIN = -32768; void HexagonInstrInfo::anchor() {} HexagonInstrInfo::HexagonInstrInfo(const HexagonSubtarget &ST) - : HexagonGenInstrInfo(ST, Hexagon::ADJCALLSTACKDOWN, + : HexagonGenInstrInfo(ST, RegInfo, Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), - Subtarget(ST) {} + RegInfo(ST.getHwMode()), Subtarget(ST) {} namespace llvm { namespace HexagonFUnits { diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index 48adf82..7a0c77c 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -23,6 +23,8 @@ #include <cstdint> #include <vector> +#include "HexagonRegisterInfo.h" + #define GET_INSTRINFO_HEADER #include "HexagonGenInstrInfo.inc" @@ -36,6 +38,7 @@ class MachineOperand; class TargetRegisterInfo; class HexagonInstrInfo : public HexagonGenInstrInfo { + const HexagonRegisterInfo RegInfo; const HexagonSubtarget &Subtarget; enum BundleAttribute { @@ -47,6 +50,8 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { public: explicit HexagonInstrInfo(const HexagonSubtarget &ST); + const HexagonRegisterInfo &getRegisterInfo() const { return RegInfo; } + /// TargetInstrInfo overrides. /// If the specified machine instruction is a direct diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index a3c8a88..66c8b0a 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -76,8 +76,7 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, OptLevel(TM.getOptLevel()), CPUString(std::string(Hexagon_MC::selectHexagonCPU(CPU))), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), - RegInfo(getHwMode()), TLInfo(TM, *this), - InstrItins(getInstrItineraryForCPU(CPUString)) { + TLInfo(TM, *this), InstrItins(getInstrItineraryForCPU(CPUString)) { Hexagon_MC::addArchSubtarget(this, FS); // Beware of the default constructor of InstrItineraryData: it will // reset all members to 0. diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index 995f66d..30794f6 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -100,7 +100,6 @@ private: // The following objects can use the TargetTriple, so they must be // declared after it. HexagonInstrInfo InstrInfo; - HexagonRegisterInfo RegInfo; HexagonTargetLowering TLInfo; HexagonSelectionDAGInfo TSInfo; HexagonFrameLowering FrameLowering; @@ -122,7 +121,7 @@ public: } const HexagonInstrInfo *getInstrInfo() const override { return &InstrInfo; } const HexagonRegisterInfo *getRegisterInfo() const override { - return &RegInfo; + return &InstrInfo.getRegisterInfo(); } const HexagonTargetLowering *getTargetLowering() const override { return &TLInfo; diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp index 02ed1001..b3d2856 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp @@ -27,7 +27,8 @@ using namespace llvm; #include "LanaiGenInstrInfo.inc" LanaiInstrInfo::LanaiInstrInfo(const LanaiSubtarget &STI) - : LanaiGenInstrInfo(STI, Lanai::ADJCALLSTACKDOWN, Lanai::ADJCALLSTACKUP), + : LanaiGenInstrInfo(STI, RegisterInfo, Lanai::ADJCALLSTACKDOWN, + Lanai::ADJCALLSTACKUP), RegisterInfo() {} void LanaiInstrInfo::copyPhysReg(MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index 9a35df2..5eb3bd6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -26,9 +26,9 @@ using namespace llvm; #include "LoongArchGenInstrInfo.inc" LoongArchInstrInfo::LoongArchInstrInfo(const LoongArchSubtarget &STI) - : LoongArchGenInstrInfo(STI, LoongArch::ADJCALLSTACKDOWN, + : LoongArchGenInstrInfo(STI, RegInfo, LoongArch::ADJCALLSTACKDOWN, LoongArch::ADJCALLSTACKUP), - STI(STI) {} + RegInfo(STI.getHwMode()), STI(STI) {} MCInst LoongArchInstrInfo::getNop() const { return MCInstBuilder(LoongArch::ANDI) diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h index e61314c..d43d229 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -24,9 +24,13 @@ namespace llvm { class LoongArchSubtarget; class LoongArchInstrInfo : public LoongArchGenInstrInfo { + const LoongArchRegisterInfo RegInfo; + public: explicit LoongArchInstrInfo(const LoongArchSubtarget &STI); + const LoongArchRegisterInfo &getRegisterInfo() const { return RegInfo; } + MCInst getNop() const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp index 3acbe49..76a8ba1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp @@ -95,4 +95,4 @@ LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU, : LoongArchGenSubtargetInfo(TT, CPU, TuneCPU, FS), FrameLowering( initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)), - InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) {} + InstrInfo(*this), TLInfo(TM, *this) {} diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h index 5e12baf..2beff07 100644 --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h @@ -45,7 +45,6 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; LoongArchFrameLowering FrameLowering; LoongArchInstrInfo InstrInfo; - LoongArchRegisterInfo RegInfo; LoongArchTargetLowering TLInfo; SelectionDAGTargetInfo TSInfo; @@ -78,7 +77,7 @@ public: } const LoongArchInstrInfo *getInstrInfo() const override { return &InstrInfo; } const LoongArchRegisterInfo *getRegisterInfo() const override { - return &RegInfo; + return &InstrInfo.getRegisterInfo(); } const LoongArchTargetLowering *getTargetLowering() const override { return &TLInfo; diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp index 65b4820..af053b8 100644 --- a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -26,7 +26,8 @@ using namespace llvm; void MSP430InstrInfo::anchor() {} MSP430InstrInfo::MSP430InstrInfo(const MSP430Subtarget &STI) - : MSP430GenInstrInfo(STI, MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP), + : MSP430GenInstrInfo(STI, RI, MSP430::ADJCALLSTACKDOWN, + MSP430::ADJCALLSTACKUP), RI() {} void MSP430InstrInfo::storeRegToStackSlot( diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp index aa94f54..69b96cf 100644 --- a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp @@ -37,11 +37,7 @@ using namespace llvm; #define DEBUG_TYPE "mips16-instrinfo" Mips16InstrInfo::Mips16InstrInfo(const MipsSubtarget &STI) - : MipsInstrInfo(STI, Mips::Bimm16), RI(STI) {} - -const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const { - return RI; -} + : MipsInstrInfo(STI, RI, Mips::Bimm16), RI(STI) {} /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.h b/llvm/lib/Target/Mips/Mips16InstrInfo.h index 1058e8c..2834fd3 100644 --- a/llvm/lib/Target/Mips/Mips16InstrInfo.h +++ b/llvm/lib/Target/Mips/Mips16InstrInfo.h @@ -30,7 +30,7 @@ class Mips16InstrInfo : public MipsInstrInfo { public: explicit Mips16InstrInfo(const MipsSubtarget &STI); - const MipsRegisterInfo &getRegisterInfo() const override; + const Mips16RegisterInfo &getRegisterInfo() const { return RI; } /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/llvm/lib/Target/Mips/MipsInstrInfo.cpp index bffdffa..c879c46 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp +++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp @@ -39,8 +39,9 @@ using namespace llvm; // Pin the vtable to this file. void MipsInstrInfo::anchor() {} -MipsInstrInfo::MipsInstrInfo(const MipsSubtarget &STI, unsigned UncondBr) - : MipsGenInstrInfo(STI, Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), +MipsInstrInfo::MipsInstrInfo(const MipsSubtarget &STI, + const MipsRegisterInfo &RI, unsigned UncondBr) + : MipsGenInstrInfo(STI, RI, Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), Subtarget(STI), UncondBrOpc(UncondBr) {} const MipsInstrInfo *MipsInstrInfo::create(MipsSubtarget &STI) { diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.h b/llvm/lib/Target/Mips/MipsInstrInfo.h index 2337ae7..fc94248 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.h +++ b/llvm/lib/Target/Mips/MipsInstrInfo.h @@ -55,7 +55,8 @@ public: BT_Indirect // One indirct branch. }; - explicit MipsInstrInfo(const MipsSubtarget &STI, unsigned UncondBrOpc); + explicit MipsInstrInfo(const MipsSubtarget &STI, const MipsRegisterInfo &RI, + unsigned UncondBrOpc); MCInst getNop() const override; @@ -130,7 +131,10 @@ public: /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). - virtual const MipsRegisterInfo &getRegisterInfo() const = 0; + const MipsRegisterInfo &getRegisterInfo() const { + return static_cast<const MipsRegisterInfo &>( + TargetInstrInfo::getRegisterInfo()); + } virtual unsigned getOppositeBranchOpc(unsigned Opc) const = 0; diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp index dbdbb17..517f489 100644 --- a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -28,11 +28,7 @@ static unsigned getUnconditionalBranch(const MipsSubtarget &STI) { } MipsSEInstrInfo::MipsSEInstrInfo(const MipsSubtarget &STI) - : MipsInstrInfo(STI, getUnconditionalBranch(STI)), RI(STI) {} - -const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const { - return RI; -} + : MipsInstrInfo(STI, RI, getUnconditionalBranch(STI)), RI(STI) {} /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.h b/llvm/lib/Target/Mips/MipsSEInstrInfo.h index 2b4f55d..0a7a0e5 100644 --- a/llvm/lib/Target/Mips/MipsSEInstrInfo.h +++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.h @@ -24,7 +24,7 @@ class MipsSEInstrInfo : public MipsInstrInfo { public: explicit MipsSEInstrInfo(const MipsSubtarget &STI); - const MipsRegisterInfo &getRegisterInfo() const override; + const MipsSERegisterInfo &getRegisterInfo() const { return RI; } /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 6840c7a..db2d96f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -26,7 +26,7 @@ using namespace llvm; void NVPTXInstrInfo::anchor() {} NVPTXInstrInfo::NVPTXInstrInfo(const NVPTXSubtarget &STI) - : NVPTXGenInstrInfo(STI), RegInfo() {} + : NVPTXGenInstrInfo(STI, RegInfo), RegInfo() {} void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 3014aa6..8d9d4c7 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -89,7 +89,7 @@ static cl::opt<bool> EnableFMARegPressureReduction( void PPCInstrInfo::anchor() {} PPCInstrInfo::PPCInstrInfo(const PPCSubtarget &STI) - : PPCGenInstrInfo(STI, PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP, + : PPCGenInstrInfo(STI, RI, PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP, /* CatchRetOpcode */ -1, STI.isPPC64() ? PPC::BLR8 : PPC::BLR), Subtarget(STI), RI(STI.getTargetMachine()) {} diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index b05956b..ce8dd3b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -82,8 +82,9 @@ namespace llvm::RISCV { } // end namespace llvm::RISCV RISCVInstrInfo::RISCVInstrInfo(const RISCVSubtarget &STI) - : RISCVGenInstrInfo(STI, RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP), - STI(STI) {} + : RISCVGenInstrInfo(STI, RegInfo, RISCV::ADJCALLSTACKDOWN, + RISCV::ADJCALLSTACKUP), + RegInfo(STI.getHwMode()), STI(STI) {} #define GET_INSTRINFO_HELPERS #include "RISCVGenInstrInfo.inc" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index c5eddb9..800af26 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -79,10 +79,13 @@ enum RISCVMachineCombinerPattern : unsigned { }; class RISCVInstrInfo : public RISCVGenInstrInfo { + const RISCVRegisterInfo RegInfo; public: explicit RISCVInstrInfo(const RISCVSubtarget &STI); + const RISCVRegisterInfo &getRegisterInfo() const { return RegInfo; } + MCInst getNop() const override; Register isLoadFromStackSlot(const MachineInstr &MI, diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index 715ac4c..3b43be3 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -104,7 +104,7 @@ RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU, RVVVectorBitsMin(RVVVectorBitsMin), RVVVectorBitsMax(RVVVectorBitsMax), FrameLowering( initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)), - InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) { + InstrInfo(*this), TLInfo(TM, *this) { TSInfo = std::make_unique<RISCVSelectionDAGInfo>(); } diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 4b4fc8f..d5ffa6c 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -112,7 +112,6 @@ private: RISCVFrameLowering FrameLowering; RISCVInstrInfo InstrInfo; - RISCVRegisterInfo RegInfo; RISCVTargetLowering TLInfo; /// Initializes using the passed in CPU and feature strings so that we can @@ -140,7 +139,7 @@ public: } const RISCVInstrInfo *getInstrInfo() const override { return &InstrInfo; } const RISCVRegisterInfo *getRegisterInfo() const override { - return &RegInfo; + return &InstrInfo.getRegisterInfo(); } const RISCVTargetLowering *getTargetLowering() const override { return &TLInfo; diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp index ba95ad8..4f8bf43 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp @@ -24,7 +24,7 @@ using namespace llvm; SPIRVInstrInfo::SPIRVInstrInfo(const SPIRVSubtarget &STI) - : SPIRVGenInstrInfo(STI) {} + : SPIRVGenInstrInfo(STI, RI) {} bool SPIRVInstrInfo::isConstantInstr(const MachineInstr &MI) const { switch (MI.getOpcode()) { diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index fbb127d..b8cd9c1 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -249,17 +249,18 @@ static InstrSignature instrToSignature(const MachineInstr &MI, InstrSignature Signature{MI.getOpcode()}; for (unsigned i = 0; i < MI.getNumOperands(); ++i) { // The only decorations that can be applied more than once to a given <id> - // or structure member are UserSemantic(5635), CacheControlLoadINTEL (6442), - // and CacheControlStoreINTEL (6443). For all the rest of decorations, we - // will only add to the signature the Opcode, the id to which it applies, - // and the decoration id, disregarding any decoration flags. This will - // ensure that any subsequent decoration with the same id will be deemed as - // a duplicate. Then, at the call site, we will be able to handle duplicates - // in the best way. + // or structure member are FuncParamAttr (38), UserSemantic (5635), + // CacheControlLoadINTEL (6442), and CacheControlStoreINTEL (6443). For all + // the rest of decorations, we will only add to the signature the Opcode, + // the id to which it applies, and the decoration id, disregarding any + // decoration flags. This will ensure that any subsequent decoration with + // the same id will be deemed as a duplicate. Then, at the call site, we + // will be able to handle duplicates in the best way. unsigned Opcode = MI.getOpcode(); if ((Opcode == SPIRV::OpDecorate) && i >= 2) { unsigned DecorationID = MI.getOperand(1).getImm(); - if (DecorationID != SPIRV::Decoration::UserSemantic && + if (DecorationID != SPIRV::Decoration::FuncParamAttr && + DecorationID != SPIRV::Decoration::UserSemantic && DecorationID != SPIRV::Decoration::CacheControlLoadINTEL && DecorationID != SPIRV::Decoration::CacheControlStoreINTEL) continue; diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp index f66eb9d..fcd6cd7 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp @@ -38,8 +38,8 @@ static cl::opt<unsigned> void SparcInstrInfo::anchor() {} SparcInstrInfo::SparcInstrInfo(const SparcSubtarget &ST) - : SparcGenInstrInfo(ST, SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), RI(ST), - Subtarget(ST) {} + : SparcGenInstrInfo(ST, RI, SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), + RI(ST), Subtarget(ST) {} /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 2e21f27..23e7e7e 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -60,7 +60,7 @@ static uint64_t allOnes(unsigned int Count) { void SystemZInstrInfo::anchor() {} SystemZInstrInfo::SystemZInstrInfo(const SystemZSubtarget &sti) - : SystemZGenInstrInfo(sti, -1, -1), + : SystemZGenInstrInfo(sti, RI, -1, -1), RI(sti.getSpecialRegisters()->getReturnFunctionAddressRegister(), sti.getHwMode()), STI(sti) {} diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp index d5e804a..bae703b 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -35,7 +35,7 @@ using namespace llvm; void VEInstrInfo::anchor() {} VEInstrInfo::VEInstrInfo(const VESubtarget &ST) - : VEGenInstrInfo(ST, VE::ADJCALLSTACKDOWN, VE::ADJCALLSTACKUP), RI() {} + : VEGenInstrInfo(ST, RI, VE::ADJCALLSTACKDOWN, VE::ADJCALLSTACKUP), RI() {} static bool IsIntegerCC(unsigned CC) { return (CC < VECC::CC_AF); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 343d90e..8b4e4fb 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -34,7 +34,7 @@ using namespace llvm; #include "WebAssemblyGenInstrInfo.inc" WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) - : WebAssemblyGenInstrInfo(STI, WebAssembly::ADJCALLSTACKDOWN, + : WebAssemblyGenInstrInfo(STI, RI, WebAssembly::ADJCALLSTACKDOWN, WebAssembly::ADJCALLSTACKUP, WebAssembly::CATCHRET), RI(STI.getTargetTriple()) {} diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 05a854a..5bce539 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -635,6 +635,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FROUNDEVEN, VT, Action); setOperationAction(ISD::FTRUNC, VT, Action); setOperationAction(ISD::FLDEXP, VT, Action); + setOperationAction(ISD::FSINCOSPI, VT, Action); }; if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 6b2a7a4..2c6d1af 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -85,7 +85,7 @@ static cl::opt<unsigned> UndefRegClearance( void X86InstrInfo::anchor() {} X86InstrInfo::X86InstrInfo(const X86Subtarget &STI) - : X86GenInstrInfo(STI, + : X86GenInstrInfo(STI, RI, (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32), (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 868f413..4f5aadc 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -419,6 +419,8 @@ public: /// Enable the MachineScheduler pass for all X86 subtargets. bool enableMachineScheduler() const override { return true; } + bool enableTerminalRule() const override { return true; } + bool enableEarlyIfConversion() const override; void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>> diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp index 1a9133a..80fda34 100644 --- a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp @@ -43,7 +43,7 @@ namespace XCore { void XCoreInstrInfo::anchor() {} XCoreInstrInfo::XCoreInstrInfo(const XCoreSubtarget &ST) - : XCoreGenInstrInfo(ST, XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP), + : XCoreGenInstrInfo(ST, RI, XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP), RI() {} static bool isZeroImm(const MachineOperand &op) { diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp index be69cef..6bbebde 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp @@ -48,7 +48,8 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI) { } XtensaInstrInfo::XtensaInstrInfo(const XtensaSubtarget &STI) - : XtensaGenInstrInfo(STI, Xtensa::ADJCALLSTACKDOWN, Xtensa::ADJCALLSTACKUP), + : XtensaGenInstrInfo(STI, RI, Xtensa::ADJCALLSTACKDOWN, + Xtensa::ADJCALLSTACKUP), RI(STI), STI(STI) {} Register XtensaInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, |
