aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp71
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp86
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h2
-rw-r--r--llvm/lib/Target/AMDGPU/R600InstrInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/R600Subtarget.h2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp66
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td39
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td8
-rw-r--r--llvm/lib/Target/ARC/ARCInstrInfo.cpp3
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.h9
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.cpp3
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.h2
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h1
-rw-r--r--llvm/lib/Target/ARM/Thumb1InstrInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/Thumb1InstrInfo.h2
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.h2
-rw-r--r--llvm/lib/Target/AVR/AVRInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/BPF/BPFInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/DirectX/DXILResourceAccess.cpp156
-rw-r--r--llvm/lib/Target/DirectX/DirectXInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.h5
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.h3
-rw-r--r--llvm/lib/Target/Lanai/LanaiInstrInfo.cpp3
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.h4
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchSubtarget.h3
-rw-r--r--llvm/lib/Target/MSP430/MSP430InstrInfo.cpp3
-rw-r--r--llvm/lib/Target/Mips/Mips16InstrInfo.cpp6
-rw-r--r--llvm/lib/Target/Mips/Mips16InstrInfo.h2
-rw-r--r--llvm/lib/Target/Mips/MipsInstrInfo.cpp5
-rw-r--r--llvm/lib/Target/Mips/MipsInstrInfo.h8
-rw-r--r--llvm/lib/Target/Mips/MipsSEInstrInfo.cpp6
-rw-r--r--llvm/lib/Target/Mips/MipsSEInstrInfo.h2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp5
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.h3
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h3
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp17
-rw-r--r--llvm/lib/Target/Sparc/SparcInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/VE/VEInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp1
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp2
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h2
-rw-r--r--llvm/lib/Target/XCore/XCoreInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp3
60 files changed, 471 insertions, 145 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 4b40733..66e4949 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -91,7 +91,7 @@ static cl::opt<unsigned> GatherOptSearchLimit(
"machine-combiner gather pattern optimization"));
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
- : AArch64GenInstrInfo(STI, AArch64::ADJCALLSTACKDOWN,
+ : AArch64GenInstrInfo(STI, RI, AArch64::ADJCALLSTACKDOWN,
AArch64::ADJCALLSTACKUP, AArch64::CATCHRET),
RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
@@ -1780,6 +1780,16 @@ static unsigned sForm(MachineInstr &Instr) {
case AArch64::SUBSWri:
case AArch64::SUBSXrr:
case AArch64::SUBSXri:
+ case AArch64::ANDSWri:
+ case AArch64::ANDSWrr:
+ case AArch64::ANDSWrs:
+ case AArch64::ANDSXri:
+ case AArch64::ANDSXrr:
+ case AArch64::ANDSXrs:
+ case AArch64::BICSWrr:
+ case AArch64::BICSXrr:
+ case AArch64::BICSWrs:
+ case AArch64::BICSXrs:
return Instr.getOpcode();
case AArch64::ADDWrr:
@@ -1810,6 +1820,22 @@ static unsigned sForm(MachineInstr &Instr) {
return AArch64::ANDSWri;
case AArch64::ANDXri:
return AArch64::ANDSXri;
+ case AArch64::ANDWrr:
+ return AArch64::ANDSWrr;
+ case AArch64::ANDWrs:
+ return AArch64::ANDSWrs;
+ case AArch64::ANDXrr:
+ return AArch64::ANDSXrr;
+ case AArch64::ANDXrs:
+ return AArch64::ANDSXrs;
+ case AArch64::BICWrr:
+ return AArch64::BICSWrr;
+ case AArch64::BICXrr:
+ return AArch64::BICSXrr;
+ case AArch64::BICWrs:
+ return AArch64::BICSWrs;
+ case AArch64::BICXrs:
+ return AArch64::BICSXrs;
}
}
@@ -1947,6 +1973,25 @@ static bool isSUBSRegImm(unsigned Opcode) {
return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
}
+static bool isANDOpcode(MachineInstr &MI) {
+ unsigned Opc = sForm(MI);
+ switch (Opc) {
+ case AArch64::ANDSWri:
+ case AArch64::ANDSWrr:
+ case AArch64::ANDSWrs:
+ case AArch64::ANDSXri:
+ case AArch64::ANDSXrr:
+ case AArch64::ANDSXrs:
+ case AArch64::BICSWrr:
+ case AArch64::BICSXrr:
+ case AArch64::BICSWrs:
+ case AArch64::BICSXrs:
+ return true;
+ default:
+ return false;
+ }
+}
+
/// Check if CmpInstr can be substituted by MI.
///
/// CmpInstr can be substituted:
@@ -1984,7 +2029,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
// 1) MI and CmpInstr set N and V to the same value.
// 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
// signed overflow occurs, so CmpInstr could still be simplified away.
- if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
+ // Note that Ands and Bics instructions always clear the V flag.
+ if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDOpcode(MI))
return false;
AccessKind AccessToCheck = AK_Write;
@@ -9590,6 +9636,27 @@ AArch64InstrInfo::getOutliningCandidateInfo(
unsigned NumBytesToCreateFrame = 0;
+ // Avoid splitting ADRP ADD/LDR pair into outlined functions.
+ // These instructions are fused together by the scheduler.
+ // Any candidate where ADRP is the last instruction should be rejected
+ // as that will lead to splitting ADRP pair.
+ MachineInstr &LastMI = RepeatedSequenceLocs[0].back();
+ MachineInstr &FirstMI = RepeatedSequenceLocs[0].front();
+ if (LastMI.getOpcode() == AArch64::ADRP &&
+ (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_PAGE) != 0 &&
+ (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_GOT) != 0) {
+ return std::nullopt;
+ }
+
+ // Similarly any candidate where the first instruction is ADD/LDR with a
+ // page offset should be rejected to avoid ADRP splitting.
+ if ((FirstMI.getOpcode() == AArch64::ADDXri ||
+ FirstMI.getOpcode() == AArch64::LDRXui) &&
+ (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_PAGEOFF) != 0 &&
+ (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) != 0) {
+ return std::nullopt;
+ }
+
// We only allow outlining for functions having exactly matching return
// address signing attributes, i.e., all share the same value for the
// attribute "sign-return-address" and all share the same type of key they
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index a5048b9..eaf8723 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -1123,24 +1123,85 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
-// FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register allocation
-// where a consecutive multi-vector tuple is constructed from the same indices
-// of multiple strided loads. This may still result in unnecessary copies
-// between the loads and the tuple. Here we try to return a hint to assign the
-// contiguous ZPRMulReg starting at the same register as the first operand of
-// the pseudo, which should be a subregister of the first strided load.
+// We add regalloc hints for different cases:
+// * Choosing a better destination operand for predicated SVE instructions
+// where the inactive lanes are undef, by choosing a register that is not
+// unique to the other operands of the instruction.
//
-// For example, if the first strided load has been assigned $z16_z20_z24_z28
-// and the operands of the pseudo are each accessing subregister zsub2, we
-// should look through through Order to find a contiguous register which
-// begins with $z24 (i.e. $z24_z25_z26_z27).
+// * Improve register allocation for SME multi-vector instructions where we can
+// benefit from the strided- and contiguous register multi-vector tuples.
//
+// Here FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register
+// allocation where a consecutive multi-vector tuple is constructed from the
+// same indices of multiple strided loads. This may still result in
+// unnecessary copies between the loads and the tuple. Here we try to return a
+// hint to assign the contiguous ZPRMulReg starting at the same register as
+// the first operand of the pseudo, which should be a subregister of the first
+// strided load.
+//
+// For example, if the first strided load has been assigned $z16_z20_z24_z28
+// and the operands of the pseudo are each accessing subregister zsub2, we
+// should look through through Order to find a contiguous register which
+// begins with $z24 (i.e. $z24_z25_z26_z27).
bool AArch64RegisterInfo::getRegAllocationHints(
Register VirtReg, ArrayRef<MCPhysReg> Order,
SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF,
const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
-
auto &ST = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // For predicated SVE instructions where the inactive lanes are undef,
+ // pick a destination register that is not unique to avoid introducing
+ // a movprfx.
+ const TargetRegisterClass *RegRC = MRI.getRegClass(VirtReg);
+ if (AArch64::ZPRRegClass.hasSubClassEq(RegRC)) {
+ for (const MachineOperand &DefOp : MRI.def_operands(VirtReg)) {
+ const MachineInstr &Def = *DefOp.getParent();
+ if (DefOp.isImplicit() ||
+ (TII->get(Def.getOpcode()).TSFlags & AArch64::FalseLanesMask) !=
+ AArch64::FalseLanesUndef)
+ continue;
+
+ unsigned InstFlags =
+ TII->get(AArch64::getSVEPseudoMap(Def.getOpcode())).TSFlags;
+
+ for (MCPhysReg R : Order) {
+ auto AddHintIfSuitable = [&](MCPhysReg R, const MachineOperand &MO) {
+ // R is a suitable register hint if there exists an operand for the
+ // instruction that is not yet allocated a register or if R matches
+ // one of the other source operands.
+ if (!VRM->hasPhys(MO.getReg()) || VRM->getPhys(MO.getReg()) == R)
+ Hints.push_back(R);
+ };
+
+ switch (InstFlags & AArch64::DestructiveInstTypeMask) {
+ default:
+ break;
+ case AArch64::DestructiveTernaryCommWithRev:
+ AddHintIfSuitable(R, Def.getOperand(2));
+ AddHintIfSuitable(R, Def.getOperand(3));
+ AddHintIfSuitable(R, Def.getOperand(4));
+ break;
+ case AArch64::DestructiveBinaryComm:
+ case AArch64::DestructiveBinaryCommWithRev:
+ AddHintIfSuitable(R, Def.getOperand(2));
+ AddHintIfSuitable(R, Def.getOperand(3));
+ break;
+ case AArch64::DestructiveBinary:
+ case AArch64::DestructiveBinaryImm:
+ AddHintIfSuitable(R, Def.getOperand(2));
+ break;
+ }
+ }
+ }
+
+ if (Hints.size())
+ return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints,
+ MF, VRM);
+ }
+
if (!ST.hasSME() || !ST.isStreaming())
return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
VRM);
@@ -1153,8 +1214,7 @@ bool AArch64RegisterInfo::getRegAllocationHints(
// FORM_TRANSPOSED_REG_TUPLE pseudo, we want to favour reducing copy
// instructions over reducing the number of clobbered callee-save registers,
// so we add the strided registers as a hint.
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- unsigned RegID = MRI.getRegClass(VirtReg)->getID();
+ unsigned RegID = RegRC->getID();
if (RegID == AArch64::ZPR2StridedOrContiguousRegClassID ||
RegID == AArch64::ZPR4StridedOrContiguousRegClassID) {
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 8974965..ab4004e 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -157,7 +157,7 @@ public:
bool enableMachineScheduler() const override { return true; }
bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
bool enableSubRegLiveness() const override { return EnableSubregLiveness; }
-
+ bool enableTerminalRule() const override { return true; }
bool enableMachinePipeliner() const override;
bool useDFAforSMS() const override { return false; }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 54ba2f8..dbca5af 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -5081,17 +5081,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned MinNumRegsRequired = DstSize / 32;
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ bool UseAGPRForm = Info->selectAGPRFormMFMA(MinNumRegsRequired);
+
OpdsMapping[0] =
- Info->getMinNumAGPRs() >= MinNumRegsRequired
- ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
- : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ UseAGPRForm ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
+ : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[4] =
- Info->getMinNumAGPRs() >= MinNumRegsRequired
- ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
- : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ UseAGPRForm ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
+ : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
OpdsMapping[8] = getVGPROpMapping(MI.getOperand(8).getReg(), MRI, *TRI);
OpdsMapping[10] = getVGPROpMapping(MI.getOperand(10).getReg(), MRI, *TRI);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index 0ea9add..b03d50f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -261,13 +261,6 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage(
const Function *Callee = getCalleeFunction(*CalleeOp);
- // Avoid crashing on undefined behavior with an illegal call to a
- // kernel. If a callsite's calling convention doesn't match the
- // function's, it's undefined behavior. If the callsite calling
- // convention does match, that would have errored earlier.
- if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
- report_fatal_error("invalid call to entry function");
-
auto isSameFunction = [](const MachineFunction &MF, const Function *F) {
return F == &MF.getFunction();
};
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f377b8a..da4bd87 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1040,6 +1040,8 @@ public:
return true;
}
+ bool enableTerminalRule() const override { return true; }
+
bool useAA() const override;
bool enableSubRegLiveness() const override {
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index 3e256cc..0104085 100644
--- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
#include "R600GenInstrInfo.inc"
R600InstrInfo::R600InstrInfo(const R600Subtarget &ST)
- : R600GenInstrInfo(ST, -1, -1), RI(), ST(ST) {}
+ : R600GenInstrInfo(ST, RI, -1, -1), RI(), ST(ST) {}
bool R600InstrInfo::isVector(const MachineInstr &MI) const {
return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h
index 22e56b6..efd99db 100644
--- a/llvm/lib/Target/AMDGPU/R600Subtarget.h
+++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h
@@ -126,6 +126,8 @@ public:
return true;
}
+ bool enableTerminalRule() const override { return true; }
+
bool enableSubRegLiveness() const override {
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9c74c65..6b397e0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -63,7 +63,8 @@ static cl::opt<bool> Fix16BitCopies(
cl::ReallyHidden);
SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST)
- : AMDGPUGenInstrInfo(ST, AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
+ : AMDGPUGenInstrInfo(ST, RI, AMDGPU::ADJCALLSTACKUP,
+ AMDGPU::ADJCALLSTACKDOWN),
RI(ST), ST(ST) {
SchedModel.init(&ST);
}
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index f0d1117..00aae2c9 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -233,10 +233,11 @@ private:
void copyToDestRegs(CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore,
- AMDGPU::OpName OpName, Register DestReg) const;
+ const DebugLoc &DL, AMDGPU::OpName OpName,
+ Register DestReg) const;
Register copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore,
- AMDGPU::OpName OpName) const;
+ const DebugLoc &DL, AMDGPU::OpName OpName) const;
unsigned read2Opcode(unsigned EltSize) const;
unsigned read2ST64Opcode(unsigned EltSize) const;
@@ -1367,10 +1368,9 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
// Paired.
void SILoadStoreOptimizer::copyToDestRegs(
CombineInfo &CI, CombineInfo &Paired,
- MachineBasicBlock::iterator InsertBefore, AMDGPU::OpName OpName,
- Register DestReg) const {
+ MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL,
+ AMDGPU::OpName OpName, Register DestReg) const {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
@@ -1398,9 +1398,9 @@ void SILoadStoreOptimizer::copyToDestRegs(
Register
SILoadStoreOptimizer::copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore,
+ const DebugLoc &DL,
AMDGPU::OpName OpName) const {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
@@ -1456,7 +1456,8 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
Register DestReg = MRI->createVirtualRegister(SuperRC);
- DebugLoc DL = CI.I->getDebugLoc();
+ DebugLoc DL =
+ DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
Register BaseReg = AddrReg->getReg();
unsigned BaseSubReg = AddrReg->getSubReg();
@@ -1484,7 +1485,7 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
.addImm(0) // gds
.cloneMergedMemRefs({&*CI.I, &*Paired.I});
- copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg);
+ copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdst, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1541,7 +1542,8 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
(NewOffset0 != NewOffset1) && "Computed offset doesn't fit");
const MCInstrDesc &Write2Desc = TII->get(Opc);
- DebugLoc DL = CI.I->getDebugLoc();
+ DebugLoc DL =
+ DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
Register BaseReg = AddrReg->getReg();
unsigned BaseSubReg = AddrReg->getSubReg();
@@ -1582,7 +1584,9 @@ MachineBasicBlock::iterator
SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
+ DebugLoc DL =
+ DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
+
const unsigned Opcode = getNewOpcode(CI, Paired);
const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
@@ -1607,7 +1611,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
- copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
+ copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1618,7 +1622,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
+ DebugLoc DL =
+ DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
+
const unsigned Opcode = getNewOpcode(CI, Paired);
const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
@@ -1639,7 +1645,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair(
New.addImm(MergedOffset);
New.addImm(CI.CPol).addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
- copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::sdst, DestReg);
+ copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::sdst, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1650,7 +1656,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
+
+ DebugLoc DL =
+ DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
const unsigned Opcode = getNewOpcode(CI, Paired);
@@ -1680,7 +1688,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
.addImm(0) // swz
.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
- copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
+ copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1691,7 +1699,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
+
+ DebugLoc DL =
+ DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
const unsigned Opcode = getNewOpcode(CI, Paired);
@@ -1731,7 +1741,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
.addImm(0) // swz
.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
- copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
+ copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1742,12 +1752,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
+ DebugLoc DL =
+ DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
const unsigned Opcode = getNewOpcode(CI, Paired);
Register SrcReg =
- copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
+ copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata);
auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
.addReg(SrcReg, RegState::Kill);
@@ -1789,7 +1800,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
+
+ DebugLoc DL =
+ DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
const unsigned Opcode = getNewOpcode(CI, Paired);
@@ -1807,7 +1820,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair(
.addImm(CI.CPol)
.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
- copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg);
+ copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdst, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1818,12 +1831,14 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatStorePair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
+
+ DebugLoc DL =
+ DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
const unsigned Opcode = getNewOpcode(CI, Paired);
Register SrcReg =
- copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
+ copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata);
auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr))
@@ -2094,12 +2109,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
+ DebugLoc DL =
+ DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());
const unsigned Opcode = getNewOpcode(CI, Paired);
Register SrcReg =
- copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
+ copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata);
auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
.addReg(SrcReg, RegState::Kill);
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 54f57e0..85adcab 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -513,6 +513,13 @@ defm V_CVT_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_u16_f16",
defm V_CVT_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_i16_f16",
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_sint>;
+
+let HasClamp = 0, HasOMod = 0 in {
+def V_TRANS_BF16_Profile : VOPProfile <[bf16, bf16, untyped, untyped]>;
+def V_TRANS_BF16_t16_Profile : VOPProfile_True16 <VOP_BF16_BF16>;
+def V_TRANS_BF16_fake16_Profile : VOPProfile_Fake16 <VOP_BF16_BF16>;
+}
+
let TRANS = 1, SchedRW = [WriteTrans32] in {
defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>;
@@ -527,14 +534,30 @@ defm V_TANH_F16 : VOP1Inst_t16 <"v_tanh_f16", VOP_F16_F16, int_amdgcn_tanh>;
}
let SubtargetPredicate = HasBF16TransInsts in {
-defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>;
-defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>;
-defm V_SQRT_BF16 : VOP1Inst_t16 <"v_sqrt_bf16", VOP_BF16_BF16, any_amdgcn_sqrt>;
-defm V_RSQ_BF16 : VOP1Inst_t16 <"v_rsq_bf16", VOP_BF16_BF16, AMDGPUrsq>;
-defm V_LOG_BF16 : VOP1Inst_t16 <"v_log_bf16", VOP_BF16_BF16, AMDGPUlogf16>;
-defm V_EXP_BF16 : VOP1Inst_t16 <"v_exp_bf16", VOP_BF16_BF16, AMDGPUexpf16>;
-defm V_SIN_BF16 : VOP1Inst_t16 <"v_sin_bf16", VOP_BF16_BF16, AMDGPUsin>;
-defm V_COS_BF16 : VOP1Inst_t16 <"v_cos_bf16", VOP_BF16_BF16, AMDGPUcos>;
+defm V_TANH_BF16 : VOP1Inst_t16_with_profiles<"v_tanh_bf16", V_TRANS_BF16_Profile,
+ V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
+ int_amdgcn_tanh>;
+defm V_RCP_BF16 : VOP1Inst_t16_with_profiles<"v_rcp_bf16", V_TRANS_BF16_Profile,
+ V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
+ AMDGPUrcp>;
+defm V_SQRT_BF16 : VOP1Inst_t16_with_profiles<"v_sqrt_bf16", V_TRANS_BF16_Profile,
+ V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
+ any_amdgcn_sqrt>;
+defm V_RSQ_BF16 : VOP1Inst_t16_with_profiles<"v_rsq_bf16", V_TRANS_BF16_Profile,
+ V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
+ AMDGPUrsq>;
+defm V_LOG_BF16 : VOP1Inst_t16_with_profiles<"v_log_bf16", V_TRANS_BF16_Profile,
+ V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
+ AMDGPUlogf16>;
+defm V_EXP_BF16 : VOP1Inst_t16_with_profiles<"v_exp_bf16", V_TRANS_BF16_Profile,
+ V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
+ AMDGPUexpf16>;
+defm V_SIN_BF16 : VOP1Inst_t16_with_profiles<"v_sin_bf16", V_TRANS_BF16_Profile,
+ V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
+ AMDGPUsin>;
+defm V_COS_BF16 : VOP1Inst_t16_with_profiles<"v_cos_bf16", V_TRANS_BF16_Profile,
+ V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile,
+ AMDGPUcos>;
}
} // End TRANS = 1, SchedRW = [WriteTrans32]
defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 8325c62..6df91a1 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1357,8 +1357,12 @@ class VOPBinOpClampPat<SDPatternOperator node, Instruction inst, ValueType vt> :
class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
dag src0 = !if(P.HasOMod,
- (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
- (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp));
+ !if(P.HasClamp,
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i32:$omod)),
+ !if(P.HasClamp,
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers)));
list<dag> ret3 = [(set P.DstVT:$vdst,
(DivergentFragOrOp<node, P>.ret (P.Src0VT src0),
diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.cpp b/llvm/lib/Target/ARC/ARCInstrInfo.cpp
index 05bcb35..2dec6ff 100644
--- a/llvm/lib/Target/ARC/ARCInstrInfo.cpp
+++ b/llvm/lib/Target/ARC/ARCInstrInfo.cpp
@@ -44,7 +44,8 @@ enum TSFlagsConstants {
void ARCInstrInfo::anchor() {}
ARCInstrInfo::ARCInstrInfo(const ARCSubtarget &ST)
- : ARCGenInstrInfo(ST, ARC::ADJCALLSTACKDOWN, ARC::ADJCALLSTACKUP), RI(ST) {}
+ : ARCGenInstrInfo(ST, RI, ARC::ADJCALLSTACKDOWN, ARC::ADJCALLSTACKUP),
+ RI(ST) {}
static bool isZeroImm(const MachineOperand &Op) {
return Op.isImm() && Op.getImm() == 0;
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 22769db..b466ca6f 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -107,8 +107,9 @@ static const ARM_MLxEntry ARM_MLxTable[] = {
{ ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
};
-ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI)
- : ARMGenInstrInfo(STI, ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
+ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI,
+ const ARMBaseRegisterInfo &TRI)
+ : ARMGenInstrInfo(STI, TRI, ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
Subtarget(STI) {
for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) {
if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 2869e7f..27f8e3b 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -44,7 +44,8 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
protected:
// Can be only subclassed.
- explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
+ explicit ARMBaseInstrInfo(const ARMSubtarget &STI,
+ const ARMBaseRegisterInfo &TRI);
void expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
unsigned LoadImmOpc, unsigned LoadOpc) const;
@@ -125,7 +126,11 @@ public:
// if there is not such an opcode.
virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0;
- virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0;
+ const ARMBaseRegisterInfo &getRegisterInfo() const {
+ return static_cast<const ARMBaseRegisterInfo &>(
+ TargetInstrInfo::getRegisterInfo());
+ }
+
const ARMSubtarget &getSubtarget() const { return Subtarget; }
ScheduleHazardRecognizer *
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index c684de7..f370547 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -25,7 +25,8 @@
#include "llvm/MC/MCInst.h"
using namespace llvm;
-ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI) {}
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI, RI) {}
/// Return the noop instruction to use for a noop.
MCInst ARMInstrInfo::getNop() const {
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.h b/llvm/lib/Target/ARM/ARMInstrInfo.h
index 178d7a2..9feaf14 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.h
@@ -35,7 +35,7 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- const ARMRegisterInfo &getRegisterInfo() const override { return RI; }
+ const ARMRegisterInfo &getRegisterInfo() const { return RI; }
private:
void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override;
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 4a0883c..34baa31 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -377,6 +377,7 @@ public:
bool isRWPI() const;
bool useMachineScheduler() const { return UseMISched; }
+ bool enableTerminalRule() const override { return true; }
bool useMachinePipeliner() const { return UseMIPipeliner; }
bool hasMinSize() const { return OptMinSize; }
bool isThumb1Only() const { return isThumb() && !hasThumb2(); }
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 4b8c2fd..f95ba6a4 100644
--- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -24,7 +24,7 @@
using namespace llvm;
Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(STI) {}
+ : ARMBaseInstrInfo(STI, RI), RI(STI) {}
/// Return the noop instruction to use for a noop.
MCInst Thumb1InstrInfo::getNop() const {
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/llvm/lib/Target/ARM/Thumb1InstrInfo.h
index 68b326c..16350a6 100644
--- a/llvm/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.h
@@ -35,7 +35,7 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
+ const ThumbRegisterInfo &getRegisterInfo() const { return RI; }
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, Register DestReg, Register SrcReg,
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index f5653d4..b66e407 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -46,7 +46,7 @@ PreferNoCSEL("prefer-no-csel", cl::Hidden,
cl::init(false));
Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(STI) {}
+ : ARMBaseInstrInfo(STI, RI), RI(STI) {}
/// Return the noop instruction to use for a noop.
MCInst Thumb2InstrInfo::getNop() const {
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index 1b0bf2d..59ef39d 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -58,7 +58,7 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
+ const ThumbRegisterInfo &getRegisterInfo() const { return RI; }
MachineInstr *optimizeSelect(MachineInstr &MI,
SmallPtrSetImpl<MachineInstr *> &SeenMIs,
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
index ce99085..5e247cb 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
@@ -30,8 +30,8 @@
namespace llvm {
AVRInstrInfo::AVRInstrInfo(const AVRSubtarget &STI)
- : AVRGenInstrInfo(STI, AVR::ADJCALLSTACKDOWN, AVR::ADJCALLSTACKUP), RI(),
- STI(STI) {}
+ : AVRGenInstrInfo(STI, RI, AVR::ADJCALLSTACKDOWN, AVR::ADJCALLSTACKUP),
+ RI(), STI(STI) {}
void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/llvm/lib/Target/BPF/BPFInstrInfo.cpp
index 409f8b4..0e56e65 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.cpp
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.cpp
@@ -27,7 +27,7 @@
using namespace llvm;
BPFInstrInfo::BPFInstrInfo(const BPFSubtarget &STI)
- : BPFGenInstrInfo(STI, BPF::ADJCALLSTACKDOWN, BPF::ADJCALLSTACKUP) {}
+ : BPFGenInstrInfo(STI, RI, BPF::ADJCALLSTACKDOWN, BPF::ADJCALLSTACKUP) {}
void BPFInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
index 619a797..34a7de8 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
@@ -25,7 +25,7 @@ using namespace llvm;
#include "CSKYGenInstrInfo.inc"
CSKYInstrInfo::CSKYInstrInfo(const CSKYSubtarget &STI)
- : CSKYGenInstrInfo(STI, CSKY::ADJCALLSTACKDOWN, CSKY::ADJCALLSTACKUP),
+ : CSKYGenInstrInfo(STI, RI, CSKY::ADJCALLSTACKDOWN, CSKY::ADJCALLSTACKUP),
STI(STI) {
v2sf = STI.hasFPUv2SingleFloat();
v2df = STI.hasFPUv2DoubleFloat();
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
index 6579d34..057d87b 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -10,6 +10,7 @@
#include "DirectX.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/DXILResource.h"
+#include "llvm/Frontend/HLSL/HLSLResource.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
@@ -20,6 +21,7 @@
#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/IR/User.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#define DEBUG_TYPE "dxil-resource-access"
@@ -44,16 +46,28 @@ static Value *calculateGEPOffset(GetElementPtrInst *GEP, Value *PrevOffset,
APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
if (GEP->accumulateConstantOffset(DL, ConstantOffset)) {
APInt Scaled = ConstantOffset.udiv(ScalarSize);
- return ConstantInt::get(Type::getInt32Ty(GEP->getContext()), Scaled);
+ return ConstantInt::get(DL.getIndexType(GEP->getType()), Scaled);
}
- auto IndexIt = GEP->idx_begin();
- assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
- "GEP is not indexing through pointer");
- ++IndexIt;
- Value *Offset = *IndexIt;
- assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
- return Offset;
+ unsigned NumIndices = GEP->getNumIndices();
+
+ // If we have a single index we're indexing into a top level array. This
+ // generally only happens with cbuffers.
+ if (NumIndices == 1)
+ return *GEP->idx_begin();
+
+ // If we have two indices, this should be a simple access through a pointer.
+ if (NumIndices == 2) {
+ auto IndexIt = GEP->idx_begin();
+ assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
+ "GEP is not indexing through pointer");
+ ++IndexIt;
+ Value *Offset = *IndexIt;
+ assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
+ return Offset;
+ }
+
+ llvm_unreachable("Unhandled GEP structure for resource access");
}
static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI,
@@ -171,6 +185,127 @@ static void createRawLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset) {
LI->replaceAllUsesWith(V);
}
+namespace {
+/// Helper for building a `load.cbufferrow` intrinsic given a simple type.
+struct CBufferRowIntrin {
+ Intrinsic::ID IID;
+ Type *RetTy;
+ unsigned int EltSize;
+ unsigned int NumElts;
+
+ CBufferRowIntrin(const DataLayout &DL, Type *Ty) {
+ assert(Ty == Ty->getScalarType() && "Expected scalar type");
+
+ switch (DL.getTypeSizeInBits(Ty)) {
+ case 16:
+ IID = Intrinsic::dx_resource_load_cbufferrow_8;
+ RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty);
+ EltSize = 2;
+ NumElts = 8;
+ break;
+ case 32:
+ IID = Intrinsic::dx_resource_load_cbufferrow_4;
+ RetTy = StructType::get(Ty, Ty, Ty, Ty);
+ EltSize = 4;
+ NumElts = 4;
+ break;
+ case 64:
+ IID = Intrinsic::dx_resource_load_cbufferrow_2;
+ RetTy = StructType::get(Ty, Ty);
+ EltSize = 8;
+ NumElts = 2;
+ break;
+ default:
+ llvm_unreachable("Only 16, 32, and 64 bit types supported");
+ }
+ }
+};
+} // namespace
+
+static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset,
+ dxil::ResourceTypeInfo &RTI) {
+ const DataLayout &DL = LI->getDataLayout();
+
+ Type *Ty = LI->getType();
+ assert(!isa<StructType>(Ty) && "Structs not handled yet");
+ CBufferRowIntrin Intrin(DL, Ty->getScalarType());
+
+ StringRef Name = LI->getName();
+ Value *Handle = II->getOperand(0);
+
+ IRBuilder<> Builder(LI);
+
+ ConstantInt *GlobalOffset = dyn_cast<ConstantInt>(II->getOperand(1));
+ assert(GlobalOffset && "CBuffer getpointer index must be constant");
+
+ unsigned int FixedOffset = GlobalOffset->getZExtValue();
+ // If we have a further constant offset we can just fold it in to the fixed
+ // offset.
+ if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) {
+ FixedOffset += ConstOffset->getZExtValue();
+ Offset = nullptr;
+ }
+
+ Value *CurrentRow = ConstantInt::get(
+ Builder.getInt32Ty(), FixedOffset / hlsl::CBufferRowSizeInBytes);
+ unsigned int CurrentIndex =
+ (FixedOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;
+
+ assert(!(CurrentIndex && Offset) &&
+ "Dynamic indexing into elements of cbuffer rows is not supported");
+ // At this point if we have a non-constant offset it has to be an array
+ // offset, so we can assume that it's a multiple of the row size.
+ if (Offset)
+ CurrentRow = FixedOffset ? Builder.CreateAdd(CurrentRow, Offset) : Offset;
+
+ auto *CBufLoad = Builder.CreateIntrinsic(
+ Intrin.RetTy, Intrin.IID, {Handle, CurrentRow}, nullptr, Name + ".load");
+ auto *Elt =
+ Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, Name + ".extract");
+
+ // At this point we've loaded the first scalar of our result, but our original
+ // type may have been a vector.
+ unsigned int Remaining =
+ ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1;
+ if (Remaining == 0) {
+ // We only have a single element, so we're done.
+ Value *Result = Elt;
+
+ // However, if we loaded a <1 x T>, then we need to adjust the type.
+ if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
+ assert(VT->getNumElements() == 1 && "Can't have multiple elements here");
+ Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
+ Builder.getInt32(0), Name);
+ }
+ LI->replaceAllUsesWith(Result);
+ return;
+ }
+
+ // Walk each element and extract it, wrapping to new rows as needed.
+ SmallVector<Value *> Extracts{Elt};
+ while (Remaining--) {
+ CurrentIndex %= Intrin.NumElts;
+
+ if (CurrentIndex == 0) {
+ CurrentRow = Builder.CreateAdd(CurrentRow,
+ ConstantInt::get(Builder.getInt32Ty(), 1));
+ CBufLoad = Builder.CreateIntrinsic(Intrin.RetTy, Intrin.IID,
+ {Handle, CurrentRow}, nullptr,
+ Name + ".load");
+ }
+
+ Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
+ Name + ".extract"));
+ }
+
+ // Finally, we build up the original loaded value.
+ Value *Result = PoisonValue::get(Ty);
+ for (int I = 0, E = Extracts.size(); I < E; ++I)
+ Result = Builder.CreateInsertElement(
+ Result, Extracts[I], Builder.getInt32(I), Name + formatv(".upto{}", I));
+ LI->replaceAllUsesWith(Result);
+}
+
static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
dxil::ResourceTypeInfo &RTI) {
switch (RTI.getResourceKind()) {
@@ -179,6 +314,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
case dxil::ResourceKind::RawBuffer:
case dxil::ResourceKind::StructuredBuffer:
return createRawLoad(II, LI, Offset);
+ case dxil::ResourceKind::CBuffer:
+ return createCBufferLoad(II, LI, Offset, RTI);
case dxil::ResourceKind::Texture1D:
case dxil::ResourceKind::Texture2D:
case dxil::ResourceKind::Texture2DMS:
@@ -190,9 +327,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
case dxil::ResourceKind::TextureCubeArray:
case dxil::ResourceKind::FeedbackTexture2D:
case dxil::ResourceKind::FeedbackTexture2DArray:
- case dxil::ResourceKind::CBuffer:
case dxil::ResourceKind::TBuffer:
- // TODO: handle these
+ reportFatalUsageError("Load not yet implemented for resource type");
return;
case dxil::ResourceKind::Sampler:
case dxil::ResourceKind::RTAccelerationStructure:
diff --git a/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp b/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp
index bb2efa4..401881d 100644
--- a/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp
@@ -19,6 +19,6 @@
using namespace llvm;
DirectXInstrInfo::DirectXInstrInfo(const DirectXSubtarget &STI)
- : DirectXGenInstrInfo(STI) {}
+ : DirectXGenInstrInfo(STI, RI) {}
DirectXInstrInfo::~DirectXInstrInfo() {}
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 55bafde..dd9f2fa 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -118,9 +118,9 @@ const int Hexagon_ADDI_OFFSET_MIN = -32768;
void HexagonInstrInfo::anchor() {}
HexagonInstrInfo::HexagonInstrInfo(const HexagonSubtarget &ST)
- : HexagonGenInstrInfo(ST, Hexagon::ADJCALLSTACKDOWN,
+ : HexagonGenInstrInfo(ST, RegInfo, Hexagon::ADJCALLSTACKDOWN,
Hexagon::ADJCALLSTACKUP),
- Subtarget(ST) {}
+ RegInfo(ST.getHwMode()), Subtarget(ST) {}
namespace llvm {
namespace HexagonFUnits {
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 48adf82..7a0c77c 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -23,6 +23,8 @@
#include <cstdint>
#include <vector>
+#include "HexagonRegisterInfo.h"
+
#define GET_INSTRINFO_HEADER
#include "HexagonGenInstrInfo.inc"
@@ -36,6 +38,7 @@ class MachineOperand;
class TargetRegisterInfo;
class HexagonInstrInfo : public HexagonGenInstrInfo {
+ const HexagonRegisterInfo RegInfo;
const HexagonSubtarget &Subtarget;
enum BundleAttribute {
@@ -47,6 +50,8 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
public:
explicit HexagonInstrInfo(const HexagonSubtarget &ST);
+ const HexagonRegisterInfo &getRegisterInfo() const { return RegInfo; }
+
/// TargetInstrInfo overrides.
/// If the specified machine instruction is a direct
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index a3c8a88..66c8b0a 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -76,8 +76,7 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
OptLevel(TM.getOptLevel()),
CPUString(std::string(Hexagon_MC::selectHexagonCPU(CPU))),
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
- RegInfo(getHwMode()), TLInfo(TM, *this),
- InstrItins(getInstrItineraryForCPU(CPUString)) {
+ TLInfo(TM, *this), InstrItins(getInstrItineraryForCPU(CPUString)) {
Hexagon_MC::addArchSubtarget(this, FS);
// Beware of the default constructor of InstrItineraryData: it will
// reset all members to 0.
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index 995f66d..30794f6 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -100,7 +100,6 @@ private:
// The following objects can use the TargetTriple, so they must be
// declared after it.
HexagonInstrInfo InstrInfo;
- HexagonRegisterInfo RegInfo;
HexagonTargetLowering TLInfo;
HexagonSelectionDAGInfo TSInfo;
HexagonFrameLowering FrameLowering;
@@ -122,7 +121,7 @@ public:
}
const HexagonInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const HexagonRegisterInfo *getRegisterInfo() const override {
- return &RegInfo;
+ return &InstrInfo.getRegisterInfo();
}
const HexagonTargetLowering *getTargetLowering() const override {
return &TLInfo;
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
index 02ed1001..b3d2856 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -27,7 +27,8 @@ using namespace llvm;
#include "LanaiGenInstrInfo.inc"
LanaiInstrInfo::LanaiInstrInfo(const LanaiSubtarget &STI)
- : LanaiGenInstrInfo(STI, Lanai::ADJCALLSTACKDOWN, Lanai::ADJCALLSTACKUP),
+ : LanaiGenInstrInfo(STI, RegisterInfo, Lanai::ADJCALLSTACKDOWN,
+ Lanai::ADJCALLSTACKUP),
RegisterInfo() {}
void LanaiInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index 9a35df2..5eb3bd6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -26,9 +26,9 @@ using namespace llvm;
#include "LoongArchGenInstrInfo.inc"
LoongArchInstrInfo::LoongArchInstrInfo(const LoongArchSubtarget &STI)
- : LoongArchGenInstrInfo(STI, LoongArch::ADJCALLSTACKDOWN,
+ : LoongArchGenInstrInfo(STI, RegInfo, LoongArch::ADJCALLSTACKDOWN,
LoongArch::ADJCALLSTACKUP),
- STI(STI) {}
+ RegInfo(STI.getHwMode()), STI(STI) {}
MCInst LoongArchInstrInfo::getNop() const {
return MCInstBuilder(LoongArch::ANDI)
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
index e61314c..d43d229 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
@@ -24,9 +24,13 @@ namespace llvm {
class LoongArchSubtarget;
class LoongArchInstrInfo : public LoongArchGenInstrInfo {
+ const LoongArchRegisterInfo RegInfo;
+
public:
explicit LoongArchInstrInfo(const LoongArchSubtarget &STI);
+ const LoongArchRegisterInfo &getRegisterInfo() const { return RegInfo; }
+
MCInst getNop() const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
index 3acbe49..76a8ba1 100644
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
@@ -95,4 +95,4 @@ LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU,
: LoongArchGenSubtargetInfo(TT, CPU, TuneCPU, FS),
FrameLowering(
initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)),
- InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) {}
+ InstrInfo(*this), TLInfo(TM, *this) {}
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
index 5e12baf..2beff07 100644
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -45,7 +45,6 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown;
LoongArchFrameLowering FrameLowering;
LoongArchInstrInfo InstrInfo;
- LoongArchRegisterInfo RegInfo;
LoongArchTargetLowering TLInfo;
SelectionDAGTargetInfo TSInfo;
@@ -78,7 +77,7 @@ public:
}
const LoongArchInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const LoongArchRegisterInfo *getRegisterInfo() const override {
- return &RegInfo;
+ return &InstrInfo.getRegisterInfo();
}
const LoongArchTargetLowering *getTargetLowering() const override {
return &TLInfo;
diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index 65b4820..af053b8 100644
--- a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -26,7 +26,8 @@ using namespace llvm;
void MSP430InstrInfo::anchor() {}
MSP430InstrInfo::MSP430InstrInfo(const MSP430Subtarget &STI)
- : MSP430GenInstrInfo(STI, MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
+ : MSP430GenInstrInfo(STI, RI, MSP430::ADJCALLSTACKDOWN,
+ MSP430::ADJCALLSTACKUP),
RI() {}
void MSP430InstrInfo::storeRegToStackSlot(
diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index aa94f54..69b96cf 100644
--- a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -37,11 +37,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips16-instrinfo"
Mips16InstrInfo::Mips16InstrInfo(const MipsSubtarget &STI)
- : MipsInstrInfo(STI, Mips::Bimm16), RI(STI) {}
-
-const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
- return RI;
-}
+ : MipsInstrInfo(STI, RI, Mips::Bimm16), RI(STI) {}
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.h b/llvm/lib/Target/Mips/Mips16InstrInfo.h
index 1058e8c..2834fd3 100644
--- a/llvm/lib/Target/Mips/Mips16InstrInfo.h
+++ b/llvm/lib/Target/Mips/Mips16InstrInfo.h
@@ -30,7 +30,7 @@ class Mips16InstrInfo : public MipsInstrInfo {
public:
explicit Mips16InstrInfo(const MipsSubtarget &STI);
- const MipsRegisterInfo &getRegisterInfo() const override;
+ const Mips16RegisterInfo &getRegisterInfo() const { return RI; }
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index bffdffa..c879c46 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -39,8 +39,9 @@ using namespace llvm;
// Pin the vtable to this file.
void MipsInstrInfo::anchor() {}
-MipsInstrInfo::MipsInstrInfo(const MipsSubtarget &STI, unsigned UncondBr)
- : MipsGenInstrInfo(STI, Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
+MipsInstrInfo::MipsInstrInfo(const MipsSubtarget &STI,
+ const MipsRegisterInfo &RI, unsigned UncondBr)
+ : MipsGenInstrInfo(STI, RI, Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
Subtarget(STI), UncondBrOpc(UncondBr) {}
const MipsInstrInfo *MipsInstrInfo::create(MipsSubtarget &STI) {
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.h b/llvm/lib/Target/Mips/MipsInstrInfo.h
index 2337ae7..fc94248 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.h
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.h
@@ -55,7 +55,8 @@ public:
BT_Indirect // One indirct branch.
};
- explicit MipsInstrInfo(const MipsSubtarget &STI, unsigned UncondBrOpc);
+ explicit MipsInstrInfo(const MipsSubtarget &STI, const MipsRegisterInfo &RI,
+ unsigned UncondBrOpc);
MCInst getNop() const override;
@@ -130,7 +131,10 @@ public:
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
- virtual const MipsRegisterInfo &getRegisterInfo() const = 0;
+ const MipsRegisterInfo &getRegisterInfo() const {
+ return static_cast<const MipsRegisterInfo &>(
+ TargetInstrInfo::getRegisterInfo());
+ }
virtual unsigned getOppositeBranchOpc(unsigned Opc) const = 0;
diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index dbdbb17..517f489 100644
--- a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -28,11 +28,7 @@ static unsigned getUnconditionalBranch(const MipsSubtarget &STI) {
}
MipsSEInstrInfo::MipsSEInstrInfo(const MipsSubtarget &STI)
- : MipsInstrInfo(STI, getUnconditionalBranch(STI)), RI(STI) {}
-
-const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const {
- return RI;
-}
+ : MipsInstrInfo(STI, RI, getUnconditionalBranch(STI)), RI(STI) {}
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.h b/llvm/lib/Target/Mips/MipsSEInstrInfo.h
index 2b4f55d..0a7a0e5 100644
--- a/llvm/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.h
@@ -24,7 +24,7 @@ class MipsSEInstrInfo : public MipsInstrInfo {
public:
explicit MipsSEInstrInfo(const MipsSubtarget &STI);
- const MipsRegisterInfo &getRegisterInfo() const override;
+ const MipsSERegisterInfo &getRegisterInfo() const { return RI; }
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 6840c7a..db2d96f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
void NVPTXInstrInfo::anchor() {}
NVPTXInstrInfo::NVPTXInstrInfo(const NVPTXSubtarget &STI)
- : NVPTXGenInstrInfo(STI), RegInfo() {}
+ : NVPTXGenInstrInfo(STI, RegInfo), RegInfo() {}
void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 3014aa6..8d9d4c7 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -89,7 +89,7 @@ static cl::opt<bool> EnableFMARegPressureReduction(
void PPCInstrInfo::anchor() {}
PPCInstrInfo::PPCInstrInfo(const PPCSubtarget &STI)
- : PPCGenInstrInfo(STI, PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
+ : PPCGenInstrInfo(STI, RI, PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
/* CatchRetOpcode */ -1,
STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
Subtarget(STI), RI(STI.getTargetMachine()) {}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index b05956b..ce8dd3b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -82,8 +82,9 @@ namespace llvm::RISCV {
} // end namespace llvm::RISCV
RISCVInstrInfo::RISCVInstrInfo(const RISCVSubtarget &STI)
- : RISCVGenInstrInfo(STI, RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP),
- STI(STI) {}
+ : RISCVGenInstrInfo(STI, RegInfo, RISCV::ADJCALLSTACKDOWN,
+ RISCV::ADJCALLSTACKUP),
+ RegInfo(STI.getHwMode()), STI(STI) {}
#define GET_INSTRINFO_HELPERS
#include "RISCVGenInstrInfo.inc"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index c5eddb9..800af26 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -79,10 +79,13 @@ enum RISCVMachineCombinerPattern : unsigned {
};
class RISCVInstrInfo : public RISCVGenInstrInfo {
+ const RISCVRegisterInfo RegInfo;
public:
explicit RISCVInstrInfo(const RISCVSubtarget &STI);
+ const RISCVRegisterInfo &getRegisterInfo() const { return RegInfo; }
+
MCInst getNop() const override;
Register isLoadFromStackSlot(const MachineInstr &MI,
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 715ac4c..3b43be3 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -104,7 +104,7 @@ RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU,
RVVVectorBitsMin(RVVVectorBitsMin), RVVVectorBitsMax(RVVVectorBitsMax),
FrameLowering(
initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)),
- InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) {
+ InstrInfo(*this), TLInfo(TM, *this) {
TSInfo = std::make_unique<RISCVSelectionDAGInfo>();
}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 4b4fc8f..d5ffa6c 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -112,7 +112,6 @@ private:
RISCVFrameLowering FrameLowering;
RISCVInstrInfo InstrInfo;
- RISCVRegisterInfo RegInfo;
RISCVTargetLowering TLInfo;
/// Initializes using the passed in CPU and feature strings so that we can
@@ -140,7 +139,7 @@ public:
}
const RISCVInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const RISCVRegisterInfo *getRegisterInfo() const override {
- return &RegInfo;
+ return &InstrInfo.getRegisterInfo();
}
const RISCVTargetLowering *getTargetLowering() const override {
return &TLInfo;
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
index ba95ad8..4f8bf43 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
@@ -24,7 +24,7 @@
using namespace llvm;
SPIRVInstrInfo::SPIRVInstrInfo(const SPIRVSubtarget &STI)
- : SPIRVGenInstrInfo(STI) {}
+ : SPIRVGenInstrInfo(STI, RI) {}
bool SPIRVInstrInfo::isConstantInstr(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index fbb127d..b8cd9c1 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -249,17 +249,18 @@ static InstrSignature instrToSignature(const MachineInstr &MI,
InstrSignature Signature{MI.getOpcode()};
for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
// The only decorations that can be applied more than once to a given <id>
- // or structure member are UserSemantic(5635), CacheControlLoadINTEL (6442),
- // and CacheControlStoreINTEL (6443). For all the rest of decorations, we
- // will only add to the signature the Opcode, the id to which it applies,
- // and the decoration id, disregarding any decoration flags. This will
- // ensure that any subsequent decoration with the same id will be deemed as
- // a duplicate. Then, at the call site, we will be able to handle duplicates
- // in the best way.
+ // or structure member are FuncParamAttr (38), UserSemantic (5635),
+ // CacheControlLoadINTEL (6442), and CacheControlStoreINTEL (6443). For all
+ // the rest of decorations, we will only add to the signature the Opcode,
+ // the id to which it applies, and the decoration id, disregarding any
+ // decoration flags. This will ensure that any subsequent decoration with
+ // the same id will be deemed as a duplicate. Then, at the call site, we
+ // will be able to handle duplicates in the best way.
unsigned Opcode = MI.getOpcode();
if ((Opcode == SPIRV::OpDecorate) && i >= 2) {
unsigned DecorationID = MI.getOperand(1).getImm();
- if (DecorationID != SPIRV::Decoration::UserSemantic &&
+ if (DecorationID != SPIRV::Decoration::FuncParamAttr &&
+ DecorationID != SPIRV::Decoration::UserSemantic &&
DecorationID != SPIRV::Decoration::CacheControlLoadINTEL &&
DecorationID != SPIRV::Decoration::CacheControlStoreINTEL)
continue;
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index f66eb9d..fcd6cd7 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -38,8 +38,8 @@ static cl::opt<unsigned>
void SparcInstrInfo::anchor() {}
SparcInstrInfo::SparcInstrInfo(const SparcSubtarget &ST)
- : SparcGenInstrInfo(ST, SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), RI(ST),
- Subtarget(ST) {}
+ : SparcGenInstrInfo(ST, RI, SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
+ RI(ST), Subtarget(ST) {}
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 2e21f27..23e7e7e 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -60,7 +60,7 @@ static uint64_t allOnes(unsigned int Count) {
void SystemZInstrInfo::anchor() {}
SystemZInstrInfo::SystemZInstrInfo(const SystemZSubtarget &sti)
- : SystemZGenInstrInfo(sti, -1, -1),
+ : SystemZGenInstrInfo(sti, RI, -1, -1),
RI(sti.getSpecialRegisters()->getReturnFunctionAddressRegister(),
sti.getHwMode()),
STI(sti) {}
diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp
index d5e804a..bae703b 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -35,7 +35,7 @@ using namespace llvm;
void VEInstrInfo::anchor() {}
VEInstrInfo::VEInstrInfo(const VESubtarget &ST)
- : VEGenInstrInfo(ST, VE::ADJCALLSTACKDOWN, VE::ADJCALLSTACKUP), RI() {}
+ : VEGenInstrInfo(ST, RI, VE::ADJCALLSTACKDOWN, VE::ADJCALLSTACKUP), RI() {}
static bool IsIntegerCC(unsigned CC) { return (CC < VECC::CC_AF); }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 343d90e..8b4e4fb 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -34,7 +34,7 @@ using namespace llvm;
#include "WebAssemblyGenInstrInfo.inc"
WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI)
- : WebAssemblyGenInstrInfo(STI, WebAssembly::ADJCALLSTACKDOWN,
+ : WebAssemblyGenInstrInfo(STI, RI, WebAssembly::ADJCALLSTACKDOWN,
WebAssembly::ADJCALLSTACKUP,
WebAssembly::CATCHRET),
RI(STI.getTargetTriple()) {}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 05a854a..5bce539 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -635,6 +635,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FROUNDEVEN, VT, Action);
setOperationAction(ISD::FTRUNC, VT, Action);
setOperationAction(ISD::FLDEXP, VT, Action);
+ setOperationAction(ISD::FSINCOSPI, VT, Action);
};
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 6b2a7a4..2c6d1af 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -85,7 +85,7 @@ static cl::opt<unsigned> UndefRegClearance(
void X86InstrInfo::anchor() {}
X86InstrInfo::X86InstrInfo(const X86Subtarget &STI)
- : X86GenInstrInfo(STI,
+ : X86GenInstrInfo(STI, RI,
(STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64
: X86::ADJCALLSTACKDOWN32),
(STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 868f413..4f5aadc 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -419,6 +419,8 @@ public:
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }
+ bool enableTerminalRule() const override { return true; }
+
bool enableEarlyIfConversion() const override;
void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
index 1a9133a..80fda34 100644
--- a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -43,7 +43,7 @@ namespace XCore {
void XCoreInstrInfo::anchor() {}
XCoreInstrInfo::XCoreInstrInfo(const XCoreSubtarget &ST)
- : XCoreGenInstrInfo(ST, XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP),
+ : XCoreGenInstrInfo(ST, RI, XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP),
RI() {}
static bool isZeroImm(const MachineOperand &op) {
diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp
index be69cef..6bbebde 100644
--- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp
@@ -48,7 +48,8 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI) {
}
XtensaInstrInfo::XtensaInstrInfo(const XtensaSubtarget &STI)
- : XtensaGenInstrInfo(STI, Xtensa::ADJCALLSTACKDOWN, Xtensa::ADJCALLSTACKUP),
+ : XtensaGenInstrInfo(STI, RI, Xtensa::ADJCALLSTACKDOWN,
+ Xtensa::ADJCALLSTACKUP),
RI(STI), STI(STI) {}
Register XtensaInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,