aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp14
-rw-r--r--llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp150
-rw-r--r--llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h6
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp175
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h16
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp90
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h4
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td4
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp28
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp90
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp2
-rw-r--r--llvm/lib/Target/BPF/BPFAsmPrinter.cpp24
-rw-r--r--llvm/lib/Target/BPF/BPFAsmPrinter.h1
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelLowering.cpp74
-rw-r--r--llvm/lib/Target/DirectX/DXContainerGlobals.cpp7
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepIICHVX.td132
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td391
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td116
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp12
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td10
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td9
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp6
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp2
-rw-r--r--llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp22
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXIntrinsics.td13
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrFuture.td8
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp125
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h1
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp16
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp165
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td1
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp30
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h3
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h2
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp74
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp65
-rw-r--r--llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp19
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp2
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h2
67 files changed, 1472 insertions, 508 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
index 1169f26..97298f9 100644
--- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
@@ -655,16 +655,10 @@ Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) {
BasicBlock *BB = BasicBlock::Create(M->getContext(), "", GuestExit);
IRBuilder<> B(BB);
- // Load the global symbol as a pointer to the check function.
- Value *GuardFn;
- if (cfguard_module_flag == 2 && !F->hasFnAttribute("guard_nocf"))
- GuardFn = GuardFnCFGlobal;
- else
- GuardFn = GuardFnGlobal;
- LoadInst *GuardCheckLoad = B.CreateLoad(PtrTy, GuardFn);
-
- // Create new call instruction. The CFGuard check should always be a call,
- // even if the original CallBase is an Invoke or CallBr instruction.
+ // Create new call instruction. The call check should always be a call,
+ // even if the original CallBase is an Invoke or CallBr instructio.
+ // This is treated as a direct call, so do not use GuardFnCFGlobal.
+ LoadInst *GuardCheckLoad = B.CreateLoad(PtrTy, GuardFnGlobal);
Function *Thunk = buildExitThunk(F->getFunctionType(), F->getAttributes());
CallInst *GuardCheck = B.CreateCall(
GuardFnType, GuardCheckLoad, {F, Thunk});
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index 7e03b97..45b7120 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -370,6 +370,22 @@ SVEFrameSizes AArch64PrologueEpilogueCommon::getSVEStackFrameSizes() const {
{ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}};
}
+SVEStackAllocations AArch64PrologueEpilogueCommon::getSVEStackAllocations(
+ SVEFrameSizes const &SVE) {
+ StackOffset AfterZPRs = SVE.ZPR.LocalsSize;
+ StackOffset BeforePPRs = SVE.ZPR.CalleeSavesSize + SVE.PPR.CalleeSavesSize;
+ StackOffset AfterPPRs = {};
+ if (SVELayout == SVEStackLayout::Split) {
+ BeforePPRs = SVE.PPR.CalleeSavesSize;
+ // If there are no ZPR CSRs, place all local allocations after the ZPRs.
+ if (SVE.ZPR.CalleeSavesSize)
+ AfterPPRs += SVE.PPR.LocalsSize + SVE.ZPR.CalleeSavesSize;
+ else
+ AfterZPRs += SVE.PPR.LocalsSize; // Group allocation of locals.
+ }
+ return {BeforePPRs, AfterPPRs, AfterZPRs};
+}
+
struct SVEPartitions {
struct {
MachineBasicBlock::iterator Begin, End;
@@ -687,16 +703,19 @@ void AArch64PrologueEmitter::emitPrologue() {
// All of the remaining stack allocations are for locals.
determineLocalsStackSize(NumBytes, PrologueSaveSize);
+ auto [PPR, ZPR] = getSVEStackFrameSizes();
+ SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
+
MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
+ assert(!SVEAllocs.AfterPPRs &&
+ "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
// If we're doing SVE saves first, we need to immediately allocate space
// for fixed objects, then space for the SVE callee saves.
//
// Windows unwind requires that the scalable size is a multiple of 16;
// that's handled when the callee-saved size is computed.
- auto SaveSize =
- StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
- StackOffset::getFixed(FixedObject);
+ auto SaveSize = SVEAllocs.BeforePPRs + StackOffset::getFixed(FixedObject);
allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},
/*FollowupAllocs=*/true);
NumBytes -= FixedObject;
@@ -764,12 +783,11 @@ void AArch64PrologueEmitter::emitPrologue() {
if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
- auto [PPR, ZPR] = getSVEStackFrameSizes();
- StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes);
+ SVEAllocs.AfterZPRs += NonSVELocalsSize;
+
StackOffset CFAOffset =
StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize;
-
MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
// Allocate space for the callee saves and PPR locals (if any).
if (SVELayout != SVEStackLayout::CalleeSavesAboveFrameRecord) {
@@ -780,31 +798,23 @@ void AArch64PrologueEmitter::emitPrologue() {
if (EmitAsyncCFI)
emitCalleeSavedSVELocations(AfterSVESavesI);
- StackOffset AllocateBeforePPRs = SVECalleeSavesSize;
- StackOffset AllocateAfterPPRs = PPR.LocalsSize;
- if (SVELayout == SVEStackLayout::Split) {
- AllocateBeforePPRs = PPR.CalleeSavesSize;
- AllocateAfterPPRs = PPR.LocalsSize + ZPR.CalleeSavesSize;
- }
- allocateStackSpace(PPRRange.Begin, 0, AllocateBeforePPRs,
+ allocateStackSpace(PPRRange.Begin, 0, SVEAllocs.BeforePPRs,
EmitAsyncCFI && !HasFP, CFAOffset,
- MFI.hasVarSizedObjects() || AllocateAfterPPRs ||
- ZPR.LocalsSize || NonSVELocalsSize);
- CFAOffset += AllocateBeforePPRs;
+ MFI.hasVarSizedObjects() || SVEAllocs.AfterPPRs ||
+ SVEAllocs.AfterZPRs);
+ CFAOffset += SVEAllocs.BeforePPRs;
assert(PPRRange.End == ZPRRange.Begin &&
"Expected ZPR callee saves after PPR locals");
- allocateStackSpace(PPRRange.End, RealignmentPadding, AllocateAfterPPRs,
+ allocateStackSpace(PPRRange.End, RealignmentPadding, SVEAllocs.AfterPPRs,
EmitAsyncCFI && !HasFP, CFAOffset,
- MFI.hasVarSizedObjects() || ZPR.LocalsSize ||
- NonSVELocalsSize);
- CFAOffset += AllocateAfterPPRs;
+ MFI.hasVarSizedObjects() || SVEAllocs.AfterZPRs);
+ CFAOffset += SVEAllocs.AfterPPRs;
} else {
assert(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord);
- // Note: With CalleeSavesAboveFrameRecord, the SVE CS have already been
- // allocated (and separate PPR locals are not supported, all SVE locals,
- // both PPR and ZPR, are within the ZPR locals area).
- assert(!PPR.LocalsSize && "Unexpected PPR locals!");
- CFAOffset += SVECalleeSavesSize;
+ // Note: With CalleeSavesAboveFrameRecord, the SVE CS (BeforePPRs) have
+ // already been allocated. PPR locals (included in AfterPPRs) are not
+ // supported (note: this is asserted above).
+ CFAOffset += SVEAllocs.BeforePPRs;
}
// Allocate space for the rest of the frame including ZPR locals. Align the
@@ -815,9 +825,9 @@ void AArch64PrologueEmitter::emitPrologue() {
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the
// correct value here, as NumBytes also includes padding bytes, which
// shouldn't be counted here.
- allocateStackSpace(
- AfterSVESavesI, RealignmentPadding, ZPR.LocalsSize + NonSVELocalsSize,
- EmitAsyncCFI && !HasFP, CFAOffset, MFI.hasVarSizedObjects());
+ allocateStackSpace(AfterSVESavesI, RealignmentPadding, SVEAllocs.AfterZPRs,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects());
}
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -1472,27 +1482,26 @@ void AArch64EpilogueEmitter::emitEpilogue() {
assert(NumBytes >= 0 && "Negative stack allocation size!?");
StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
- StackOffset SVEStackSize =
- SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize;
+ SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
MachineBasicBlock::iterator RestoreBegin = ZPRRange.Begin;
- MachineBasicBlock::iterator RestoreEnd = PPRRange.End;
// Deallocate the SVE area.
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
- StackOffset SVELocalsSize = ZPR.LocalsSize + PPR.LocalsSize;
+ assert(!SVEAllocs.AfterPPRs &&
+ "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
// If the callee-save area is before FP, restoring the FP implicitly
- // deallocates non-callee-save SVE allocations. Otherwise, deallocate them
+ // deallocates non-callee-save SVE allocations. Otherwise, deallocate them
// explicitly.
if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
- SVELocalsSize, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI);
+ SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI);
}
// Deallocate callee-save SVE registers.
- emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
- SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI);
+ emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
+ SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI);
} else if (AFI->hasSVEStackSize()) {
// If we have stack realignment or variable-sized objects we must use the FP
// to restore SVE callee saves (as there is an unknown amount of
@@ -1524,46 +1533,33 @@ void AArch64EpilogueEmitter::emitEpilogue() {
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
-SVECalleeSavesSize, TII, MachineInstr::FrameDestroy);
} else if (BaseForSVEDealloc == AArch64::SP) {
- auto CFAOffset =
- SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize);
-
- if (SVECalleeSavesSize) {
- // Deallocate the non-SVE locals first before we can deallocate (and
- // restore callee saves) from the SVE area.
- auto NonSVELocals = StackOffset::getFixed(NumBytes);
- emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
- NonSVELocals, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
- CFAOffset -= NonSVELocals;
- NumBytes = 0;
- }
-
- if (ZPR.LocalsSize) {
- emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
- ZPR.LocalsSize, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
- CFAOffset -= ZPR.LocalsSize;
+ auto NonSVELocals = StackOffset::getFixed(NumBytes);
+ auto CFAOffset = NonSVELocals + StackOffset::getFixed(PrologueSaveSize) +
+ SVEAllocs.totalSize();
+
+ if (SVECalleeSavesSize || SVELayout == SVEStackLayout::Split) {
+ // Deallocate non-SVE locals now. This is needed to reach the SVE callee
+ // saves, but may also allow combining stack hazard bumps for split SVE.
+ SVEAllocs.AfterZPRs += NonSVELocals;
+ NumBytes -= NonSVELocals.getFixed();
}
-
- StackOffset SVECalleeSavesToDealloc = SVECalleeSavesSize;
- if (SVELayout == SVEStackLayout::Split &&
- (PPR.LocalsSize || ZPR.CalleeSavesSize)) {
- assert(PPRRange.Begin == ZPRRange.End &&
- "Expected PPR restores after ZPR");
- emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP,
- PPR.LocalsSize + ZPR.CalleeSavesSize, TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI,
- &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
- CFAOffset -= PPR.LocalsSize + ZPR.CalleeSavesSize;
- SVECalleeSavesToDealloc -= ZPR.CalleeSavesSize;
- }
-
- // If split SVE is on, this dealloc PPRs, otherwise, deallocs ZPRs + PPRs:
- if (SVECalleeSavesToDealloc)
- emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
- SVECalleeSavesToDealloc, TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI,
- &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
+ // To deallocate the SVE stack adjust by the allocations in reverse.
+ emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
+ SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ CFAOffset);
+ CFAOffset -= SVEAllocs.AfterZPRs;
+ assert(PPRRange.Begin == ZPRRange.End &&
+ "Expected PPR restores after ZPR");
+ emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP,
+ SVEAllocs.AfterPPRs, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ CFAOffset);
+ CFAOffset -= SVEAllocs.AfterPPRs;
+ emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
+ SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ CFAOffset);
}
if (EmitCFI)
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
index bccadda..6e0e283 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
@@ -33,6 +33,11 @@ struct SVEFrameSizes {
} PPR, ZPR;
};
+struct SVEStackAllocations {
+ StackOffset BeforePPRs, AfterPPRs, AfterZPRs;
+ StackOffset totalSize() const { return BeforePPRs + AfterPPRs + AfterZPRs; }
+};
+
class AArch64PrologueEpilogueCommon {
public:
AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -66,6 +71,7 @@ protected:
bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const;
SVEFrameSizes getSVEStackFrameSizes() const;
+ SVEStackAllocations getSVEStackAllocations(SVEFrameSizes const &);
MachineFunction &MF;
MachineBasicBlock &MBB;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 47c1ac4..10f2c80 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -308,9 +308,9 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
}
-bool AArch64TTIImpl::areTypesABICompatible(
- const Function *Caller, const Function *Callee,
- const ArrayRef<Type *> &Types) const {
+bool AArch64TTIImpl::areTypesABICompatible(const Function *Caller,
+ const Function *Callee,
+ ArrayRef<Type *> Types) const {
if (!BaseT::areTypesABICompatible(Caller, Callee, Types))
return false;
@@ -2227,7 +2227,7 @@ static std::optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
return std::nullopt;
}
-template <Intrinsic::ID MulOpc, typename Intrinsic::ID FuseOpc>
+template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
static std::optional<Instruction *>
instCombineSVEVectorFuseMulAddSub(InstCombiner &IC, IntrinsicInst &II,
bool MergeIntoAddendOp) {
@@ -3007,9 +3007,9 @@ AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
llvm_unreachable("Unsupported register kind");
}
-bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
- ArrayRef<const Value *> Args,
- Type *SrcOverrideTy) const {
+bool AArch64TTIImpl::isSingleExtWideningInstruction(
+ unsigned Opcode, Type *DstTy, ArrayRef<const Value *> Args,
+ Type *SrcOverrideTy) const {
// A helper that returns a vector type from the given type. The number of
// elements in type Ty determines the vector width.
auto toVectorTy = [&](Type *ArgTy) {
@@ -3027,48 +3027,29 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
(DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
return false;
- // Determine if the operation has a widening variant. We consider both the
- // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
- // instructions.
- //
- // TODO: Add additional widening operations (e.g., shl, etc.) once we
- // verify that their extending operands are eliminated during code
- // generation.
Type *SrcTy = SrcOverrideTy;
switch (Opcode) {
- case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
- case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
+ case Instruction::Add: // UADDW(2), SADDW(2).
+ case Instruction::Sub: { // USUBW(2), SSUBW(2).
// The second operand needs to be an extend
if (isa<SExtInst>(Args[1]) || isa<ZExtInst>(Args[1])) {
if (!SrcTy)
SrcTy =
toVectorTy(cast<Instruction>(Args[1])->getOperand(0)->getType());
- } else
+ break;
+ }
+
+ if (Opcode == Instruction::Sub)
return false;
- break;
- case Instruction::Mul: { // SMULL(2), UMULL(2)
- // Both operands need to be extends of the same type.
- if ((isa<SExtInst>(Args[0]) && isa<SExtInst>(Args[1])) ||
- (isa<ZExtInst>(Args[0]) && isa<ZExtInst>(Args[1]))) {
+
+ // UADDW(2), SADDW(2) can be commutted.
+ if (isa<SExtInst>(Args[0]) || isa<ZExtInst>(Args[0])) {
if (!SrcTy)
SrcTy =
toVectorTy(cast<Instruction>(Args[0])->getOperand(0)->getType());
- } else if (isa<ZExtInst>(Args[0]) || isa<ZExtInst>(Args[1])) {
- // If one of the operands is a Zext and the other has enough zero bits to
- // be treated as unsigned, we can still general a umull, meaning the zext
- // is free.
- KnownBits Known =
- computeKnownBits(isa<ZExtInst>(Args[0]) ? Args[1] : Args[0], DL);
- if (Args[0]->getType()->getScalarSizeInBits() -
- Known.Zero.countLeadingOnes() >
- DstTy->getScalarSizeInBits() / 2)
- return false;
- if (!SrcTy)
- SrcTy = toVectorTy(Type::getIntNTy(DstTy->getContext(),
- DstTy->getScalarSizeInBits() / 2));
- } else
- return false;
- break;
+ break;
+ }
+ return false;
}
default:
return false;
@@ -3099,6 +3080,73 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
}
+Type *AArch64TTIImpl::isBinExtWideningInstruction(unsigned Opcode, Type *DstTy,
+ ArrayRef<const Value *> Args,
+ Type *SrcOverrideTy) const {
+ if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
+ Opcode != Instruction::Mul)
+ return nullptr;
+
+ // Exit early if DstTy is not a vector type whose elements are one of [i16,
+ // i32, i64]. SVE doesn't generally have the same set of instructions to
+ // perform an extend with the add/sub/mul. There are SMULLB style
+ // instructions, but they operate on top/bottom, requiring some sort of lane
+ // interleaving to be used with zext/sext.
+ unsigned DstEltSize = DstTy->getScalarSizeInBits();
+ if (!useNeonVector(DstTy) || Args.size() != 2 ||
+ (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
+ return nullptr;
+
+ auto getScalarSizeWithOverride = [&](const Value *V) {
+ if (SrcOverrideTy)
+ return SrcOverrideTy->getScalarSizeInBits();
+ return cast<Instruction>(V)
+ ->getOperand(0)
+ ->getType()
+ ->getScalarSizeInBits();
+ };
+
+ unsigned MaxEltSize = 0;
+ if ((isa<SExtInst>(Args[0]) && isa<SExtInst>(Args[1])) ||
+ (isa<ZExtInst>(Args[0]) && isa<ZExtInst>(Args[1]))) {
+ unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
+ unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
+ MaxEltSize = std::max(EltSize0, EltSize1);
+ } else if (isa<SExtInst, ZExtInst>(Args[0]) &&
+ isa<SExtInst, ZExtInst>(Args[1])) {
+ unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
+ unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
+ // mul(sext, zext) will become smull(sext, zext) if the extends are large
+ // enough.
+ if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
+ return nullptr;
+ MaxEltSize = DstEltSize / 2;
+ } else if (Opcode == Instruction::Mul &&
+ (isa<ZExtInst>(Args[0]) || isa<ZExtInst>(Args[1]))) {
+ // If one of the operands is a Zext and the other has enough zero bits
+ // to be treated as unsigned, we can still generate a umull, meaning the
+ // zext is free.
+ KnownBits Known =
+ computeKnownBits(isa<ZExtInst>(Args[0]) ? Args[1] : Args[0], DL);
+ if (Args[0]->getType()->getScalarSizeInBits() -
+ Known.Zero.countLeadingOnes() >
+ DstTy->getScalarSizeInBits() / 2)
+ return nullptr;
+
+ MaxEltSize =
+ getScalarSizeWithOverride(isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
+ } else
+ return nullptr;
+
+ if (MaxEltSize * 2 > DstEltSize)
+ return nullptr;
+
+ Type *ExtTy = DstTy->getWithNewBitWidth(MaxEltSize * 2);
+ if (ExtTy->getPrimitiveSizeInBits() <= 64)
+ return nullptr;
+ return ExtTy;
+}
+
// s/urhadd instructions implement the following pattern, making the
// extends free:
// %x = add ((zext i8 -> i16), 1)
@@ -3159,7 +3207,24 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
if (I && I->hasOneUser()) {
auto *SingleUser = cast<Instruction>(*I->user_begin());
SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
- if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands, Src)) {
+ if (Type *ExtTy = isBinExtWideningInstruction(
+ SingleUser->getOpcode(), Dst, Operands,
+ Src != I->getOperand(0)->getType() ? Src : nullptr)) {
+ // The cost from Src->Src*2 needs to be added if required, the cost from
+ // Src*2->ExtTy is free.
+ if (ExtTy->getScalarSizeInBits() > Src->getScalarSizeInBits() * 2) {
+ Type *DoubleSrcTy =
+ Src->getWithNewBitWidth(Src->getScalarSizeInBits() * 2);
+ return getCastInstrCost(Opcode, DoubleSrcTy, Src,
+ TTI::CastContextHint::None, CostKind);
+ }
+
+ return 0;
+ }
+
+ if (isSingleExtWideningInstruction(
+ SingleUser->getOpcode(), Dst, Operands,
+ Src != I->getOperand(0)->getType() ? Src : nullptr)) {
// For adds only count the second operand as free if both operands are
// extends but not the same operation. (i.e both operands are not free in
// add(sext, zext)).
@@ -3168,8 +3233,11 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
(isa<CastInst>(SingleUser->getOperand(1)) &&
cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
return 0;
- } else // Others are free so long as isWideningInstruction returned true.
+ } else {
+ // Others are free so long as isSingleExtWideningInstruction
+ // returned true.
return 0;
+ }
}
// The cast will be free for the s/urhadd instructions
@@ -4148,6 +4216,18 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
}))
return *PromotedCost;
+ // If the operation is a widening instruction (smull or umull) and both
+ // operands are extends the cost can be cheaper by considering that the
+ // operation will operate on the narrowest type size possible (double the
+ // largest input size) and a further extend.
+ if (Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
+ if (ExtTy != Ty)
+ return getArithmeticInstrCost(Opcode, ExtTy, CostKind) +
+ getCastInstrCost(Instruction::ZExt, Ty, ExtTy,
+ TTI::CastContextHint::None, CostKind);
+ return LT.first;
+ }
+
switch (ISD) {
default:
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
@@ -4381,10 +4461,8 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
// - two 2-cost i64 inserts, and
// - two 1-cost muls.
// So, for a v2i64 with LT.First = 1 the cost is 14, and for a v4i64 with
- // LT.first = 2 the cost is 28. If both operands are extensions it will not
- // need to scalarize so the cost can be cheaper (smull or umull).
- // so the cost can be cheaper (smull or umull).
- if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
+ // LT.first = 2 the cost is 28.
+ if (LT.second != MVT::v2i64)
return LT.first;
return cast<VectorType>(Ty)->getElementCount().getKnownMinValue() *
(getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind) +
@@ -6657,10 +6735,15 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
Ops.push_back(&Ext->getOperandUse(0));
Ops.push_back(&Op);
- if (isa<SExtInst>(Ext))
+ if (isa<SExtInst>(Ext)) {
NumSExts++;
- else
+ } else {
NumZExts++;
+ // A zext(a) is also a sext(zext(a)), if we take more than 2 steps.
+ if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
+ I->getType()->getScalarSizeInBits())
+ NumSExts++;
+ }
continue;
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index fe2e849..e3b0a1b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -59,9 +59,17 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
VECTOR_LDST_FOUR_ELEMENTS
};
- bool isWideningInstruction(Type *DstTy, unsigned Opcode,
- ArrayRef<const Value *> Args,
- Type *SrcOverrideTy = nullptr) const;
+ /// Given a add/sub/mul operation, detect a widening addl/subl/mull pattern
+ /// where both operands can be treated like extends. Returns the minimal type
+ /// needed to compute the operation.
+ Type *isBinExtWideningInstruction(unsigned Opcode, Type *DstTy,
+ ArrayRef<const Value *> Args,
+ Type *SrcOverrideTy = nullptr) const;
+ /// Given a add/sub operation with a single extend operand, detect a
+ /// widening addw/subw pattern.
+ bool isSingleExtWideningInstruction(unsigned Opcode, Type *DstTy,
+ ArrayRef<const Value *> Args,
+ Type *SrcOverrideTy = nullptr) const;
// A helper function called by 'getVectorInstrCost'.
//
@@ -84,7 +92,7 @@ public:
const Function *Callee) const override;
bool areTypesABICompatible(const Function *Caller, const Function *Callee,
- const ArrayRef<Type *> &Types) const override;
+ ArrayRef<Type *> Types) const override;
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
unsigned DefaultCallPenalty) const override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index cd8b249..67042b7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -69,7 +69,7 @@ FunctionPass *createAMDGPUPreloadKernArgPrologLegacyPass();
ModulePass *createAMDGPUPreloadKernelArgumentsLegacyPass(const TargetMachine *);
struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
- AMDGPUSimplifyLibCallsPass() {}
+ AMDGPUSimplifyLibCallsPass() = default;
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
@@ -371,13 +371,13 @@ public:
class AMDGPUAnnotateUniformValuesPass
: public PassInfoMixin<AMDGPUAnnotateUniformValuesPass> {
public:
- AMDGPUAnnotateUniformValuesPass() {}
+ AMDGPUAnnotateUniformValuesPass() = default;
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
class SIModeRegisterPass : public PassInfoMixin<SIModeRegisterPass> {
public:
- SIModeRegisterPass() {}
+ SIModeRegisterPass() = default;
PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM);
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index 1064e57..dad94b8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -96,7 +96,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ArgDescriptor &Arg) {
}
struct KernArgPreloadDescriptor : public ArgDescriptor {
- KernArgPreloadDescriptor() {}
+ KernArgPreloadDescriptor() = default;
SmallVector<MCRegister> Regs;
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 9907c88f..8669978 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1555,7 +1555,7 @@ private:
AMDGPU::ClusterDimsAttr Attr;
- static constexpr const char AttrName[] = "amdgpu-cluster-dims";
+ static constexpr char AttrName[] = "amdgpu-cluster-dims";
};
AAAMDGPUClusterDims &
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
index cf2ab825..a3be0f5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
@@ -48,7 +48,7 @@ private:
FuncInfoMap FIM;
public:
- AMDGPUPerfHintAnalysis() {}
+ AMDGPUPerfHintAnalysis() = default;
// OldPM
bool runOnSCC(const GCNTargetMachine &TM, CallGraphSCC &SCC);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 103cdec..1e5885a2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -202,7 +202,7 @@ bool PredicateMapping::match(const MachineInstr &MI,
return true;
}
-SetOfRulesForOpcode::SetOfRulesForOpcode() {}
+SetOfRulesForOpcode::SetOfRulesForOpcode() = default;
SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes)
: FastTypes(FastTypes) {}
@@ -913,6 +913,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
+ addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
+
addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
.Uni(S64, {{Sgpr64}, {}});
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index b28c50e..b87b54f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -816,7 +816,7 @@ parseAMDGPUAtomicOptimizerStrategy(StringRef Params) {
Params.consume_front("strategy=");
auto Result = StringSwitch<std::optional<ScanOptions>>(Params)
.Case("dpp", ScanOptions::DPP)
- .Cases("iterative", "", ScanOptions::Iterative)
+ .Cases({"iterative", ""}, ScanOptions::Iterative)
.Case("none", ScanOptions::None)
.Default(std::nullopt);
if (Result)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 733c5d5..fe81a5e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -181,14 +181,52 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
return NewRetBlock;
}
+static BasicBlock *
+createDummyReturnBlock(Function &F,
+ SmallVector<BasicBlock *, 4> &ReturningBlocks) {
+ BasicBlock *DummyReturnBB =
+ BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F);
+ Type *RetTy = F.getReturnType();
+ Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
+ ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
+ ReturningBlocks.push_back(DummyReturnBB);
+ return DummyReturnBB;
+}
+
+/// Handle conditional branch instructions (-> 2 targets) and callbr
+/// instructions with N targets.
+static void handleNBranch(Function &F, BasicBlock *BB, Instruction *BI,
+ BasicBlock *DummyReturnBB,
+ std::vector<DominatorTree::UpdateType> &Updates) {
+ SmallVector<BasicBlock *, 2> Successors(successors(BB));
+
+ // Create a new transition block to hold the conditional branch.
+ BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
+
+ Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
+
+ // 'Successors' become successors of TransitionBB instead of BB,
+ // and TransitionBB becomes a single successor of BB.
+ Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
+ for (BasicBlock *Successor : Successors) {
+ Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor);
+ Updates.emplace_back(DominatorTree::Delete, BB, Successor);
+ }
+
+ // Create a branch that will always branch to the transition block and
+ // references DummyReturnBB.
+ BB->getTerminator()->eraseFromParent();
+ BranchInst::Create(TransitionBB, DummyReturnBB,
+ ConstantInt::getTrue(F.getContext()), BB);
+ Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
+}
+
bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
const PostDominatorTree &PDT,
const UniformityInfo &UA) {
- assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
-
if (PDT.root_size() == 0 ||
(PDT.root_size() == 1 &&
- !isa<BranchInst>(PDT.getRoot()->getTerminator())))
+ !isa<BranchInst, CallBrInst>(PDT.getRoot()->getTerminator())))
return false;
// Loop over all of the blocks in a function, tracking all of the blocks that
@@ -222,46 +260,28 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
if (HasDivergentExitBlock)
UnreachableBlocks.push_back(BB);
} else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
-
- ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext());
- if (DummyReturnBB == nullptr) {
- DummyReturnBB = BasicBlock::Create(F.getContext(),
- "DummyReturnBlock", &F);
- Type *RetTy = F.getReturnType();
- Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
- ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
- ReturningBlocks.push_back(DummyReturnBB);
- }
+ if (!DummyReturnBB)
+ DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
if (BI->isUnconditional()) {
BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
BI->eraseFromParent(); // Delete the unconditional branch.
// Add a new conditional branch with a dummy edge to the return block.
- BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
- Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
- } else { // Conditional branch.
- SmallVector<BasicBlock *, 2> Successors(successors(BB));
-
- // Create a new transition block to hold the conditional branch.
- BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
-
- Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
-
- // 'Successors' become successors of TransitionBB instead of BB,
- // and TransitionBB becomes a single successor of BB.
- Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
- for (BasicBlock *Successor : Successors) {
- Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor);
- Updates.emplace_back(DominatorTree::Delete, BB, Successor);
- }
-
- // Create a branch that will always branch to the transition block and
- // references DummyReturnBB.
- BB->getTerminator()->eraseFromParent();
- BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
+ BranchInst::Create(LoopHeaderBB, DummyReturnBB,
+ ConstantInt::getTrue(F.getContext()), BB);
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
+ } else {
+ handleNBranch(F, BB, BI, DummyReturnBB, Updates);
}
Changed = true;
+ } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(BB->getTerminator())) {
+ if (!DummyReturnBB)
+ DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
+
+ handleNBranch(F, BB, CBI, DummyReturnBB, Updates);
+ Changed = true;
+ } else {
+ llvm_unreachable("unsupported block terminator");
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index 61c5dcd..ded2f5a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -54,7 +54,7 @@ public:
bool CullSGPRHazardsAtMemWait;
unsigned CullSGPRHazardsMemWaitThreshold;
- AMDGPUWaitSGPRHazards() {}
+ AMDGPUWaitSGPRHazards() = default;
// Return the numeric ID 0-127 for a given SGPR.
static std::optional<unsigned> sgprNumber(Register Reg,
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 975781f..f357981 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -183,7 +183,7 @@ class ScheduleMetrics {
unsigned BubbleCycles;
public:
- ScheduleMetrics() {}
+ ScheduleMetrics() = default;
ScheduleMetrics(unsigned L, unsigned BC)
: ScheduleLength(L), BubbleCycles(BC) {}
unsigned getLength() const { return ScheduleLength; }
@@ -217,7 +217,7 @@ class RegionPressureMap {
bool IsLiveOut;
public:
- RegionPressureMap() {}
+ RegionPressureMap() = default;
RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
: DAG(GCNDAG), IsLiveOut(LiveOut) {}
// Build the Instr->LiveReg and RegionIdx->Instr maps
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index d950131..65dce74 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -2116,8 +2116,10 @@ class VIMAGE_TENSOR_Real <bits<8> op, VIMAGE_TENSOR_Pseudo ps, string opName = p
let vaddr2 = !if(ps.UpTo2D, !cast<int>(SGPR_NULL_gfx11plus.HWEncoding), ?);
let vaddr3 = !if(ps.UpTo2D, !cast<int>(SGPR_NULL_gfx11plus.HWEncoding), ?);
+ // Set VADDR4 to NULL
+ let vaddr4 = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
+
// set to 0 based on SPG.
- let vaddr4 = 0;
let rsrc = 0;
let vdata = 0;
let d16 = 0;
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 5c39f7a..aa5ea77 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -2170,7 +2170,9 @@ bool SIFrameLowering::hasFPImpl(const MachineFunction &MF) const {
return MFI.getStackSize() != 0;
}
- return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
+ return (frameTriviallyRequiresSP(MFI) &&
+ !MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) ||
+ MFI.isFrameAddressTaken() ||
MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
MF) ||
mayReserveScratchForCWSR(MF) ||
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 6dcbced..b7fa899 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1288,18 +1288,38 @@ void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) {
}
void WaitcntBrackets::applyXcnt(const AMDGPU::Waitcnt &Wait) {
+ // On entry to a block with multiple predescessors, there may
+ // be pending SMEM and VMEM events active at the same time.
+ // In such cases, only clear one active event at a time.
+ auto applyPendingXcntGroup = [this](unsigned E) {
+ unsigned LowerBound = getScoreLB(X_CNT);
+ applyWaitcnt(X_CNT, 0);
+ PendingEvents |= (1 << E);
+ setScoreLB(X_CNT, LowerBound);
+ };
+
// Wait on XCNT is redundant if we are already waiting for a load to complete.
// SMEM can return out of order, so only omit XCNT wait if we are waiting till
// zero.
- if (Wait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP))
- return applyWaitcnt(X_CNT, 0);
+ if (Wait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP)) {
+ if (hasPendingEvent(VMEM_GROUP))
+ applyPendingXcntGroup(VMEM_GROUP);
+ else
+ applyWaitcnt(X_CNT, 0);
+ return;
+ }
// If we have pending store we cannot optimize XCnt because we do not wait for
// stores. VMEM loads retun in order, so if we only have loads XCnt is
// decremented to the same number as LOADCnt.
if (Wait.LoadCnt != ~0u && hasPendingEvent(VMEM_GROUP) &&
- !hasPendingEvent(STORE_CNT))
- return applyWaitcnt(X_CNT, std::min(Wait.XCnt, Wait.LoadCnt));
+ !hasPendingEvent(STORE_CNT)) {
+ if (hasPendingEvent(SMEM_GROUP))
+ applyPendingXcntGroup(SMEM_GROUP);
+ else
+ applyWaitcnt(X_CNT, std::min(Wait.XCnt, Wait.LoadCnt));
+ return;
+ }
applyWaitcnt(X_CNT, Wait.XCnt);
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d930a21..45f5919 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6153,7 +6153,7 @@ bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
// information.
if (AMDGPU::isPackedFP32Inst(MI.getOpcode()) && AMDGPU::isGFX12Plus(ST) &&
MO.isReg() && RI.isSGPRReg(MRI, MO.getReg())) {
- constexpr const AMDGPU::OpName OpNames[] = {
+ constexpr AMDGPU::OpName OpNames[] = {
AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
for (auto [I, OpName] : enumerate(OpNames)) {
@@ -6215,8 +6215,8 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
bool SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand(
const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN,
const MachineOperand *MO) const {
- constexpr const unsigned NumOps = 3;
- constexpr const AMDGPU::OpName OpNames[NumOps * 2] = {
+ constexpr unsigned NumOps = 3;
+ constexpr AMDGPU::OpName OpNames[NumOps * 2] = {
AMDGPU::OpName::src0, AMDGPU::OpName::src1,
AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
@@ -10618,6 +10618,42 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
return false;
}
+// SCC is already valid after SCCValid.
+// SCCRedefine will redefine SCC to the same value already available after
+// SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
+// update kill/dead flags if necessary.
+static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
+ const SIRegisterInfo &RI) {
+ MachineInstr *KillsSCC = nullptr;
+ for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
+ SCCRedefine->getIterator())) {
+ if (MI.modifiesRegister(AMDGPU::SCC, &RI))
+ return false;
+ if (MI.killsRegister(AMDGPU::SCC, &RI))
+ KillsSCC = &MI;
+ }
+ if (MachineOperand *SccDef =
+ SCCValid->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
+ SccDef->setIsDead(false);
+ if (KillsSCC)
+ KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
+ SCCRedefine->eraseFromParent();
+ return true;
+}
+
+static bool foldableSelect(const MachineInstr &Def) {
+ if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
+ Def.getOpcode() != AMDGPU::S_CSELECT_B64)
+ return false;
+ bool Op1IsNonZeroImm =
+ Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
+ bool Op2IsZeroImm =
+ Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
+ if (!Op1IsNonZeroImm || !Op2IsZeroImm)
+ return false;
+ return true;
+}
+
bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
Register SrcReg2, int64_t CmpMask,
int64_t CmpValue,
@@ -10637,19 +10673,6 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (!Def || Def->getParent() != CmpInstr.getParent())
return false;
- const auto foldableSelect = [](MachineInstr *Def) -> bool {
- if (Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
- Def->getOpcode() == AMDGPU::S_CSELECT_B64) {
- bool Op1IsNonZeroImm =
- Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0;
- bool Op2IsZeroImm =
- Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0;
- if (Op1IsNonZeroImm && Op2IsZeroImm)
- return true;
- }
- return false;
- };
-
// For S_OP that set SCC = DST!=0, do the transformation
//
// s_cmp_lg_* (S_OP ...), 0 => (S_OP ...)
@@ -10660,24 +10683,12 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
//
// s_cmp_lg_* (S_CSELECT* (non-zero imm), 0), 0 => (S_CSELECT* (non-zero
// imm), 0)
- if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(Def))
+ if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(*Def))
return false;
- MachineInstr *KillsSCC = nullptr;
- for (MachineInstr &MI :
- make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
- if (MI.modifiesRegister(AMDGPU::SCC, &RI))
- return false;
- if (MI.killsRegister(AMDGPU::SCC, &RI))
- KillsSCC = &MI;
- }
+ if (!optimizeSCC(Def, &CmpInstr, RI))
+ return false;
- if (MachineOperand *SccDef =
- Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
- SccDef->setIsDead(false);
- if (KillsSCC)
- KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
- CmpInstr.eraseFromParent();
return true;
};
@@ -10755,21 +10766,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
return false;
- MachineInstr *KillsSCC = nullptr;
- for (MachineInstr &MI :
- make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
- if (MI.modifiesRegister(AMDGPU::SCC, &RI))
- return false;
- if (MI.killsRegister(AMDGPU::SCC, &RI))
- KillsSCC = &MI;
- }
-
- MachineOperand *SccDef =
- Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
- SccDef->setIsDead(false);
- if (KillsSCC)
- KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
- CmpInstr.eraseFromParent();
+ if (!optimizeSCC(Def, &CmpInstr, RI))
+ return false;
if (!MRI->use_nodbg_empty(DefReg)) {
assert(!IsReversedCC);
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 7431e11..abefa32 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -296,7 +296,7 @@ bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,
for (MachineBasicBlock::instr_iterator I = std::next(First.getIterator()),
E = MI.getIterator();
I != E; ++I) {
- if (I->isBundle())
+ if (I->isBundle() || I->isDebugInstr())
continue;
switch (I->getOpcode()) {
case AMDGPU::S_SET_GPR_IDX_MODE:
diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
index 77dc4a7..b2a8204 100644
--- a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -88,6 +88,16 @@ bool BPFAsmPrinter::doFinalization(Module &M) {
}
}
+ for (GlobalObject &GO : M.global_objects()) {
+ if (!GO.hasExternalWeakLinkage())
+ continue;
+
+ if (!SawTrapCall && GO.getName() == BPF_TRAP) {
+ GO.eraseFromParent();
+ break;
+ }
+ }
+
return AsmPrinter::doFinalization(M);
}
@@ -160,6 +170,20 @@ bool BPFAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
void BPFAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ if (MI->isCall()) {
+ for (const MachineOperand &Op : MI->operands()) {
+ if (Op.isGlobal()) {
+ if (const GlobalValue *GV = Op.getGlobal())
+ if (GV->getName() == BPF_TRAP)
+ SawTrapCall = true;
+ } else if (Op.isSymbol()) {
+ if (const MCSymbol *Sym = Op.getMCSymbol())
+ if (Sym->getName() == BPF_TRAP)
+ SawTrapCall = true;
+ }
+ }
+ }
+
BPF_MC::verifyInstructionPredicates(MI->getOpcode(),
getSubtargetInfo().getFeatureBits());
diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.h b/llvm/lib/Target/BPF/BPFAsmPrinter.h
index 90ef207..75a1d7e 100644
--- a/llvm/lib/Target/BPF/BPFAsmPrinter.h
+++ b/llvm/lib/Target/BPF/BPFAsmPrinter.h
@@ -39,6 +39,7 @@ public:
private:
BTFDebug *BTF;
TargetMachine &TM;
+ bool SawTrapCall = false;
const BPFTargetMachine &getBTM() const;
};
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
index e5b4f6e..08f196b 100644
--- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
@@ -884,13 +884,13 @@ CSKYTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
.Case("{t4}", CSKY::R20)
.Case("{t5}", CSKY::R21)
.Case("{t6}", CSKY::R22)
- .Cases("{t7}", "{fp}", CSKY::R23)
- .Cases("{t8}", "{top}", CSKY::R24)
- .Cases("{t9}", "{bsp}", CSKY::R25)
+ .Cases({"{t7}", "{fp}"}, CSKY::R23)
+ .Cases({"{t8}", "{top}"}, CSKY::R24)
+ .Cases({"{t9}", "{bsp}"}, CSKY::R25)
.Case("{r26}", CSKY::R26)
.Case("{r27}", CSKY::R27)
- .Cases("{gb}", "{rgb}", "{rdb}", CSKY::R28)
- .Cases("{tb}", "{rtb}", CSKY::R29)
+ .Cases({"{gb}", "{rgb}", "{rdb}"}, CSKY::R28)
+ .Cases({"{tb}", "{rtb}"}, CSKY::R29)
.Case("{svbr}", CSKY::R30)
.Case("{tls}", CSKY::R31)
.Default(CSKY::NoRegister);
@@ -907,38 +907,38 @@ CSKYTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// use the ABI names in register constraint lists.
if (Subtarget.useHardFloat()) {
unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
- .Cases("{fr0}", "{vr0}", CSKY::F0_32)
- .Cases("{fr1}", "{vr1}", CSKY::F1_32)
- .Cases("{fr2}", "{vr2}", CSKY::F2_32)
- .Cases("{fr3}", "{vr3}", CSKY::F3_32)
- .Cases("{fr4}", "{vr4}", CSKY::F4_32)
- .Cases("{fr5}", "{vr5}", CSKY::F5_32)
- .Cases("{fr6}", "{vr6}", CSKY::F6_32)
- .Cases("{fr7}", "{vr7}", CSKY::F7_32)
- .Cases("{fr8}", "{vr8}", CSKY::F8_32)
- .Cases("{fr9}", "{vr9}", CSKY::F9_32)
- .Cases("{fr10}", "{vr10}", CSKY::F10_32)
- .Cases("{fr11}", "{vr11}", CSKY::F11_32)
- .Cases("{fr12}", "{vr12}", CSKY::F12_32)
- .Cases("{fr13}", "{vr13}", CSKY::F13_32)
- .Cases("{fr14}", "{vr14}", CSKY::F14_32)
- .Cases("{fr15}", "{vr15}", CSKY::F15_32)
- .Cases("{fr16}", "{vr16}", CSKY::F16_32)
- .Cases("{fr17}", "{vr17}", CSKY::F17_32)
- .Cases("{fr18}", "{vr18}", CSKY::F18_32)
- .Cases("{fr19}", "{vr19}", CSKY::F19_32)
- .Cases("{fr20}", "{vr20}", CSKY::F20_32)
- .Cases("{fr21}", "{vr21}", CSKY::F21_32)
- .Cases("{fr22}", "{vr22}", CSKY::F22_32)
- .Cases("{fr23}", "{vr23}", CSKY::F23_32)
- .Cases("{fr24}", "{vr24}", CSKY::F24_32)
- .Cases("{fr25}", "{vr25}", CSKY::F25_32)
- .Cases("{fr26}", "{vr26}", CSKY::F26_32)
- .Cases("{fr27}", "{vr27}", CSKY::F27_32)
- .Cases("{fr28}", "{vr28}", CSKY::F28_32)
- .Cases("{fr29}", "{vr29}", CSKY::F29_32)
- .Cases("{fr30}", "{vr30}", CSKY::F30_32)
- .Cases("{fr31}", "{vr31}", CSKY::F31_32)
+ .Cases({"{fr0}", "{vr0}"}, CSKY::F0_32)
+ .Cases({"{fr1}", "{vr1}"}, CSKY::F1_32)
+ .Cases({"{fr2}", "{vr2}"}, CSKY::F2_32)
+ .Cases({"{fr3}", "{vr3}"}, CSKY::F3_32)
+ .Cases({"{fr4}", "{vr4}"}, CSKY::F4_32)
+ .Cases({"{fr5}", "{vr5}"}, CSKY::F5_32)
+ .Cases({"{fr6}", "{vr6}"}, CSKY::F6_32)
+ .Cases({"{fr7}", "{vr7}"}, CSKY::F7_32)
+ .Cases({"{fr8}", "{vr8}"}, CSKY::F8_32)
+ .Cases({"{fr9}", "{vr9}"}, CSKY::F9_32)
+ .Cases({"{fr10}", "{vr10}"}, CSKY::F10_32)
+ .Cases({"{fr11}", "{vr11}"}, CSKY::F11_32)
+ .Cases({"{fr12}", "{vr12}"}, CSKY::F12_32)
+ .Cases({"{fr13}", "{vr13}"}, CSKY::F13_32)
+ .Cases({"{fr14}", "{vr14}"}, CSKY::F14_32)
+ .Cases({"{fr15}", "{vr15}"}, CSKY::F15_32)
+ .Cases({"{fr16}", "{vr16}"}, CSKY::F16_32)
+ .Cases({"{fr17}", "{vr17}"}, CSKY::F17_32)
+ .Cases({"{fr18}", "{vr18}"}, CSKY::F18_32)
+ .Cases({"{fr19}", "{vr19}"}, CSKY::F19_32)
+ .Cases({"{fr20}", "{vr20}"}, CSKY::F20_32)
+ .Cases({"{fr21}", "{vr21}"}, CSKY::F21_32)
+ .Cases({"{fr22}", "{vr22}"}, CSKY::F22_32)
+ .Cases({"{fr23}", "{vr23}"}, CSKY::F23_32)
+ .Cases({"{fr24}", "{vr24}"}, CSKY::F24_32)
+ .Cases({"{fr25}", "{vr25}"}, CSKY::F25_32)
+ .Cases({"{fr26}", "{vr26}"}, CSKY::F26_32)
+ .Cases({"{fr27}", "{vr27}"}, CSKY::F27_32)
+ .Cases({"{fr28}", "{vr28}"}, CSKY::F28_32)
+ .Cases({"{fr29}", "{vr29}"}, CSKY::F29_32)
+ .Cases({"{fr30}", "{vr30}"}, CSKY::F30_32)
+ .Cases({"{fr31}", "{vr31}"}, CSKY::F31_32)
.Default(CSKY::NoRegister);
if (FReg != CSKY::NoRegister) {
assert(CSKY::F0_32 <= FReg && FReg <= CSKY::F31_32 && "Unknown fp-reg");
diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
index 8ace2d2..eb4c884 100644
--- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
+++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
@@ -194,9 +194,10 @@ void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) {
dxbc::PSV::v2::ResourceBindInfo BindInfo;
BindInfo.Type = Type;
BindInfo.LowerBound = Binding.LowerBound;
- assert(Binding.Size == UINT32_MAX ||
- (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX &&
- "Resource range is too large");
+ assert(
+ (Binding.Size == UINT32_MAX ||
+ (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX) &&
+ "Resource range is too large");
BindInfo.UpperBound = (Binding.Size == UINT32_MAX)
? UINT32_MAX
: Binding.LowerBound + Binding.Size - 1;
diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
index f4e36fa7..e661c94 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
@@ -26,6 +26,7 @@ def tc_20a4bbec : InstrItinClass;
def tc_227864f7 : InstrItinClass;
def tc_257f6f7c : InstrItinClass;
def tc_26a377fe : InstrItinClass;
+def tc_2a698a03 : InstrItinClass;
def tc_2b4c548e : InstrItinClass;
def tc_2c745bb8 : InstrItinClass;
def tc_2d4051cd : InstrItinClass;
@@ -52,6 +53,7 @@ def tc_561aaa58 : InstrItinClass;
def tc_56c4f9fe : InstrItinClass;
def tc_56e64202 : InstrItinClass;
def tc_58d21193 : InstrItinClass;
+def tc_57a4709c : InstrItinClass;
def tc_5bf8afbb : InstrItinClass;
def tc_5cdf8c84 : InstrItinClass;
def tc_61bf7c03 : InstrItinClass;
@@ -220,6 +222,11 @@ class DepHVXItinV55 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -356,6 +363,11 @@ class DepHVXItinV55 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -812,6 +824,11 @@ class DepHVXItinV60 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -948,6 +965,11 @@ class DepHVXItinV60 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -1404,6 +1426,11 @@ class DepHVXItinV62 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -1540,6 +1567,11 @@ class DepHVXItinV62 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -1996,6 +2028,11 @@ class DepHVXItinV65 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -2132,6 +2169,11 @@ class DepHVXItinV65 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -2588,6 +2630,11 @@ class DepHVXItinV66 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -2724,6 +2771,11 @@ class DepHVXItinV66 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -3180,6 +3232,11 @@ class DepHVXItinV67 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -3316,6 +3373,11 @@ class DepHVXItinV67 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -3772,6 +3834,11 @@ class DepHVXItinV68 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -3908,6 +3975,11 @@ class DepHVXItinV68 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -4364,6 +4436,11 @@ class DepHVXItinV69 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -4500,6 +4577,11 @@ class DepHVXItinV69 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -4956,6 +5038,11 @@ class DepHVXItinV71 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -5092,6 +5179,11 @@ class DepHVXItinV71 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -5548,6 +5640,11 @@ class DepHVXItinV73 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -5684,6 +5781,11 @@ class DepHVXItinV73 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -6140,6 +6242,11 @@ class DepHVXItinV75 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -6276,6 +6383,11 @@ class DepHVXItinV75 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -6732,6 +6844,11 @@ class DepHVXItinV79 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -6868,6 +6985,11 @@ class DepHVXItinV79 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -7324,6 +7446,11 @@ class DepHVXItinV81 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -7460,6 +7587,11 @@ class DepHVXItinV81 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
index f8f1c2a..b188134 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -29939,6 +29939,58 @@ let opNewValue = 0;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vabs_qf16_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf16 = vabs($Vu32.hf)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabs_qf16_qf16 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf16 = vabs($Vu32.qf16)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabs_qf32_qf32 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf32 = vabs($Vu32.qf32)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabs_qf32_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf32 = vabs($Vu32.sf)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vabs_sf : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
@@ -31302,6 +31354,21 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_valign4 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = valign4($Vu32,$Vv32,$Rt8)",
+tc_57a4709c, TypeCVI_VA>, Enc_a30110, Requires<[UseHVXV81]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_valignb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
@@ -32583,6 +32650,32 @@ let isCVI = 1;
let hasHvxTmp = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vconv_bf_qf32 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxWR:$Vuu32),
+"$Vd32.bf = $Vuu32.qf32",
+tc_2a698a03, TypeCVI_VS>, Enc_a33d04, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vconv_f8_qf16 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.f8 = $Vu32.qf16",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vconv_h_hf : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
@@ -32596,6 +32689,19 @@ let opNewValue = 0;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vconv_h_hf_rnd : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.h = $Vu32.hf:rnd",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vconv_hf_h : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
@@ -32635,6 +32741,71 @@ let opNewValue = 0;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vconv_qf16_f8 : HInst<
+(outs HvxWR:$Vdd32),
+(ins HvxVR:$Vu32),
+"$Vdd32.qf16 = $Vu32.f8",
+tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vconv_qf16_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf16 = $Vu32.hf",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vconv_qf16_qf16 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf16 = $Vu32.qf16",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vconv_qf32_qf32 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf32 = $Vu32.qf32",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vconv_qf32_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf32 = $Vu32.sf",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vconv_sf_qf32 : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
@@ -33720,6 +33891,122 @@ let isHVXALU2SRC = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
+def V6_veqhf : HInst<
+(outs HvxQR:$Qd4),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.hf,$Vv32.hf)",
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-2} = 0b000111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_veqhf_and : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.hf,$Vv32.hf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-2} = 0b000111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqhf_or : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.hf,$Vv32.hf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-2} = 0b010111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isAccumulator = 1;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqhf_xor : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.hf,$Vv32.hf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-2} = 0b100111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqsf : HInst<
+(outs HvxQR:$Qd4),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.sf,$Vv32.sf)",
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-2} = 0b000011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_veqsf_and : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.sf,$Vv32.sf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-2} = 0b000011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqsf_or : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.sf,$Vv32.sf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-2} = 0b010011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isAccumulator = 1;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqsf_xor : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.sf,$Vv32.sf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-2} = 0b100011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
def V6_veqw : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -34538,6 +34825,58 @@ let Inst{31-24} = 0b00011110;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vilog2_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.w = vilog2($Vu32.hf)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vilog2_qf16 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.w = vilog2($Vu32.qf16)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vilog2_qf32 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.w = vilog2($Vu32.qf32)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vilog2_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.w = vilog2($Vu32.sf)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vinsertwr : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, IntRegs:$Rt32),
@@ -37170,6 +37509,58 @@ let isCVI = 1;
let isHVXALU = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vneg_qf16_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf16 = vneg($Vu32.hf)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vneg_qf16_qf16 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf16 = vneg($Vu32.qf16)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vneg_qf32_qf32 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf32 = vneg($Vu32.qf32)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vneg_qf32_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf32 = vneg($Vu32.sf)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vnormamth : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
diff --git a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
index 23f4b3a..c11483b 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
@@ -3830,6 +3830,122 @@ def: Pat<(int_hexagon_V6_vsub_hf_f8_128B HvxVR:$src1, HvxVR:$src2),
// V81 HVX Instructions.
+def: Pat<(int_hexagon_V6_vabs_qf16_hf HvxVR:$src1),
+ (V6_vabs_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vabs_qf16_hf_128B HvxVR:$src1),
+ (V6_vabs_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vabs_qf16_qf16 HvxVR:$src1),
+ (V6_vabs_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vabs_qf16_qf16_128B HvxVR:$src1),
+ (V6_vabs_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vabs_qf32_qf32 HvxVR:$src1),
+ (V6_vabs_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vabs_qf32_qf32_128B HvxVR:$src1),
+ (V6_vabs_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vabs_qf32_sf HvxVR:$src1),
+ (V6_vabs_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vabs_qf32_sf_128B HvxVR:$src1),
+ (V6_vabs_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_valign4 HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_valign4 HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[UseHVXV81, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_valign4_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_valign4 HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[UseHVXV81, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vconv_bf_qf32 HvxWR:$src1),
+ (V6_vconv_bf_qf32 HvxWR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_bf_qf32_128B HvxWR:$src1),
+ (V6_vconv_bf_qf32 HvxWR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_f8_qf16 HvxVR:$src1),
+ (V6_vconv_f8_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_f8_qf16_128B HvxVR:$src1),
+ (V6_vconv_f8_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_h_hf_rnd HvxVR:$src1),
+ (V6_vconv_h_hf_rnd HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vconv_h_hf_rnd_128B HvxVR:$src1),
+ (V6_vconv_h_hf_rnd HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vconv_qf16_f8 HvxVR:$src1),
+ (V6_vconv_qf16_f8 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf16_f8_128B HvxVR:$src1),
+ (V6_vconv_qf16_f8 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf16_hf HvxVR:$src1),
+ (V6_vconv_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf16_hf_128B HvxVR:$src1),
+ (V6_vconv_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf16_qf16 HvxVR:$src1),
+ (V6_vconv_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf16_qf16_128B HvxVR:$src1),
+ (V6_vconv_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf32_qf32 HvxVR:$src1),
+ (V6_vconv_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf32_qf32_128B HvxVR:$src1),
+ (V6_vconv_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf32_sf HvxVR:$src1),
+ (V6_vconv_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf32_sf_128B HvxVR:$src1),
+ (V6_vconv_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqhf HvxVR:$src1, HvxVR:$src2),
+ (V6_veqhf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqhf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_veqhf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqhf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqhf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqhf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqhf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqhf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqhf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqhf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqhf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqhf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqhf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqhf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqhf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqsf HvxVR:$src1, HvxVR:$src2),
+ (V6_veqsf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqsf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_veqsf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqsf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqsf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqsf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vilog2_hf HvxVR:$src1),
+ (V6_vilog2_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vilog2_hf_128B HvxVR:$src1),
+ (V6_vilog2_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vilog2_qf16 HvxVR:$src1),
+ (V6_vilog2_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vilog2_qf16_128B HvxVR:$src1),
+ (V6_vilog2_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vilog2_qf32 HvxVR:$src1),
+ (V6_vilog2_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vilog2_qf32_128B HvxVR:$src1),
+ (V6_vilog2_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vilog2_sf HvxVR:$src1),
+ (V6_vilog2_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vilog2_sf_128B HvxVR:$src1),
+ (V6_vilog2_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vneg_qf16_hf HvxVR:$src1),
+ (V6_vneg_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vneg_qf16_hf_128B HvxVR:$src1),
+ (V6_vneg_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vneg_qf16_qf16 HvxVR:$src1),
+ (V6_vneg_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vneg_qf16_qf16_128B HvxVR:$src1),
+ (V6_vneg_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vneg_qf32_qf32 HvxVR:$src1),
+ (V6_vneg_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vneg_qf32_qf32_128B HvxVR:$src1),
+ (V6_vneg_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vneg_qf32_sf HvxVR:$src1),
+ (V6_vneg_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vneg_qf32_sf_128B HvxVR:$src1),
+ (V6_vneg_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
def: Pat<(int_hexagon_V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2),
(V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
def: Pat<(int_hexagon_V6_vsub_hf_mix_128B HvxVR:$src1, HvxVR:$src2),
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 7ee280d..eadf020 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -1815,7 +1815,7 @@ struct WeightedLeaf {
int Weight;
int InsertionOrder;
- WeightedLeaf() {}
+ WeightedLeaf() = default;
WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index a6de839..fe700e1 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -371,6 +371,12 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
ISD::SETUGE, ISD::SETUGT},
VT, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::FMINNUM, VT, Legal);
+ setOperationAction(ISD::FMAXNUM, VT, Legal);
}
setOperationAction(ISD::CTPOP, GRLenVT, Legal);
setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
@@ -453,6 +459,12 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
ISD::SETUGE, ISD::SETUGT},
VT, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::FMINNUM, VT, Legal);
+ setOperationAction(ISD::FMAXNUM, VT, Legal);
}
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index ca4ee5f..b502b056 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1558,6 +1558,10 @@ defm : PatXrXrF<fmul, "XVFMUL">;
// XVFDIV_{S/D}
defm : PatXrXrF<fdiv, "XVFDIV">;
+// XVFMAX_{S/D}, XVFMIN_{S/D}
+defm : PatXrXrF<fmaxnum, "XVFMAX">;
+defm : PatXrXrF<fminnum, "XVFMIN">;
+
// XVFMADD_{S/D}
def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa),
(XVFMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>;
@@ -2424,6 +2428,12 @@ def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm),
def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm),
(XVPICKVE_D v4f64:$xj, (to_valid_timm timm:$imm))>;
+// Vector floating-point conversion
+defm : PatXrF<fceil, "XVFRINTRP">;
+defm : PatXrF<ffloor, "XVFRINTRM">;
+defm : PatXrF<ftrunc, "XVFRINTRZ">;
+defm : PatXrF<froundeven, "XVFRINTRNE">;
+
// load
def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm),
(XVLD GPR:$rj, (to_valid_timm timm:$imm))>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 92402ba..6b74a4b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1760,6 +1760,10 @@ defm : PatVrVrF<fmul, "VFMUL">;
// VFDIV_{S/D}
defm : PatVrVrF<fdiv, "VFDIV">;
+// VFMAX_{S/D}, VFMIN_{S/D}
+defm : PatVrVrF<fmaxnum, "VFMAX">;
+defm : PatVrVrF<fminnum, "VFMIN">;
+
// VFMADD_{S/D}
def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va),
(VFMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>;
@@ -2552,6 +2556,11 @@ def : Pat<(f64 (froundeven FPR64:$fj)),
(f64 (EXTRACT_SUBREG (VFRINTRNE_D (VREPLVEI_D
(SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>;
+defm : PatVrF<fceil, "VFRINTRP">;
+defm : PatVrF<ffloor, "VFRINTRM">;
+defm : PatVrF<ftrunc, "VFRINTRZ">;
+defm : PatVrF<froundeven, "VFRINTRNE">;
+
// load
def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm),
(VLD GPR:$rj, (to_valid_timm timm:$imm))>;
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
index 7d54565..6d69af5 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
@@ -39,7 +39,7 @@ LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit)
: MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH,
/*HasRelocationAddend=*/true) {}
-LoongArchELFObjectWriter::~LoongArchELFObjectWriter() {}
+LoongArchELFObjectWriter::~LoongArchELFObjectWriter() = default;
unsigned LoongArchELFObjectWriter::getRelocType(const MCFixup &Fixup,
const MCValue &Target,
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
index f0e2bc4..08fa51d 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
@@ -38,7 +38,7 @@ public:
LoongArchMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII)
: Ctx(ctx), MCII(MCII) {}
- ~LoongArchMCCodeEmitter() override {}
+ ~LoongArchMCCodeEmitter() override = default;
void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
diff --git a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
index e37f3a66..fb5cd5c2 100644
--- a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
+++ b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
@@ -690,9 +690,9 @@ bool M68kAsmParser::parseRegisterName(MCRegister &RegNo, SMLoc Loc,
} else {
// Floating point control register.
RegNo = StringSwitch<unsigned>(RegisterNameLower)
- .Cases("fpc", "fpcr", M68k::FPC)
- .Cases("fps", "fpsr", M68k::FPS)
- .Cases("fpi", "fpiar", M68k::FPIAR)
+ .Cases({"fpc", "fpcr"}, M68k::FPC)
+ .Cases({"fps", "fpsr"}, M68k::FPS)
+ .Cases({"fpi", "fpiar"}, M68k::FPIAR)
.Default(M68k::NoRegister);
assert(RegNo != M68k::NoRegister &&
"Unrecognized FP control register name");
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
index fe83dc6..51bafe4 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
@@ -49,7 +49,7 @@ public:
M68kAsmBackend(const Target &T, const MCSubtargetInfo &STI)
: MCAsmBackend(llvm::endianness::big),
Allows32BitBranch(llvm::StringSwitch<bool>(STI.getCPU())
- .CasesLower("m68020", "m68030", "m68040", true)
+ .CasesLower({"m68020", "m68030", "m68040"}, true)
.Default(false)) {}
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &,
diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 97379d7..f588e56 100644
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -6176,7 +6176,7 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
CC = StringSwitch<unsigned>(Name)
.Case("zero", 0)
- .Cases("at", "AT", 1)
+ .Cases({"at", "AT"}, 1)
.Case("a0", 4)
.Case("a1", 5)
.Case("a2", 6)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
index caef8fe7..b832b82 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
+++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
@@ -20,7 +20,7 @@ class MemoryLocation;
class NVPTXAAResult : public AAResultBase {
public:
- NVPTXAAResult() {}
+ NVPTXAAResult() = default;
NVPTXAAResult(NVPTXAAResult &&Arg) : AAResultBase(std::move(Arg)) {}
/// Handle invalidation events from the new pass manager.
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index c667a09..996d653 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -1836,7 +1836,7 @@ bool NVPTXDAGToDAGISel::tryFence(SDNode *N) {
return true;
}
-NVPTXScopes::NVPTXScopes(LLVMContext &C) {
+NVPTXScopes::NVPTXScopes(LLVMContext &C) : Context(&C) {
Scopes[C.getOrInsertSyncScopeID("singlethread")] = NVPTX::Scope::Thread;
Scopes[C.getOrInsertSyncScopeID("")] = NVPTX::Scope::System;
Scopes[C.getOrInsertSyncScopeID("block")] = NVPTX::Scope::Block;
@@ -1851,11 +1851,21 @@ NVPTX::Scope NVPTXScopes::operator[](SyncScope::ID ID) const {
auto S = Scopes.find(ID);
if (S == Scopes.end()) {
- // TODO:
- // - Add API to LLVMContext to get the name of a single scope.
- // - Use that API here to print an error containing the name
- // of this Unknown ID.
- report_fatal_error(formatv("Could not find scope ID={}.", int(ID)));
+ auto scopeName = Context->getSyncScopeName(ID);
+ assert(scopeName.has_value() && "Scope name must exist.");
+
+ // Build list of supported syncscopes programmatically
+ SmallVector<StringRef> supportedScopes;
+ for (const auto &Entry : Scopes) {
+ if (auto name = Context->getSyncScopeName(Entry.first))
+ supportedScopes.push_back(name->empty() ? "<empty string>" : *name);
+ }
+
+ reportFatalUsageError(
+ formatv("NVPTX backend does not support syncscope \"{0}\" (ID={1}).\n"
+ "Supported syncscopes are: {2}.",
+ scopeName.value(), int(ID),
+ make_range(supportedScopes.begin(), supportedScopes.end())));
}
return S->second;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 1cb579b..d525531 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -35,6 +35,7 @@ struct NVPTXScopes {
private:
SmallMapVector<SyncScope::ID, NVPTX::Scope, 8> Scopes{};
+ LLVMContext *Context = nullptr;
};
class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index e8758aa..50827bd 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1562,12 +1562,17 @@ def : Pat<(int_nvvm_saturate_d f64:$a), (CVT_f64_f64 $a, CvtSAT)>;
// Exp2 Log2
//
-def : Pat<(int_nvvm_ex2_approx_ftz_f f32:$a), (EX2_APPROX_f32 $a, FTZ)>;
-def : Pat<(int_nvvm_ex2_approx_f f32:$a), (EX2_APPROX_f32 $a, NoFTZ)>;
+def : Pat<(f32 (int_nvvm_ex2_approx_ftz f32:$a)), (EX2_APPROX_f32 $a, FTZ)>;
+def : Pat<(f32 (int_nvvm_ex2_approx f32:$a)), (EX2_APPROX_f32 $a, NoFTZ)>;
let Predicates = [hasPTX<70>, hasSM<75>] in {
- def : Pat<(int_nvvm_ex2_approx_f16 f16:$a), (EX2_APPROX_f16 $a)>;
- def : Pat<(int_nvvm_ex2_approx_f16x2 v2f16:$a), (EX2_APPROX_f16x2 $a)>;
+ def : Pat<(f16 (int_nvvm_ex2_approx f16:$a)), (EX2_APPROX_f16 $a)>;
+ def : Pat<(v2f16 (int_nvvm_ex2_approx v2f16:$a)), (EX2_APPROX_f16x2 $a)>;
+}
+
+let Predicates = [hasPTX<78>, hasSM<90>] in {
+ def : Pat<(bf16 (int_nvvm_ex2_approx_ftz bf16:$a)), (EX2_APPROX_bf16 $a)>;
+ def : Pat<(v2bf16 (int_nvvm_ex2_approx_ftz v2bf16:$a)), (EX2_APPROX_bf16x2 $a)>;
}
def LG2_APPROX_f32 :
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 729c077..64593e6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -318,7 +318,7 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC,
// answer. These include:
//
// - nvvm_cos_approx_{f,ftz_f}
- // - nvvm_ex2_approx_{d,f,ftz_f}
+ // - nvvm_ex2_approx(_ftz)
// - nvvm_lg2_approx_{d,f,ftz_f}
// - nvvm_sin_approx_{f,ftz_f}
// - nvvm_sqrt_approx_{f,ftz_f}
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index bcb3f50..780e124 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2702,7 +2702,7 @@ static bool isSpecialLLVMGlobalArrayToSkip(const GlobalVariable *GV) {
static bool isSpecialLLVMGlobalArrayForStaticInit(const GlobalVariable *GV) {
return StringSwitch<bool>(GV->getName())
- .Cases("llvm.global_ctors", "llvm.global_dtors", true)
+ .Cases({"llvm.global_ctors", "llvm.global_dtors"}, true)
.Default(false);
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index da3efdc..0c2e44e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -360,6 +360,10 @@ let Predicates = [HasVSX, IsISAFuture] in {
def LXVPRLL : XForm_XTp5_RAB5<31, 621, (outs vsrprc:$XTp),
(ins (memr $RA):$addr, g8rc:$RB),
"lxvprll $XTp, $addr, $RB", IIC_LdStLFD, []>;
+ def LXVPB32X
+ : XForm_XTp5_RAB5<31, 877, (outs vsrprc:$XTp),
+ (ins (memr $RA):$addr, g8rc:$RB),
+ "lxvpb32x $XTp, $addr, $RB", IIC_LdStLFD, []>;
}
let mayStore = 1 in {
@@ -376,6 +380,10 @@ let Predicates = [HasVSX, IsISAFuture] in {
: XForm_XTp5_RAB5<31, 749, (outs),
(ins vsrprc:$XTp, (memr $RA):$addr, g8rc:$RB),
"stxvprll $XTp, $addr, $RB", IIC_LdStLFD, []>;
+ def STXVPB32X
+ : XForm_XTp5_RAB5<31, 1005, (outs),
+ (ins vsrprc:$XTp, (memr $RA):$addr, g8rc:$RB),
+ "stxvpb32x $XTp, $addr, $RB", IIC_LdStLFD, []>;
}
def VUPKHSNTOB : VXForm_VRTB5<387, 0, (outs vrrc:$VRT), (ins vrrc:$VRB),
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 2fba090..b04e887 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -912,7 +912,7 @@ bool PPCTTIImpl::areInlineCompatible(const Function *Caller,
bool PPCTTIImpl::areTypesABICompatible(const Function *Caller,
const Function *Callee,
- const ArrayRef<Type *> &Types) const {
+ ArrayRef<Type *> Types) const {
// We need to ensure that argument promotion does not
// attempt to promote pointers to MMA types (__vector_pair
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 475472a..8d7f255 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -147,7 +147,7 @@ public:
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const override;
bool areTypesABICompatible(const Function *Caller, const Function *Callee,
- const ArrayRef<Type *> &Types) const override;
+ ArrayRef<Type *> Types) const override;
bool supportsTailCallFor(const CallBase *CB) const override;
private:
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 282cf5d..3d5a55c 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -95,7 +95,8 @@ private:
void addVectorLoadStoreOperands(MachineInstr &I,
SmallVectorImpl<SrcOp> &SrcOps,
unsigned &CurOp, bool IsMasked,
- bool IsStrided) const;
+ bool IsStridedOrIndexed,
+ LLT *IndexVT = nullptr) const;
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineIRBuilder &MIB) const;
@@ -722,15 +723,17 @@ static unsigned selectRegImmLoadStoreOp(unsigned GenericOpc, unsigned OpSize) {
void RISCVInstructionSelector::addVectorLoadStoreOperands(
MachineInstr &I, SmallVectorImpl<SrcOp> &SrcOps, unsigned &CurOp,
- bool IsMasked, bool IsStrided) const {
+ bool IsMasked, bool IsStridedOrIndexed, LLT *IndexVT) const {
// Base Pointer
auto PtrReg = I.getOperand(CurOp++).getReg();
SrcOps.push_back(PtrReg);
- // Stride
- if (IsStrided) {
+ // Stride or Index
+ if (IsStridedOrIndexed) {
auto StrideReg = I.getOperand(CurOp++).getReg();
SrcOps.push_back(StrideReg);
+ if (IndexVT)
+ *IndexVT = MRI->getType(StrideReg);
}
// Mask
@@ -805,6 +808,70 @@ bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
I.eraseFromParent();
return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);
}
+ case Intrinsic::riscv_vloxei:
+ case Intrinsic::riscv_vloxei_mask:
+ case Intrinsic::riscv_vluxei:
+ case Intrinsic::riscv_vluxei_mask: {
+ bool IsMasked = IntrinID == Intrinsic::riscv_vloxei_mask ||
+ IntrinID == Intrinsic::riscv_vluxei_mask;
+ bool IsOrdered = IntrinID == Intrinsic::riscv_vloxei ||
+ IntrinID == Intrinsic::riscv_vloxei_mask;
+ LLT VT = MRI->getType(I.getOperand(0).getReg());
+ unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
+
+ // Result vector
+ const Register DstReg = I.getOperand(0).getReg();
+
+ // Sources
+ bool HasPassthruOperand = IntrinID != Intrinsic::riscv_vlm;
+ unsigned CurOp = 2;
+ SmallVector<SrcOp, 4> SrcOps; // Source registers.
+
+ // Passthru
+ if (HasPassthruOperand) {
+ auto PassthruReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(PassthruReg);
+ } else {
+ // Use NoRegister if there is no specified passthru.
+ SrcOps.push_back(Register());
+ }
+ LLT IndexVT;
+ addVectorLoadStoreOperands(I, SrcOps, CurOp, IsMasked, true, &IndexVT);
+
+ RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(VT));
+ RISCVVType::VLMUL IndexLMUL =
+ RISCVTargetLowering::getLMUL(getMVTForLLT(IndexVT));
+ unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
+ if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
+ reportFatalUsageError("The V extension does not support EEW=64 for index "
+ "values when XLEN=32");
+ }
+ const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
+ IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
+ static_cast<unsigned>(IndexLMUL));
+
+ auto PseudoMI = MIB.buildInstr(P->Pseudo, {DstReg}, SrcOps);
+
+ // Select VL
+ auto VLOpFn = renderVLOp(I.getOperand(CurOp++));
+ for (auto &RenderFn : *VLOpFn)
+ RenderFn(PseudoMI);
+
+ // SEW
+ PseudoMI.addImm(Log2SEW);
+
+ // Policy
+ uint64_t Policy = RISCVVType::MASK_AGNOSTIC;
+ if (IsMasked)
+ Policy = I.getOperand(CurOp++).getImm();
+ PseudoMI.addImm(Policy);
+
+ // Memref
+ PseudoMI.cloneMemRefs(I);
+
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);
+ }
case Intrinsic::riscv_vsm:
case Intrinsic::riscv_vse:
case Intrinsic::riscv_vse_mask:
@@ -847,6 +914,56 @@ bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
I.eraseFromParent();
return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);
}
+ case Intrinsic::riscv_vsoxei:
+ case Intrinsic::riscv_vsoxei_mask:
+ case Intrinsic::riscv_vsuxei:
+ case Intrinsic::riscv_vsuxei_mask: {
+ bool IsMasked = IntrinID == Intrinsic::riscv_vsoxei_mask ||
+ IntrinID == Intrinsic::riscv_vsuxei_mask;
+ bool IsOrdered = IntrinID == Intrinsic::riscv_vsoxei ||
+ IntrinID == Intrinsic::riscv_vsoxei_mask;
+ LLT VT = MRI->getType(I.getOperand(1).getReg());
+ unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
+
+ // Sources
+ unsigned CurOp = 1;
+ SmallVector<SrcOp, 4> SrcOps; // Source registers.
+
+ // Store value
+ auto PassthruReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(PassthruReg);
+
+ LLT IndexVT;
+ addVectorLoadStoreOperands(I, SrcOps, CurOp, IsMasked, true, &IndexVT);
+
+ RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(VT));
+ RISCVVType::VLMUL IndexLMUL =
+ RISCVTargetLowering::getLMUL(getMVTForLLT(IndexVT));
+ unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
+ if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
+ reportFatalUsageError("The V extension does not support EEW=64 for index "
+ "values when XLEN=32");
+ }
+ const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
+ IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
+ static_cast<unsigned>(IndexLMUL));
+
+ auto PseudoMI = MIB.buildInstr(P->Pseudo, {}, SrcOps);
+
+ // Select VL
+ auto VLOpFn = renderVLOp(I.getOperand(CurOp++));
+ for (auto &RenderFn : *VLOpFn)
+ RenderFn(PseudoMI);
+
+ // SEW
+ PseudoMI.addImm(Log2SEW);
+
+ // Memref
+ PseudoMI.cloneMemRefs(I);
+
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);
+ }
}
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index e75dfe3..5b8cfb2 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -407,7 +407,6 @@ enum OperandType : unsigned {
OPERAND_SIMM5_PLUS1,
OPERAND_SIMM6,
OPERAND_SIMM6_NONZERO,
- OPERAND_SIMM8,
OPERAND_SIMM8_UNSIGNED,
OPERAND_SIMM10,
OPERAND_SIMM10_LSB0000_NONZERO,
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 526675a..b0453fc 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -131,6 +131,7 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoCCMAXU:
case RISCV::PseudoCCMIN:
case RISCV::PseudoCCMINU:
+ case RISCV::PseudoCCMUL:
case RISCV::PseudoCCADDW:
case RISCV::PseudoCCSUBW:
case RISCV::PseudoCCSLL:
@@ -237,6 +238,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
case RISCV::PseudoCCMIN: NewOpc = RISCV::MIN; break;
case RISCV::PseudoCCMAXU: NewOpc = RISCV::MAXU; break;
case RISCV::PseudoCCMINU: NewOpc = RISCV::MINU; break;
+ case RISCV::PseudoCCMUL: NewOpc = RISCV::MUL; break;
case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break;
case RISCV::PseudoCCSLLI: NewOpc = RISCV::SLLI; break;
case RISCV::PseudoCCSRLI: NewOpc = RISCV::SRLI; break;
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index cfee6ab..5b72334 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1856,6 +1856,11 @@ def TuneShortForwardBranchIMinMax
"true", "Enable short forward branch optimization for min,max instructions in Zbb",
[TuneShortForwardBranchOpt]>;
+def TuneShortForwardBranchIMul
+ : SubtargetFeature<"short-forward-branch-i-mul", "HasShortForwardBranchIMul",
+ "true", "Enable short forward branch optimization for mul instruction",
+ [TuneShortForwardBranchOpt]>;
+
// Some subtargets require a S2V transfer buffer to move scalars into vectors.
// FIXME: Forming .vx/.vf/.wx/.wf can reduce register pressure.
def TuneNoSinkSplatOperands
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index b25a054..9078335 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -371,8 +371,8 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
- report_fatal_error("The V extension does not support EEW=64 for index "
- "values when XLEN=32");
+ reportFatalUsageError("The V extension does not support EEW=64 for index "
+ "values when XLEN=32");
}
const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
@@ -444,8 +444,8 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
- report_fatal_error("The V extension does not support EEW=64 for index "
- "values when XLEN=32");
+ reportFatalUsageError("The V extension does not support EEW=64 for index "
+ "values when XLEN=32");
}
const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
@@ -2223,8 +2223,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
- report_fatal_error("The V extension does not support EEW=64 for index "
- "values when XLEN=32");
+ reportFatalUsageError("The V extension does not support EEW=64 for "
+ "index values when XLEN=32");
}
const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
@@ -2457,8 +2457,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
- report_fatal_error("The V extension does not support EEW=64 for index "
- "values when XLEN=32");
+ reportFatalUsageError("The V extension does not support EEW=64 for "
+ "index values when XLEN=32");
}
const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
IsMasked, IsOrdered, IndexLog2EEW,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c6a8b84..c56ce3f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16495,6 +16495,35 @@ static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(Op, DL, VT, Shift1, Shift2);
}
+static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
+ unsigned ShY) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue X = N->getOperand(0);
+ SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
+ DAG.getConstant(ShY, DL, VT), X);
+ return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
+ DAG.getConstant(ShX, DL, VT), Mul359);
+}
+
+static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
+ uint64_t MulAmt) {
+ switch (MulAmt) {
+ case 5 * 3:
+ return getShlAddShlAdd(N, DAG, 2, 1);
+ case 9 * 3:
+ return getShlAddShlAdd(N, DAG, 3, 1);
+ case 5 * 5:
+ return getShlAddShlAdd(N, DAG, 2, 2);
+ case 9 * 5:
+ return getShlAddShlAdd(N, DAG, 3, 2);
+ case 9 * 9:
+ return getShlAddShlAdd(N, DAG, 3, 3);
+ default:
+ return SDValue();
+ }
+}
+
// Try to expand a scalar multiply to a faster sequence.
static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -16524,18 +16553,17 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
return SDValue();
- // WARNING: The code below is knowingly incorrect with regards to undef semantics.
- // We're adding additional uses of X here, and in principle, we should be freezing
- // X before doing so. However, adding freeze here causes real regressions, and no
- // other target properly freezes X in these cases either.
- SDValue X = N->getOperand(0);
-
+ // WARNING: The code below is knowingly incorrect with regards to undef
+ // semantics. We're adding additional uses of X here, and in principle, we
+ // should be freezing X before doing so. However, adding freeze here causes
+ // real regressions, and no other target properly freezes X in these cases
+ // either.
if (Subtarget.hasShlAdd(3)) {
+ SDValue X = N->getOperand(0);
int Shift;
if (int ShXAmount = isShifted359(MulAmt, Shift)) {
// 3/5/9 * 2^N -> shl (shXadd X, X), N
SDLoc DL(N);
- SDValue X = N->getOperand(0);
// Put the shift first if we can fold a zext into the shift forming
// a slli.uw.
if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
@@ -16554,38 +16582,8 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
}
// 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
- int ShX;
- int ShY;
- switch (MulAmt) {
- case 3 * 5:
- ShY = 1;
- ShX = 2;
- break;
- case 3 * 9:
- ShY = 1;
- ShX = 3;
- break;
- case 5 * 5:
- ShX = ShY = 2;
- break;
- case 5 * 9:
- ShY = 2;
- ShX = 3;
- break;
- case 9 * 9:
- ShX = ShY = 3;
- break;
- default:
- ShX = ShY = 0;
- break;
- }
- if (ShX) {
- SDLoc DL(N);
- SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getConstant(ShY, DL, VT), X);
- return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
- DAG.getConstant(ShX, DL, VT), Mul359);
- }
+ if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt))
+ return V;
// If this is a power 2 + 2/4/8, we can use a shift followed by a single
// shXadd. First check if this a sum of two power of 2s because that's
@@ -16648,23 +16646,12 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
}
}
- for (uint64_t Divisor : {3, 5, 9}) {
- if (MulAmt % Divisor != 0)
- continue;
- uint64_t MulAmt2 = MulAmt / Divisor;
- // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
- // of 25 which happen to be quite common.
- if (int ShBAmount = isShifted359(MulAmt2, Shift)) {
- SDLoc DL(N);
- SDValue Mul359A =
- DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
- SDValue Mul359B =
- DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359A,
- DAG.getConstant(ShBAmount, DL, VT), Mul359A);
- return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
- DAG.getConstant(Shift, DL, VT));
- }
+ // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
+ // of 25 which happen to be quite common.
+ Shift = llvm::countr_zero(MulAmt);
+ if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT));
}
}
@@ -23946,7 +23933,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
.Case("{t0}", RISCV::X5)
.Case("{t1}", RISCV::X6)
.Case("{t2}", RISCV::X7)
- .Cases("{s0}", "{fp}", RISCV::X8)
+ .Cases({"{s0}", "{fp}"}, RISCV::X8)
.Case("{s1}", RISCV::X9)
.Case("{a0}", RISCV::X10)
.Case("{a1}", RISCV::X11)
@@ -23983,38 +23970,38 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// use the ABI names in register constraint lists.
if (Subtarget.hasStdExtF()) {
unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
- .Cases("{f0}", "{ft0}", RISCV::F0_F)
- .Cases("{f1}", "{ft1}", RISCV::F1_F)
- .Cases("{f2}", "{ft2}", RISCV::F2_F)
- .Cases("{f3}", "{ft3}", RISCV::F3_F)
- .Cases("{f4}", "{ft4}", RISCV::F4_F)
- .Cases("{f5}", "{ft5}", RISCV::F5_F)
- .Cases("{f6}", "{ft6}", RISCV::F6_F)
- .Cases("{f7}", "{ft7}", RISCV::F7_F)
- .Cases("{f8}", "{fs0}", RISCV::F8_F)
- .Cases("{f9}", "{fs1}", RISCV::F9_F)
- .Cases("{f10}", "{fa0}", RISCV::F10_F)
- .Cases("{f11}", "{fa1}", RISCV::F11_F)
- .Cases("{f12}", "{fa2}", RISCV::F12_F)
- .Cases("{f13}", "{fa3}", RISCV::F13_F)
- .Cases("{f14}", "{fa4}", RISCV::F14_F)
- .Cases("{f15}", "{fa5}", RISCV::F15_F)
- .Cases("{f16}", "{fa6}", RISCV::F16_F)
- .Cases("{f17}", "{fa7}", RISCV::F17_F)
- .Cases("{f18}", "{fs2}", RISCV::F18_F)
- .Cases("{f19}", "{fs3}", RISCV::F19_F)
- .Cases("{f20}", "{fs4}", RISCV::F20_F)
- .Cases("{f21}", "{fs5}", RISCV::F21_F)
- .Cases("{f22}", "{fs6}", RISCV::F22_F)
- .Cases("{f23}", "{fs7}", RISCV::F23_F)
- .Cases("{f24}", "{fs8}", RISCV::F24_F)
- .Cases("{f25}", "{fs9}", RISCV::F25_F)
- .Cases("{f26}", "{fs10}", RISCV::F26_F)
- .Cases("{f27}", "{fs11}", RISCV::F27_F)
- .Cases("{f28}", "{ft8}", RISCV::F28_F)
- .Cases("{f29}", "{ft9}", RISCV::F29_F)
- .Cases("{f30}", "{ft10}", RISCV::F30_F)
- .Cases("{f31}", "{ft11}", RISCV::F31_F)
+ .Cases({"{f0}", "{ft0}"}, RISCV::F0_F)
+ .Cases({"{f1}", "{ft1}"}, RISCV::F1_F)
+ .Cases({"{f2}", "{ft2}"}, RISCV::F2_F)
+ .Cases({"{f3}", "{ft3}"}, RISCV::F3_F)
+ .Cases({"{f4}", "{ft4}"}, RISCV::F4_F)
+ .Cases({"{f5}", "{ft5}"}, RISCV::F5_F)
+ .Cases({"{f6}", "{ft6}"}, RISCV::F6_F)
+ .Cases({"{f7}", "{ft7}"}, RISCV::F7_F)
+ .Cases({"{f8}", "{fs0}"}, RISCV::F8_F)
+ .Cases({"{f9}", "{fs1}"}, RISCV::F9_F)
+ .Cases({"{f10}", "{fa0}"}, RISCV::F10_F)
+ .Cases({"{f11}", "{fa1}"}, RISCV::F11_F)
+ .Cases({"{f12}", "{fa2}"}, RISCV::F12_F)
+ .Cases({"{f13}", "{fa3}"}, RISCV::F13_F)
+ .Cases({"{f14}", "{fa4}"}, RISCV::F14_F)
+ .Cases({"{f15}", "{fa5}"}, RISCV::F15_F)
+ .Cases({"{f16}", "{fa6}"}, RISCV::F16_F)
+ .Cases({"{f17}", "{fa7}"}, RISCV::F17_F)
+ .Cases({"{f18}", "{fs2}"}, RISCV::F18_F)
+ .Cases({"{f19}", "{fs3}"}, RISCV::F19_F)
+ .Cases({"{f20}", "{fs4}"}, RISCV::F20_F)
+ .Cases({"{f21}", "{fs5}"}, RISCV::F21_F)
+ .Cases({"{f22}", "{fs6}"}, RISCV::F22_F)
+ .Cases({"{f23}", "{fs7}"}, RISCV::F23_F)
+ .Cases({"{f24}", "{fs8}"}, RISCV::F24_F)
+ .Cases({"{f25}", "{fs9}"}, RISCV::F25_F)
+ .Cases({"{f26}", "{fs10}"}, RISCV::F26_F)
+ .Cases({"{f27}", "{fs11}"}, RISCV::F27_F)
+ .Cases({"{f28}", "{ft8}"}, RISCV::F28_F)
+ .Cases({"{f29}", "{ft9}"}, RISCV::F29_F)
+ .Cases({"{f30}", "{ft10}"}, RISCV::F30_F)
+ .Cases({"{f31}", "{ft11}"}, RISCV::F31_F)
.Default(RISCV::NoRegister);
if (FReg != RISCV::NoRegister) {
assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
diff --git a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
index a1c8e23..c58a5c0 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
@@ -48,7 +48,7 @@ class VXRMInfo {
} State = Uninitialized;
public:
- VXRMInfo() {}
+ VXRMInfo() = default;
static VXRMInfo getUnknown() {
VXRMInfo Info;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index c9df787..b8ab70b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1703,6 +1703,7 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
case RISCV::MAXU: return RISCV::PseudoCCMAXU;
case RISCV::MIN: return RISCV::PseudoCCMIN;
case RISCV::MINU: return RISCV::PseudoCCMINU;
+ case RISCV::MUL: return RISCV::PseudoCCMUL;
case RISCV::ADDI: return RISCV::PseudoCCADDI;
case RISCV::SLLI: return RISCV::PseudoCCSLLI;
@@ -1754,6 +1755,9 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg,
MI->getOpcode() == RISCV::MINU || MI->getOpcode() == RISCV::MAXU))
return nullptr;
+ if (!STI.hasShortForwardBranchIMul() && MI->getOpcode() == RISCV::MUL)
+ return nullptr;
+
// Check if MI can be predicated and folded into the CCMOV.
if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
return nullptr;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
index 5a67a5a..494b1c9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
@@ -110,6 +110,7 @@ def PseudoCCMAX : SFBALU_rr;
def PseudoCCMIN : SFBALU_rr;
def PseudoCCMAXU : SFBALU_rr;
def PseudoCCMINU : SFBALU_rr;
+def PseudoCCMUL : SFBALU_rr;
def PseudoCCADDI : SFBALU_ri;
def PseudoCCANDI : SFBALU_ri;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index c31713e..1c6a5af 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -90,6 +90,7 @@ defvar ZfhminDExts = [ZfhminDExt, ZhinxminZdinxExt, ZhinxminZdinx32Ext];
//===----------------------------------------------------------------------===//
let Predicates = [HasHalfFPLoadStoreMove] in {
+let canFoldAsLoad = 1 in
def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>;
// Operands for stores are in the order srcreg, base, offset rather than
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp
index 0a318e0..ed6d355 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp
@@ -15,4 +15,4 @@
using namespace llvm;
SPIRVTargetStreamer::SPIRVTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
-SPIRVTargetStreamer::~SPIRVTargetStreamer() {}
+SPIRVTargetStreamer::~SPIRVTargetStreamer() = default;
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index 9e11c3a..dd57b74 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -149,23 +149,23 @@ static FunctionType *getOriginalFunctionType(const Function &F) {
return isa<MDString>(N->getOperand(0)) &&
cast<MDString>(N->getOperand(0))->getString() == F.getName();
});
- // TODO: probably one function can have numerous type mutations,
- // so we should support this.
if (ThisFuncMDIt != NamedMD->op_end()) {
auto *ThisFuncMD = *ThisFuncMDIt;
- MDNode *MD = dyn_cast<MDNode>(ThisFuncMD->getOperand(1));
- assert(MD && "MDNode operand is expected");
- ConstantInt *Const = getConstInt(MD, 0);
- if (Const) {
- auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(1));
- assert(CMeta && "ConstantAsMetadata operand is expected");
- assert(Const->getSExtValue() >= -1);
- // Currently -1 indicates return value, greater values mean
- // argument numbers.
- if (Const->getSExtValue() == -1)
- RetTy = CMeta->getType();
- else
- ArgTypes[Const->getSExtValue()] = CMeta->getType();
+ for (unsigned I = 1; I != ThisFuncMD->getNumOperands(); ++I) {
+ MDNode *MD = dyn_cast<MDNode>(ThisFuncMD->getOperand(I));
+ assert(MD && "MDNode operand is expected");
+ ConstantInt *Const = getConstInt(MD, 0);
+ if (Const) {
+ auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(1));
+ assert(CMeta && "ConstantAsMetadata operand is expected");
+ assert(Const->getSExtValue() >= -1);
+ // Currently -1 indicates return value, greater values mean
+ // argument numbers.
+ if (Const->getSExtValue() == -1)
+ RetTy = CMeta->getType();
+ else
+ ArgTypes[Const->getSExtValue()] = CMeta->getType();
+ }
}
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
index 2d19f6de..44b6c66 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
@@ -81,7 +81,7 @@ private:
void initAvailableCapabilitiesForVulkan(const SPIRVSubtarget &ST);
public:
- RequirementHandler() {}
+ RequirementHandler() = default;
void clear() {
MinimalCaps.clear();
AllCaps.clear();
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
index 7dd0b95..5ba0356 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -69,7 +69,7 @@ static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
}
// Pin SPIRVTargetObjectFile's vtables to this file.
-SPIRVTargetObjectFile::~SPIRVTargetObjectFile() {}
+SPIRVTargetObjectFile::~SPIRVTargetObjectFile() = default;
SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h b/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h
index 9d0adbb..87ec256 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h
@@ -16,7 +16,7 @@ namespace llvm {
/// This implementation is used for SystemZ ELF targets.
class SystemZELFTargetObjectFile : public TargetLoweringObjectFileELF {
public:
- SystemZELFTargetObjectFile() {}
+ SystemZELFTargetObjectFile() = default;
/// Describe a TLS variable address within debug info.
const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
index 7845cdf..1bfc61f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
@@ -76,7 +76,7 @@ public:
BlockSet.insert(MBB);
}
ArrayRef<MachineBasicBlock *> getBlocks() const { return Blocks; }
- using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator;
+ using block_iterator = ArrayRef<MachineBasicBlock *>::const_iterator;
block_iterator block_begin() const { return getBlocks().begin(); }
block_iterator block_end() const { return getBlocks().end(); }
inline iterator_range<block_iterator> blocks() const {
@@ -96,7 +96,7 @@ public:
void addSubException(std::unique_ptr<WebAssemblyException> E) {
SubExceptions.push_back(std::move(E));
}
- using iterator = typename decltype(SubExceptions)::const_iterator;
+ using iterator = decltype(SubExceptions)::const_iterator;
iterator begin() const { return SubExceptions.begin(); }
iterator end() const { return SubExceptions.end(); }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index ff4d6469..ee575e3 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -207,8 +207,7 @@ template <> struct MappingTraits<WebAssemblyFunctionInfo> {
template <> struct CustomMappingTraits<BBNumberMap> {
static void inputOne(IO &YamlIO, StringRef Key,
BBNumberMap &SrcToUnwindDest) {
- YamlIO.mapRequired(Key.str().c_str(),
- SrcToUnwindDest[std::atoi(Key.str().c_str())]);
+ YamlIO.mapRequired(Key, SrcToUnwindDest[std::atoi(Key.str().c_str())]);
}
static void output(IO &YamlIO, BBNumberMap &SrcToUnwindDest) {
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h
index e92bf17..96b8a4e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h
@@ -35,7 +35,7 @@ public:
virtual MachineBasicBlock *getHeader() const = 0;
virtual bool contains(const MachineBasicBlock *MBB) const = 0;
virtual unsigned getNumBlocks() const = 0;
- using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator;
+ using block_iterator = ArrayRef<MachineBasicBlock *>::const_iterator;
virtual iterator_range<block_iterator> blocks() const = 0;
virtual bool isLoop() const = 0;
};
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index b7ea672..bac3692 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2470,10 +2470,10 @@ bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
// Report back its kind, or IOK_INVALID if does not evaluated as a known one
unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
return StringSwitch<unsigned>(Name)
- .Cases("TYPE","type",IOK_TYPE)
- .Cases("SIZE","size",IOK_SIZE)
- .Cases("LENGTH","length",IOK_LENGTH)
- .Default(IOK_INVALID);
+ .Cases({"TYPE", "type"}, IOK_TYPE)
+ .Cases({"SIZE", "size"}, IOK_SIZE)
+ .Cases({"LENGTH", "length"}, IOK_LENGTH)
+ .Default(IOK_INVALID);
}
/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
@@ -2516,8 +2516,8 @@ unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {
return StringSwitch<unsigned>(Name.lower())
.Case("type", MOK_TYPE)
- .Cases("size", "sizeof", MOK_SIZEOF)
- .Cases("length", "lengthof", MOK_LENGTHOF)
+ .Cases({"size", "sizeof"}, MOK_SIZEOF)
+ .Cases({"length", "lengthof"}, MOK_LENGTHOF)
.Default(MOK_INVALID);
}
@@ -2581,21 +2581,21 @@ bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size,
StringRef *SizeStr) {
Size = StringSwitch<unsigned>(getTok().getString())
- .Cases("BYTE", "byte", 8)
- .Cases("WORD", "word", 16)
- .Cases("DWORD", "dword", 32)
- .Cases("FLOAT", "float", 32)
- .Cases("LONG", "long", 32)
- .Cases("FWORD", "fword", 48)
- .Cases("DOUBLE", "double", 64)
- .Cases("QWORD", "qword", 64)
- .Cases("MMWORD","mmword", 64)
- .Cases("XWORD", "xword", 80)
- .Cases("TBYTE", "tbyte", 80)
- .Cases("XMMWORD", "xmmword", 128)
- .Cases("YMMWORD", "ymmword", 256)
- .Cases("ZMMWORD", "zmmword", 512)
- .Default(0);
+ .Cases({"BYTE", "byte"}, 8)
+ .Cases({"WORD", "word"}, 16)
+ .Cases({"DWORD", "dword"}, 32)
+ .Cases({"FLOAT", "float"}, 32)
+ .Cases({"LONG", "long"}, 32)
+ .Cases({"FWORD", "fword"}, 48)
+ .Cases({"DOUBLE", "double"}, 64)
+ .Cases({"QWORD", "qword"}, 64)
+ .Cases({"MMWORD", "mmword"}, 64)
+ .Cases({"XWORD", "xword"}, 80)
+ .Cases({"TBYTE", "tbyte"}, 80)
+ .Cases({"XMMWORD", "xmmword"}, 128)
+ .Cases({"YMMWORD", "ymmword"}, 256)
+ .Cases({"ZMMWORD", "zmmword"}, 512)
+ .Default(0);
if (Size) {
if (SizeStr)
*SizeStr = getTok().getString();
@@ -2886,22 +2886,22 @@ bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
// otherwise the EFLAGS Condition Code enumerator.
X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
return StringSwitch<X86::CondCode>(CC)
- .Case("o", X86::COND_O) // Overflow
- .Case("no", X86::COND_NO) // No Overflow
- .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal
- .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
- .Cases("e", "z", X86::COND_E) // Equal/Zero
- .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
- .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
- .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal
- .Case("s", X86::COND_S) // Sign
- .Case("ns", X86::COND_NS) // No Sign
- .Cases("p", "pe", X86::COND_P) // Parity/Parity Even
- .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
- .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal
- .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
- .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
- .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal
+ .Case("o", X86::COND_O) // Overflow
+ .Case("no", X86::COND_NO) // No Overflow
+ .Cases({"b", "nae"}, X86::COND_B) // Below/Neither Above nor Equal
+ .Cases({"ae", "nb"}, X86::COND_AE) // Above or Equal/Not Below
+ .Cases({"e", "z"}, X86::COND_E) // Equal/Zero
+ .Cases({"ne", "nz"}, X86::COND_NE) // Not Equal/Not Zero
+ .Cases({"be", "na"}, X86::COND_BE) // Below or Equal/Not Above
+ .Cases({"a", "nbe"}, X86::COND_A) // Above/Neither Below nor Equal
+ .Case("s", X86::COND_S) // Sign
+ .Case("ns", X86::COND_NS) // No Sign
+ .Cases({"p", "pe"}, X86::COND_P) // Parity/Parity Even
+ .Cases({"np", "po"}, X86::COND_NP) // No Parity/Parity Odd
+ .Cases({"l", "nge"}, X86::COND_L) // Less/Neither Greater nor Equal
+ .Cases({"ge", "nl"}, X86::COND_GE) // Greater or Equal/Not Less
+ .Cases({"le", "ng"}, X86::COND_LE) // Less or Equal/Not Greater
+ .Cases({"g", "nle"}, X86::COND_G) // Greater/Neither Less nor Equal
.Default(X86::COND_INVALID);
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 007074c..6edf018 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22861,6 +22861,13 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y,
if (!OpVT.isScalarInteger() || OpSize < 128)
return SDValue();
+ // Don't do this if we're not supposed to use the FPU.
+ bool NoImplicitFloatOps =
+ DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+ if (Subtarget.useSoftFloat() || NoImplicitFloatOps)
+ return SDValue();
+
// Ignore a comparison with zero because that gets special treatment in
// EmitTest(). But make an exception for the special case of a pair of
// logically-combined vector-sized operands compared to zero. This pattern may
@@ -22883,13 +22890,9 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y,
// Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands.
// Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands.
// Otherwise use PCMPEQ (plus AND) and mask testing.
- bool NoImplicitFloatOps =
- DAG.getMachineFunction().getFunction().hasFnAttribute(
- Attribute::NoImplicitFloat);
- if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
- ((OpSize == 128 && Subtarget.hasSSE2()) ||
- (OpSize == 256 && Subtarget.hasAVX()) ||
- (OpSize == 512 && Subtarget.useAVX512Regs()))) {
+ if ((OpSize == 128 && Subtarget.hasSSE2()) ||
+ (OpSize == 256 && Subtarget.hasAVX()) ||
+ (OpSize == 512 && Subtarget.useAVX512Regs())) {
bool HasPT = Subtarget.hasSSE41();
// PTEST and MOVMSK are slow on Knights Landing and Knights Mill and widened
@@ -33031,12 +33034,13 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));
Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy)
- : (Type *)FixedVectorType::get(ArgTy, 4);
+ : (Type *)FixedVectorType::get(ArgTy, 2);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(DAG.getEntryNode())
- .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args));
+ .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
+ .setIsPostTypeLegalization();
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
@@ -53351,21 +53355,11 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
using namespace SDPatternMatch;
-
- // Only handle normal stores and its chain was a matching normal load.
- auto *Ld = dyn_cast<LoadSDNode>(St->getChain());
- if (!ISD::isNormalStore(St) || !St->isSimple() || !Ld ||
- !ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
- Ld->getBasePtr() != St->getBasePtr() ||
- Ld->getOffset() != St->getOffset())
- return SDValue();
-
- SDValue LoadVal(Ld, 0);
SDValue StoredVal = St->getValue();
EVT VT = StoredVal.getValueType();
- // Only narrow larger than legal scalar integers.
- if (!VT.isScalarInteger() ||
+ // Only narrow normal stores of larger than legal scalar integers.
+ if (!ISD::isNormalStore(St) || !St->isSimple() || !VT.isScalarInteger() ||
VT.getSizeInBits() <= (Subtarget.is64Bit() ? 64 : 32))
return SDValue();
@@ -53374,18 +53368,26 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
// BTC: X ^ (1 << ShAmt)
//
// BitInsert: (X & ~(1 << ShAmt)) | (InsertBit << ShAmt)
- SDValue InsertBit, ShAmt;
+ SDValue SrcVal, InsertBit, ShAmt;
if (!StoredVal.hasOneUse() ||
- !(sd_match(StoredVal, m_And(m_Specific(LoadVal),
+ !(sd_match(StoredVal, m_And(m_Value(SrcVal),
m_Not(m_Shl(m_One(), m_Value(ShAmt))))) ||
sd_match(StoredVal,
- m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
+ m_Or(m_Value(SrcVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
sd_match(StoredVal,
- m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
- sd_match(StoredVal,
- m_Or(m_And(m_Specific(LoadVal),
- m_Not(m_Shl(m_One(), m_Value(ShAmt)))),
- m_Shl(m_Value(InsertBit), m_Deferred(ShAmt))))))
+ m_Xor(m_Value(SrcVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
+ sd_match(
+ StoredVal,
+ m_Or(m_And(m_Value(SrcVal), m_Not(m_Shl(m_One(), m_Value(ShAmt)))),
+ m_Shl(m_Value(InsertBit), m_Deferred(ShAmt))))))
+ return SDValue();
+
+ // SrcVal must be a matching normal load further up the chain.
+ auto *Ld = dyn_cast<LoadSDNode>(SrcVal);
+ if (!Ld || !ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
+ Ld->getBasePtr() != St->getBasePtr() ||
+ Ld->getOffset() != St->getOffset() ||
+ !St->getChain().reachesChainWithoutSideEffects(SDValue(Ld, 1)))
return SDValue();
// Ensure the shift amount is in bounds.
@@ -53419,7 +53421,7 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
SDNodeFlags::NoUnsignedWrap);
// Reconstruct the BTC/BTR/BTS pattern for the i32 block and store.
- SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt);
+ SDValue X = DAG.getNode(ISD::SRL, DL, VT, SrcVal, AlignAmt);
X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
SDValue Mask = DAG.getNode(ISD::SHL, DL, MVT::i32,
@@ -54629,8 +54631,7 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
SDValue NewLoad =
DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getPointerInfo(),
Align(), Ld->getMemOperand()->getFlags());
- DAG.ReplaceAllUsesOfValueWith(Src.getOperand(0).getValue(1),
- NewLoad.getValue(1));
+ DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
return NewLoad;
}
}
diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
index 090060e..3b96e70 100644
--- a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
@@ -115,9 +115,9 @@ struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> {
static constexpr MachineInstr *const ArgNodeSentinel = nullptr;
using GraphT = ImmutableGraph<MachineInstr *, int>;
- using Node = typename GraphT::Node;
- using Edge = typename GraphT::Edge;
- using size_type = typename GraphT::size_type;
+ using Node = GraphT::Node;
+ using Edge = GraphT::Edge;
+ using size_type = GraphT::size_type;
MachineGadgetGraph(std::unique_ptr<Node[]> Nodes,
std::unique_ptr<Edge[]> Edges, size_type NodesSize,
size_type EdgesSize, int NumFences = 0, int NumGadgets = 0)
@@ -191,10 +191,10 @@ template <>
struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {
using GraphType = MachineGadgetGraph;
using Traits = llvm::GraphTraits<GraphType *>;
- using NodeRef = typename Traits::NodeRef;
- using EdgeRef = typename Traits::EdgeRef;
- using ChildIteratorType = typename Traits::ChildIteratorType;
- using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType;
+ using NodeRef = Traits::NodeRef;
+ using EdgeRef = Traits::EdgeRef;
+ using ChildIteratorType = Traits::ChildIteratorType;
+ using ChildEdgeIteratorType = Traits::ChildEdgeIteratorType;
DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
@@ -227,9 +227,6 @@ struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {
} // end namespace llvm
-constexpr MachineInstr *MachineGadgetGraph::ArgNodeSentinel;
-constexpr int MachineGadgetGraph::GadgetEdgeSentinel;
-
char X86LoadValueInjectionLoadHardeningPass::ID = 0;
void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage(
@@ -335,7 +332,7 @@ X86LoadValueInjectionLoadHardeningPass::getGadgetGraph(
L.computePhiInfo();
GraphBuilder Builder;
- using GraphIter = typename GraphBuilder::BuilderNodeRef;
+ using GraphIter = GraphBuilder::BuilderNodeRef;
DenseMap<MachineInstr *, GraphIter> NodeMap;
int FenceCount = 0, GadgetCount = 0;
auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) {
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 3d8d0a23..0b1430e 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -6562,7 +6562,7 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
bool X86TTIImpl::areTypesABICompatible(const Function *Caller,
const Function *Callee,
- const ArrayRef<Type *> &Types) const {
+ ArrayRef<Type *> Types) const {
if (!BaseT::areTypesABICompatible(Caller, Callee, Types))
return false;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 133b366..de5e1c2 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -296,7 +296,7 @@ public:
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const override;
bool areTypesABICompatible(const Function *Caller, const Function *Callee,
- const ArrayRef<Type *> &Type) const override;
+ ArrayRef<Type *> Type) const override;
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
return ST->getMaxInlineSizeThreshold();