aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/ConstantFolding.cpp11
-rw-r--r--llvm/lib/Analysis/DXILResource.cpp7
-rw-r--r--llvm/lib/Analysis/DependenceAnalysis.cpp17
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp2
-rw-r--r--llvm/lib/Analysis/MemoryDependenceAnalysis.cpp44
-rw-r--r--llvm/lib/Analysis/ProfileSummaryInfo.cpp14
-rw-r--r--llvm/lib/Analysis/TargetLibraryInfo.cpp28
-rw-r--r--llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AIXException.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/ARMException.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp9
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp6
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp7
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp47
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp4
-rw-r--r--llvm/lib/CodeGen/CommandFlags.cpp7
-rw-r--r--llvm/lib/CodeGen/ExpandFp.cpp33
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp6
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp2
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp123
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.cpp1
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.h1
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIParser.cpp7
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIRParser.cpp14
-rw-r--r--llvm/lib/CodeGen/MIRPrinter.cpp15
-rw-r--r--llvm/lib/CodeGen/MachineFunction.cpp5
-rw-r--r--llvm/lib/CodeGen/MachineOperand.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp54
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp6
-rw-r--r--llvm/lib/CodeGen/StackProtector.cpp10
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp34
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp21
-rw-r--r--llvm/lib/CodeGen/WindowsSecureHotPatching.cpp13
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp21
-rw-r--r--llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp474
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp26
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp7
-rw-r--r--llvm/lib/IR/DiagnosticInfo.cpp4
-rw-r--r--llvm/lib/IR/Metadata.cpp2
-rw-r--r--llvm/lib/MC/CMakeLists.txt6
-rw-r--r--llvm/lib/MC/ELFObjectWriter.cpp29
-rw-r--r--llvm/lib/MC/GOFFObjectWriter.cpp2
-rw-r--r--llvm/lib/MC/MCAsmInfoCOFF.cpp108
-rw-r--r--llvm/lib/MC/MCAsmInfoDarwin.cpp5
-rw-r--r--llvm/lib/MC/MCAsmInfoELF.cpp196
-rw-r--r--llvm/lib/MC/MCAsmInfoGOFF.cpp138
-rw-r--r--llvm/lib/MC/MCAsmInfoWasm.cpp83
-rw-r--r--llvm/lib/MC/MCAsmInfoXCOFF.cpp124
-rw-r--r--llvm/lib/MC/MCAsmStreamer.cpp31
-rw-r--r--llvm/lib/MC/MCAssembler.cpp1
-rw-r--r--llvm/lib/MC/MCContext.cpp54
-rw-r--r--llvm/lib/MC/MCELFStreamer.cpp4
-rw-r--r--llvm/lib/MC/MCExpr.cpp25
-rw-r--r--llvm/lib/MC/MCFragment.cpp2
-rw-r--r--llvm/lib/MC/MCGOFFStreamer.cpp20
-rw-r--r--llvm/lib/MC/MCMachOStreamer.cpp12
-rw-r--r--llvm/lib/MC/MCObjectStreamer.cpp66
-rw-r--r--llvm/lib/MC/MCParser/AsmParser.cpp11
-rw-r--r--llvm/lib/MC/MCParser/ELFAsmParser.cpp4
-rw-r--r--llvm/lib/MC/MCParser/MasmParser.cpp3
-rw-r--r--llvm/lib/MC/MCParser/WasmAsmParser.cpp2
-rw-r--r--llvm/lib/MC/MCSection.cpp8
-rw-r--r--llvm/lib/MC/MCSectionCOFF.cpp117
-rw-r--r--llvm/lib/MC/MCSectionDXContainer.cpp15
-rw-r--r--llvm/lib/MC/MCSectionELF.cpp217
-rw-r--r--llvm/lib/MC/MCSectionGOFF.cpp143
-rw-r--r--llvm/lib/MC/MCSectionMachO.cpp28
-rw-r--r--llvm/lib/MC/MCSectionWasm.cpp101
-rw-r--r--llvm/lib/MC/MCSectionXCOFF.cpp134
-rw-r--r--llvm/lib/MC/MCStreamer.cpp38
-rw-r--r--llvm/lib/MC/MCTargetOptions.cpp3
-rw-r--r--llvm/lib/MC/MCTargetOptionsCommandFlags.cpp7
-rw-r--r--llvm/lib/MC/MCWasmStreamer.cpp2
-rw-r--r--llvm/lib/MC/MCWinCOFFStreamer.cpp3
-rw-r--r--llvm/lib/MC/MCXCOFFStreamer.cpp14
-rw-r--r--llvm/lib/MC/MachObjectWriter.cpp34
-rw-r--r--llvm/lib/MC/WasmObjectWriter.cpp2
-rw-r--r--llvm/lib/MC/WinCOFFObjectWriter.cpp5
-rw-r--r--llvm/lib/MC/XCOFFObjectWriter.cpp8
-rw-r--r--llvm/lib/Object/IRSymtab.cpp11
-rw-r--r--llvm/lib/Passes/PassRegistry.def2
-rw-r--r--llvm/lib/ProfileData/InstrProfReader.cpp2
-rw-r--r--llvm/lib/Support/AArch64AttributeParser.cpp27
-rw-r--r--llvm/lib/Support/CommandLine.cpp27
-rw-r--r--llvm/lib/Support/Debug.cpp69
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp38
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp163
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h7
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp270
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.h4
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td69
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp23
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp2
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp3
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td26
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGISel.td6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp151
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp93
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp21
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp22
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td249
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.h40
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.cpp57
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h15
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h12
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp119
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h1
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp37
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp83
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h9
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td10
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp71
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp61
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td43
-rw-r--r--llvm/lib/Target/AMDGPU/SMInstructions.td5
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td147
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp3
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp23
-rw-r--r--llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp14
-rw-r--r--llvm/lib/Target/AVR/AVRAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h4
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.cpp80
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp7
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h5
-rw-r--r--llvm/lib/Target/DirectX/DXILFlattenArrays.cpp9
-rw-r--r--llvm/lib/Target/DirectX/DXILLegalizePass.cpp97
-rw-r--r--llvm/lib/Target/DirectX/DXILResourceAccess.cpp3
-rw-r--r--llvm/lib/Target/DirectX/DXILRootSignature.cpp471
-rw-r--r--llvm/lib/Target/DirectX/DXILRootSignature.h11
-rw-r--r--llvm/lib/Target/DirectX/DXILShaderFlags.cpp15
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonMask.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp62
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp1
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsAsmPrinter.cpp8
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.cpp29
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.h1
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h2
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp5
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp273
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp636
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.h10
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td139
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp32
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp9
-rw-r--r--llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp149
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp1
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h1
-rw-r--r--llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td6
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp85
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoC.td121
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td95
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td33
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZc.td14
-rw-r--r--llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp17
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td200
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.cpp8
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h4
-rw-r--r--llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp35
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVAPI.cpp4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp6
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp4
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp6
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssembly.td15
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td3
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td11
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp6
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp5
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h2
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp30
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp40
-rw-r--r--llvm/lib/Target/X86/X86PassRegistry.def44
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp131
-rw-r--r--llvm/lib/TargetParser/RISCVISAInfo.cpp19
-rw-r--r--llvm/lib/TargetParser/TargetParser.cpp1
-rw-r--r--llvm/lib/TargetParser/Triple.cpp6
-rw-r--r--llvm/lib/TextAPI/SymbolSet.cpp5
-rw-r--r--llvm/lib/Transforms/Coroutines/Coroutines.cpp3
-rw-r--r--llvm/lib/Transforms/HipStdPar/HipStdPar.cpp118
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp143
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp6
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp8
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp3
-rw-r--r--llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp17
-rw-r--r--llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp193
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp2
-rw-r--r--llvm/lib/Transforms/ObjCARC/CMakeLists.txt1
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp156
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFuse.cpp25
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInterchange.cpp114
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp12
-rw-r--r--llvm/lib/Transforms/Scalar/Scalarizer.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp49
-rw-r--r--llvm/lib/Transforms/Scalar/StructurizeCFG.cpp130
-rw-r--r--llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp36
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp69
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp112
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h27
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp44
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp10
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp40
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp71
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.h10
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp124
242 files changed, 5315 insertions, 4335 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index ec78386..759c553 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -929,12 +929,11 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
if (!AllConstantInt)
break;
- // TODO: Try to intersect two inrange attributes?
- if (!InRange) {
- InRange = GEP->getInRange();
- if (InRange)
- // Adjust inrange by offset until now.
- InRange = InRange->sextOrTrunc(BitWidth).subtract(Offset);
+ // Adjust inrange offset and intersect inrange attributes
+ if (auto GEPRange = GEP->getInRange()) {
+ auto AdjustedGEPRange = GEPRange->sextOrTrunc(BitWidth).subtract(Offset);
+ InRange =
+ InRange ? InRange->intersectWith(AdjustedGEPRange) : AdjustedGEPRange;
}
Ptr = cast<Constant>(GEP->getOperand(0));
diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp
index 2da6468..1959ab6 100644
--- a/llvm/lib/Analysis/DXILResource.cpp
+++ b/llvm/lib/Analysis/DXILResource.cpp
@@ -1079,15 +1079,16 @@ void DXILResourceBindingInfo::populate(Module &M, DXILResourceTypeMap &DRTM) {
// add new space
S = &BS->Spaces.emplace_back(B.Space);
- // the space is full - set flag to report overlapping binding later
- if (S->FreeRanges.empty()) {
+ // The space is full - there are no free slots left, or the rest of the
+ // slots are taken by an unbounded array. Set flag to report overlapping
+ // binding later.
+ if (S->FreeRanges.empty() || S->FreeRanges.back().UpperBound < UINT32_MAX) {
OverlappingBinding = true;
continue;
}
// adjust the last free range lower bound, split it in two, or remove it
BindingRange &LastFreeRange = S->FreeRanges.back();
- assert(LastFreeRange.UpperBound == UINT32_MAX);
if (LastFreeRange.LowerBound == B.LowerBound) {
if (B.UpperBound < UINT32_MAX)
LastFreeRange.LowerBound = B.UpperBound + 1;
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index dd9a44b..f1473b2 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -3383,6 +3383,10 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
SrcSubscripts, DstSubscripts))
return false;
+ assert(isLoopInvariant(SrcBase, SrcLoop) &&
+ isLoopInvariant(DstBase, DstLoop) &&
+ "Expected SrcBase and DstBase to be loop invariant");
+
int Size = SrcSubscripts.size();
LLVM_DEBUG({
dbgs() << "\nSrcSubscripts: ";
@@ -3666,6 +3670,19 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
SCEVUnionPredicate(Assume, *SE));
}
+ // Even if the base pointers are the same, they may not be loop-invariant. It
+ // could lead to incorrect results, as we're analyzing loop-carried
+ // dependencies. Src and Dst can be in different loops, so we need to check
+ // the base pointer is invariant in both loops.
+ Loop *SrcLoop = LI->getLoopFor(Src->getParent());
+ Loop *DstLoop = LI->getLoopFor(Dst->getParent());
+ if (!isLoopInvariant(SrcBase, SrcLoop) ||
+ !isLoopInvariant(DstBase, DstLoop)) {
+ LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n");
+ return std::make_unique<Dependence>(Src, Dst,
+ SCEVUnionPredicate(Assume, *SE));
+ }
+
uint64_t EltSize = SrcLoc.Size.toRaw();
const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase);
const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase);
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 82530e7..5907e21 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5366,7 +5366,7 @@ static Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
Type *MidTy = CI->getType();
Type *DstTy = Ty;
if (Src->getType() == Ty) {
- auto FirstOp = static_cast<Instruction::CastOps>(CI->getOpcode());
+ auto FirstOp = CI->getOpcode();
auto SecondOp = static_cast<Instruction::CastOps>(CastOpc);
Type *SrcIntPtrTy =
SrcTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(SrcTy) : nullptr;
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 3aa9909..2b0f212 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -983,33 +983,37 @@ MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
static void
SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
unsigned NumSortedEntries) {
- switch (Cache.size() - NumSortedEntries) {
- case 0:
- // done, no new entries.
- break;
- case 2: {
- // Two new entries, insert the last one into place.
- NonLocalDepEntry Val = Cache.back();
- Cache.pop_back();
- MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
- std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
- Cache.insert(Entry, Val);
- [[fallthrough]];
+
+ // If only one entry, don't sort.
+ if (Cache.size() < 2)
+ return;
+
+ unsigned s = Cache.size() - NumSortedEntries;
+
+ // If the cache is already sorted, don't sort it again.
+ if (s == 0)
+ return;
+
+ // If no entry is sorted, sort the whole cache.
+ if (NumSortedEntries == 0) {
+ llvm::sort(Cache);
+ return;
}
- case 1:
- // One new entry, Just insert the new value at the appropriate position.
- if (Cache.size() != 1) {
+
+ // If the number of unsorted entires is small and the cache size is big, using
+ // insertion sort is faster. Here use Log2_32 to quickly choose the sort
+ // method.
+ if (s < Log2_32(Cache.size())) {
+ while (s > 0) {
NonLocalDepEntry Val = Cache.back();
Cache.pop_back();
MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
- llvm::upper_bound(Cache, Val);
+ std::upper_bound(Cache.begin(), Cache.end() - s + 1, Val);
Cache.insert(Entry, Val);
+ s--;
}
- break;
- default:
- // Added many values, do a full scale sort.
+ } else {
llvm::sort(Cache);
- break;
}
}
diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index e8d4e37..f1c3155 100644
--- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -121,8 +121,18 @@ void ProfileSummaryInfo::computeThresholds() {
ProfileSummaryBuilder::getHotCountThreshold(DetailedSummary);
ColdCountThreshold =
ProfileSummaryBuilder::getColdCountThreshold(DetailedSummary);
- assert(ColdCountThreshold <= HotCountThreshold &&
- "Cold count threshold cannot exceed hot count threshold!");
+ // When the hot and cold thresholds are identical, we would classify
+ // a count value as both hot and cold since we are doing an inclusive check
+ // (see ::is{Hot|Cold}Count(). To avoid this undesirable overlap, ensure the
+ // thresholds are distinct.
+ if (HotCountThreshold == ColdCountThreshold) {
+ if (ColdCountThreshold > 0)
+ (*ColdCountThreshold)--;
+ else
+ (*HotCountThreshold)++;
+ }
+ assert(ColdCountThreshold < HotCountThreshold &&
+ "Cold count threshold should be less than hot count threshold!");
if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) {
HasHugeWorkingSetSize =
HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index e475be2..6e92766 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -875,6 +875,34 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_toascii);
}
+ if (T.isOSFreeBSD()) {
+ TLI.setAvailable(LibFunc_dunder_strtok_r);
+ TLI.setAvailable(LibFunc_memalign);
+ TLI.setAvailable(LibFunc_fputc_unlocked);
+ TLI.setAvailable(LibFunc_fputs_unlocked);
+ TLI.setAvailable(LibFunc_fread_unlocked);
+ TLI.setAvailable(LibFunc_fwrite_unlocked);
+ TLI.setAvailable(LibFunc_getc_unlocked);
+ TLI.setAvailable(LibFunc_getchar_unlocked);
+ TLI.setAvailable(LibFunc_putc_unlocked);
+ TLI.setAvailable(LibFunc_putchar_unlocked);
+
+ TLI.setUnavailable(LibFunc___kmpc_alloc_shared);
+ TLI.setUnavailable(LibFunc___kmpc_free_shared);
+ TLI.setUnavailable(LibFunc_dunder_strndup);
+ TLI.setUnavailable(LibFunc_memccpy_chk);
+ TLI.setUnavailable(LibFunc_strlen_chk);
+ TLI.setUnavailable(LibFunc_fmaximum_num);
+ TLI.setUnavailable(LibFunc_fmaximum_numf);
+ TLI.setUnavailable(LibFunc_fmaximum_numl);
+ TLI.setUnavailable(LibFunc_fminimum_num);
+ TLI.setUnavailable(LibFunc_fminimum_numf);
+ TLI.setUnavailable(LibFunc_fminimum_numl);
+ TLI.setUnavailable(LibFunc_roundeven);
+ TLI.setUnavailable(LibFunc_roundevenf);
+ TLI.setUnavailable(LibFunc_roundevenl);
+ }
+
// As currently implemented in clang, NVPTX code has no standard library to
// speak of. Headers provide a standard-ish library implementation, but many
// of the signatures are wrong -- for example, many libm functions are not
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index c871070..7025b83 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -525,6 +525,8 @@ AAMDNodes AAMDNodes::merge(const AAMDNodes &Other) const {
Result.TBAAStruct = nullptr;
Result.Scope = MDNode::getMostGenericAliasScope(Scope, Other.Scope);
Result.NoAlias = MDNode::intersect(NoAlias, Other.NoAlias);
+ Result.NoAliasAddrSpace = MDNode::getMostGenericNoaliasAddrspace(
+ NoAliasAddrSpace, Other.NoAliasAddrSpace);
return Result;
}
@@ -533,6 +535,8 @@ AAMDNodes AAMDNodes::concat(const AAMDNodes &Other) const {
Result.TBAA = Result.TBAAStruct = nullptr;
Result.Scope = MDNode::getMostGenericAliasScope(Scope, Other.Scope);
Result.NoAlias = MDNode::intersect(NoAlias, Other.NoAlias);
+ Result.NoAliasAddrSpace = MDNode::getMostGenericNoaliasAddrspace(
+ NoAliasAddrSpace, Other.NoAliasAddrSpace);
return Result;
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
index 5d7c97a..6356d71 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
@@ -37,8 +37,8 @@ void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA,
// unsigned long personality; /* Pointer to the personality routine */
// }
- auto *EHInfo =
- cast<MCSectionXCOFF>(Asm->getObjFileLowering().getCompactUnwindSection());
+ auto *EHInfo = static_cast<MCSectionXCOFF *>(
+ Asm->getObjFileLowering().getCompactUnwindSection());
if (Asm->TM.getFunctionSections()) {
// If option -ffunction-sections is on, append the function name to the
// name of EH Info Table csect so that each function has its own EH Info
diff --git a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index de6ebcf..51342c6 100644
--- a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -39,7 +39,7 @@ void ARMException::beginFunction(const MachineFunction *MF) {
if (CFISecType == AsmPrinter::CFISection::Debug) {
if (!hasEmittedCFISections) {
if (Asm->getModuleCFISectionType() == AsmPrinter::CFISection::Debug)
- Asm->OutStreamer->emitCFISections(false, true);
+ Asm->OutStreamer->emitCFISections(false, true, false);
hasEmittedCFISections = true;
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index f1d3e96..6166271 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -4221,10 +4221,11 @@ MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
SectionKind Kind = CPE.getSectionKind(&DL);
const Constant *C = CPE.Val.ConstVal;
Align Alignment = CPE.Alignment;
- if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>(
- getObjFileLowering().getSectionForConstant(DL, Kind, C,
- Alignment))) {
- if (MCSymbol *Sym = S->getCOMDATSymbol()) {
+ auto *S =
+ getObjFileLowering().getSectionForConstant(DL, Kind, C, Alignment);
+ if (S && TM.getTargetTriple().isOSBinFormatCOFF()) {
+ if (MCSymbol *Sym =
+ static_cast<const MCSectionCOFF *>(S)->getCOMDATSymbol()) {
if (Sym->isUndefined())
OutStreamer->emitSymbolAttribute(Sym, MCSA_Global);
return Sym;
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 8abeb56..c5d6e40 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -1051,10 +1051,10 @@ void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) {
// comdat key. A section may be comdat because of -ffunction-sections or
// because it is comdat in the IR.
MCSectionCOFF *GVSec =
- GVSym ? dyn_cast<MCSectionCOFF>(&GVSym->getSection()) : nullptr;
+ GVSym ? static_cast<MCSectionCOFF *>(&GVSym->getSection()) : nullptr;
const MCSymbol *KeySym = GVSec ? GVSec->getCOMDATSymbol() : nullptr;
- MCSectionCOFF *DebugSec = cast<MCSectionCOFF>(
+ auto *DebugSec = static_cast<MCSectionCOFF *>(
CompilerInfoAsm->getObjFileLowering().getCOFFDebugSymbolsSection());
DebugSec = OS.getContext().getAssociativeCOFFSection(DebugSec, KeySym);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 4fac4bb..6b8d08c 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -109,9 +109,11 @@ void DwarfCFIException::beginBasicBlockSection(const MachineBasicBlock &MBB) {
// chose not to be verbose in that case. And with `ForceDwarfFrameSection`,
// we should always emit .debug_frame.
if (CFISecType == AsmPrinter::CFISection::Debug ||
- Asm->TM.Options.ForceDwarfFrameSection)
+ Asm->TM.Options.ForceDwarfFrameSection ||
+ Asm->TM.Options.MCOptions.EmitSFrameUnwind)
Asm->OutStreamer->emitCFISections(
- CFISecType == AsmPrinter::CFISection::EH, true);
+ CFISecType == AsmPrinter::CFISection::EH, true,
+ Asm->TM.Options.MCOptions.EmitSFrameUnwind);
hasEmittedCFISections = true;
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 11b8576..7188833 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -972,10 +972,9 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
// the call graph which could lead to some target function. For tail
// calls, no return PC information is needed, unless tuning for GDB in
// DWARF4 mode in which case we fake a return PC for compatibility.
- const MCSymbol *PCAddr =
- (!IsTail || CU.useGNUAnalogForDwarf5Feature())
- ? const_cast<MCSymbol *>(getLabelAfterInsn(TopLevelCallMI))
- : nullptr;
+ const MCSymbol *PCAddr = (!IsTail || CU.useGNUAnalogForDwarf5Feature())
+ ? getLabelAfterInsn(TopLevelCallMI)
+ : nullptr;
// For tail calls, it's necessary to record the address of the branch
// instruction so that the debugger can show where the tail call occurred.
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 3b3e7a4..a7c99b1 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -2083,22 +2083,54 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
if (TBB == FBB) {
MBB->splice(Loc, TBB, TBB->begin(), TIB);
} else {
+ // Merge the debug locations, and hoist and kill the debug instructions from
+ // both branches. FIXME: We could probably try harder to preserve some debug
+ // instructions (but at least this isn't producing wrong locations).
+ MachineInstrBuilder MIRBuilder(*MBB->getParent(), Loc);
+ auto HoistAndKillDbgInstr = [MBB, Loc](MachineBasicBlock::iterator DI) {
+ assert(DI->isDebugInstr() && "Expected a debug instruction");
+ if (DI->isDebugRef()) {
+ const TargetInstrInfo *TII =
+ MBB->getParent()->getSubtarget().getInstrInfo();
+ const MCInstrDesc &DBGV = TII->get(TargetOpcode::DBG_VALUE);
+ DI = BuildMI(*MBB->getParent(), DI->getDebugLoc(), DBGV, false, 0,
+ DI->getDebugVariable(), DI->getDebugExpression());
+ MBB->insert(Loc, &*DI);
+ return;
+ }
+ // Deleting a DBG_PHI results in an undef at the referenced DBG_INSTR_REF.
+ if (DI->isDebugPHI()) {
+ DI->eraseFromParent();
+ return;
+ }
+
+ DI->setDebugValueUndef();
+ DI->moveBefore(&*Loc);
+ };
+
// TIB and FIB point to the end of the regions to hoist/merge in TBB and
// FBB.
MachineBasicBlock::iterator FE = FIB;
MachineBasicBlock::iterator FI = FBB->begin();
for (MachineBasicBlock::iterator TI :
make_early_inc_range(make_range(TBB->begin(), TIB))) {
- // Move debug instructions and pseudo probes without modifying them.
- // FIXME: This is the wrong thing to do for debug locations, which
- // should at least be killed (and hoisted from BOTH blocks).
- if (TI->isDebugOrPseudoInstr()) {
- TI->moveBefore(&*Loc);
+ // Hoist and kill debug instructions from FBB. After this loop FI points
+ // to the next non-debug instruction to hoist (checked in assert after the
+ // TBB debug instruction handling code).
+ while (FI != FE && FI->isDebugInstr())
+ HoistAndKillDbgInstr(FI++);
+
+ // Kill debug instructions before moving.
+ if (TI->isDebugInstr()) {
+ HoistAndKillDbgInstr(TI);
continue;
}
- // Get the next non-meta instruction in FBB.
- FI = skipDebugInstructionsForward(FI, FE, false);
+ // FI and TI now point to identical non-debug instructions.
+ assert(FI != FE && "Unexpected end of FBB range");
+ // Pseudo probes are excluded from the range when identifying foldable
+ // instructions, so we don't expect to see one now.
+ assert(!TI->isPseudoProbe() && "Unexpected pseudo probe in range");
// NOTE: The loop above checks CheckKillDead but we can't do that here as
// it modifies some kill markers after the check.
assert(TI->isIdenticalTo(*FI, MachineInstr::CheckDefs) &&
@@ -2111,6 +2143,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
++FI;
}
}
+
FBB->erase(FBB->begin(), FIB);
if (UpdateLiveIns)
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index c21058c..416c56d 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2095,6 +2095,10 @@ static bool isRemOfLoopIncrementWithLoopInvariant(
if (!L->isLoopInvariant(RemAmt))
return false;
+ // Only works if the AddOffset is a loop invaraint
+ if (AddOffset && !L->isLoopInvariant(AddOffset))
+ return false;
+
// Is the PHI a loop increment?
auto LoopIncrInfo = getIVIncrement(PN, LI);
if (!LoopIncrInfo)
diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp
index 9512f79..810dc29 100644
--- a/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/llvm/lib/CodeGen/CommandFlags.cpp
@@ -101,6 +101,7 @@ CGOPT(EABI, EABIVersion)
CGOPT(DebuggerKind, DebuggerTuningOpt)
CGOPT(bool, EnableStackSizeSection)
CGOPT(bool, EnableAddrsig)
+CGOPT(bool, EnableCallGraphSection)
CGOPT(bool, EmitCallSiteInfo)
CGOPT(bool, EnableMachineFunctionSplitter)
CGOPT(bool, EnableStaticDataPartitioning)
@@ -461,6 +462,11 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(EnableAddrsig);
+ static cl::opt<bool> EnableCallGraphSection(
+ "call-graph-section", cl::desc("Emit a call graph section"),
+ cl::init(false));
+ CGBINDOPT(EnableCallGraphSection);
+
static cl::opt<bool> EmitCallSiteInfo(
"emit-call-site-info",
cl::desc(
@@ -595,6 +601,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.EnableMachineFunctionSplitter = getEnableMachineFunctionSplitter();
Options.EnableStaticDataPartitioning = getEnableStaticDataPartitioning();
Options.EmitAddrsig = getEnableAddrsig();
+ Options.EmitCallGraphSection = getEnableCallGraphSection();
Options.EmitCallSiteInfo = getEmitCallSiteInfo();
Options.EnableDebugEntryValues = getEnableDebugEntryValues();
Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index 714ec55..1c1047c 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -103,10 +103,10 @@ static void expandFPToI(Instruction *FPToI) {
Value *A1 = nullptr;
if (FloatVal->getType()->isHalfTy()) {
if (FPToI->getOpcode() == Instruction::FPToUI) {
- Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32));
+ Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty());
A1 = Builder.CreateZExt(A0, IntTy);
} else { // FPToSI
- Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32));
+ Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty());
A1 = Builder.CreateSExt(A0, IntTy);
}
FPToI->replaceAllUsesWith(A1);
@@ -425,8 +425,8 @@ static void expandIToFP(Instruction *IToFP) {
AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
AAddr0->addIncoming(Shl, SwBB);
Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
- Value *A1 = Builder.CreateLShr(A0, Builder.getIntN(32, 2));
- Value *A2 = Builder.CreateAnd(A1, Builder.getIntN(32, 1));
+ Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2));
+ Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1));
Value *Conv16 = Builder.CreateZExt(A2, IntTy);
Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
@@ -457,9 +457,9 @@ static void expandIToFP(Instruction *IToFP) {
Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
Value *ExtractT62 = nullptr;
if (FloatWidth > 80)
- ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getIntNTy(64));
+ ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty());
else
- ExtractT62 = Builder.CreateTrunc(Extract, Builder.getIntNTy(32));
+ ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty());
Builder.CreateBr(IfEnd26);
// if.else:
@@ -475,7 +475,7 @@ static void expandIToFP(Instruction *IToFP) {
Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
Value *ExtractT66 = nullptr;
if (FloatWidth > 80)
- ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getIntNTy(64));
+ ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
else
ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
Builder.CreateBr(IfEnd26);
@@ -507,30 +507,29 @@ static void expandIToFP(Instruction *IToFP) {
Builder.getIntN(BitWidth, 63));
And29 = Builder.CreateAnd(Shr, Temp2, "and29");
} else {
- Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getIntNTy(32));
+ Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty());
And29 = Builder.CreateAnd(
- Conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000));
+ Conv28, ConstantInt::getSigned(Builder.getInt32Ty(), 0x80000000));
}
unsigned TempMod = FPMantissaWidth % 32;
Value *And34 = nullptr;
Value *Shl30 = nullptr;
if (FloatWidth > 80) {
TempMod += 32;
- Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getIntN(64, TempMod));
+ Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod));
Shl30 = Builder.CreateAdd(
- Add,
- Builder.getIntN(64, ((1ull << (62ull - TempMod)) - 1ull) << TempMod));
- And34 = Builder.CreateZExt(Shl30, Builder.getIntNTy(128));
+ Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod));
+ And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty());
} else {
- Value *Add = Builder.CreateShl(E0, Builder.getIntN(32, TempMod));
+ Value *Add = Builder.CreateShl(E0, Builder.getInt32(TempMod));
Shl30 = Builder.CreateAdd(
- Add, Builder.getIntN(32, ((1 << (30 - TempMod)) - 1) << TempMod));
+ Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod));
And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
- Builder.getIntN(32, (1 << TempMod) - 1));
+ Builder.getInt32((1 << TempMod) - 1));
}
Value *Or35 = nullptr;
if (FloatWidth > 80) {
- Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getIntNTy(128));
+ Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty());
Value *Or31 = Builder.CreateOr(And29Trunc, And34);
Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index e8f513a..e84ba91 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5949,8 +5949,7 @@ bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI,
const TargetOptions &Options = MF->getTarget().Options;
LLT DstType = MRI.getType(MI.getOperand(0).getReg());
- if (CanReassociate &&
- !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
+ if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc))
return false;
// Floating-point multiply-add with intermediate rounding.
@@ -5962,8 +5961,7 @@ bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI,
if (!HasFMAD && !HasFMA)
return false;
- AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Options.UnsafeFPMath || HasFMAD;
+ AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
// If the addition is not contractable, do not combine.
if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
return false;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index ed7b07f..538a763 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8004,7 +8004,7 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
return UnableToLegalize;
- if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) {
+ if (MI.getFlag(MachineInstr::FmAfn)) {
unsigned Flags = MI.getFlags();
auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 1b69188..5e50898 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -253,6 +253,21 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
return false;
}
+static Value *getMaskOperand(IntrinsicInst *II) {
+ switch (II->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unexpected intrinsic");
+ case Intrinsic::vp_load:
+ return II->getOperand(1);
+ case Intrinsic::masked_load:
+ return II->getOperand(2);
+ case Intrinsic::vp_store:
+ return II->getOperand(2);
+ case Intrinsic::masked_store:
+ return II->getOperand(3);
+ }
+}
+
// Return the corresponded deinterleaved mask, or nullptr if there is no valid
// mask.
static Value *getMask(Value *WideMask, unsigned Factor,
@@ -268,8 +283,12 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
if (isa<ScalableVectorType>(Load->getType()))
return false;
- if (auto *LI = dyn_cast<LoadInst>(Load);
- LI && !LI->isSimple())
+ auto *LI = dyn_cast<LoadInst>(Load);
+ auto *II = dyn_cast<IntrinsicInst>(Load);
+ if (!LI && !II)
+ return false;
+
+ if (LI && !LI->isSimple())
return false;
// Check if all users of this load are shufflevectors. If we encounter any
@@ -322,7 +341,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
// Holds the corresponding index for each DE-interleave shuffle.
SmallVector<unsigned, 4> Indices;
- Type *VecTy = FirstSVI->getType();
+ VectorType *VecTy = cast<VectorType>(FirstSVI->getType());
// Check if other shufflevectors are also DE-interleaved of the same type
// and factor as the first shufflevector.
@@ -360,13 +379,16 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, Load);
Value *Mask = nullptr;
- if (auto *VPLoad = dyn_cast<VPIntrinsic>(Load)) {
- Mask = getMask(VPLoad->getMaskParam(), Factor, cast<VectorType>(VecTy));
+ if (LI) {
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n");
+ } else {
+ // Check mask operand. Handle both all-true/false and interleaved mask.
+ Mask = getMask(getMaskOperand(II), Factor, VecTy);
if (!Mask)
return false;
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load: " << *Load << "\n");
- } else {
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n");
+
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load or masked.load: "
+ << *Load << "\n");
}
// Try to create target specific intrinsics to replace the load and
@@ -483,15 +505,16 @@ bool InterleavedAccessImpl::tryReplaceExtracts(
bool InterleavedAccessImpl::lowerInterleavedStore(
Instruction *Store, SmallSetVector<Instruction *, 32> &DeadInsts) {
Value *StoredValue;
- if (auto *SI = dyn_cast<StoreInst>(Store)) {
+ auto *SI = dyn_cast<StoreInst>(Store);
+ auto *II = dyn_cast<IntrinsicInst>(Store);
+ if (SI) {
if (!SI->isSimple())
return false;
StoredValue = SI->getValueOperand();
- } else if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) {
- assert(VPStore->getIntrinsicID() == Intrinsic::vp_store);
- StoredValue = VPStore->getArgOperand(0);
} else {
- llvm_unreachable("unsupported store operation");
+ assert(II->getIntrinsicID() == Intrinsic::vp_store ||
+ II->getIntrinsicID() == Intrinsic::masked_store);
+ StoredValue = II->getArgOperand(0);
}
auto *SVI = dyn_cast<ShuffleVectorInst>(StoredValue);
@@ -508,18 +531,18 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
"number of stored element should be a multiple of Factor");
Value *Mask = nullptr;
- if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) {
+ if (SI) {
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
+ } else {
+ // Check mask operand. Handle both all-true/false and interleaved mask.
unsigned LaneMaskLen = NumStoredElements / Factor;
- Mask = getMask(VPStore->getMaskParam(), Factor,
+ Mask = getMask(getMaskOperand(II), Factor,
ElementCount::getFixed(LaneMaskLen));
if (!Mask)
return false;
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store: " << *Store
- << "\n");
-
- } else {
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store or masked.store: "
+ << *Store << "\n");
}
// Try to create target specific intrinsics to replace the store and
@@ -564,6 +587,27 @@ static Value *getMask(Value *WideMask, unsigned Factor,
}
}
+ if (auto *SVI = dyn_cast<ShuffleVectorInst>(WideMask)) {
+ // Check that the shuffle mask is: a) an interleave, b) all of the same
+ // set of the elements, and c) contained by the first source. (c) could
+ // be relaxed if desired.
+ unsigned NumSrcElts =
+ cast<FixedVectorType>(SVI->getOperand(1)->getType())->getNumElements();
+ SmallVector<unsigned> StartIndexes;
+ if (ShuffleVectorInst::isInterleaveMask(SVI->getShuffleMask(), Factor,
+ NumSrcElts * 2, StartIndexes) &&
+ llvm::all_of(StartIndexes, [](unsigned Start) { return Start == 0; }) &&
+ llvm::all_of(SVI->getShuffleMask(), [&NumSrcElts](int Idx) {
+ return Idx < (int)NumSrcElts;
+ })) {
+ auto *LeafMaskTy =
+ VectorType::get(Type::getInt1Ty(SVI->getContext()), LeafValueEC);
+ IRBuilder<> Builder(SVI);
+ return Builder.CreateExtractVector(LeafMaskTy, SVI->getOperand(0),
+ uint64_t(0));
+ }
+ }
+
return nullptr;
}
@@ -590,21 +634,12 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
<< " and factor = " << Factor << "\n");
} else {
assert(II);
-
- // Check mask operand. Handle both all-true/false and interleaved mask.
- Value *WideMask;
- switch (II->getIntrinsicID()) {
- default:
+ if (II->getIntrinsicID() != Intrinsic::masked_load &&
+ II->getIntrinsicID() != Intrinsic::vp_load)
return false;
- case Intrinsic::vp_load:
- WideMask = II->getOperand(1);
- break;
- case Intrinsic::masked_load:
- WideMask = II->getOperand(2);
- break;
- }
- Mask = getMask(WideMask, Factor, getDeinterleavedVectorType(DI));
+ // Check mask operand. Handle both all-true/false and interleaved mask.
+ Mask = getMask(getMaskOperand(II), Factor, getDeinterleavedVectorType(DI));
if (!Mask)
return false;
@@ -641,19 +676,11 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
Value *Mask = nullptr;
if (II) {
- // Check mask operand. Handle both all-true/false and interleaved mask.
- Value *WideMask;
- switch (II->getIntrinsicID()) {
- default:
+ if (II->getIntrinsicID() != Intrinsic::masked_store &&
+ II->getIntrinsicID() != Intrinsic::vp_store)
return false;
- case Intrinsic::vp_store:
- WideMask = II->getOperand(2);
- break;
- case Intrinsic::masked_store:
- WideMask = II->getOperand(3);
- break;
- }
- Mask = getMask(WideMask, Factor,
+ // Check mask operand. Handle both all-true/false and interleaved mask.
+ Mask = getMask(getMaskOperand(II), Factor,
cast<VectorType>(InterleaveValues[0]->getType()));
if (!Mask)
return false;
@@ -687,11 +714,13 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
using namespace PatternMatch;
for (auto &I : instructions(F)) {
if (match(&I, m_CombineOr(m_Load(m_Value()),
- m_Intrinsic<Intrinsic::vp_load>())))
+ m_Intrinsic<Intrinsic::vp_load>())) ||
+ match(&I, m_Intrinsic<Intrinsic::masked_load>()))
Changed |= lowerInterleavedLoad(&I, DeadInsts);
if (match(&I, m_CombineOr(m_Store(m_Value(), m_Value()),
- m_Intrinsic<Intrinsic::vp_store>())))
+ m_Intrinsic<Intrinsic::vp_store>())) ||
+ match(&I, m_Intrinsic<Intrinsic::masked_store>()))
Changed |= lowerInterleavedStore(&I, DeadInsts);
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 7153902..193df1f 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -616,6 +616,7 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
.Case("!range", MIToken::md_range)
.Case("!DIExpression", MIToken::md_diexpr)
.Case("!DILocation", MIToken::md_dilocation)
+ .Case("!noalias.addrspace", MIToken::md_noalias_addrspace)
.Default(MIToken::Error);
}
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h
index d7cd067..54142ac 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -151,6 +151,7 @@ struct MIToken {
md_tbaa,
md_alias_scope,
md_noalias,
+ md_noalias_addrspace,
md_range,
md_diexpr,
md_dilocation,
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 3a364d5..807d59c 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -3482,6 +3482,11 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
if (parseMDNode(AAInfo.NoAlias))
return true;
break;
+ case MIToken::md_noalias_addrspace:
+ lex();
+ if (parseMDNode(AAInfo.NoAliasAddrSpace))
+ return true;
+ break;
case MIToken::md_range:
lex();
if (parseMDNode(Range))
@@ -3490,7 +3495,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
// TODO: Report an error on duplicate metadata nodes.
default:
return error("expected 'align' or '!tbaa' or '!alias.scope' or "
- "'!noalias' or '!range'");
+ "'!noalias' or '!range' or '!noalias.addrspace'");
}
}
if (expectAndConsume(MIToken::rparen))
diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 1e9fcf3..3e99e57 100644
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -504,13 +504,21 @@ bool MIRParserImpl::initializeCallSiteInfo(
return error(Error, ArgRegPair.Reg.SourceRange);
CSInfo.ArgRegPairs.emplace_back(Reg, ArgRegPair.ArgNo);
}
+ if (!YamlCSInfo.CalleeTypeIds.empty()) {
+ for (auto CalleeTypeId : YamlCSInfo.CalleeTypeIds) {
+ IntegerType *Int64Ty = Type::getInt64Ty(Context);
+ CSInfo.CalleeTypeIds.push_back(ConstantInt::get(Int64Ty, CalleeTypeId,
+ /*isSigned=*/false));
+ }
+ }
- if (TM.Options.EmitCallSiteInfo)
+ if (TM.Options.EmitCallSiteInfo || TM.Options.EmitCallGraphSection)
MF.addCallSiteInfo(&*CallI, std::move(CSInfo));
}
- if (YamlMF.CallSitesInfo.size() && !TM.Options.EmitCallSiteInfo)
- return error(Twine("Call site info provided but not used"));
+ if (!YamlMF.CallSitesInfo.empty() &&
+ !(TM.Options.EmitCallSiteInfo || TM.Options.EmitCallGraphSection))
+ return error("call site info provided but not used");
return false;
}
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index 7710b50..ad7835a 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -525,24 +525,30 @@ static void convertCallSiteObjects(yaml::MachineFunction &YMF,
const MachineFunction &MF,
ModuleSlotTracker &MST) {
const auto *TRI = MF.getSubtarget().getRegisterInfo();
- for (auto CSInfo : MF.getCallSitesInfo()) {
+ for (auto [MI, CallSiteInfo] : MF.getCallSitesInfo()) {
yaml::CallSiteInfo YmlCS;
yaml::MachineInstrLoc CallLocation;
// Prepare instruction position.
- MachineBasicBlock::const_instr_iterator CallI = CSInfo.first->getIterator();
+ MachineBasicBlock::const_instr_iterator CallI = MI->getIterator();
CallLocation.BlockNum = CallI->getParent()->getNumber();
// Get call instruction offset from the beginning of block.
CallLocation.Offset =
std::distance(CallI->getParent()->instr_begin(), CallI);
YmlCS.CallLocation = CallLocation;
+
+ auto [ArgRegPairs, CalleeTypeIds] = CallSiteInfo;
// Construct call arguments and theirs forwarding register info.
- for (auto ArgReg : CSInfo.second.ArgRegPairs) {
+ for (auto ArgReg : ArgRegPairs) {
yaml::CallSiteInfo::ArgRegPair YmlArgReg;
YmlArgReg.ArgNo = ArgReg.ArgNo;
printRegMIR(ArgReg.Reg, YmlArgReg.Reg, TRI);
YmlCS.ArgForwardingRegs.emplace_back(YmlArgReg);
}
+ // Get type ids.
+ for (auto *CalleeTypeId : CalleeTypeIds) {
+ YmlCS.CalleeTypeIds.push_back(CalleeTypeId->getZExtValue());
+ }
YMF.CallSitesInfo.push_back(std::move(YmlCS));
}
@@ -815,6 +821,9 @@ static void printMI(raw_ostream &OS, MFPrintState &State,
if (MI.getFlag(MachineInstr::SameSign))
OS << "samesign ";
+ // NOTE: Please add new MIFlags also to the MI_FLAGS_STR in
+ // llvm/utils/update_mir_test_checks.py.
+
OS << TII->getName(MI.getOpcode());
LS = ListSeparator();
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 429a17a..60d42e0 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -211,8 +211,7 @@ void MachineFunction::init() {
ConstantPool = new (Allocator) MachineConstantPool(getDataLayout());
Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
- // FIXME: Use Function::hasOptSize().
- if (!F.getAlign() && !F.hasFnAttribute(Attribute::OptimizeForSize))
+ if (!F.getAlign() && !F.hasOptSize())
Alignment = std::max(Alignment,
STI->getTargetLowering()->getPrefFunctionAlignment());
@@ -920,7 +919,7 @@ MachineFunction::getCallSiteInfo(const MachineInstr *MI) {
assert(MI->isCandidateForAdditionalCallInfo() &&
"Call site info refers only to call (MI) candidates");
- if (!Target.Options.EmitCallSiteInfo)
+ if (!Target.Options.EmitCallSiteInfo && !Target.Options.EmitCallGraphSection)
return CallSitesInfo.end();
return CallSitesInfo.find(MI);
}
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index 0d25169..c612f8de 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -1273,6 +1273,10 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << ", !noalias ";
AAInfo.NoAlias->printAsOperand(OS, MST);
}
+ if (AAInfo.NoAliasAddrSpace) {
+ OS << ", !noalias.addrspace ";
+ AAInfo.NoAliasAddrSpace->printAsOperand(OS, MST);
+ }
if (getRanges()) {
OS << ", !range ";
getRanges()->printAsOperand(OS, MST);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 74172b2..ba0ab23 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3853,7 +3853,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
case ISD::FP_TO_FP16:
LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
- if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
+ if (Node->getFlags().hasApproximateFuncs() && !TLI.useSoftFloat()) {
SDValue Op = Node->getOperand(0);
MVT SVT = Op.getSimpleValueType();
if ((SVT == MVT::f64 || SVT == MVT::f80) &&
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index e5704c0..583a85a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
@@ -357,6 +358,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::PATCHPOINT:
Res = PromoteIntRes_PATCHPOINT(N);
break;
+ case ISD::READ_REGISTER:
+ Res = PromoteIntRes_READ_REGISTER(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -2076,6 +2080,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::PATCHPOINT:
Res = PromoteIntOp_PATCHPOINT(N, OpNo);
break;
+ case ISD::WRITE_REGISTER:
+ Res = PromoteIntOp_WRITE_REGISTER(N, OpNo);
+ break;
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
Res = PromoteIntOp_VP_STRIDED(N, OpNo);
@@ -2853,6 +2860,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) {
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_WRITE_REGISTER(SDNode *N,
+ unsigned OpNo) {
+ const Function &Fn = DAG.getMachineFunction().getFunction();
+ Fn.getContext().diagnose(DiagnosticInfoLegalizationFailure(
+ "cannot use llvm.write_register with illegal type", Fn,
+ N->getDebugLoc()));
+ return N->getOperand(0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) {
assert((N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD && OpNo == 3) ||
(N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE && OpNo == 4));
@@ -3127,6 +3143,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VSCALE:
ExpandIntRes_VSCALE(N, Lo, Hi);
break;
+
+ case ISD::READ_REGISTER:
+ ExpandIntRes_READ_REGISTER(N, Lo, Hi);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -5471,6 +5491,18 @@ void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
SplitInteger(Res, Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_READ_REGISTER(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ const Function &Fn = DAG.getMachineFunction().getFunction();
+ Fn.getContext().diagnose(DiagnosticInfoLegalizationFailure(
+ "cannot use llvm.read_register with illegal type", Fn, N->getDebugLoc()));
+ ReplaceValueWith(SDValue(N, 1), N->getOperand(0));
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ Lo = DAG.getPOISON(LoVT);
+ Hi = DAG.getPOISON(HiVT);
+}
+
//===----------------------------------------------------------------------===//
// Integer Operand Expansion
//===----------------------------------------------------------------------===//
@@ -5537,6 +5569,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
Res = ExpandIntOp_VP_STRIDED(N, OpNo);
break;
+ case ISD::WRITE_REGISTER:
+ Res = ExpandIntOp_WRITE_REGISTER(N, OpNo);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -5935,6 +5970,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) {
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
+SDValue DAGTypeLegalizer::ExpandIntOp_WRITE_REGISTER(SDNode *N, unsigned OpNo) {
+ const Function &Fn = DAG.getMachineFunction().getFunction();
+ Fn.getContext().diagnose(DiagnosticInfoLegalizationFailure(
+ "cannot use llvm.write_register with illegal type", Fn,
+ N->getDebugLoc()));
+
+ return N->getOperand(0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SPLICE(SDNode *N) {
SDLoc dl(N);
@@ -6332,6 +6376,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_PATCHPOINT(SDNode *N) {
return Res.getValue(0);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_READ_REGISTER(SDNode *N) {
+ const Function &Fn = DAG.getMachineFunction().getFunction();
+ Fn.getContext().diagnose(DiagnosticInfoLegalizationFailure(
+ "cannot use llvm.read_register with illegal type", Fn, N->getDebugLoc()));
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ReplaceValueWith(SDValue(N, 1), N->getOperand(0));
+ return DAG.getPOISON(NVT);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDLoc dl(N);
SDValue V0 = GetPromotedInteger(N->getOperand(0));
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 9b53724..2e13b18 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -378,6 +378,7 @@ private:
SDValue PromoteIntRes_VPFunnelShift(SDNode *N);
SDValue PromoteIntRes_IS_FPCLASS(SDNode *N);
SDValue PromoteIntRes_PATCHPOINT(SDNode *N);
+ SDValue PromoteIntRes_READ_REGISTER(SDNode *N);
SDValue PromoteIntRes_VECTOR_FIND_LAST_ACTIVE(SDNode *N);
SDValue PromoteIntRes_GET_ACTIVE_LANE_MASK(SDNode *N);
SDValue PromoteIntRes_PARTIAL_REDUCE_MLA(SDNode *N);
@@ -428,6 +429,7 @@ private:
SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_WRITE_REGISTER(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_VP_SPLICE(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_VECTOR_HISTOGRAM(SDNode *N, unsigned OpNo);
@@ -511,6 +513,7 @@ private:
void ExpandIntRes_FunnelShift (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_VSCALE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_READ_REGISTER(SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
SDValue &Lo, SDValue &Hi);
@@ -534,6 +537,7 @@ private:
SDValue ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo);
SDValue ExpandIntOp_PATCHPOINT(SDNode *N, unsigned OpNo);
SDValue ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_WRITE_REGISTER(SDNode *N, unsigned OpNo);
void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode, const SDLoc &dl);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 1636465..6eca7b7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3923,11 +3923,15 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
// FPTrunc is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
SDLoc dl = getCurSDLoc();
+ SDNodeFlags Flags;
+ if (auto *TruncInst = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*TruncInst);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
DAG.getTargetConstant(
- 0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
+ 0, dl, TLI.getPointerTy(DAG.getDataLayout())),
+ Flags));
}
void SelectionDAGBuilder::visitFPExt(const User &I) {
diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp
index b79911b..2a8234a 100644
--- a/llvm/lib/CodeGen/StackProtector.cpp
+++ b/llvm/lib/CodeGen/StackProtector.cpp
@@ -588,7 +588,14 @@ bool InsertStackProtectors(const TargetMachine *TM, Function *F,
continue;
Instruction *CheckLoc = dyn_cast<ReturnInst>(BB.getTerminator());
if (!CheckLoc && !DisableCheckNoReturn)
- for (auto &Inst : BB)
+ for (auto &Inst : BB) {
+ if (IntrinsicInst *IB = dyn_cast<IntrinsicInst>(&Inst);
+ IB && (IB->getIntrinsicID() == Intrinsic::eh_sjlj_callsite)) {
+ // eh_sjlj_callsite has to be in same BB as the
+ // bb terminator. Don't insert within this range.
+ CheckLoc = IB;
+ break;
+ }
if (auto *CB = dyn_cast<CallBase>(&Inst))
// Do stack check before noreturn calls that aren't nounwind (e.g:
// __cxa_throw).
@@ -596,6 +603,7 @@ bool InsertStackProtectors(const TargetMachine *TM, Function *F,
CheckLoc = CB;
break;
}
+ }
if (!CheckLoc)
continue;
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index d4a3455..68b8a00 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -806,7 +806,17 @@ void TargetLoweringBase::initActions() {
ISD::SDIVFIX, ISD::SDIVFIXSAT,
ISD::UDIVFIX, ISD::UDIVFIXSAT,
ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
- ISD::IS_FPCLASS},
+ ISD::IS_FPCLASS, ISD::FCBRT,
+ ISD::FLOG, ISD::FLOG2,
+ ISD::FLOG10, ISD::FEXP,
+ ISD::FEXP2, ISD::FEXP10,
+ ISD::FFLOOR, ISD::FNEARBYINT,
+ ISD::FCEIL, ISD::FRINT,
+ ISD::FTRUNC, ISD::FROUNDEVEN,
+ ISD::FTAN, ISD::FACOS,
+ ISD::FASIN, ISD::FATAN,
+ ISD::FCOSH, ISD::FSINH,
+ ISD::FTANH, ISD::FATAN2},
VT, Expand);
// Overflow operations default to expand
@@ -852,13 +862,12 @@ void TargetLoweringBase::initActions() {
// These operations default to expand for vector types.
if (VT.isVector())
- setOperationAction(
- {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG,
- ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG,
- ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::LROUND,
- ISD::LLROUND, ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN,
- ISD::FCOSH, ISD::FSINH, ISD::FTANH, ISD::FATAN2},
- VT, Expand);
+ setOperationAction({ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG,
+ ISD::ANY_EXTEND_VECTOR_INREG,
+ ISD::SIGN_EXTEND_VECTOR_INREG,
+ ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR,
+ ISD::LRINT, ISD::LLRINT, ISD::LROUND, ISD::LLROUND},
+ VT, Expand);
// Constrained floating-point operations default to expand.
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
@@ -914,15 +923,6 @@ void TargetLoweringBase::initActions() {
{MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128},
Expand);
- // These library functions default to expand.
- setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
- ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR,
- ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
- ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN,
- ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH,
- ISD::FATAN2},
- {MVT::f32, MVT::f64, MVT::f128}, Expand);
-
// Insert custom handling default for llvm.canonicalize.*.
setOperationAction(ISD::FCANONICALIZE,
{MVT::f16, MVT::f32, MVT::f64, MVT::f128}, Expand);
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index a40ceaa..725e951 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -995,7 +995,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForLSDA(
if (!LSDASection || (!F.hasComdat() && !TM.getFunctionSections()))
return LSDASection;
- const auto *LSDA = cast<MCSectionELF>(LSDASection);
+ const auto *LSDA = static_cast<const MCSectionELF *>(LSDASection);
unsigned Flags = LSDA->getFlags();
const MCSymbolELF *LinkedToSym = nullptr;
StringRef Group;
@@ -1734,7 +1734,8 @@ MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
Name == getInstrProfSectionName(IPSK_covdata, Triple::COFF,
/*AddSegmentInfo=*/false) ||
Name == getInstrProfSectionName(IPSK_covname, Triple::COFF,
- /*AddSegmentInfo=*/false))
+ /*AddSegmentInfo=*/false) ||
+ Name == ".llvmbc" || Name == ".llvmcmd")
Kind = SectionKind::getMetadata();
int Selection = 0;
unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
@@ -2054,14 +2055,14 @@ MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
return getCOFFStaticStructorSection(
getContext(), getContext().getTargetTriple(), true, Priority, KeySym,
- cast<MCSectionCOFF>(StaticCtorSection));
+ static_cast<MCSectionCOFF *>(StaticCtorSection));
}
MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
return getCOFFStaticStructorSection(
getContext(), getContext().getTargetTriple(), false, Priority, KeySym,
- cast<MCSectionCOFF>(StaticDtorSection));
+ static_cast<MCSectionCOFF *>(StaticDtorSection));
}
const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference(
@@ -2388,23 +2389,25 @@ TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV,
// here.
if (const GlobalObject *GO = dyn_cast<GlobalObject>(GV)) {
if (GO->isDeclarationForLinker())
- return cast<MCSectionXCOFF>(getSectionForExternalReference(GO, TM))
+ return static_cast<const MCSectionXCOFF *>(
+ getSectionForExternalReference(GO, TM))
->getQualNameSymbol();
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
if (GVar->hasAttribute("toc-data"))
- return cast<MCSectionXCOFF>(
+ return static_cast<const MCSectionXCOFF *>(
SectionForGlobal(GVar, SectionKind::getData(), TM))
->getQualNameSymbol();
SectionKind GOKind = getKindForGlobal(GO, TM);
if (GOKind.isText())
- return cast<MCSectionXCOFF>(
+ return static_cast<const MCSectionXCOFF *>(
getSectionForFunctionDescriptor(cast<Function>(GO), TM))
->getQualNameSymbol();
if ((TM.getDataSections() && !GO->hasSection()) || GO->hasCommonLinkage() ||
GOKind.isBSSLocal() || GOKind.isThreadBSSLocal())
- return cast<MCSectionXCOFF>(SectionForGlobal(GO, GOKind, TM))
+ return static_cast<const MCSectionXCOFF *>(
+ SectionForGlobal(GO, GOKind, TM))
->getQualNameSymbol();
}
@@ -2740,7 +2743,7 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA(
const Function &F, const MCSymbol &FnSym, const TargetMachine &TM) const {
- auto *LSDA = cast<MCSectionXCOFF>(LSDASection);
+ auto *LSDA = static_cast<MCSectionXCOFF *>(LSDASection);
if (TM.getFunctionSections()) {
// If option -ffunction-sections is on, append the function name to the
// name of the LSDA csect so that each function has its own LSDA csect.
diff --git a/llvm/lib/CodeGen/WindowsSecureHotPatching.cpp b/llvm/lib/CodeGen/WindowsSecureHotPatching.cpp
index 6267207..fd54190 100644
--- a/llvm/lib/CodeGen/WindowsSecureHotPatching.cpp
+++ b/llvm/lib/CodeGen/WindowsSecureHotPatching.cpp
@@ -369,6 +369,19 @@ static GlobalVariable *getOrCreateRefVariable(
AddrOfOldGV, Twine("__ref_").concat(GV->getName()),
nullptr, GlobalVariable::NotThreadLocal);
+ // RefGV is created with isConstant = false, but we want to place RefGV into
+ // .rdata, not .data. It is important that the GlobalVariable be mutable
+ // from the compiler's point of view, so that the optimizer does not remove
+ // the global variable entirely and replace all references to it with its
+ // initial value.
+ //
+ // When the Windows hot-patch loader applies a hot-patch, it maps the
+ // pages of .rdata as read/write so that it can set each __ref_* variable
+ // to point to the original variable in the base image. Afterward, pages in
+ // .rdata are remapped as read-only. This protects the __ref_* variables from
+ // being overwritten during execution.
+ RefGV->setSection(".rdata");
+
// Create debug info for the replacement global variable.
DataLayout Layout = M->getDataLayout();
DIType *DebugType = DebugInfo.createPointerType(
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index b3798f1..a8559e7 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -183,7 +183,7 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
std::lock_guard<sys::Mutex> locked(lock);
// Save information about our target
- Arch = (Triple::ArchType)Obj.getArch();
+ Arch = Obj.getArch();
IsTargetLittleEndian = Obj.isLittleEndian();
setMipsABI(Obj);
@@ -1361,18 +1361,17 @@ std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
RuntimeDyld::loadObject(const ObjectFile &Obj) {
if (!Dyld) {
if (Obj.isELF())
- Dyld =
- createRuntimeDyldELF(static_cast<Triple::ArchType>(Obj.getArch()),
- MemMgr, Resolver, ProcessAllSections,
- std::move(NotifyStubEmitted));
+ Dyld = createRuntimeDyldELF(Obj.getArch(), MemMgr, Resolver,
+ ProcessAllSections,
+ std::move(NotifyStubEmitted));
else if (Obj.isMachO())
- Dyld = createRuntimeDyldMachO(
- static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver,
- ProcessAllSections, std::move(NotifyStubEmitted));
+ Dyld = createRuntimeDyldMachO(Obj.getArch(), MemMgr, Resolver,
+ ProcessAllSections,
+ std::move(NotifyStubEmitted));
else if (Obj.isCOFF())
- Dyld = createRuntimeDyldCOFF(
- static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver,
- ProcessAllSections, std::move(NotifyStubEmitted));
+ Dyld = createRuntimeDyldCOFF(Obj.getArch(), MemMgr, Resolver,
+ ProcessAllSections,
+ std::move(NotifyStubEmitted));
else
report_fatal_error("Incompatible object format!");
}
diff --git a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp
index f7669f0..53f5934 100644
--- a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp
+++ b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Frontend/HLSL/RootSignatureMetadata.h"
+#include "llvm/Frontend/HLSL/RootSignatureValidations.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/ScopedPrinter.h"
@@ -20,6 +22,42 @@ namespace llvm {
namespace hlsl {
namespace rootsig {
+static std::optional<uint32_t> extractMdIntValue(MDNode *Node,
+ unsigned int OpId) {
+ if (auto *CI =
+ mdconst::dyn_extract<ConstantInt>(Node->getOperand(OpId).get()))
+ return CI->getZExtValue();
+ return std::nullopt;
+}
+
+static std::optional<float> extractMdFloatValue(MDNode *Node,
+ unsigned int OpId) {
+ if (auto *CI = mdconst::dyn_extract<ConstantFP>(Node->getOperand(OpId).get()))
+ return CI->getValueAPF().convertToFloat();
+ return std::nullopt;
+}
+
+static std::optional<StringRef> extractMdStringValue(MDNode *Node,
+ unsigned int OpId) {
+ MDString *NodeText = dyn_cast<MDString>(Node->getOperand(OpId));
+ if (NodeText == nullptr)
+ return std::nullopt;
+ return NodeText->getString();
+}
+
+static bool reportError(LLVMContext *Ctx, Twine Message,
+ DiagnosticSeverity Severity = DS_Error) {
+ Ctx->diagnose(DiagnosticInfoGeneric(Message, Severity));
+ return true;
+}
+
+static bool reportValueError(LLVMContext *Ctx, Twine ParamName,
+ uint32_t Value) {
+ Ctx->diagnose(DiagnosticInfoGeneric(
+ "Invalid value for " + ParamName + ": " + Twine(Value), DS_Error));
+ return true;
+}
+
static const EnumEntry<dxil::ResourceClass> ResourceClassNames[] = {
{"CBV", dxil::ResourceClass::CBuffer},
{"SRV", dxil::ResourceClass::SRV},
@@ -189,6 +227,442 @@ MDNode *MetadataBuilder::BuildStaticSampler(const StaticSampler &Sampler) {
return MDNode::get(Ctx, Operands);
}
+bool MetadataParser::parseRootFlags(LLVMContext *Ctx,
+ mcdxbc::RootSignatureDesc &RSD,
+ MDNode *RootFlagNode) {
+
+ if (RootFlagNode->getNumOperands() != 2)
+ return reportError(Ctx, "Invalid format for RootFlag Element");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(RootFlagNode, 1))
+ RSD.Flags = *Val;
+ else
+ return reportError(Ctx, "Invalid value for RootFlag");
+
+ return false;
+}
+
+bool MetadataParser::parseRootConstants(LLVMContext *Ctx,
+ mcdxbc::RootSignatureDesc &RSD,
+ MDNode *RootConstantNode) {
+
+ if (RootConstantNode->getNumOperands() != 5)
+ return reportError(Ctx, "Invalid format for RootConstants Element");
+
+ dxbc::RTS0::v1::RootParameterHeader Header;
+ // The parameter offset doesn't matter here - we recalculate it during
+ // serialization Header.ParameterOffset = 0;
+ Header.ParameterType =
+ llvm::to_underlying(dxbc::RootParameterType::Constants32Bit);
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(RootConstantNode, 1))
+ Header.ShaderVisibility = *Val;
+ else
+ return reportError(Ctx, "Invalid value for ShaderVisibility");
+
+ dxbc::RTS0::v1::RootConstants Constants;
+ if (std::optional<uint32_t> Val = extractMdIntValue(RootConstantNode, 2))
+ Constants.ShaderRegister = *Val;
+ else
+ return reportError(Ctx, "Invalid value for ShaderRegister");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(RootConstantNode, 3))
+ Constants.RegisterSpace = *Val;
+ else
+ return reportError(Ctx, "Invalid value for RegisterSpace");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(RootConstantNode, 4))
+ Constants.Num32BitValues = *Val;
+ else
+ return reportError(Ctx, "Invalid value for Num32BitValues");
+
+ RSD.ParametersContainer.addParameter(Header, Constants);
+
+ return false;
+}
+
+bool MetadataParser::parseRootDescriptors(
+ LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
+ MDNode *RootDescriptorNode, RootSignatureElementKind ElementKind) {
+ assert(ElementKind == RootSignatureElementKind::SRV ||
+ ElementKind == RootSignatureElementKind::UAV ||
+ ElementKind == RootSignatureElementKind::CBV &&
+ "parseRootDescriptors should only be called with RootDescriptor "
+ "element kind.");
+ if (RootDescriptorNode->getNumOperands() != 5)
+ return reportError(Ctx, "Invalid format for Root Descriptor Element");
+
+ dxbc::RTS0::v1::RootParameterHeader Header;
+ switch (ElementKind) {
+ case RootSignatureElementKind::SRV:
+ Header.ParameterType = llvm::to_underlying(dxbc::RootParameterType::SRV);
+ break;
+ case RootSignatureElementKind::UAV:
+ Header.ParameterType = llvm::to_underlying(dxbc::RootParameterType::UAV);
+ break;
+ case RootSignatureElementKind::CBV:
+ Header.ParameterType = llvm::to_underlying(dxbc::RootParameterType::CBV);
+ break;
+ default:
+ llvm_unreachable("invalid Root Descriptor kind");
+ break;
+ }
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(RootDescriptorNode, 1))
+ Header.ShaderVisibility = *Val;
+ else
+ return reportError(Ctx, "Invalid value for ShaderVisibility");
+
+ dxbc::RTS0::v2::RootDescriptor Descriptor;
+ if (std::optional<uint32_t> Val = extractMdIntValue(RootDescriptorNode, 2))
+ Descriptor.ShaderRegister = *Val;
+ else
+ return reportError(Ctx, "Invalid value for ShaderRegister");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(RootDescriptorNode, 3))
+ Descriptor.RegisterSpace = *Val;
+ else
+ return reportError(Ctx, "Invalid value for RegisterSpace");
+
+ if (RSD.Version == 1) {
+ RSD.ParametersContainer.addParameter(Header, Descriptor);
+ return false;
+ }
+ assert(RSD.Version > 1);
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(RootDescriptorNode, 4))
+ Descriptor.Flags = *Val;
+ else
+ return reportError(Ctx, "Invalid value for Root Descriptor Flags");
+
+ RSD.ParametersContainer.addParameter(Header, Descriptor);
+ return false;
+}
+
+bool MetadataParser::parseDescriptorRange(LLVMContext *Ctx,
+ mcdxbc::DescriptorTable &Table,
+ MDNode *RangeDescriptorNode) {
+
+ if (RangeDescriptorNode->getNumOperands() != 6)
+ return reportError(Ctx, "Invalid format for Descriptor Range");
+
+ dxbc::RTS0::v2::DescriptorRange Range;
+
+ std::optional<StringRef> ElementText =
+ extractMdStringValue(RangeDescriptorNode, 0);
+
+ if (!ElementText.has_value())
+ return reportError(Ctx, "Descriptor Range, first element is not a string.");
+
+ Range.RangeType =
+ StringSwitch<uint32_t>(*ElementText)
+ .Case("CBV", llvm::to_underlying(dxbc::DescriptorRangeType::CBV))
+ .Case("SRV", llvm::to_underlying(dxbc::DescriptorRangeType::SRV))
+ .Case("UAV", llvm::to_underlying(dxbc::DescriptorRangeType::UAV))
+ .Case("Sampler",
+ llvm::to_underlying(dxbc::DescriptorRangeType::Sampler))
+ .Default(~0U);
+
+ if (Range.RangeType == ~0U)
+ return reportError(Ctx, "Invalid Descriptor Range type: " + *ElementText);
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(RangeDescriptorNode, 1))
+ Range.NumDescriptors = *Val;
+ else
+ return reportError(Ctx, "Invalid value for Number of Descriptor in Range");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(RangeDescriptorNode, 2))
+ Range.BaseShaderRegister = *Val;
+ else
+ return reportError(Ctx, "Invalid value for BaseShaderRegister");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(RangeDescriptorNode, 3))
+ Range.RegisterSpace = *Val;
+ else
+ return reportError(Ctx, "Invalid value for RegisterSpace");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(RangeDescriptorNode, 4))
+ Range.OffsetInDescriptorsFromTableStart = *Val;
+ else
+ return reportError(Ctx,
+ "Invalid value for OffsetInDescriptorsFromTableStart");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(RangeDescriptorNode, 5))
+ Range.Flags = *Val;
+ else
+ return reportError(Ctx, "Invalid value for Descriptor Range Flags");
+
+ Table.Ranges.push_back(Range);
+ return false;
+}
+
+bool MetadataParser::parseDescriptorTable(LLVMContext *Ctx,
+ mcdxbc::RootSignatureDesc &RSD,
+ MDNode *DescriptorTableNode) {
+ const unsigned int NumOperands = DescriptorTableNode->getNumOperands();
+ if (NumOperands < 2)
+ return reportError(Ctx, "Invalid format for Descriptor Table");
+
+ dxbc::RTS0::v1::RootParameterHeader Header;
+ if (std::optional<uint32_t> Val = extractMdIntValue(DescriptorTableNode, 1))
+ Header.ShaderVisibility = *Val;
+ else
+ return reportError(Ctx, "Invalid value for ShaderVisibility");
+
+ mcdxbc::DescriptorTable Table;
+ Header.ParameterType =
+ llvm::to_underlying(dxbc::RootParameterType::DescriptorTable);
+
+ for (unsigned int I = 2; I < NumOperands; I++) {
+ MDNode *Element = dyn_cast<MDNode>(DescriptorTableNode->getOperand(I));
+ if (Element == nullptr)
+ return reportError(Ctx, "Missing Root Element Metadata Node.");
+
+ if (parseDescriptorRange(Ctx, Table, Element))
+ return true;
+ }
+
+ RSD.ParametersContainer.addParameter(Header, Table);
+ return false;
+}
+
+bool MetadataParser::parseStaticSampler(LLVMContext *Ctx,
+ mcdxbc::RootSignatureDesc &RSD,
+ MDNode *StaticSamplerNode) {
+ if (StaticSamplerNode->getNumOperands() != 14)
+ return reportError(Ctx, "Invalid format for Static Sampler");
+
+ dxbc::RTS0::v1::StaticSampler Sampler;
+ if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 1))
+ Sampler.Filter = *Val;
+ else
+ return reportError(Ctx, "Invalid value for Filter");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 2))
+ Sampler.AddressU = *Val;
+ else
+ return reportError(Ctx, "Invalid value for AddressU");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 3))
+ Sampler.AddressV = *Val;
+ else
+ return reportError(Ctx, "Invalid value for AddressV");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 4))
+ Sampler.AddressW = *Val;
+ else
+ return reportError(Ctx, "Invalid value for AddressW");
+
+ if (std::optional<float> Val = extractMdFloatValue(StaticSamplerNode, 5))
+ Sampler.MipLODBias = *Val;
+ else
+ return reportError(Ctx, "Invalid value for MipLODBias");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 6))
+ Sampler.MaxAnisotropy = *Val;
+ else
+ return reportError(Ctx, "Invalid value for MaxAnisotropy");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 7))
+ Sampler.ComparisonFunc = *Val;
+ else
+ return reportError(Ctx, "Invalid value for ComparisonFunc ");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 8))
+ Sampler.BorderColor = *Val;
+ else
+ return reportError(Ctx, "Invalid value for ComparisonFunc ");
+
+ if (std::optional<float> Val = extractMdFloatValue(StaticSamplerNode, 9))
+ Sampler.MinLOD = *Val;
+ else
+ return reportError(Ctx, "Invalid value for MinLOD");
+
+ if (std::optional<float> Val = extractMdFloatValue(StaticSamplerNode, 10))
+ Sampler.MaxLOD = *Val;
+ else
+ return reportError(Ctx, "Invalid value for MaxLOD");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 11))
+ Sampler.ShaderRegister = *Val;
+ else
+ return reportError(Ctx, "Invalid value for ShaderRegister");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 12))
+ Sampler.RegisterSpace = *Val;
+ else
+ return reportError(Ctx, "Invalid value for RegisterSpace");
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 13))
+ Sampler.ShaderVisibility = *Val;
+ else
+ return reportError(Ctx, "Invalid value for ShaderVisibility");
+
+ RSD.StaticSamplers.push_back(Sampler);
+ return false;
+}
+
+bool MetadataParser::parseRootSignatureElement(LLVMContext *Ctx,
+ mcdxbc::RootSignatureDesc &RSD,
+ MDNode *Element) {
+ std::optional<StringRef> ElementText = extractMdStringValue(Element, 0);
+ if (!ElementText.has_value())
+ return reportError(Ctx, "Invalid format for Root Element");
+
+ RootSignatureElementKind ElementKind =
+ StringSwitch<RootSignatureElementKind>(*ElementText)
+ .Case("RootFlags", RootSignatureElementKind::RootFlags)
+ .Case("RootConstants", RootSignatureElementKind::RootConstants)
+ .Case("RootCBV", RootSignatureElementKind::CBV)
+ .Case("RootSRV", RootSignatureElementKind::SRV)
+ .Case("RootUAV", RootSignatureElementKind::UAV)
+ .Case("DescriptorTable", RootSignatureElementKind::DescriptorTable)
+ .Case("StaticSampler", RootSignatureElementKind::StaticSamplers)
+ .Default(RootSignatureElementKind::Error);
+
+ switch (ElementKind) {
+
+ case RootSignatureElementKind::RootFlags:
+ return parseRootFlags(Ctx, RSD, Element);
+ case RootSignatureElementKind::RootConstants:
+ return parseRootConstants(Ctx, RSD, Element);
+ case RootSignatureElementKind::CBV:
+ case RootSignatureElementKind::SRV:
+ case RootSignatureElementKind::UAV:
+ return parseRootDescriptors(Ctx, RSD, Element, ElementKind);
+ case RootSignatureElementKind::DescriptorTable:
+ return parseDescriptorTable(Ctx, RSD, Element);
+ case RootSignatureElementKind::StaticSamplers:
+ return parseStaticSampler(Ctx, RSD, Element);
+ case RootSignatureElementKind::Error:
+ return reportError(Ctx, "Invalid Root Signature Element: " + *ElementText);
+ }
+
+ llvm_unreachable("Unhandled RootSignatureElementKind enum.");
+}
+
+bool MetadataParser::validateRootSignature(
+ LLVMContext *Ctx, const llvm::mcdxbc::RootSignatureDesc &RSD) {
+ if (!llvm::hlsl::rootsig::verifyVersion(RSD.Version)) {
+ return reportValueError(Ctx, "Version", RSD.Version);
+ }
+
+ if (!llvm::hlsl::rootsig::verifyRootFlag(RSD.Flags)) {
+ return reportValueError(Ctx, "RootFlags", RSD.Flags);
+ }
+
+ for (const mcdxbc::RootParameterInfo &Info : RSD.ParametersContainer) {
+ if (!dxbc::isValidShaderVisibility(Info.Header.ShaderVisibility))
+ return reportValueError(Ctx, "ShaderVisibility",
+ Info.Header.ShaderVisibility);
+
+ assert(dxbc::isValidParameterType(Info.Header.ParameterType) &&
+ "Invalid value for ParameterType");
+
+ switch (Info.Header.ParameterType) {
+
+ case llvm::to_underlying(dxbc::RootParameterType::CBV):
+ case llvm::to_underlying(dxbc::RootParameterType::UAV):
+ case llvm::to_underlying(dxbc::RootParameterType::SRV): {
+ const dxbc::RTS0::v2::RootDescriptor &Descriptor =
+ RSD.ParametersContainer.getRootDescriptor(Info.Location);
+ if (!llvm::hlsl::rootsig::verifyRegisterValue(Descriptor.ShaderRegister))
+ return reportValueError(Ctx, "ShaderRegister",
+ Descriptor.ShaderRegister);
+
+ if (!llvm::hlsl::rootsig::verifyRegisterSpace(Descriptor.RegisterSpace))
+ return reportValueError(Ctx, "RegisterSpace", Descriptor.RegisterSpace);
+
+ if (RSD.Version > 1) {
+ if (!llvm::hlsl::rootsig::verifyRootDescriptorFlag(RSD.Version,
+ Descriptor.Flags))
+ return reportValueError(Ctx, "RootDescriptorFlag", Descriptor.Flags);
+ }
+ break;
+ }
+ case llvm::to_underlying(dxbc::RootParameterType::DescriptorTable): {
+ const mcdxbc::DescriptorTable &Table =
+ RSD.ParametersContainer.getDescriptorTable(Info.Location);
+ for (const dxbc::RTS0::v2::DescriptorRange &Range : Table) {
+ if (!llvm::hlsl::rootsig::verifyRangeType(Range.RangeType))
+ return reportValueError(Ctx, "RangeType", Range.RangeType);
+
+ if (!llvm::hlsl::rootsig::verifyRegisterSpace(Range.RegisterSpace))
+ return reportValueError(Ctx, "RegisterSpace", Range.RegisterSpace);
+
+ if (!llvm::hlsl::rootsig::verifyNumDescriptors(Range.NumDescriptors))
+ return reportValueError(Ctx, "NumDescriptors", Range.NumDescriptors);
+
+ if (!llvm::hlsl::rootsig::verifyDescriptorRangeFlag(
+ RSD.Version, Range.RangeType, Range.Flags))
+ return reportValueError(Ctx, "DescriptorFlag", Range.Flags);
+ }
+ break;
+ }
+ }
+ }
+
+ for (const dxbc::RTS0::v1::StaticSampler &Sampler : RSD.StaticSamplers) {
+ if (!llvm::hlsl::rootsig::verifySamplerFilter(Sampler.Filter))
+ return reportValueError(Ctx, "Filter", Sampler.Filter);
+
+ if (!llvm::hlsl::rootsig::verifyAddress(Sampler.AddressU))
+ return reportValueError(Ctx, "AddressU", Sampler.AddressU);
+
+ if (!llvm::hlsl::rootsig::verifyAddress(Sampler.AddressV))
+ return reportValueError(Ctx, "AddressV", Sampler.AddressV);
+
+ if (!llvm::hlsl::rootsig::verifyAddress(Sampler.AddressW))
+ return reportValueError(Ctx, "AddressW", Sampler.AddressW);
+
+ if (!llvm::hlsl::rootsig::verifyMipLODBias(Sampler.MipLODBias))
+ return reportValueError(Ctx, "MipLODBias", Sampler.MipLODBias);
+
+ if (!llvm::hlsl::rootsig::verifyMaxAnisotropy(Sampler.MaxAnisotropy))
+ return reportValueError(Ctx, "MaxAnisotropy", Sampler.MaxAnisotropy);
+
+ if (!llvm::hlsl::rootsig::verifyComparisonFunc(Sampler.ComparisonFunc))
+ return reportValueError(Ctx, "ComparisonFunc", Sampler.ComparisonFunc);
+
+ if (!llvm::hlsl::rootsig::verifyBorderColor(Sampler.BorderColor))
+ return reportValueError(Ctx, "BorderColor", Sampler.BorderColor);
+
+ if (!llvm::hlsl::rootsig::verifyLOD(Sampler.MinLOD))
+ return reportValueError(Ctx, "MinLOD", Sampler.MinLOD);
+
+ if (!llvm::hlsl::rootsig::verifyLOD(Sampler.MaxLOD))
+ return reportValueError(Ctx, "MaxLOD", Sampler.MaxLOD);
+
+ if (!llvm::hlsl::rootsig::verifyRegisterValue(Sampler.ShaderRegister))
+ return reportValueError(Ctx, "ShaderRegister", Sampler.ShaderRegister);
+
+ if (!llvm::hlsl::rootsig::verifyRegisterSpace(Sampler.RegisterSpace))
+ return reportValueError(Ctx, "RegisterSpace", Sampler.RegisterSpace);
+
+ if (!dxbc::isValidShaderVisibility(Sampler.ShaderVisibility))
+ return reportValueError(Ctx, "ShaderVisibility",
+ Sampler.ShaderVisibility);
+ }
+
+ return false;
+}
+
+bool MetadataParser::ParseRootSignature(LLVMContext *Ctx,
+ mcdxbc::RootSignatureDesc &RSD) {
+ bool HasError = false;
+
+ // Loop through the Root Elements of the root signature.
+ for (const auto &Operand : Root->operands()) {
+ MDNode *Element = dyn_cast<MDNode>(Operand);
+ if (Element == nullptr)
+ return reportError(Ctx, "Missing Root Element Metadata Node.");
+
+ HasError = HasError || parseRootSignatureElement(Ctx, RSD, Element) ||
+ validateRootSignature(Ctx, RSD);
+ }
+
+ return HasError;
+}
} // namespace rootsig
} // namespace hlsl
} // namespace llvm
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 840ca83..7928772 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2617,7 +2617,7 @@ void OpenMPIRBuilder::emitReductionListCopy(
Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
const LocationDescription &Loc, ArrayRef<ReductionInfo> ReductionInfos,
AttributeList FuncAttrs) {
- IRBuilder<>::InsertPointGuard IPG(Builder);
+ InsertPointTy SavedIP = Builder.saveIP();
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
@@ -2630,7 +2630,6 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
WcFunc->addParamAttr(1, Attribute::NoUndef);
BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", WcFunc);
Builder.SetInsertPoint(EntryBB);
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// ReduceList: thread local Reduce list.
// At the stage of the computation when this function is called, partially
@@ -2845,6 +2844,7 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
}
Builder.CreateRetVoid();
+ Builder.restoreIP(SavedIP);
return WcFunc;
}
@@ -2853,7 +2853,6 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
AttributeList FuncAttrs) {
LLVMContext &Ctx = M.getContext();
- IRBuilder<>::InsertPointGuard IPG(Builder);
FunctionType *FuncTy =
FunctionType::get(Builder.getVoidTy(),
{Builder.getPtrTy(), Builder.getInt16Ty(),
@@ -2872,7 +2871,6 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
SarFunc->addParamAttr(3, Attribute::SExt);
BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", SarFunc);
Builder.SetInsertPoint(EntryBB);
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Thread local Reduce list used to host the values of data to be reduced.
Argument *ReduceListArg = SarFunc->getArg(0);
@@ -3019,7 +3017,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy,
AttributeList FuncAttrs) {
- IRBuilder<>::InsertPointGuard IPG(Builder);
+ OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(),
@@ -3035,7 +3033,6 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc);
Builder.SetInsertPoint(EntryBlock);
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Buffer: global reduction buffer.
Argument *BufferArg = LtGCFunc->getArg(0);
@@ -3123,13 +3120,14 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
}
Builder.CreateRetVoid();
+ Builder.restoreIP(OldIP);
return LtGCFunc;
}
Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
Type *ReductionsBufferTy, AttributeList FuncAttrs) {
- IRBuilder<>::InsertPointGuard IPG(Builder);
+ OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(),
@@ -3145,7 +3143,6 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc);
Builder.SetInsertPoint(EntryBlock);
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Buffer: global reduction buffer.
Argument *BufferArg = LtGRFunc->getArg(0);
@@ -3206,13 +3203,14 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
->addFnAttr(Attribute::NoUnwind);
Builder.CreateRetVoid();
+ Builder.restoreIP(OldIP);
return LtGRFunc;
}
Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy,
AttributeList FuncAttrs) {
- IRBuilder<>::InsertPointGuard IPG(Builder);
+ OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(),
@@ -3228,7 +3226,6 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc);
Builder.SetInsertPoint(EntryBlock);
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Buffer: global reduction buffer.
Argument *BufferArg = LtGCFunc->getArg(0);
@@ -3314,13 +3311,14 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
}
Builder.CreateRetVoid();
+ Builder.restoreIP(OldIP);
return LtGCFunc;
}
Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
Type *ReductionsBufferTy, AttributeList FuncAttrs) {
- IRBuilder<>::InsertPointGuard IPG(Builder);
+ OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
LLVMContext &Ctx = M.getContext();
auto *FuncTy = FunctionType::get(
Builder.getVoidTy(),
@@ -3336,7 +3334,6 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc);
Builder.SetInsertPoint(EntryBlock);
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Buffer: global reduction buffer.
Argument *BufferArg = LtGRFunc->getArg(0);
@@ -3397,6 +3394,7 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
Builder.CreateCall(ReduceFn, {ReduceList, ReductionList})
->addFnAttr(Attribute::NoUnwind);
Builder.CreateRetVoid();
+ Builder.restoreIP(OldIP);
return LtGRFunc;
}
@@ -3409,7 +3407,6 @@ std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name) const {
Expected<Function *> OpenMPIRBuilder::createReductionFunction(
StringRef ReducerName, ArrayRef<ReductionInfo> ReductionInfos,
ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) {
- IRBuilder<>::InsertPointGuard IPG(Builder);
auto *FuncTy = FunctionType::get(Builder.getVoidTy(),
{Builder.getPtrTy(), Builder.getPtrTy()},
/* IsVarArg */ false);
@@ -3422,7 +3419,6 @@ Expected<Function *> OpenMPIRBuilder::createReductionFunction(
BasicBlock *EntryBB =
BasicBlock::Create(M.getContext(), "entry", ReductionFunc);
Builder.SetInsertPoint(EntryBB);
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Need to alloca memory here and deal with the pointers before getting
// LHS/RHS pointers out
@@ -3750,12 +3746,10 @@ static Error populateReductionFunction(
Function *ReductionFunc,
ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
IRBuilder<> &Builder, ArrayRef<bool> IsByRef, bool IsGPU) {
- IRBuilder<>::InsertPointGuard IPG(Builder);
Module *Module = ReductionFunc->getParent();
BasicBlock *ReductionFuncBlock =
BasicBlock::Create(Module->getContext(), "", ReductionFunc);
Builder.SetInsertPoint(ReductionFuncBlock);
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
Value *LHSArrayPtr = nullptr;
Value *RHSArrayPtr = nullptr;
if (IsGPU) {
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 28ed1e5..7159107 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1450,6 +1450,7 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
.Case("popc.ll", true)
.Case("h2f", true)
.Case("swap.lo.hi.b64", true)
+ .Case("tanh.approx.f32", true)
.Default(false);
if (Expand) {
@@ -2543,6 +2544,12 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
MDNode *MD = MDNode::get(Builder.getContext(), {});
LD->setMetadata(LLVMContext::MD_invariant_load, MD);
return LD;
+ } else if (Name == "tanh.approx.f32") {
+ // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
+ FastMathFlags FMF;
+ FMF.setApproxFunc();
+ Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
+ FMF);
} else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
Value *Arg =
Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp
index b94dcac..4f37624 100644
--- a/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/llvm/lib/IR/DiagnosticInfo.cpp
@@ -81,6 +81,10 @@ void DiagnosticInfoInlineAsm::print(DiagnosticPrinter &DP) const {
DP << " at line " << getLocCookie();
}
+void DiagnosticInfoLegalizationFailure::print(DiagnosticPrinter &DP) const {
+ DP << getLocationStr() << ": " << getMsgStr();
+}
+
DiagnosticInfoRegAllocFailure::DiagnosticInfoRegAllocFailure(
const Twine &MsgStr, const Function &Fn, const DiagnosticLocation &DL,
DiagnosticSeverity Severity)
diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index 0dbd07f..1157cbe 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -1796,6 +1796,7 @@ AAMDNodes Instruction::getAAMetadata() const {
Result.TBAAStruct = Info.lookup(LLVMContext::MD_tbaa_struct);
Result.Scope = Info.lookup(LLVMContext::MD_alias_scope);
Result.NoAlias = Info.lookup(LLVMContext::MD_noalias);
+ Result.NoAliasAddrSpace = Info.lookup(LLVMContext::MD_noalias_addrspace);
}
return Result;
}
@@ -1805,6 +1806,7 @@ void Instruction::setAAMetadata(const AAMDNodes &N) {
setMetadata(LLVMContext::MD_tbaa_struct, N.TBAAStruct);
setMetadata(LLVMContext::MD_alias_scope, N.Scope);
setMetadata(LLVMContext::MD_noalias, N.NoAlias);
+ setMetadata(LLVMContext::MD_noalias_addrspace, N.NoAliasAddrSpace);
}
void Instruction::setNoSanitizeMetadata() {
diff --git a/llvm/lib/MC/CMakeLists.txt b/llvm/lib/MC/CMakeLists.txt
index d662c42..18a85b3 100644
--- a/llvm/lib/MC/CMakeLists.txt
+++ b/llvm/lib/MC/CMakeLists.txt
@@ -43,13 +43,7 @@ add_llvm_component_library(LLVMMC
MCRegisterInfo.cpp
MCSchedule.cpp
MCSection.cpp
- MCSectionCOFF.cpp
- MCSectionDXContainer.cpp
- MCSectionELF.cpp
- MCSectionGOFF.cpp
MCSectionMachO.cpp
- MCSectionWasm.cpp
- MCSectionXCOFF.cpp
MCStreamer.cpp
MCSPIRVStreamer.cpp
MCSubtargetInfo.cpp
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index 9f52b3e..ae8dffc 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -559,20 +559,7 @@ void ELFWriter::computeSymbolTable(const RevGroupMapTy &RevGroupMap) {
} else {
const MCSectionELF &Section =
static_cast<const MCSectionELF &>(Symbol.getSection());
-
- // We may end up with a situation when section symbol is technically
- // defined, but should not be. That happens because we explicitly
- // pre-create few .debug_* sections to have accessors.
- // And if these sections were not really defined in the code, but were
- // referenced, we simply error out.
- if (!Section.isRegistered()) {
- assert(static_cast<const MCSymbolELF &>(Symbol).getType() ==
- ELF::STT_SECTION);
- Ctx.reportError(SMLoc(),
- "Undefined section reference: " + Symbol.getName());
- continue;
- }
-
+ assert(Section.isRegistered());
if (Mode == NonDwoOnly && isDwoSection(Section))
continue;
MSD.SectionIndex = Section.getOrdinal();
@@ -1100,7 +1087,8 @@ uint64_t ELFWriter::writeObject() {
// Remember the offset into the file for this section.
const uint64_t SecStart = align(RelSection->getAlign());
- writeRelocations(cast<MCSectionELF>(*RelSection->getLinkedToSection()));
+ writeRelocations(
+ static_cast<const MCSectionELF &>(*RelSection->getLinkedToSection()));
uint64_t SecEnd = W.OS.tell();
RelSection->setOffsets(SecStart, SecEnd);
@@ -1273,7 +1261,7 @@ bool ELFObjectWriter::useSectionSymbol(const MCValue &Val,
// that it pointed to another string and subtracting 42 at runtime will
// produce the wrong value.
if (Sym->isInSection()) {
- auto &Sec = cast<MCSectionELF>(Sym->getSection());
+ auto &Sec = static_cast<const MCSectionELF &>(Sym->getSection());
unsigned Flags = Sec.getFlags();
if (Flags & ELF::SHF_MERGE) {
if (C != 0)
@@ -1325,13 +1313,14 @@ bool ELFObjectWriter::checkRelocation(SMLoc Loc, const MCSectionELF *From,
void ELFObjectWriter::recordRelocation(const MCFragment &F,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
- const MCSectionELF &Section = cast<MCSectionELF>(*F.getParent());
+ auto &Section = static_cast<const MCSectionELF &>(*F.getParent());
MCContext &Ctx = getContext();
const auto *SymA = cast_or_null<MCSymbolELF>(Target.getAddSym());
- const MCSectionELF *SecA = (SymA && SymA->isInSection())
- ? cast<MCSectionELF>(&SymA->getSection())
- : nullptr;
+ const MCSectionELF *SecA =
+ (SymA && SymA->isInSection())
+ ? static_cast<const MCSectionELF *>(&SymA->getSection())
+ : nullptr;
if (DwoOS && !checkRelocation(Fixup.getLoc(), &Section, SecA))
return;
diff --git a/llvm/lib/MC/GOFFObjectWriter.cpp b/llvm/lib/MC/GOFFObjectWriter.cpp
index 1871f5f..88188f3 100644
--- a/llvm/lib/MC/GOFFObjectWriter.cpp
+++ b/llvm/lib/MC/GOFFObjectWriter.cpp
@@ -336,7 +336,7 @@ void GOFFWriter::defineSymbols() {
unsigned Ordinal = 0;
// Process all sections.
for (MCSection &S : Asm) {
- auto &Section = cast<MCSectionGOFF>(S);
+ auto &Section = static_cast<MCSectionGOFF &>(S);
Section.setOrdinal(++Ordinal);
defineSectionSymbols(Section);
}
diff --git a/llvm/lib/MC/MCAsmInfoCOFF.cpp b/llvm/lib/MC/MCAsmInfoCOFF.cpp
index 0b8781c..54717df 100644
--- a/llvm/lib/MC/MCAsmInfoCOFF.cpp
+++ b/llvm/lib/MC/MCAsmInfoCOFF.cpp
@@ -12,7 +12,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfoCOFF.h"
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
using namespace llvm;
@@ -49,6 +55,10 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
HasCOFFComdatConstants = true;
}
+bool MCAsmInfoCOFF::useCodeAlign(const MCSection &Sec) const {
+ return Sec.isText();
+}
+
void MCAsmInfoMicrosoft::anchor() {}
MCAsmInfoMicrosoft::MCAsmInfoMicrosoft() = default;
@@ -64,3 +74,101 @@ MCAsmInfoGNUCOFF::MCAsmInfoGNUCOFF() {
// We don't create constants in comdat sections for MinGW.
HasCOFFComdatConstants = false;
}
+
+bool MCSectionCOFF::shouldOmitSectionDirective(StringRef Name) const {
+ if (COMDATSymbol || isUnique())
+ return false;
+
+ // FIXME: Does .section .bss/.data/.text work everywhere??
+ if (Name == ".text" || Name == ".data" || Name == ".bss")
+ return true;
+
+ return false;
+}
+
+void MCSectionCOFF::setSelection(int Selection) const {
+ assert(Selection != 0 && "invalid COMDAT selection type");
+ this->Selection = Selection;
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+}
+
+void MCAsmInfoCOFF::printSwitchToSection(const MCSection &Section, uint32_t,
+ const Triple &T,
+ raw_ostream &OS) const {
+ auto &Sec = static_cast<const MCSectionCOFF &>(Section);
+ // standard sections don't require the '.section'
+ if (Sec.shouldOmitSectionDirective(Sec.getName())) {
+ OS << '\t' << Sec.getName() << '\n';
+ return;
+ }
+
+ OS << "\t.section\t" << Sec.getName() << ",\"";
+ if (Sec.getCharacteristics() & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
+ OS << 'd';
+ if (Sec.getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+ OS << 'b';
+ if (Sec.getCharacteristics() & COFF::IMAGE_SCN_MEM_EXECUTE)
+ OS << 'x';
+ if (Sec.getCharacteristics() & COFF::IMAGE_SCN_MEM_WRITE)
+ OS << 'w';
+ else if (Sec.getCharacteristics() & COFF::IMAGE_SCN_MEM_READ)
+ OS << 'r';
+ else
+ OS << 'y';
+ if (Sec.getCharacteristics() & COFF::IMAGE_SCN_LNK_REMOVE)
+ OS << 'n';
+ if (Sec.getCharacteristics() & COFF::IMAGE_SCN_MEM_SHARED)
+ OS << 's';
+ if ((Sec.getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE) &&
+ !Sec.isImplicitlyDiscardable(Sec.getName()))
+ OS << 'D';
+ if (Sec.getCharacteristics() & COFF::IMAGE_SCN_LNK_INFO)
+ OS << 'i';
+ OS << '"';
+
+ // unique should be tail of .section directive.
+ if (Sec.isUnique() && !Sec.COMDATSymbol)
+ OS << ",unique," << Sec.UniqueID;
+
+ if (Sec.getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) {
+ if (Sec.COMDATSymbol)
+ OS << ",";
+ else
+ OS << "\n\t.linkonce\t";
+ switch (Sec.Selection) {
+ case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES:
+ OS << "one_only";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_ANY:
+ OS << "discard";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE:
+ OS << "same_size";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH:
+ OS << "same_contents";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE:
+ OS << "associative";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_LARGEST:
+ OS << "largest";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_NEWEST:
+ OS << "newest";
+ break;
+ default:
+ assert(false && "unsupported COFF selection type");
+ break;
+ }
+ if (Sec.COMDATSymbol) {
+ OS << ",";
+ Sec.COMDATSymbol->print(OS, this);
+ }
+ }
+
+ if (Sec.isUnique() && Sec.COMDATSymbol)
+ OS << ",unique," << Sec.UniqueID;
+
+ OS << '\n';
+}
diff --git a/llvm/lib/MC/MCAsmInfoDarwin.cpp b/llvm/lib/MC/MCAsmInfoDarwin.cpp
index 9cba775..e156fa0 100644
--- a/llvm/lib/MC/MCAsmInfoDarwin.cpp
+++ b/llvm/lib/MC/MCAsmInfoDarwin.cpp
@@ -85,3 +85,8 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
DwarfUsesRelocationsAcrossSections = false;
SetDirectiveSuppressesReloc = true;
}
+
+bool MCAsmInfoDarwin::useCodeAlign(const MCSection &Sec) const {
+ return static_cast<const MCSectionMachO &>(Sec).hasAttribute(
+ MachO::S_ATTR_PURE_INSTRUCTIONS);
+}
diff --git a/llvm/lib/MC/MCAsmInfoELF.cpp b/llvm/lib/MC/MCAsmInfoELF.cpp
index 7eb89ef..cdae9d7 100644
--- a/llvm/lib/MC/MCAsmInfoELF.cpp
+++ b/llvm/lib/MC/MCAsmInfoELF.cpp
@@ -12,9 +12,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfoELF.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
+#include <cassert>
using namespace llvm;
@@ -28,9 +35,198 @@ MCSection *MCAsmInfoELF::getNonexecutableStackSection(MCContext &Ctx) const {
return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0);
}
+bool MCAsmInfoELF::useCodeAlign(const MCSection &Sec) const {
+ return static_cast<const MCSectionELF &>(Sec).getFlags() & ELF::SHF_EXECINSTR;
+}
+
MCAsmInfoELF::MCAsmInfoELF() {
HasIdentDirective = true;
WeakRefDirective = "\t.weak\t";
PrivateGlobalPrefix = ".L";
PrivateLabelPrefix = ".L";
}
+
+static void printName(raw_ostream &OS, StringRef Name) {
+ if (Name.find_first_not_of("0123456789_."
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == Name.npos) {
+ OS << Name;
+ return;
+ }
+ OS << '"';
+ for (const char *B = Name.begin(), *E = Name.end(); B < E; ++B) {
+ if (*B == '"') // Unquoted "
+ OS << "\\\"";
+ else if (*B != '\\') // Neither " or backslash
+ OS << *B;
+ else if (B + 1 == E) // Trailing backslash
+ OS << "\\\\";
+ else {
+ OS << B[0] << B[1]; // Quoted character
+ ++B;
+ }
+ }
+ OS << '"';
+}
+
+void MCAsmInfoELF::printSwitchToSection(const MCSection &Section,
+ uint32_t Subsection, const Triple &T,
+ raw_ostream &OS) const {
+ auto &Sec = static_cast<const MCSectionELF &>(Section);
+ if (!Sec.isUnique() && shouldOmitSectionDirective(Sec.getName())) {
+ OS << '\t' << Sec.getName();
+ if (Subsection)
+ OS << '\t' << Subsection;
+ OS << '\n';
+ return;
+ }
+
+ OS << "\t.section\t";
+ printName(OS, Sec.getName());
+
+ // Handle the weird solaris syntax if desired.
+ if (usesSunStyleELFSectionSwitchSyntax() && !(Sec.Flags & ELF::SHF_MERGE)) {
+ if (Sec.Flags & ELF::SHF_ALLOC)
+ OS << ",#alloc";
+ if (Sec.Flags & ELF::SHF_EXECINSTR)
+ OS << ",#execinstr";
+ if (Sec.Flags & ELF::SHF_WRITE)
+ OS << ",#write";
+ if (Sec.Flags & ELF::SHF_EXCLUDE)
+ OS << ",#exclude";
+ if (Sec.Flags & ELF::SHF_TLS)
+ OS << ",#tls";
+ OS << '\n';
+ return;
+ }
+
+ OS << ",\"";
+ if (Sec.Flags & ELF::SHF_ALLOC)
+ OS << 'a';
+ if (Sec.Flags & ELF::SHF_EXCLUDE)
+ OS << 'e';
+ if (Sec.Flags & ELF::SHF_EXECINSTR)
+ OS << 'x';
+ if (Sec.Flags & ELF::SHF_WRITE)
+ OS << 'w';
+ if (Sec.Flags & ELF::SHF_MERGE)
+ OS << 'M';
+ if (Sec.Flags & ELF::SHF_STRINGS)
+ OS << 'S';
+ if (Sec.Flags & ELF::SHF_TLS)
+ OS << 'T';
+ if (Sec.Flags & ELF::SHF_LINK_ORDER)
+ OS << 'o';
+ if (Sec.Flags & ELF::SHF_GROUP)
+ OS << 'G';
+ if (Sec.Flags & ELF::SHF_GNU_RETAIN)
+ OS << 'R';
+
+ // If there are os-specific flags, print them.
+ if (T.isOSSolaris())
+ if (Sec.Flags & ELF::SHF_SUNW_NODISCARD)
+ OS << 'R';
+
+ // If there are tarSec.get-specific flags, print them.
+ Triple::ArchType Arch = T.getArch();
+ if (Arch == Triple::xcore) {
+ if (Sec.Flags & ELF::XCORE_SHF_CP_SECTION)
+ OS << 'c';
+ if (Sec.Flags & ELF::XCORE_SHF_DP_SECTION)
+ OS << 'd';
+ } else if (T.isARM() || T.isThumb()) {
+ if (Sec.Flags & ELF::SHF_ARM_PURECODE)
+ OS << 'y';
+ } else if (T.isAArch64()) {
+ if (Sec.Flags & ELF::SHF_AARCH64_PURECODE)
+ OS << 'y';
+ } else if (Arch == Triple::hexagon) {
+ if (Sec.Flags & ELF::SHF_HEX_GPREL)
+ OS << 's';
+ } else if (Arch == Triple::x86_64) {
+ if (Sec.Flags & ELF::SHF_X86_64_LARGE)
+ OS << 'l';
+ }
+
+ OS << '"';
+
+ OS << ',';
+
+ // If comment string is '@', e.g. as on ARM - use '%' instead
+ if (getCommentString()[0] == '@')
+ OS << '%';
+ else
+ OS << '@';
+
+ if (Sec.Type == ELF::SHT_INIT_ARRAY)
+ OS << "init_array";
+ else if (Sec.Type == ELF::SHT_FINI_ARRAY)
+ OS << "fini_array";
+ else if (Sec.Type == ELF::SHT_PREINIT_ARRAY)
+ OS << "preinit_array";
+ else if (Sec.Type == ELF::SHT_NOBITS)
+ OS << "nobits";
+ else if (Sec.Type == ELF::SHT_NOTE)
+ OS << "note";
+ else if (Sec.Type == ELF::SHT_PROGBITS)
+ OS << "progbits";
+ else if (Sec.Type == ELF::SHT_X86_64_UNWIND)
+ OS << "unwind";
+ else if (Sec.Type == ELF::SHT_MIPS_DWARF)
+ // Print hex value of the flag while we do not have
+ // any standard symbolic representation of the flag.
+ OS << "0x7000001e";
+ else if (Sec.Type == ELF::SHT_LLVM_ODRTAB)
+ OS << "llvm_odrtab";
+ else if (Sec.Type == ELF::SHT_LLVM_LINKER_OPTIONS)
+ OS << "llvm_linker_options";
+ else if (Sec.Type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE)
+ OS << "llvm_call_graph_profile";
+ else if (Sec.Type == ELF::SHT_LLVM_DEPENDENT_LIBRARIES)
+ OS << "llvm_dependent_libraries";
+ else if (Sec.Type == ELF::SHT_LLVM_SYMPART)
+ OS << "llvm_sympart";
+ else if (Sec.Type == ELF::SHT_LLVM_BB_ADDR_MAP)
+ OS << "llvm_bb_addr_map";
+ else if (Sec.Type == ELF::SHT_LLVM_OFFLOADING)
+ OS << "llvm_offloading";
+ else if (Sec.Type == ELF::SHT_LLVM_LTO)
+ OS << "llvm_lto";
+ else if (Sec.Type == ELF::SHT_LLVM_JT_SIZES)
+ OS << "llvm_jt_sizes";
+ else if (Sec.Type == ELF::SHT_LLVM_CFI_JUMP_TABLE)
+ OS << "llvm_cfi_jump_table";
+ else
+ OS << "0x" << Twine::utohexstr(Sec.Type);
+
+ if (Sec.EntrySize) {
+ assert((Sec.Flags & ELF::SHF_MERGE) ||
+ Sec.Type == ELF::SHT_LLVM_CFI_JUMP_TABLE);
+ OS << "," << Sec.EntrySize;
+ }
+
+ if (Sec.Flags & ELF::SHF_LINK_ORDER) {
+ OS << ",";
+ if (Sec.LinkedToSym)
+ printName(OS, Sec.LinkedToSym->getName());
+ else
+ OS << '0';
+ }
+
+ if (Sec.Flags & ELF::SHF_GROUP) {
+ OS << ",";
+ printName(OS, Sec.Group.getPointer()->getName());
+ if (Sec.isComdat())
+ OS << ",comdat";
+ }
+
+ if (Sec.isUnique())
+ OS << ",unique," << Sec.UniqueID;
+
+ OS << '\n';
+
+ if (Subsection) {
+ OS << "\t.subsection\t" << Subsection;
+ OS << '\n';
+ }
+}
diff --git a/llvm/lib/MC/MCAsmInfoGOFF.cpp b/llvm/lib/MC/MCAsmInfoGOFF.cpp
index 3c81a46..0a5d1927 100644
--- a/llvm/lib/MC/MCAsmInfoGOFF.cpp
+++ b/llvm/lib/MC/MCAsmInfoGOFF.cpp
@@ -13,11 +13,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfoGOFF.h"
+#include "llvm/BinaryFormat/GOFF.h"
+#include "llvm/MC/MCSectionGOFF.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-void MCAsmInfoGOFF::anchor() {}
-
MCAsmInfoGOFF::MCAsmInfoGOFF() {
Data64bitsDirective = "\t.quad\t";
HasDotTypeDotSizeDirective = false;
@@ -25,3 +26,136 @@ MCAsmInfoGOFF::MCAsmInfoGOFF() {
PrivateLabelPrefix = "L#";
ZeroDirective = "\t.space\t";
}
+
+static void emitCATTR(raw_ostream &OS, StringRef Name, GOFF::ESDRmode Rmode,
+ GOFF::ESDAlignment Alignment,
+ GOFF::ESDLoadingBehavior LoadBehavior,
+ GOFF::ESDExecutable Executable, bool IsReadOnly,
+ uint32_t SortKey, uint8_t FillByteValue,
+ StringRef PartName) {
+ OS << Name << " CATTR ";
+ OS << "ALIGN(" << static_cast<unsigned>(Alignment) << "),"
+ << "FILL(" << static_cast<unsigned>(FillByteValue) << ")";
+ switch (LoadBehavior) {
+ case GOFF::ESD_LB_Deferred:
+ OS << ",DEFLOAD";
+ break;
+ case GOFF::ESD_LB_NoLoad:
+ OS << ",NOLOAD";
+ break;
+ default:
+ break;
+ }
+ switch (Executable) {
+ case GOFF::ESD_EXE_CODE:
+ OS << ",EXECUTABLE";
+ break;
+ case GOFF::ESD_EXE_DATA:
+ OS << ",NOTEXECUTABLE";
+ break;
+ default:
+ break;
+ }
+ if (IsReadOnly)
+ OS << ",READONLY";
+ if (Rmode != GOFF::ESD_RMODE_None) {
+ OS << ',';
+ OS << "RMODE(";
+ switch (Rmode) {
+ case GOFF::ESD_RMODE_24:
+ OS << "24";
+ break;
+ case GOFF::ESD_RMODE_31:
+ OS << "31";
+ break;
+ case GOFF::ESD_RMODE_64:
+ OS << "64";
+ break;
+ case GOFF::ESD_RMODE_None:
+ break;
+ }
+ OS << ')';
+ }
+ if (SortKey)
+ OS << ",PRIORITY(" << SortKey << ")";
+ if (!PartName.empty())
+ OS << ",PART(" << PartName << ")";
+ OS << '\n';
+}
+
+static void emitXATTR(raw_ostream &OS, StringRef Name,
+ GOFF::ESDLinkageType Linkage,
+ GOFF::ESDExecutable Executable,
+ GOFF::ESDBindingScope BindingScope) {
+ OS << Name << " XATTR ";
+ OS << "LINKAGE(" << (Linkage == GOFF::ESD_LT_OS ? "OS" : "XPLINK") << "),";
+ if (Executable != GOFF::ESD_EXE_Unspecified)
+ OS << "REFERENCE(" << (Executable == GOFF::ESD_EXE_CODE ? "CODE" : "DATA")
+ << "),";
+ if (BindingScope != GOFF::ESD_BSC_Unspecified) {
+ OS << "SCOPE(";
+ switch (BindingScope) {
+ case GOFF::ESD_BSC_Section:
+ OS << "SECTION";
+ break;
+ case GOFF::ESD_BSC_Module:
+ OS << "MODULE";
+ break;
+ case GOFF::ESD_BSC_Library:
+ OS << "LIBRARY";
+ break;
+ case GOFF::ESD_BSC_ImportExport:
+ OS << "EXPORT";
+ break;
+ default:
+ break;
+ }
+ OS << ')';
+ }
+ OS << '\n';
+}
+
+void MCAsmInfoGOFF::printSwitchToSection(const MCSection &Section,
+ uint32_t Subsection, const Triple &T,
+ raw_ostream &OS) const {
+ auto &Sec =
+ const_cast<MCSectionGOFF &>(static_cast<const MCSectionGOFF &>(Section));
+ switch (Sec.SymbolType) {
+ case GOFF::ESD_ST_SectionDefinition: {
+ OS << Sec.getName() << " CSECT\n";
+ Sec.Emitted = true;
+ break;
+ }
+ case GOFF::ESD_ST_ElementDefinition: {
+ printSwitchToSection(*Sec.getParent(), Subsection, T, OS);
+ if (!Sec.Emitted) {
+ emitCATTR(OS, Sec.getName(), Sec.EDAttributes.Rmode,
+ Sec.EDAttributes.Alignment, Sec.EDAttributes.LoadBehavior,
+ GOFF::ESD_EXE_Unspecified, Sec.EDAttributes.IsReadOnly, 0,
+ Sec.EDAttributes.FillByteValue, StringRef());
+ Sec.Emitted = true;
+ } else
+ OS << Sec.getName() << " CATTR\n";
+ break;
+ }
+ case GOFF::ESD_ST_PartReference: {
+ MCSectionGOFF *ED = Sec.getParent();
+ printSwitchToSection(*ED->getParent(), Subsection, T, OS);
+ if (!Sec.Emitted) {
+ emitCATTR(OS, ED->getName(), ED->getEDAttributes().Rmode,
+ ED->EDAttributes.Alignment, ED->EDAttributes.LoadBehavior,
+ Sec.PRAttributes.Executable, ED->EDAttributes.IsReadOnly,
+ Sec.PRAttributes.SortKey, ED->EDAttributes.FillByteValue,
+ Sec.getName());
+ emitXATTR(OS, Sec.getName(), Sec.PRAttributes.Linkage,
+ Sec.PRAttributes.Executable, Sec.PRAttributes.BindingScope);
+ ED->Emitted = true;
+ Sec.Emitted = true;
+ } else
+ OS << ED->getName() << " CATTR PART(" << Sec.getName() << ")\n";
+ break;
+ }
+ default:
+ llvm_unreachable("Wrong section type");
+ }
+}
diff --git a/llvm/lib/MC/MCAsmInfoWasm.cpp b/llvm/lib/MC/MCAsmInfoWasm.cpp
index ce6ec7e..5e44f48 100644
--- a/llvm/lib/MC/MCAsmInfoWasm.cpp
+++ b/llvm/lib/MC/MCAsmInfoWasm.cpp
@@ -12,9 +12,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfoWasm.h"
-using namespace llvm;
+#include "llvm/MC/MCSectionWasm.h"
+#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/Support/raw_ostream.h"
-void MCAsmInfoWasm::anchor() {}
+using namespace llvm;
MCAsmInfoWasm::MCAsmInfoWasm() {
HasIdentDirective = true;
@@ -23,3 +25,80 @@ MCAsmInfoWasm::MCAsmInfoWasm() {
PrivateGlobalPrefix = ".L";
PrivateLabelPrefix = ".L";
}
+
+static void printName(raw_ostream &OS, StringRef Name) {
+ if (Name.find_first_not_of("0123456789_."
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == Name.npos) {
+ OS << Name;
+ return;
+ }
+ OS << '"';
+ for (const char *B = Name.begin(), *E = Name.end(); B < E; ++B) {
+ if (*B == '"') // Unquoted "
+ OS << "\\\"";
+ else if (*B != '\\') // Neither " or backslash
+ OS << *B;
+ else if (B + 1 == E) // Trailing backslash
+ OS << "\\\\";
+ else {
+ OS << B[0] << B[1]; // Quoted character
+ ++B;
+ }
+ }
+ OS << '"';
+}
+
+void MCAsmInfoWasm::printSwitchToSection(const MCSection &Section,
+ uint32_t Subsection, const Triple &T,
+ raw_ostream &OS) const {
+ auto &Sec = static_cast<const MCSectionWasm &>(Section);
+ if (shouldOmitSectionDirective(Sec.getName())) {
+ OS << '\t' << Sec.getName();
+ if (Subsection)
+ OS << '\t' << Subsection;
+ OS << '\n';
+ return;
+ }
+
+ OS << "\t.section\t";
+ printName(OS, Sec.getName());
+ OS << ",\"";
+
+ if (Sec.IsPassive)
+ OS << 'p';
+ if (Sec.Group)
+ OS << 'G';
+ if (Sec.SegmentFlags & wasm::WASM_SEG_FLAG_STRINGS)
+ OS << 'S';
+ if (Sec.SegmentFlags & wasm::WASM_SEG_FLAG_TLS)
+ OS << 'T';
+ if (Sec.SegmentFlags & wasm::WASM_SEG_FLAG_RETAIN)
+ OS << 'R';
+
+ OS << '"';
+
+ OS << ',';
+
+ // If comment string is '@', e.g. as on ARM - use '%' instead
+ if (getCommentString()[0] == '@')
+ OS << '%';
+ else
+ OS << '@';
+
+ // TODO: Print section type.
+
+ if (Sec.Group) {
+ OS << ",";
+ printName(OS, Sec.Group->getName());
+ OS << ",comdat";
+ }
+
+ if (Sec.isUnique())
+ OS << ",unique," << Sec.UniqueID;
+
+ OS << '\n';
+
+ if (Subsection)
+ OS << "\t.subsection\t" << Subsection << '\n';
+}
diff --git a/llvm/lib/MC/MCAsmInfoXCOFF.cpp b/llvm/lib/MC/MCAsmInfoXCOFF.cpp
index 6ef11ba..0403b44 100644
--- a/llvm/lib/MC/MCAsmInfoXCOFF.cpp
+++ b/llvm/lib/MC/MCAsmInfoXCOFF.cpp
@@ -8,7 +8,11 @@
#include "llvm/MC/MCAsmInfoXCOFF.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -16,8 +20,6 @@ namespace llvm {
extern cl::opt<cl::boolOrDefault> UseLEB128Directives;
}
-void MCAsmInfoXCOFF::anchor() {}
-
MCAsmInfoXCOFF::MCAsmInfoXCOFF() {
IsAIX = true;
IsLittleEndian = false;
@@ -56,3 +58,121 @@ bool MCAsmInfoXCOFF::isAcceptableChar(char C) const {
// any combination of these.
return isAlnum(C) || C == '_' || C == '.';
}
+
+bool MCAsmInfoXCOFF::useCodeAlign(const MCSection &Sec) const {
+ return static_cast<const MCSectionXCOFF &>(Sec).getKind().isText();
+}
+
+MCSectionXCOFF::~MCSectionXCOFF() = default;
+
+void MCSectionXCOFF::printCsectDirective(raw_ostream &OS) const {
+ OS << "\t.csect " << QualName->getName() << "," << Log2(getAlign()) << '\n';
+}
+
+void MCAsmInfoXCOFF::printSwitchToSection(const MCSection &Section, uint32_t,
+ const Triple &T,
+ raw_ostream &OS) const {
+ auto &Sec = static_cast<const MCSectionXCOFF &>(Section);
+ if (Sec.getKind().isText()) {
+ if (Sec.getMappingClass() != XCOFF::XMC_PR)
+ report_fatal_error("Unhandled storage-mapping class for .text csect");
+
+ Sec.printCsectDirective(OS);
+ return;
+ }
+
+ if (Sec.getKind().isReadOnly()) {
+ if (Sec.getMappingClass() != XCOFF::XMC_RO &&
+ Sec.getMappingClass() != XCOFF::XMC_TD)
+ report_fatal_error("Unhandled storage-mapping class for .rodata csect.");
+ Sec.printCsectDirective(OS);
+ return;
+ }
+
+ if (Sec.getKind().isReadOnlyWithRel()) {
+ if (Sec.getMappingClass() != XCOFF::XMC_RW &&
+ Sec.getMappingClass() != XCOFF::XMC_RO &&
+ Sec.getMappingClass() != XCOFF::XMC_TD)
+ report_fatal_error(
+ "Unexepected storage-mapping class for ReadOnlyWithRel kind");
+ Sec.printCsectDirective(OS);
+ return;
+ }
+
+ // Initialized TLS data.
+ if (Sec.getKind().isThreadData()) {
+ // We only expect XMC_TL here for initialized TLS data.
+ if (Sec.getMappingClass() != XCOFF::XMC_TL)
+ report_fatal_error("Unhandled storage-mapping class for .tdata csect.");
+ Sec.printCsectDirective(OS);
+ return;
+ }
+
+ if (Sec.getKind().isData()) {
+ switch (Sec.getMappingClass()) {
+ case XCOFF::XMC_RW:
+ case XCOFF::XMC_DS:
+ case XCOFF::XMC_TD:
+ Sec.printCsectDirective(OS);
+ break;
+ case XCOFF::XMC_TC:
+ case XCOFF::XMC_TE:
+ break;
+ case XCOFF::XMC_TC0:
+ OS << "\t.toc\n";
+ break;
+ default:
+ report_fatal_error("Unhandled storage-mapping class for .data csect.");
+ }
+ return;
+ }
+
+ if (Sec.isCsect() && Sec.getMappingClass() == XCOFF::XMC_TD) {
+ // Common csect type (uninitialized storage) does not have to print
+ // csect directive for section switching unless it is local.
+ if (Sec.getKind().isCommon() && !Sec.getKind().isBSSLocal())
+ return;
+
+ assert(Sec.getKind().isBSS() && "Unexpected section kind for toc-data");
+ Sec.printCsectDirective(OS);
+ return;
+ }
+ // Common csect type (uninitialized storage) does not have to print csect
+ // directive for section switching.
+ if (Sec.isCsect() && Sec.getCSectType() == XCOFF::XTY_CM) {
+ assert((Sec.getMappingClass() == XCOFF::XMC_RW ||
+ Sec.getMappingClass() == XCOFF::XMC_BS ||
+ Sec.getMappingClass() == XCOFF::XMC_UL) &&
+ "Generated a storage-mapping class for a common/bss/tbss csect we "
+ "don't "
+ "understand how to switch to.");
+ // Common symbols and local zero-initialized symbols for TLS and Non-TLS are
+ // eligible for .bss/.tbss csect, getKind().isThreadBSS() is used to
+ // cover TLS common and zero-initialized local symbols since linkage type
+ // (in the GlobalVariable) is not accessible in this class.
+ assert((Sec.getKind().isBSSLocal() || Sec.getKind().isCommon() ||
+ Sec.getKind().isThreadBSS()) &&
+ "wrong symbol type for .bss/.tbss csect");
+ // Don't have to print a directive for switching to section for commons
+ // and zero-initialized TLS data. The '.comm' and '.lcomm' directives of the
+ // variable will create the needed csect.
+ return;
+ }
+
+ // Zero-initialized TLS data with weak or external linkage are not eligible to
+ // be put into common csect.
+ if (Sec.getKind().isThreadBSS()) {
+ Sec.printCsectDirective(OS);
+ return;
+ }
+
+ // XCOFF debug sections.
+ if (Sec.getKind().isMetadata() && Sec.isDwarfSect()) {
+ OS << "\n\t.dwsect " << format("0x%" PRIx32, *Sec.getDwarfSubtypeFlags())
+ << '\n';
+ OS << Sec.getName() << ':' << '\n';
+ return;
+ }
+
+ report_fatal_error("Printing for this SectionKind is unimplemented.");
+}
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 67c53e0..da51da4 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -345,7 +345,7 @@ public:
void emitIdent(StringRef IdentString) override;
void emitCFIBKeyFrame() override;
void emitCFIMTETaggedFrame() override;
- void emitCFISections(bool EH, bool Debug) override;
+ void emitCFISections(bool EH, bool Debug, bool SFrame) override;
void emitCFIDefCfa(int64_t Register, int64_t Offset, SMLoc Loc) override;
void emitCFIDefCfaOffset(int64_t Offset, SMLoc Loc) override;
void emitCFIDefCfaRegister(int64_t Register, SMLoc Loc) override;
@@ -532,8 +532,8 @@ void MCAsmStreamer::switchSection(MCSection *Section, uint32_t Subsection) {
if (MCTargetStreamer *TS = getTargetStreamer()) {
TS->changeSection(Cur.first, Section, Subsection, OS);
} else {
- Section->printSwitchToSection(*MAI, getContext().getTargetTriple(), OS,
- Subsection);
+ MAI->printSwitchToSection(*Section, Subsection,
+ getContext().getTargetTriple(), OS);
}
}
MCStreamer::switchSection(Section, Subsection);
@@ -543,7 +543,7 @@ bool MCAsmStreamer::popSection() {
if (!MCStreamer::popSection())
return false;
auto [Sec, Subsec] = getCurrentSection();
- Sec->printSwitchToSection(*MAI, getContext().getTargetTriple(), OS, Subsec);
+ MAI->printSwitchToSection(*Sec, Subsec, getContext().getTargetTriple(), OS);
return true;
}
@@ -1105,7 +1105,7 @@ void MCAsmStreamer::emitZerofill(MCSection *Section, MCSymbol *Symbol,
// Note: a .zerofill directive does not switch sections.
OS << ".zerofill ";
- assert(Section->getVariant() == MCSection::SV_MachO &&
+ assert(getContext().getObjectFileType() == MCContext::IsMachO &&
".zerofill is a Mach-O specific directive");
// This is a mach-o specific directive.
@@ -1130,7 +1130,7 @@ void MCAsmStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
// Instead of using the Section we'll just use the shortcut.
- assert(Section->getVariant() == MCSection::SV_MachO &&
+ assert(getContext().getObjectFileType() == MCContext::IsMachO &&
".zerofill is a Mach-O specific directive");
// This is a mach-o specific directive and section.
@@ -1906,15 +1906,24 @@ void MCAsmStreamer::emitIdent(StringRef IdentString) {
EmitEOL();
}
-void MCAsmStreamer::emitCFISections(bool EH, bool Debug) {
- MCStreamer::emitCFISections(EH, Debug);
+void MCAsmStreamer::emitCFISections(bool EH, bool Debug, bool SFrame) {
+ MCStreamer::emitCFISections(EH, Debug, SFrame);
OS << "\t.cfi_sections ";
+ bool C = false;
if (EH) {
OS << ".eh_frame";
- if (Debug)
- OS << ", .debug_frame";
- } else if (Debug) {
+ C = true;
+ }
+ if (Debug) {
+ if (C)
+ OS << ", ";
OS << ".debug_frame";
+ C = true;
+ }
+ if (SFrame) {
+ if (C)
+ OS << ", ";
+ OS << ".sframe";
}
EmitEOL();
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index e142ac1..8500fd1 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -106,7 +106,6 @@ void MCAssembler::reset() {
bool MCAssembler::registerSection(MCSection &Section) {
if (Section.isRegistered())
return false;
- assert(Section.curFragList()->Head && "allocInitialFragment not called");
Sections.push_back(&Section);
Section.setIsRegistered(true);
return true;
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index 12b3fba..39bf628 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -200,16 +200,6 @@ MCInst *MCContext::createMCInst() {
return new (MCInstAllocator.Allocate()) MCInst;
}
-// Allocate the initial MCFragment for the begin symbol.
-MCFragment *MCContext::allocInitialFragment(MCSection &Sec) {
- assert(!Sec.curFragList()->Head);
- auto *F = allocFragment<MCFragment>();
- F->setParent(&Sec);
- Sec.curFragList()->Head = F;
- Sec.curFragList()->Tail = F;
- return F;
-}
-
//===----------------------------------------------------------------------===//
// Symbol Manipulation
//===----------------------------------------------------------------------===//
@@ -443,17 +433,19 @@ MCSymbol *MCContext::getDirectionalLocalSymbol(unsigned LocalLabelVal,
return getOrCreateDirectionalLocalSymbol(LocalLabelVal, Instance);
}
+// Create a section symbol, with a distinct one for each section of the same.
+// The first symbol is used for assembly code references.
template <typename Symbol>
Symbol *MCContext::getOrCreateSectionSymbol(StringRef Section) {
Symbol *R;
auto &SymEntry = getSymbolTableEntry(Section);
MCSymbol *Sym = SymEntry.second.Symbol;
- // A section symbol can not redefine regular symbols. There may be multiple
- // sections with the same name, in which case the first such section wins.
if (Sym && Sym->isDefined() &&
(!Sym->isInSection() || Sym->getSection().getBeginSymbol() != Sym))
reportError(SMLoc(), "invalid symbol redefinition");
- if (Sym && Sym->isUndefined()) {
+ // Use the symbol's index to track if it has been used as a section symbol.
+ // Set to -1 to catch potential bugs if misused as a symbol index.
+ if (Sym && Sym->getIndex() != -1u) {
R = cast<Symbol>(Sym);
} else {
SymEntry.second.Used = true;
@@ -461,6 +453,8 @@ Symbol *MCContext::getOrCreateSectionSymbol(StringRef Section) {
if (!Sym)
SymEntry.second.Symbol = R;
}
+ // Mark as section symbol.
+ R->setIndex(-1u);
return R;
}
@@ -568,7 +562,6 @@ MCSectionMachO *MCContext::getMachOSection(StringRef Segment, StringRef Section,
MCSectionMachO(Segment, Name.substr(Name.size() - Section.size()),
TypeAndAttributes, Reserved2, Kind, Begin);
R.first->second = Ret;
- allocInitialFragment(*Ret);
return Ret;
}
@@ -579,15 +572,8 @@ MCSectionELF *MCContext::createELFSectionImpl(StringRef Section, unsigned Type,
bool Comdat, unsigned UniqueID,
const MCSymbolELF *LinkedToSym) {
auto *R = getOrCreateSectionSymbol<MCSymbolELF>(Section);
- R->setBinding(ELF::STB_LOCAL);
- R->setType(ELF::STT_SECTION);
-
- auto *Ret = new (ELFAllocator.Allocate()) MCSectionELF(
+ return new (ELFAllocator.Allocate()) MCSectionELF(
Section, Type, Flags, EntrySize, Group, Comdat, UniqueID, R, LinkedToSym);
-
- auto *F = allocInitialFragment(*Ret);
- R->setFragment(F);
- return Ret;
}
MCSectionELF *
@@ -743,7 +729,6 @@ MCSectionGOFF *MCContext::getGOFFSection(SectionKind Kind, StringRef Name,
MCSectionGOFF(CachedName, Kind, IsVirtual, Attributes,
static_cast<MCSectionGOFF *>(Parent));
Iter->second = GOFFSection;
- allocInitialFragment(*GOFFSection);
return GOFFSection;
}
@@ -782,8 +767,8 @@ MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
if (Selection != COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE &&
COMDATSymbol->isDefined() &&
(!COMDATSymbol->isInSection() ||
- cast<MCSectionCOFF>(COMDATSymbol->getSection()).getCOMDATSymbol() !=
- COMDATSymbol))
+ static_cast<const MCSectionCOFF &>(COMDATSymbol->getSection())
+ .getCOMDATSymbol() != COMDATSymbol))
reportError(SMLoc(), "invalid symbol redefinition");
}
@@ -798,8 +783,7 @@ MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
MCSectionCOFF *Result = new (COFFAllocator.Allocate()) MCSectionCOFF(
CachedName, Characteristics, COMDATSymbol, Selection, UniqueID, Begin);
Iter->second = Result;
- auto *F = allocInitialFragment(*Result);
- Begin->setFragment(F);
+ Begin->setFragment(&Result->getDummyFragment());
return Result;
}
@@ -870,8 +854,6 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind,
MCSectionWasm(CachedName, Kind, Flags, GroupSym, UniqueID, Begin);
Entry.second = Result;
- auto *F = allocInitialFragment(*Result);
- Begin->setFragment(F);
return Result;
}
@@ -927,24 +909,11 @@ MCSectionXCOFF *MCContext::getXCOFFSection(
MultiSymbolsAllowed);
Entry.second = Result;
-
- auto *F = allocInitialFragment(*Result);
-
- // We might miss calculating the symbols difference as absolute value before
- // adding fixups when symbol_A without the fragment set is the csect itself
- // and symbol_B is in it.
- // TODO: Currently we only set the fragment for XMC_PR csects and DWARF
- // sections because we don't have other cases that hit this problem yet.
- if (IsDwarfSec || CsectProp->MappingClass == XCOFF::XMC_PR)
- QualName->setFragment(F);
-
return Result;
}
MCSectionSPIRV *MCContext::getSPIRVSection() {
MCSectionSPIRV *Result = new (SPIRVAllocator.Allocate()) MCSectionSPIRV();
-
- allocInitialFragment(*Result);
return Result;
}
@@ -964,7 +933,6 @@ MCSectionDXContainer *MCContext::getDXContainerSection(StringRef Section,
new (DXCAllocator.Allocate()) MCSectionDXContainer(Name, K, nullptr);
// The first fragment will store the header
- allocInitialFragment(*MapIt->second);
return MapIt->second;
}
diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp
index b8cbaea5..38744a0 100644
--- a/llvm/lib/MC/MCELFStreamer.cpp
+++ b/llvm/lib/MC/MCELFStreamer.cpp
@@ -89,7 +89,9 @@ void MCELFStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
getWriter().markGnuAbi();
MCObjectStreamer::changeSection(Section, Subsection);
- Asm.registerSymbol(*Section->getBeginSymbol());
+ auto *Sym = static_cast<MCSymbolELF *>(Section->getBeginSymbol());
+ Sym->setBinding(ELF::STB_LOCAL);
+ Sym->setType(ELF::STT_SECTION);
}
void MCELFStreamer::emitWeakReference(MCSymbol *Alias, const MCSymbol *Target) {
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index dbb2fd1..c24c82d 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -346,17 +346,16 @@ static void attemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
Displacement *= -1;
}
- // Track whether B is before a relaxable instruction and whether A is after
- // a relaxable instruction. If SA and SB are separated by a linker-relaxable
- // instruction, the difference cannot be resolved as it may be changed by
- // the linker.
+ // Track whether B is before a relaxable instruction/alignment and whether A
+ // is after a relaxable instruction/alignment. If SA and SB are separated by
+ // a linker-relaxable instruction/alignment, the difference cannot be
+ // resolved as it may be changed by the linker.
bool BBeforeRelax = false, AAfterRelax = false;
for (auto F = FB; F; F = F->getNext()) {
- auto DF = F->getKind() == MCFragment::FT_Data ? F : nullptr;
- if (DF && DF->isLinkerRelaxable()) {
- if (&*F != FB || SBOffset != DF->getContents().size())
+ if (F && F->isLinkerRelaxable()) {
+ if (&*F != FB || SBOffset != F->getSize())
BBeforeRelax = true;
- if (&*F != FA || SAOffset == DF->getContents().size())
+ if (&*F != FA || SAOffset == F->getSize())
AAfterRelax = true;
if (BBeforeRelax && AAfterRelax)
return;
@@ -370,17 +369,15 @@ static void attemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
}
int64_t Num;
- if (DF) {
- Displacement += DF->getContents().size();
- } else if (F->getKind() == MCFragment::FT_Relaxable &&
+ if (F->getKind() == MCFragment::FT_Data) {
+ Displacement += F->getFixedSize();
+ } else if ((F->getKind() == MCFragment::FT_Relaxable ||
+ F->getKind() == MCFragment::FT_Align) &&
Asm->hasFinalLayout()) {
// Before finishLayout, a relaxable fragment's size is indeterminate.
// After layout, during relocation generation, it can be treated as a
// data fragment.
Displacement += F->getSize();
- } else if (F->getKind() == MCFragment::FT_Align && Layout &&
- F->isLinkerRelaxable()) {
- Displacement += Asm->computeFragmentSize(*F);
} else if (auto *FF = dyn_cast<MCFillFragment>(F);
FF && FF->getNumValues().evaluateAsAbsolute(Num)) {
Displacement += Num * FF->getValueSize();
diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp
index 3c395e5..6cbdf74 100644
--- a/llvm/lib/MC/MCFragment.cpp
+++ b/llvm/lib/MC/MCFragment.cpp
@@ -35,7 +35,7 @@ MCFragment::MCFragment(FragmentType Kind, bool HasInstructions)
}
const MCSymbol *MCFragment::getAtom() const {
- return cast<MCSectionMachO>(Parent)->getAtom(LayoutOrder);
+ return static_cast<const MCSectionMachO *>(Parent)->getAtom(LayoutOrder);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/MC/MCGOFFStreamer.cpp b/llvm/lib/MC/MCGOFFStreamer.cpp
index b702191..1718e2a 100644
--- a/llvm/lib/MC/MCGOFFStreamer.cpp
+++ b/llvm/lib/MC/MCGOFFStreamer.cpp
@@ -26,19 +26,15 @@ GOFFObjectWriter &MCGOFFStreamer::getWriter() {
return static_cast<GOFFObjectWriter &>(getAssembler().getWriter());
}
-// Make sure that all section are registered in the correct order.
-static void registerSectionHierarchy(MCAssembler &Asm, MCSectionGOFF *Section) {
- if (Section->isRegistered())
- return;
- if (Section->getParent())
- registerSectionHierarchy(Asm, Section->getParent());
- Asm.registerSection(*Section);
-}
-
void MCGOFFStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
- registerSectionHierarchy(getAssembler(),
- static_cast<MCSectionGOFF *>(Section));
- MCObjectStreamer::changeSection(Section, Subsection);
+ // Make sure that all section are registered in the correct order.
+ SmallVector<MCSectionGOFF *> Sections;
+ for (auto *S = static_cast<MCSectionGOFF *>(Section); S; S = S->getParent())
+ Sections.push_back(S);
+ while (!Sections.empty()) {
+ auto *S = Sections.pop_back_val();
+ MCObjectStreamer::changeSection(S, Sections.empty() ? Subsection : 0);
+ }
}
MCStreamer *llvm::createGOFFStreamer(MCContext &Context,
diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp
index 8c3332c..a214513 100644
--- a/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/llvm/lib/MC/MCMachOStreamer.cpp
@@ -140,6 +140,8 @@ void MCMachOStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
MCSymbol *Label = getContext().createLinkerPrivateTempSymbol();
Section->setBeginSymbol(Label);
HasSectionLabel[Section] = true;
+ if (!Label->isInSection())
+ emitLabel(Label);
}
}
@@ -441,13 +443,13 @@ void MCMachOStreamer::finishImpl() {
// Set the fragment atom associations by tracking the last seen atom defining
// symbol.
for (MCSection &Sec : getAssembler()) {
- cast<MCSectionMachO>(Sec).allocAtoms();
+ static_cast<MCSectionMachO &>(Sec).allocAtoms();
const MCSymbol *CurrentAtom = nullptr;
size_t I = 0;
for (MCFragment &Frag : Sec) {
if (const MCSymbol *Symbol = DefiningSymbolMap.lookup(&Frag))
CurrentAtom = Symbol;
- cast<MCSectionMachO>(Sec).setAtom(I++, CurrentAtom);
+ static_cast<MCSectionMachO &>(Sec).setAtom(I++, CurrentAtom);
}
}
@@ -482,7 +484,8 @@ void MCMachOStreamer::finalizeCGProfile() {
// For each entry, reserve space for 2 32-bit indices and a 64-bit count.
size_t SectionBytes =
W.getCGProfile().size() * (2 * sizeof(uint32_t) + sizeof(uint64_t));
- (*CGProfileSection->begin()).appendContents(SectionBytes, 0);
+ (*CGProfileSection->begin())
+ .setVarContents(std::vector<char>(SectionBytes, 0));
}
MCStreamer *llvm::createMachOStreamer(MCContext &Context,
@@ -518,5 +521,6 @@ void MCMachOStreamer::createAddrSigSection() {
// (instead of emitting a zero-sized section) so these relocations are
// technically valid, even though we don't expect these relocations to
// actually be applied by the linker.
- Frag->appendContents(8, 0);
+ constexpr char zero[8] = {};
+ Frag->setVarContents(zero);
}
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index 89f4da5..e277143 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -57,6 +57,10 @@ void MCObjectStreamer::insert(MCFragment *F) {
newFragment();
}
+void MCObjectStreamer::appendContents(ArrayRef<char> Contents) {
+ CurFrag->appendContents(Contents);
+}
+
void MCObjectStreamer::appendContents(size_t Num, char Elt) {
CurFrag->appendContents(Num, Elt);
}
@@ -129,10 +133,11 @@ void MCObjectStreamer::visitUsedSymbol(const MCSymbol &Sym) {
Assembler->registerSymbol(Sym);
}
-void MCObjectStreamer::emitCFISections(bool EH, bool Debug) {
- MCStreamer::emitCFISections(EH, Debug);
+void MCObjectStreamer::emitCFISections(bool EH, bool Debug, bool SFrame) {
+ MCStreamer::emitCFISections(EH, Debug, SFrame);
EmitEHFrame = EH;
EmitDebugFrame = Debug;
+ EmitSFrame = SFrame;
}
void MCObjectStreamer::emitValueImpl(const MCExpr *Value, unsigned Size,
@@ -184,10 +189,10 @@ void MCObjectStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
getAssembler().registerSymbol(*Symbol);
- // If there is a current fragment, mark the symbol as pointing into it.
- // Otherwise queue the label and set its fragment pointer when we emit the
- // next fragment.
- MCFragment *F = getCurrentFragment();
+ // Set the fragment and offset. This function might be called by
+ // changeSection, when the section stack top hasn't been changed to the new
+ // section.
+ MCFragment *F = CurFrag;
Symbol->setFragment(F);
Symbol->setOffset(F->getContents().size());
@@ -246,6 +251,15 @@ void MCObjectStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
assert(Section && "Cannot switch to a null section!");
getContext().clearDwarfLocSeen();
+ // Register the section and create an initial fragment for subsection 0
+ // if `Subsection` is non-zero.
+ bool NewSec = getAssembler().registerSection(*Section);
+ MCFragment *F0 = nullptr;
+ if (NewSec && Subsection) {
+ changeSection(Section, 0);
+ F0 = CurFrag;
+ }
+
auto &Subsections = Section->Subsections;
size_t I = 0, E = Subsections.size();
while (I != E && Subsections[I].first < Subsection)
@@ -261,12 +275,13 @@ void MCObjectStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
Section->CurFragList = &Subsections[I].second;
CurFrag = Section->CurFragList->Tail;
- getAssembler().registerSection(*Section);
-}
-
-void MCObjectStreamer::switchSectionNoPrint(MCSection *Section) {
- MCStreamer::switchSectionNoPrint(Section);
- changeSection(Section, 0);
+ // Define the section symbol at subsection 0's initial fragment if required.
+ if (!NewSec)
+ return;
+ if (auto *Sym = Section->getBeginSymbol()) {
+ Sym->setFragment(Subsection ? F0 : CurFrag);
+ getAssembler().registerSymbol(*Sym);
+ }
}
void MCObjectStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
@@ -329,31 +344,33 @@ void MCObjectStreamer::emitInstToData(const MCInst &Inst,
MCFragment *F = getCurrentFragment();
// Append the instruction to the data fragment.
- size_t FixupStartIndex = F->getFixups().size();
size_t CodeOffset = F->getContents().size();
SmallVector<MCFixup, 1> Fixups;
getAssembler().getEmitter().encodeInstruction(
Inst, F->getContentsForAppending(), Fixups, STI);
F->doneAppending();
- if (!Fixups.empty())
- F->appendFixups(Fixups);
F->setHasInstructions(STI);
+ if (Fixups.empty())
+ return;
bool MarkedLinkerRelaxable = false;
- for (auto &Fixup : MutableArrayRef(F->getFixups()).slice(FixupStartIndex)) {
+ for (auto &Fixup : Fixups) {
Fixup.setOffset(Fixup.getOffset() + CodeOffset);
- if (!Fixup.isLinkerRelaxable())
+ if (!Fixup.isLinkerRelaxable() || MarkedLinkerRelaxable)
continue;
- F->setLinkerRelaxable();
+ MarkedLinkerRelaxable = true;
+ // Set the fragment's order within the subsection for use by
+ // MCAssembler::relaxAlign.
+ auto *Sec = F->getParent();
+ if (!Sec->isLinkerRelaxable())
+ Sec->setLinkerRelaxable();
// Do not add data after a linker-relaxable instruction. The difference
// between a new label and a label at or before the linker-relaxable
// instruction cannot be resolved at assemble-time.
- if (!MarkedLinkerRelaxable) {
- MarkedLinkerRelaxable = true;
- getCurrentSectionOnly()->setLinkerRelaxable();
- newFragment();
- }
+ F->setLinkerRelaxable();
+ newFragment();
}
+ F->appendFixups(Fixups);
}
void MCObjectStreamer::emitInstToFragment(const MCInst &Inst,
@@ -525,8 +542,7 @@ void MCObjectStreamer::emitCVFileChecksumOffsetDirective(unsigned FileNo) {
void MCObjectStreamer::emitBytes(StringRef Data) {
MCDwarfLineEntry::make(this, getCurrentSectionOnly());
- MCFragment *DF = getCurrentFragment();
- DF->appendContents(ArrayRef(Data.data(), Data.size()));
+ appendContents(ArrayRef(Data.data(), Data.size()));
}
void MCObjectStreamer::emitValueToAlignment(Align Alignment, int64_t Fill,
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index d0b6ea4..9f64a98 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -3413,7 +3413,7 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, uint8_t ValueSize) {
// Check whether we should use optimal code alignment for this .align
// directive.
- if (Section->useCodeAlign() && !HasFillExpr) {
+ if (MAI.useCodeAlign(*Section) && !HasFillExpr) {
getStreamer().emitCodeAlignment(
Align(Alignment), &getTargetParser().getSTI(), MaxBytesToFill);
} else {
@@ -4093,27 +4093,30 @@ bool AsmParser::parseDirectiveCVFPOData() {
}
/// parseDirectiveCFISections
-/// ::= .cfi_sections section [, section]
+/// ::= .cfi_sections section [, section][, section]
bool AsmParser::parseDirectiveCFISections() {
StringRef Name;
bool EH = false;
bool Debug = false;
+ bool SFrame = false;
if (!parseOptionalToken(AsmToken::EndOfStatement)) {
for (;;) {
if (parseIdentifier(Name))
- return TokError("expected .eh_frame or .debug_frame");
+ return TokError("expected .eh_frame, .debug_frame, or .sframe");
if (Name == ".eh_frame")
EH = true;
else if (Name == ".debug_frame")
Debug = true;
+ else if (Name == ".sframe")
+ SFrame = true;
if (parseOptionalToken(AsmToken::EndOfStatement))
break;
if (parseComma())
return true;
}
}
- getStreamer().emitCFISections(EH, Debug);
+ getStreamer().emitCFISections(EH, Debug, SFrame);
return false;
}
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index c7c3df3..2e251cc 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -644,8 +644,8 @@ EndStmt:
}
if (UseLastGroup) {
- if (const MCSectionELF *Section =
- cast_or_null<MCSectionELF>(getStreamer().getCurrentSectionOnly()))
+ if (auto *Section = static_cast<const MCSectionELF *>(
+ getStreamer().getCurrentSectionOnly()))
if (const MCSymbol *Group = Section->getGroup()) {
GroupName = Group->getName();
IsComdat = Section->isComdat();
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index f4684e6..780289e 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -4228,8 +4228,7 @@ bool MasmParser::emitAlignTo(int64_t Alignment) {
// Check whether we should use optimal code alignment for this align
// directive.
const MCSection *Section = getStreamer().getCurrentSectionOnly();
- assert(Section && "must have section to emit alignment");
- if (Section->useCodeAlign()) {
+ if (MAI.useCodeAlign(*Section)) {
getStreamer().emitCodeAlignment(Align(Alignment),
&getTargetParser().getSTI(),
/*MaxBytesToEmit=*/0);
diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
index 1f824b8..d97f4f5 100644
--- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
@@ -252,7 +252,7 @@ public:
if (TypeName == "function") {
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
auto *Current =
- cast<MCSectionWasm>(getStreamer().getCurrentSectionOnly());
+ static_cast<MCSectionWasm *>(getStreamer().getCurrentSectionOnly());
if (Current->getGroup())
WasmSym->setComdat(true);
} else if (TypeName == "global")
diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp
index 023f7f2..4f28267 100644
--- a/llvm/lib/MC/MCSection.cpp
+++ b/llvm/lib/MC/MCSection.cpp
@@ -18,12 +18,10 @@
using namespace llvm;
-MCSection::MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsBss,
- MCSymbol *Begin)
+MCSection::MCSection(StringRef Name, bool IsText, bool IsBss, MCSymbol *Begin)
: Begin(Begin), HasInstructions(false), IsRegistered(false), IsText(IsText),
- IsBss(IsBss), LinkerRelaxable(false), Name(Name), Variant(V) {
- // The initial subsection number is 0. Create a fragment list.
- CurFragList = &Subsections.emplace_back(0u, FragList{}).second;
+ IsBss(IsBss), LinkerRelaxable(false), Name(Name) {
+ DummyFragment.setParent(this);
}
MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) {
diff --git a/llvm/lib/MC/MCSectionCOFF.cpp b/llvm/lib/MC/MCSectionCOFF.cpp
deleted file mode 100644
index 5bf1473..0000000
--- a/llvm/lib/MC/MCSectionCOFF.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-//===- lib/MC/MCSectionCOFF.cpp - COFF Code Section Representation --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCSectionCOFF.h"
-#include "llvm/BinaryFormat/COFF.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-
-using namespace llvm;
-
-// shouldOmitSectionDirective - Decides whether a '.section' directive
-// should be printed before the section name
-bool MCSectionCOFF::shouldOmitSectionDirective(StringRef Name,
- const MCAsmInfo &MAI) const {
- if (COMDATSymbol || isUnique())
- return false;
-
- // FIXME: Does .section .bss/.data/.text work everywhere??
- if (Name == ".text" || Name == ".data" || Name == ".bss")
- return true;
-
- return false;
-}
-
-void MCSectionCOFF::setSelection(int Selection) const {
- assert(Selection != 0 && "invalid COMDAT selection type");
- this->Selection = Selection;
- Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
-}
-
-void MCSectionCOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
- raw_ostream &OS,
- uint32_t Subsection) const {
- // standard sections don't require the '.section'
- if (shouldOmitSectionDirective(getName(), MAI)) {
- OS << '\t' << getName() << '\n';
- return;
- }
-
- OS << "\t.section\t" << getName() << ",\"";
- if (getCharacteristics() & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
- OS << 'd';
- if (getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
- OS << 'b';
- if (getCharacteristics() & COFF::IMAGE_SCN_MEM_EXECUTE)
- OS << 'x';
- if (getCharacteristics() & COFF::IMAGE_SCN_MEM_WRITE)
- OS << 'w';
- else if (getCharacteristics() & COFF::IMAGE_SCN_MEM_READ)
- OS << 'r';
- else
- OS << 'y';
- if (getCharacteristics() & COFF::IMAGE_SCN_LNK_REMOVE)
- OS << 'n';
- if (getCharacteristics() & COFF::IMAGE_SCN_MEM_SHARED)
- OS << 's';
- if ((getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE) &&
- !isImplicitlyDiscardable(getName()))
- OS << 'D';
- if (getCharacteristics() & COFF::IMAGE_SCN_LNK_INFO)
- OS << 'i';
- OS << '"';
-
- // unique should be tail of .section directive.
- if (isUnique() && !COMDATSymbol)
- OS << ",unique," << UniqueID;
-
- if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) {
- if (COMDATSymbol)
- OS << ",";
- else
- OS << "\n\t.linkonce\t";
- switch (Selection) {
- case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES:
- OS << "one_only";
- break;
- case COFF::IMAGE_COMDAT_SELECT_ANY:
- OS << "discard";
- break;
- case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE:
- OS << "same_size";
- break;
- case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH:
- OS << "same_contents";
- break;
- case COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE:
- OS << "associative";
- break;
- case COFF::IMAGE_COMDAT_SELECT_LARGEST:
- OS << "largest";
- break;
- case COFF::IMAGE_COMDAT_SELECT_NEWEST:
- OS << "newest";
- break;
- default:
- assert(false && "unsupported COFF selection type");
- break;
- }
- if (COMDATSymbol) {
- OS << ",";
- COMDATSymbol->print(OS, &MAI);
- }
- }
-
- if (isUnique() && COMDATSymbol)
- OS << ",unique," << UniqueID;
-
- OS << '\n';
-}
-
-bool MCSectionCOFF::useCodeAlign() const { return isText(); }
diff --git a/llvm/lib/MC/MCSectionDXContainer.cpp b/llvm/lib/MC/MCSectionDXContainer.cpp
deleted file mode 100644
index 7eee59d..0000000
--- a/llvm/lib/MC/MCSectionDXContainer.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-//===- lib/MC/MCSectionDXContainer.cpp - DXContainer Section --------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCSectionDXContainer.h"
-
-using namespace llvm;
-
-void MCSectionDXContainer::printSwitchToSection(const MCAsmInfo &,
- const Triple &, raw_ostream &,
- uint32_t) const {}
diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp
deleted file mode 100644
index ef33f9c..0000000
--- a/llvm/lib/MC/MCSectionELF.cpp
+++ /dev/null
@@ -1,217 +0,0 @@
-//===- lib/MC/MCSectionELF.cpp - ELF Code Section Representation ----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/TargetParser/Triple.h"
-#include <cassert>
-
-using namespace llvm;
-
-// Decides whether a '.section' directive
-// should be printed before the section name.
-bool MCSectionELF::shouldOmitSectionDirective(StringRef Name,
- const MCAsmInfo &MAI) const {
- if (isUnique())
- return false;
-
- return MAI.shouldOmitSectionDirective(Name);
-}
-
-static void printName(raw_ostream &OS, StringRef Name) {
- if (Name.find_first_not_of("0123456789_."
- "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == Name.npos) {
- OS << Name;
- return;
- }
- OS << '"';
- for (const char *B = Name.begin(), *E = Name.end(); B < E; ++B) {
- if (*B == '"') // Unquoted "
- OS << "\\\"";
- else if (*B != '\\') // Neither " or backslash
- OS << *B;
- else if (B + 1 == E) // Trailing backslash
- OS << "\\\\";
- else {
- OS << B[0] << B[1]; // Quoted character
- ++B;
- }
- }
- OS << '"';
-}
-
-void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
- raw_ostream &OS,
- uint32_t Subsection) const {
- if (shouldOmitSectionDirective(getName(), MAI)) {
- OS << '\t' << getName();
- if (Subsection)
- OS << '\t' << Subsection;
- OS << '\n';
- return;
- }
-
- OS << "\t.section\t";
- printName(OS, getName());
-
- // Handle the weird solaris syntax if desired.
- if (MAI.usesSunStyleELFSectionSwitchSyntax() &&
- !(Flags & ELF::SHF_MERGE)) {
- if (Flags & ELF::SHF_ALLOC)
- OS << ",#alloc";
- if (Flags & ELF::SHF_EXECINSTR)
- OS << ",#execinstr";
- if (Flags & ELF::SHF_WRITE)
- OS << ",#write";
- if (Flags & ELF::SHF_EXCLUDE)
- OS << ",#exclude";
- if (Flags & ELF::SHF_TLS)
- OS << ",#tls";
- OS << '\n';
- return;
- }
-
- OS << ",\"";
- if (Flags & ELF::SHF_ALLOC)
- OS << 'a';
- if (Flags & ELF::SHF_EXCLUDE)
- OS << 'e';
- if (Flags & ELF::SHF_EXECINSTR)
- OS << 'x';
- if (Flags & ELF::SHF_WRITE)
- OS << 'w';
- if (Flags & ELF::SHF_MERGE)
- OS << 'M';
- if (Flags & ELF::SHF_STRINGS)
- OS << 'S';
- if (Flags & ELF::SHF_TLS)
- OS << 'T';
- if (Flags & ELF::SHF_LINK_ORDER)
- OS << 'o';
- if (Flags & ELF::SHF_GROUP)
- OS << 'G';
- if (Flags & ELF::SHF_GNU_RETAIN)
- OS << 'R';
-
- // If there are os-specific flags, print them.
- if (T.isOSSolaris())
- if (Flags & ELF::SHF_SUNW_NODISCARD)
- OS << 'R';
-
- // If there are target-specific flags, print them.
- Triple::ArchType Arch = T.getArch();
- if (Arch == Triple::xcore) {
- if (Flags & ELF::XCORE_SHF_CP_SECTION)
- OS << 'c';
- if (Flags & ELF::XCORE_SHF_DP_SECTION)
- OS << 'd';
- } else if (T.isARM() || T.isThumb()) {
- if (Flags & ELF::SHF_ARM_PURECODE)
- OS << 'y';
- } else if (T.isAArch64()) {
- if (Flags & ELF::SHF_AARCH64_PURECODE)
- OS << 'y';
- } else if (Arch == Triple::hexagon) {
- if (Flags & ELF::SHF_HEX_GPREL)
- OS << 's';
- } else if (Arch == Triple::x86_64) {
- if (Flags & ELF::SHF_X86_64_LARGE)
- OS << 'l';
- }
-
- OS << '"';
-
- OS << ',';
-
- // If comment string is '@', e.g. as on ARM - use '%' instead
- if (MAI.getCommentString()[0] == '@')
- OS << '%';
- else
- OS << '@';
-
- if (Type == ELF::SHT_INIT_ARRAY)
- OS << "init_array";
- else if (Type == ELF::SHT_FINI_ARRAY)
- OS << "fini_array";
- else if (Type == ELF::SHT_PREINIT_ARRAY)
- OS << "preinit_array";
- else if (Type == ELF::SHT_NOBITS)
- OS << "nobits";
- else if (Type == ELF::SHT_NOTE)
- OS << "note";
- else if (Type == ELF::SHT_PROGBITS)
- OS << "progbits";
- else if (Type == ELF::SHT_X86_64_UNWIND)
- OS << "unwind";
- else if (Type == ELF::SHT_MIPS_DWARF)
- // Print hex value of the flag while we do not have
- // any standard symbolic representation of the flag.
- OS << "0x7000001e";
- else if (Type == ELF::SHT_LLVM_ODRTAB)
- OS << "llvm_odrtab";
- else if (Type == ELF::SHT_LLVM_LINKER_OPTIONS)
- OS << "llvm_linker_options";
- else if (Type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE)
- OS << "llvm_call_graph_profile";
- else if (Type == ELF::SHT_LLVM_DEPENDENT_LIBRARIES)
- OS << "llvm_dependent_libraries";
- else if (Type == ELF::SHT_LLVM_SYMPART)
- OS << "llvm_sympart";
- else if (Type == ELF::SHT_LLVM_BB_ADDR_MAP)
- OS << "llvm_bb_addr_map";
- else if (Type == ELF::SHT_LLVM_OFFLOADING)
- OS << "llvm_offloading";
- else if (Type == ELF::SHT_LLVM_LTO)
- OS << "llvm_lto";
- else if (Type == ELF::SHT_LLVM_JT_SIZES)
- OS << "llvm_jt_sizes";
- else if (Type == ELF::SHT_LLVM_CFI_JUMP_TABLE)
- OS << "llvm_cfi_jump_table";
- else
- OS << "0x" << Twine::utohexstr(Type);
-
- if (EntrySize) {
- assert((Flags & ELF::SHF_MERGE) || Type == ELF::SHT_LLVM_CFI_JUMP_TABLE);
- OS << "," << EntrySize;
- }
-
- if (Flags & ELF::SHF_LINK_ORDER) {
- OS << ",";
- if (LinkedToSym)
- printName(OS, LinkedToSym->getName());
- else
- OS << '0';
- }
-
- if (Flags & ELF::SHF_GROUP) {
- OS << ",";
- printName(OS, Group.getPointer()->getName());
- if (isComdat())
- OS << ",comdat";
- }
-
- if (isUnique())
- OS << ",unique," << UniqueID;
-
- OS << '\n';
-
- if (Subsection) {
- OS << "\t.subsection\t" << Subsection;
- OS << '\n';
- }
-}
-
-bool MCSectionELF::useCodeAlign() const {
- return getFlags() & ELF::SHF_EXECINSTR;
-}
diff --git a/llvm/lib/MC/MCSectionGOFF.cpp b/llvm/lib/MC/MCSectionGOFF.cpp
deleted file mode 100644
index 8163e5b..0000000
--- a/llvm/lib/MC/MCSectionGOFF.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-//===- MCSectionGOFF.cpp - GOFF Code Section Representation ---------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCSectionGOFF.h"
-#include "llvm/BinaryFormat/GOFF.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-static void emitCATTR(raw_ostream &OS, StringRef Name, GOFF::ESDRmode Rmode,
- GOFF::ESDAlignment Alignment,
- GOFF::ESDLoadingBehavior LoadBehavior,
- GOFF::ESDExecutable Executable, bool IsReadOnly,
- uint32_t SortKey, uint8_t FillByteValue,
- StringRef PartName) {
- OS << Name << " CATTR ";
- OS << "ALIGN(" << static_cast<unsigned>(Alignment) << "),"
- << "FILL(" << static_cast<unsigned>(FillByteValue) << ")";
- switch (LoadBehavior) {
- case GOFF::ESD_LB_Deferred:
- OS << ",DEFLOAD";
- break;
- case GOFF::ESD_LB_NoLoad:
- OS << ",NOLOAD";
- break;
- default:
- break;
- }
- switch (Executable) {
- case GOFF::ESD_EXE_CODE:
- OS << ",EXECUTABLE";
- break;
- case GOFF::ESD_EXE_DATA:
- OS << ",NOTEXECUTABLE";
- break;
- default:
- break;
- }
- if (IsReadOnly)
- OS << ",READONLY";
- if (Rmode != GOFF::ESD_RMODE_None) {
- OS << ',';
- OS << "RMODE(";
- switch (Rmode) {
- case GOFF::ESD_RMODE_24:
- OS << "24";
- break;
- case GOFF::ESD_RMODE_31:
- OS << "31";
- break;
- case GOFF::ESD_RMODE_64:
- OS << "64";
- break;
- case GOFF::ESD_RMODE_None:
- break;
- }
- OS << ')';
- }
- if (SortKey)
- OS << ",PRIORITY(" << SortKey << ")";
- if (!PartName.empty())
- OS << ",PART(" << PartName << ")";
- OS << '\n';
-}
-
-static void emitXATTR(raw_ostream &OS, StringRef Name,
- GOFF::ESDLinkageType Linkage,
- GOFF::ESDExecutable Executable,
- GOFF::ESDBindingScope BindingScope) {
- OS << Name << " XATTR ";
- OS << "LINKAGE(" << (Linkage == GOFF::ESD_LT_OS ? "OS" : "XPLINK") << "),";
- if (Executable != GOFF::ESD_EXE_Unspecified)
- OS << "REFERENCE(" << (Executable == GOFF::ESD_EXE_CODE ? "CODE" : "DATA")
- << "),";
- if (BindingScope != GOFF::ESD_BSC_Unspecified) {
- OS << "SCOPE(";
- switch (BindingScope) {
- case GOFF::ESD_BSC_Section:
- OS << "SECTION";
- break;
- case GOFF::ESD_BSC_Module:
- OS << "MODULE";
- break;
- case GOFF::ESD_BSC_Library:
- OS << "LIBRARY";
- break;
- case GOFF::ESD_BSC_ImportExport:
- OS << "EXPORT";
- break;
- default:
- break;
- }
- OS << ')';
- }
- OS << '\n';
-}
-
-void MCSectionGOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
- raw_ostream &OS,
- uint32_t Subsection) const {
- switch (SymbolType) {
- case GOFF::ESD_ST_SectionDefinition: {
- OS << Name << " CSECT\n";
- Emitted = true;
- break;
- }
- case GOFF::ESD_ST_ElementDefinition: {
- getParent()->printSwitchToSection(MAI, T, OS, Subsection);
- if (!Emitted) {
- emitCATTR(OS, Name, EDAttributes.Rmode, EDAttributes.Alignment,
- EDAttributes.LoadBehavior, GOFF::ESD_EXE_Unspecified,
- EDAttributes.IsReadOnly, 0, EDAttributes.FillByteValue,
- StringRef());
- Emitted = true;
- } else
- OS << Name << " CATTR\n";
- break;
- }
- case GOFF::ESD_ST_PartReference: {
- MCSectionGOFF *ED = getParent();
- ED->getParent()->printSwitchToSection(MAI, T, OS, Subsection);
- if (!Emitted) {
- emitCATTR(OS, ED->getName(), ED->getEDAttributes().Rmode,
- ED->EDAttributes.Alignment, ED->EDAttributes.LoadBehavior,
- PRAttributes.Executable, ED->EDAttributes.IsReadOnly,
- PRAttributes.SortKey, ED->EDAttributes.FillByteValue, Name);
- emitXATTR(OS, Name, PRAttributes.Linkage, PRAttributes.Executable,
- PRAttributes.BindingScope);
- ED->Emitted = true;
- Emitted = true;
- } else
- OS << ED->getName() << " CATTR PART(" << Name << ")\n";
- break;
- }
- default:
- llvm_unreachable("Wrong section type");
- }
-} \ No newline at end of file
diff --git a/llvm/lib/MC/MCSectionMachO.cpp b/llvm/lib/MC/MCSectionMachO.cpp
index 67453ce..67c8235 100644
--- a/llvm/lib/MC/MCSectionMachO.cpp
+++ b/llvm/lib/MC/MCSectionMachO.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Support/raw_ostream.h"
@@ -92,7 +93,7 @@ ENTRY("" /*FIXME*/, S_ATTR_LOC_RELOC)
MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
unsigned TAA, unsigned reserved2, SectionKind K,
MCSymbol *Begin)
- : MCSection(SV_MachO, Section, K.isText(),
+ : MCSection(Section, K.isText(),
MachO::isVirtualSection(TAA & MachO::SECTION_TYPE), Begin),
TypeAndAttributes(TAA), Reserved2(reserved2) {
assert(Segment.size() <= 16 && Section.size() <= 16 &&
@@ -105,19 +106,20 @@ MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
}
}
-void MCSectionMachO::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
- raw_ostream &OS,
- uint32_t Subsection) const {
- OS << "\t.section\t" << getSegmentName() << ',' << getName();
+void MCAsmInfoDarwin::printSwitchToSection(const MCSection &Section, uint32_t,
+ const Triple &T,
+ raw_ostream &OS) const {
+ auto &Sec = static_cast<const MCSectionMachO &>(Section);
+ OS << "\t.section\t" << Sec.getSegmentName() << ',' << Sec.getName();
// Get the section type and attributes.
- unsigned TAA = getTypeAndAttributes();
+ unsigned TAA = Sec.getTypeAndAttributes();
if (TAA == 0) {
OS << '\n';
return;
}
- MachO::SectionType SectionType = getType();
+ MachO::SectionType SectionType = Sec.getType();
assert(SectionType <= MachO::LAST_KNOWN_SECTION_TYPE &&
"Invalid SectionType specified!");
@@ -135,8 +137,8 @@ void MCSectionMachO::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
if (SectionAttrs == 0) {
// If we have a S_SYMBOL_STUBS size specified, print it along with 'none' as
// the attribute specifier.
- if (Reserved2 != 0)
- OS << ",none," << Reserved2;
+ if (Sec.Reserved2 != 0)
+ OS << ",none," << Sec.Reserved2;
OS << '\n';
return;
}
@@ -164,15 +166,11 @@ void MCSectionMachO::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
assert(SectionAttrs == 0 && "Unknown section attributes!");
// If we have a S_SYMBOL_STUBS size specified, print it.
- if (Reserved2 != 0)
- OS << ',' << Reserved2;
+ if (Sec.Reserved2 != 0)
+ OS << ',' << Sec.Reserved2;
OS << '\n';
}
-bool MCSectionMachO::useCodeAlign() const {
- return hasAttribute(MachO::S_ATTR_PURE_INSTRUCTIONS);
-}
-
/// ParseSectionSpecifier - Parse the section specifier indicated by "Spec".
/// This is a string that can appear after a .section directive in a mach-o
/// flavored .s file. If successful, this fills in the specified Out
diff --git a/llvm/lib/MC/MCSectionWasm.cpp b/llvm/lib/MC/MCSectionWasm.cpp
deleted file mode 100644
index e25af1c..0000000
--- a/llvm/lib/MC/MCSectionWasm.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-//===- lib/MC/MCSectionWasm.cpp - Wasm Code Section Representation --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCSectionWasm.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSymbolWasm.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-// Decides whether a '.section' directive
-// should be printed before the section name.
-bool MCSectionWasm::shouldOmitSectionDirective(StringRef Name,
- const MCAsmInfo &MAI) const {
- return MAI.shouldOmitSectionDirective(Name);
-}
-
-static void printName(raw_ostream &OS, StringRef Name) {
- if (Name.find_first_not_of("0123456789_."
- "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == Name.npos) {
- OS << Name;
- return;
- }
- OS << '"';
- for (const char *B = Name.begin(), *E = Name.end(); B < E; ++B) {
- if (*B == '"') // Unquoted "
- OS << "\\\"";
- else if (*B != '\\') // Neither " or backslash
- OS << *B;
- else if (B + 1 == E) // Trailing backslash
- OS << "\\\\";
- else {
- OS << B[0] << B[1]; // Quoted character
- ++B;
- }
- }
- OS << '"';
-}
-
-void MCSectionWasm::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
- raw_ostream &OS,
- uint32_t Subsection) const {
-
- if (shouldOmitSectionDirective(getName(), MAI)) {
- OS << '\t' << getName();
- if (Subsection)
- OS << '\t' << Subsection;
- OS << '\n';
- return;
- }
-
- OS << "\t.section\t";
- printName(OS, getName());
- OS << ",\"";
-
- if (IsPassive)
- OS << 'p';
- if (Group)
- OS << 'G';
- if (SegmentFlags & wasm::WASM_SEG_FLAG_STRINGS)
- OS << 'S';
- if (SegmentFlags & wasm::WASM_SEG_FLAG_TLS)
- OS << 'T';
- if (SegmentFlags & wasm::WASM_SEG_FLAG_RETAIN)
- OS << 'R';
-
- OS << '"';
-
- OS << ',';
-
- // If comment string is '@', e.g. as on ARM - use '%' instead
- if (MAI.getCommentString()[0] == '@')
- OS << '%';
- else
- OS << '@';
-
- // TODO: Print section type.
-
- if (Group) {
- OS << ",";
- printName(OS, Group->getName());
- OS << ",comdat";
- }
-
- if (isUnique())
- OS << ",unique," << UniqueID;
-
- OS << '\n';
-
- if (Subsection)
- OS << "\t.subsection\t" << Subsection << '\n';
-}
-
-bool MCSectionWasm::useCodeAlign() const { return false; }
diff --git a/llvm/lib/MC/MCSectionXCOFF.cpp b/llvm/lib/MC/MCSectionXCOFF.cpp
deleted file mode 100644
index 41043b2..0000000
--- a/llvm/lib/MC/MCSectionXCOFF.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-//===- lib/MC/MCSectionXCOFF.cpp - XCOFF Code Section Representation ------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCSectionXCOFF.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-namespace llvm {
-class MCExpr;
-class Triple;
-} // namespace llvm
-
-using namespace llvm;
-
-MCSectionXCOFF::~MCSectionXCOFF() = default;
-
-void MCSectionXCOFF::printCsectDirective(raw_ostream &OS) const {
- OS << "\t.csect " << QualName->getName() << "," << Log2(getAlign()) << '\n';
-}
-
-void MCSectionXCOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
- raw_ostream &OS,
- uint32_t Subsection) const {
- if (getKind().isText()) {
- if (getMappingClass() != XCOFF::XMC_PR)
- report_fatal_error("Unhandled storage-mapping class for .text csect");
-
- printCsectDirective(OS);
- return;
- }
-
- if (getKind().isReadOnly()) {
- if (getMappingClass() != XCOFF::XMC_RO &&
- getMappingClass() != XCOFF::XMC_TD)
- report_fatal_error("Unhandled storage-mapping class for .rodata csect.");
- printCsectDirective(OS);
- return;
- }
-
- if (getKind().isReadOnlyWithRel()) {
- if (getMappingClass() != XCOFF::XMC_RW &&
- getMappingClass() != XCOFF::XMC_RO &&
- getMappingClass() != XCOFF::XMC_TD)
- report_fatal_error(
- "Unexepected storage-mapping class for ReadOnlyWithRel kind");
- printCsectDirective(OS);
- return;
- }
-
- // Initialized TLS data.
- if (getKind().isThreadData()) {
- // We only expect XMC_TL here for initialized TLS data.
- if (getMappingClass() != XCOFF::XMC_TL)
- report_fatal_error("Unhandled storage-mapping class for .tdata csect.");
- printCsectDirective(OS);
- return;
- }
-
- if (getKind().isData()) {
- switch (getMappingClass()) {
- case XCOFF::XMC_RW:
- case XCOFF::XMC_DS:
- case XCOFF::XMC_TD:
- printCsectDirective(OS);
- break;
- case XCOFF::XMC_TC:
- case XCOFF::XMC_TE:
- break;
- case XCOFF::XMC_TC0:
- OS << "\t.toc\n";
- break;
- default:
- report_fatal_error(
- "Unhandled storage-mapping class for .data csect.");
- }
- return;
- }
-
- if (isCsect() && getMappingClass() == XCOFF::XMC_TD) {
- // Common csect type (uninitialized storage) does not have to print csect
- // directive for section switching unless it is local.
- if (getKind().isCommon() && !getKind().isBSSLocal())
- return;
-
- assert(getKind().isBSS() && "Unexpected section kind for toc-data");
- printCsectDirective(OS);
- return;
- }
- // Common csect type (uninitialized storage) does not have to print csect
- // directive for section switching.
- if (isCsect() && getCSectType() == XCOFF::XTY_CM) {
- assert((getMappingClass() == XCOFF::XMC_RW ||
- getMappingClass() == XCOFF::XMC_BS ||
- getMappingClass() == XCOFF::XMC_UL) &&
- "Generated a storage-mapping class for a common/bss/tbss csect we "
- "don't "
- "understand how to switch to.");
- // Common symbols and local zero-initialized symbols for TLS and Non-TLS are
- // eligible for .bss/.tbss csect, getKind().isThreadBSS() is used to cover
- // TLS common and zero-initialized local symbols since linkage type (in the
- // GlobalVariable) is not accessible in this class.
- assert((getKind().isBSSLocal() || getKind().isCommon() ||
- getKind().isThreadBSS()) &&
- "wrong symbol type for .bss/.tbss csect");
- // Don't have to print a directive for switching to section for commons and
- // zero-initialized TLS data. The '.comm' and '.lcomm' directives of the
- // variable will create the needed csect.
- return;
- }
-
- // Zero-initialized TLS data with weak or external linkage are not eligible to
- // be put into common csect.
- if (getKind().isThreadBSS()) {
- printCsectDirective(OS);
- return;
- }
-
- // XCOFF debug sections.
- if (getKind().isMetadata() && isDwarfSect()) {
- OS << "\n\t.dwsect " << format("0x%" PRIx32, *getDwarfSubtypeFlags())
- << '\n';
- OS << getName() << ':' << '\n';
- return;
- }
-
- report_fatal_error("Printing for this SectionKind is unimplemented.");
-}
-
-bool MCSectionXCOFF::useCodeAlign() const { return getKind().isText(); }
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
index 30198c9..bc73981 100644
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -56,12 +56,11 @@ void MCTargetStreamer::finish() {}
void MCTargetStreamer::emitConstantPools() {}
-void MCTargetStreamer::changeSection(const MCSection *CurSection,
- MCSection *Section, uint32_t Subsection,
- raw_ostream &OS) {
- Section->printSwitchToSection(*Streamer.getContext().getAsmInfo(),
- Streamer.getContext().getTargetTriple(), OS,
- Subsection);
+void MCTargetStreamer::changeSection(const MCSection *, MCSection *Sec,
+ uint32_t Subsection, raw_ostream &OS) {
+ auto &MAI = *Streamer.getContext().getAsmInfo();
+ MAI.printSwitchToSection(*Sec, Subsection,
+ Streamer.getContext().getTargetTriple(), OS);
}
void MCTargetStreamer::emitDwarfFileDirective(StringRef Directive) {
@@ -415,7 +414,7 @@ void MCStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
void MCStreamer::emitConditionalAssignment(MCSymbol *Symbol,
const MCExpr *Value) {}
-void MCStreamer::emitCFISections(bool EH, bool Debug) {}
+void MCStreamer::emitCFISections(bool EH, bool Debug, bool SFrame) {}
void MCStreamer::emitCFIStartProc(bool IsSimple, SMLoc Loc) {
if (!FrameInfoStack.empty() &&
@@ -838,8 +837,8 @@ static MCSection *getWinCFISection(MCContext &Context, unsigned *NextWinCFIID,
if (TextSec == Context.getObjectFileInfo()->getTextSection())
return MainCFISec;
- const auto *TextSecCOFF = cast<MCSectionCOFF>(TextSec);
- auto *MainCFISecCOFF = cast<MCSectionCOFF>(MainCFISec);
+ const auto *TextSecCOFF = static_cast<const MCSectionCOFF *>(TextSec);
+ auto *MainCFISecCOFF = static_cast<MCSectionCOFF *>(MainCFISec);
unsigned UniqueID = TextSecCOFF->getOrAssignWinCFISectionID(NextWinCFIID);
// If this section is COMDAT, this unwind section should be COMDAT associative
@@ -1314,9 +1313,20 @@ void MCStreamer::emitZerofill(MCSection *, MCSymbol *, uint64_t, Align, SMLoc) {
}
void MCStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
uint64_t Size, Align ByteAlignment) {}
-void MCStreamer::changeSection(MCSection *Section, uint32_t) {
- CurFrag = &Section->getDummyFragment();
+
+void MCStreamer::changeSection(MCSection *Sec, uint32_t) {
+ CurFrag = &Sec->getDummyFragment();
+ auto *Sym = Sec->getBeginSymbol();
+ if (!Sym || !Sym->isUndefined())
+ return;
+ // In Mach-O, DWARF sections use Begin as a temporary label, requiring a label
+ // definition, unlike section symbols in other file formats.
+ if (getContext().getObjectFileType() == MCContext::IsMachO)
+ emitLabel(Sym);
+ else
+ Sym->setFragment(CurFrag);
}
+
void MCStreamer::emitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {}
void MCStreamer::emitBytes(StringRef Data) {}
void MCStreamer::emitBinaryData(StringRef Data) { emitBytes(Data); }
@@ -1358,9 +1368,6 @@ void MCStreamer::switchSection(MCSection *Section, uint32_t Subsection) {
changeSection(Section, Subsection);
SectionStack.back().first = MCSectionSubPair(Section, Subsection);
assert(!Section->hasEnded() && "Section already ended");
- MCSymbol *Sym = Section->getBeginSymbol();
- if (Sym && !Sym->isInSection())
- emitLabel(Sym);
}
}
@@ -1387,9 +1394,6 @@ void MCStreamer::switchSectionNoPrint(MCSection *Section) {
SectionStack.back().second = SectionStack.back().first;
SectionStack.back().first = MCSectionSubPair(Section, 0);
changeSection(Section, 0);
- MCSymbol *Sym = Section->getBeginSymbol();
- if (Sym && !Sym->isInSection())
- emitLabel(Sym);
}
MCSymbol *MCStreamer::endSection(MCSection *Section) {
diff --git a/llvm/lib/MC/MCTargetOptions.cpp b/llvm/lib/MC/MCTargetOptions.cpp
index bff4b8d..be6d19d 100644
--- a/llvm/lib/MC/MCTargetOptions.cpp
+++ b/llvm/lib/MC/MCTargetOptions.cpp
@@ -19,7 +19,8 @@ MCTargetOptions::MCTargetOptions()
PreserveAsmComments(true), Dwarf64(false),
EmitDwarfUnwind(EmitDwarfUnwindType::Default),
MCUseDwarfDirectory(DefaultDwarfDirectory),
- EmitCompactUnwindNonCanonical(false), PPCUseFullRegisterNames(false) {}
+ EmitCompactUnwindNonCanonical(false), EmitSFrameUnwind(false),
+ PPCUseFullRegisterNames(false) {}
StringRef MCTargetOptions::getABIName() const {
return ABIName;
diff --git a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
index 2adc291..ff95ff7 100644
--- a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
+++ b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
@@ -41,6 +41,7 @@ MCOPT(int, DwarfVersion)
MCOPT(bool, Dwarf64)
MCOPT(EmitDwarfUnwindType, EmitDwarfUnwind)
MCOPT(bool, EmitCompactUnwindNonCanonical)
+MCOPT(bool, EmitSFrameUnwind)
MCOPT(bool, ShowMCInst)
MCOPT(bool, FatalWarnings)
MCOPT(bool, NoWarn)
@@ -105,6 +106,11 @@ llvm::mc::RegisterMCTargetOptionsFlags::RegisterMCTargetOptionsFlags() {
false)); // By default, use DWARF for non-canonical personalities.
MCBINDOPT(EmitCompactUnwindNonCanonical);
+ static cl::opt<bool> EmitSFrameUnwind(
+ "gsframe", cl::desc("Whether to emit .sframe unwind sections."),
+ cl::init(false));
+ MCBINDOPT(EmitSFrameUnwind);
+
static cl::opt<bool> ShowMCInst(
"asm-show-inst",
cl::desc("Emit internal instruction representation to assembly file"));
@@ -188,6 +194,7 @@ MCTargetOptions llvm::mc::InitMCTargetOptionsFromFlags() {
Options.X86Sse2Avx = getX86Sse2Avx();
Options.EmitDwarfUnwind = getEmitDwarfUnwind();
Options.EmitCompactUnwindNonCanonical = getEmitCompactUnwindNonCanonical();
+ Options.EmitSFrameUnwind = getEmitSFrameUnwind();
Options.AsSecureLogFile = getAsSecureLogFile();
return Options;
diff --git a/llvm/lib/MC/MCWasmStreamer.cpp b/llvm/lib/MC/MCWasmStreamer.cpp
index 5891420c..e3ef111 100644
--- a/llvm/lib/MC/MCWasmStreamer.cpp
+++ b/llvm/lib/MC/MCWasmStreamer.cpp
@@ -58,7 +58,7 @@ void MCWasmStreamer::emitLabelAtPos(MCSymbol *S, SMLoc Loc, MCFragment &F,
void MCWasmStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
MCAssembler &Asm = getAssembler();
- auto *SectionWasm = cast<MCSectionWasm>(Section);
+ auto *SectionWasm = static_cast<const MCSectionWasm *>(Section);
const MCSymbol *Grp = SectionWasm->getGroup();
if (Grp)
Asm.registerSymbol(*Grp);
diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp
index 9369bea..1ffe25c 100644
--- a/llvm/lib/MC/MCWinCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp
@@ -157,7 +157,8 @@ void MCWinCOFFStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
// Ensure that the first and the second symbols relative to the section are
// the section symbol and the COMDAT symbol.
getAssembler().registerSymbol(*Section->getBeginSymbol());
- if (auto *Sym = cast<MCSectionCOFF>(Section)->getCOMDATSymbol())
+ if (auto *Sym =
+ static_cast<const MCSectionCOFF *>(Section)->getCOMDATSymbol())
getAssembler().registerSymbol(*Sym);
}
diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp
index 63381b4..898ac5d 100644
--- a/llvm/lib/MC/MCXCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCXCOFFStreamer.cpp
@@ -36,6 +36,20 @@ XCOFFObjectWriter &MCXCOFFStreamer::getWriter() {
return static_cast<XCOFFObjectWriter &>(getAssembler().getWriter());
}
+void MCXCOFFStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
+ MCObjectStreamer::changeSection(Section, Subsection);
+ auto *Sec = static_cast<const MCSectionXCOFF *>(Section);
+ // We might miss calculating the symbols difference as absolute value before
+ // adding fixups when symbol_A without the fragment set is the csect itself
+ // and symbol_B is in it.
+ // TODO: Currently we only set the fragment for XMC_PR csects and DWARF
+ // sections because we don't have other cases that hit this problem yet.
+ // if (IsDwarfSec || CsectProp->MappingClass == XCOFF::XMC_PR)
+ // QualName->setFragment(F);
+ if (Sec->isDwarfSect() || Sec->getMappingClass() == XCOFF::XMC_PR)
+ Sec->getQualNameSymbol()->setFragment(CurFrag);
+}
+
bool MCXCOFFStreamer::emitSymbolAttribute(MCSymbol *Sym,
MCSymbolAttr Attribute) {
auto *Symbol = cast<MCSymbolXCOFF>(Sym);
diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp
index 48d2fc6..e87696a 100644
--- a/llvm/lib/MC/MachObjectWriter.cpp
+++ b/llvm/lib/MC/MachObjectWriter.cpp
@@ -126,7 +126,8 @@ uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S) const {
uint64_t MachObjectWriter::getPaddingSize(const MCAssembler &Asm,
const MCSection *Sec) const {
uint64_t EndAddr = getSectionAddress(Sec) + Asm.getSectionAddressSize(*Sec);
- unsigned Next = cast<MCSectionMachO>(Sec)->getLayoutOrder() + 1;
+ unsigned Next =
+ static_cast<const MCSectionMachO *>(Sec)->getLayoutOrder() + 1;
if (Next >= SectionOrder.size())
return 0;
@@ -259,15 +260,12 @@ void MachObjectWriter::writeSegmentLoadCommand(
}
void MachObjectWriter::writeSection(const MCAssembler &Asm,
- const MCSection &Sec, uint64_t VMAddr,
+ const MCSectionMachO &Sec, uint64_t VMAddr,
uint64_t FileOffset, unsigned Flags,
uint64_t RelocationsStart,
unsigned NumRelocations) {
- uint64_t SectionSize = Asm.getSectionAddressSize(Sec);
- const MCSectionMachO &Section = cast<MCSectionMachO>(Sec);
-
// The offset is unused for virtual sections.
- if (Section.isBssSection()) {
+ if (Sec.isBssSection()) {
assert(Asm.getSectionFileSize(Sec) == 0 && "Invalid file size!");
FileOffset = 0;
}
@@ -275,11 +273,11 @@ void MachObjectWriter::writeSection(const MCAssembler &Asm,
// struct section (68 bytes) or
// struct section_64 (80 bytes)
+ uint64_t SectionSize = Asm.getSectionAddressSize(Sec);
uint64_t Start = W.OS.tell();
(void) Start;
-
- writeWithPadding(Section.getName(), 16);
- writeWithPadding(Section.getSegmentName(), 16);
+ writeWithPadding(Sec.getName(), 16);
+ writeWithPadding(Sec.getSegmentName(), 16);
if (is64Bit()) {
W.write<uint64_t>(VMAddr); // address
W.write<uint64_t>(SectionSize); // size
@@ -290,14 +288,14 @@ void MachObjectWriter::writeSection(const MCAssembler &Asm,
assert(isUInt<32>(FileOffset) && "Cannot encode offset of section");
W.write<uint32_t>(FileOffset);
- W.write<uint32_t>(Log2(Section.getAlign()));
+ W.write<uint32_t>(Log2(Sec.getAlign()));
assert((!NumRelocations || isUInt<32>(RelocationsStart)) &&
"Cannot encode offset of relocations");
W.write<uint32_t>(NumRelocations ? RelocationsStart : 0);
W.write<uint32_t>(NumRelocations);
W.write<uint32_t>(Flags);
W.write<uint32_t>(IndirectSymBase.lookup(&Sec)); // reserved1
- W.write<uint32_t>(Section.getStubSize()); // reserved2
+ W.write<uint32_t>(Sec.getStubSize()); // reserved2
if (is64Bit())
W.write<uint32_t>(0); // reserved3
@@ -531,7 +529,7 @@ void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
// Report errors for use of .indirect_symbol not in a symbol pointer section
// or stub section.
for (IndirectSymbolData &ISD : IndirectSymbols) {
- const MCSectionMachO &Section = cast<MCSectionMachO>(*ISD.Section);
+ const MCSectionMachO &Section = static_cast<MCSectionMachO &>(*ISD.Section);
if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
@@ -545,7 +543,7 @@ void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
// Bind non-lazy symbol pointers first.
for (auto [IndirectIndex, ISD] : enumerate(IndirectSymbols)) {
- const auto &Section = cast<MCSectionMachO>(*ISD.Section);
+ const auto &Section = static_cast<MCSectionMachO &>(*ISD.Section);
if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS)
@@ -559,7 +557,7 @@ void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
// Then lazy symbol pointers and symbol stubs.
for (auto [IndirectIndex, ISD] : enumerate(IndirectSymbols)) {
- const auto &Section = cast<MCSectionMachO>(*ISD.Section);
+ const auto &Section = static_cast<MCSectionMachO &>(*ISD.Section);
if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
Section.getType() != MachO::S_SYMBOL_STUBS)
@@ -684,13 +682,13 @@ void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm) {
for (MCSection &Sec : Asm) {
if (!Sec.isBssSection()) {
SectionOrder.push_back(&Sec);
- cast<MCSectionMachO>(Sec).setLayoutOrder(i++);
+ static_cast<MCSectionMachO &>(Sec).setLayoutOrder(i++);
}
}
for (MCSection &Sec : Asm) {
if (Sec.isBssSection()) {
SectionOrder.push_back(&Sec);
- cast<MCSectionMachO>(Sec).setLayoutOrder(i++);
+ static_cast<MCSectionMachO &>(Sec).setLayoutOrder(i++);
}
}
@@ -808,7 +806,7 @@ uint64_t MachObjectWriter::writeObject() {
}
MCSection *Sec = getContext().getMachOSection("__LLVM", "__cg_profile", 0,
SectionKind::getMetadata());
- llvm::copy(OS.str(), Sec->curFragList()->Head->getContents().data());
+ llvm::copy(OS.str(), Sec->curFragList()->Head->getVarContents().data());
}
unsigned NumSections = Asm.end() - Asm.begin();
@@ -907,7 +905,7 @@ uint64_t MachObjectWriter::writeObject() {
// ... and then the section headers.
uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
for (const MCSection &Section : Asm) {
- const auto &Sec = cast<MCSectionMachO>(Section);
+ const auto &Sec = static_cast<const MCSectionMachO &>(Section);
std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
unsigned NumRelocs = Relocs.size();
uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec);
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index 3b99af4..bfd6334 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -480,7 +480,7 @@ void WasmObjectWriter::recordRelocation(const MCFragment &F,
// The WebAssembly backend should never generate FKF_IsPCRel fixups
assert(!Fixup.isPCRel());
- const auto &FixupSection = cast<MCSectionWasm>(*F.getParent());
+ const auto &FixupSection = static_cast<MCSectionWasm &>(*F.getParent());
uint64_t C = Target.getConstant();
uint64_t FixupOffset = Asm->getFragmentOffset(F) + Fixup.getOffset();
MCContext &Ctx = getContext();
diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 6ad4334..856850d 100644
--- a/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -373,7 +373,7 @@ void WinCOFFWriter::defineSymbol(const MCSymbol &MCSym) {
COFFSection *Sec = nullptr;
MCSectionCOFF *MCSec = nullptr;
if (Base && Base->getFragment()) {
- MCSec = cast<MCSectionCOFF>(Base->getFragment()->getParent());
+ MCSec = static_cast<MCSectionCOFF *>(Base->getFragment()->getParent());
Sec = SectionMap[MCSec];
}
@@ -1057,7 +1057,8 @@ uint64_t WinCOFFWriter::writeObject() {
continue;
}
- const auto *AssocMCSec = cast<MCSectionCOFF>(&AssocMCSym->getSection());
+ const auto *AssocMCSec =
+ static_cast<const MCSectionCOFF *>(&AssocMCSym->getSection());
assert(SectionMap.count(AssocMCSec));
COFFSection *AssocSec = SectionMap[AssocMCSec];
diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 2f6785f..65f543b 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -550,13 +550,13 @@ CsectGroup &XCOFFWriter::getCsectGroup(const MCSectionXCOFF *MCSec) {
static MCSectionXCOFF *getContainingCsect(const MCSymbolXCOFF *XSym) {
if (XSym->isDefined())
- return cast<MCSectionXCOFF>(XSym->getFragment()->getParent());
+ return static_cast<MCSectionXCOFF *>(XSym->getFragment()->getParent());
return XSym->getRepresentedCsect();
}
void XCOFFWriter::executePostLayoutBinding() {
for (const auto &S : *Asm) {
- const auto *MCSec = cast<const MCSectionXCOFF>(&S);
+ auto *MCSec = static_cast<const MCSectionXCOFF *>(&S);
assert(!SectionMap.contains(MCSec) && "Cannot add a section twice.");
// If the name does not fit in the storage provided in the symbol table
@@ -747,7 +747,7 @@ void XCOFFWriter::recordRelocation(const MCFragment &F, const MCFixup &Fixup,
FixedValue = TOCEntryOffset;
}
} else if (Type == XCOFF::RelocationType::R_RBR) {
- MCSectionXCOFF *ParentSec = cast<MCSectionXCOFF>(F.getParent());
+ auto *ParentSec = static_cast<MCSectionXCOFF *>(F.getParent());
assert((SymASec->getMappingClass() == XCOFF::XMC_PR &&
ParentSec->getMappingClass() == XCOFF::XMC_PR) &&
"Only XMC_PR csect may have the R_RBR relocation.");
@@ -768,7 +768,7 @@ void XCOFFWriter::recordRelocation(const MCFragment &F, const MCFixup &Fixup,
}
XCOFFRelocation Reloc = {Index, FixupOffsetInCsect, SignAndSize, Type};
- MCSectionXCOFF *RelocationSec = cast<MCSectionXCOFF>(F.getParent());
+ auto *RelocationSec = static_cast<MCSectionXCOFF *>(F.getParent());
assert(SectionMap.contains(RelocationSec) &&
"Expected containing csect to exist in map.");
SectionMap[RelocationSec]->Relocations.push_back(Reloc);
diff --git a/llvm/lib/Object/IRSymtab.cpp b/llvm/lib/Object/IRSymtab.cpp
index 2579fa3..0f19495 100644
--- a/llvm/lib/Object/IRSymtab.cpp
+++ b/llvm/lib/Object/IRSymtab.cpp
@@ -8,11 +8,11 @@
#include "llvm/Object/IRSymtab.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Comdat.h"
@@ -213,9 +213,10 @@ Expected<int> Builder::getComdatIndex(const Comdat *C, const Module *M) {
return P.first->second;
}
-static DenseSet<StringRef> buildPreservedSymbolsSet(const Triple &TT) {
- DenseSet<StringRef> PreservedSymbolSet(std::begin(PreservedSymbols),
- std::end(PreservedSymbols));
+static StringSet<> buildPreservedSymbolsSet(const Triple &TT) {
+ StringSet<> PreservedSymbolSet;
+ PreservedSymbolSet.insert(std::begin(PreservedSymbols),
+ std::end(PreservedSymbols));
// FIXME: Do we need to pass in ABI fields from TargetOptions?
RTLIB::RuntimeLibcallsInfo Libcalls(TT);
for (RTLIB::LibcallImpl Impl : Libcalls.getLibcallImpls()) {
@@ -280,7 +281,7 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
setStr(Sym.IRName, GV->getName());
- static const DenseSet<StringRef> PreservedSymbolsSet =
+ static const StringSet<> PreservedSymbolsSet =
buildPreservedSymbolsSet(GV->getParent()->getTargetTriple());
bool IsPreservedSymbol = PreservedSymbolsSet.contains(GV->getName());
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index bb7ccdb..1b111dc 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -84,6 +84,7 @@ MODULE_PASS("global-merge-func", GlobalMergeFuncPass())
MODULE_PASS("globalopt", GlobalOptPass())
MODULE_PASS("globalsplit", GlobalSplitPass())
MODULE_PASS("hipstdpar-interpose-alloc", HipStdParAllocationInterpositionPass())
+MODULE_PASS("hipstdpar-math-fixup", HipStdParMathFixupPass())
MODULE_PASS("hipstdpar-select-accelerator-code",
HipStdParAcceleratorCodeSelectionPass())
MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
@@ -119,7 +120,6 @@ MODULE_PASS("module-inline", ModuleInlinerPass())
MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
MODULE_PASS("no-op-module", NoOpModulePass())
MODULE_PASS("nsan", NumericalStabilitySanitizerPass())
-MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass())
MODULE_PASS("openmp-opt", OpenMPOptPass())
MODULE_PASS("openmp-opt-postlink",
OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink))
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 5c7b9e0..886add7 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1295,7 +1295,7 @@ Error IndexedInstrProfReader::readHeader() {
// Writer first writes the length of compressed string, and then the actual
// content.
const char *VTableNamePtr = (const char *)Ptr;
- if (VTableNamePtr > (const char *)DataBuffer->getBufferEnd())
+ if (VTableNamePtr > DataBuffer->getBufferEnd())
return make_error<InstrProfError>(instrprof_error::truncated);
VTableName = StringRef(VTableNamePtr, CompressedVTableNamesLen);
diff --git a/llvm/lib/Support/AArch64AttributeParser.cpp b/llvm/lib/Support/AArch64AttributeParser.cpp
index c675ef2..eed8dba 100644
--- a/llvm/lib/Support/AArch64AttributeParser.cpp
+++ b/llvm/lib/Support/AArch64AttributeParser.cpp
@@ -8,6 +8,7 @@
//===---------------------------------------------------------------------===//
#include "llvm/Support/AArch64AttributeParser.h"
+#include "llvm/Support/AArch64BuildAttributes.h"
std::vector<llvm::SubsectionAndTagToTagName> &
llvm::AArch64AttributeParser::returnTagsNamesMap() {
@@ -19,3 +20,29 @@ llvm::AArch64AttributeParser::returnTagsNamesMap() {
{"aeabi_feature_and_bits", 2, "Tag_Feature_GCS"}};
return TagsNamesMap;
}
+
+llvm::AArch64BuildAttrSubsections llvm::extractBuildAttributesSubsections(
+ const llvm::AArch64AttributeParser &Attributes) {
+
+ llvm::AArch64BuildAttrSubsections SubSections;
+ auto GetPauthValue = [&Attributes](unsigned Tag) {
+ return Attributes.getAttributeValue("aeabi_pauthabi", Tag).value_or(0);
+ };
+ SubSections.Pauth.TagPlatform =
+ GetPauthValue(llvm::AArch64BuildAttributes::TAG_PAUTH_PLATFORM);
+ SubSections.Pauth.TagSchema =
+ GetPauthValue(llvm::AArch64BuildAttributes::TAG_PAUTH_SCHEMA);
+
+ auto GetFeatureValue = [&Attributes](unsigned Tag) {
+ return Attributes.getAttributeValue("aeabi_feature_and_bits", Tag)
+ .value_or(0);
+ };
+ SubSections.AndFeatures |=
+ GetFeatureValue(llvm::AArch64BuildAttributes::TAG_FEATURE_BTI);
+ SubSections.AndFeatures |=
+ GetFeatureValue(llvm::AArch64BuildAttributes::TAG_FEATURE_PAC) << 1;
+ SubSections.AndFeatures |=
+ GetFeatureValue(llvm::AArch64BuildAttributes::TAG_FEATURE_GCS) << 2;
+
+ return SubSections;
+}
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index d5c3cba..8491633 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -68,11 +68,19 @@ template class LLVM_EXPORT_TEMPLATE basic_parser<float>;
template class LLVM_EXPORT_TEMPLATE basic_parser<std::string>;
template class LLVM_EXPORT_TEMPLATE basic_parser<char>;
-template class opt<unsigned>;
-template class opt<int>;
-template class opt<std::string>;
-template class opt<char>;
-template class opt<bool>;
+#if !(defined(LLVM_ENABLE_LLVM_EXPORT_ANNOTATIONS) && defined(_MSC_VER))
+// Only instantiate opt<std::string> when not building a Windows DLL. When
+// exporting opt<std::string>, MSVC implicitly exports symbols for
+// std::basic_string through transitive inheritance via std::string. These
+// symbols may appear in clients, leading to duplicate symbol conflicts.
+template class LLVM_EXPORT_TEMPLATE opt<std::string>;
+#endif
+
+template class LLVM_EXPORT_TEMPLATE opt<bool>;
+template class LLVM_EXPORT_TEMPLATE opt<char>;
+template class LLVM_EXPORT_TEMPLATE opt<int>;
+template class LLVM_EXPORT_TEMPLATE opt<unsigned>;
+
} // namespace cl
} // namespace llvm
@@ -95,6 +103,15 @@ void parser<float>::anchor() {}
void parser<std::string>::anchor() {}
void parser<char>::anchor() {}
+// These anchor functions instantiate opt<T> and reference its virtual
+// destructor to ensure MSVC exports the corresponding vtable and typeinfo when
+// building a Windows DLL. Without an explicit reference, MSVC may omit the
+// instantiation at link time even if it is marked DLL-export.
+void opt_bool_anchor() { opt<bool> anchor{""}; }
+void opt_char_anchor() { opt<char> anchor{""}; }
+void opt_int_anchor() { opt<int> anchor{""}; }
+void opt_unsigned_anchor() { opt<unsigned> anchor{""}; }
+
//===----------------------------------------------------------------------===//
const static size_t DefaultPad = 2;
diff --git a/llvm/lib/Support/Debug.cpp b/llvm/lib/Support/Debug.cpp
index 5bb04d0..b6f338f 100644
--- a/llvm/lib/Support/Debug.cpp
+++ b/llvm/lib/Support/Debug.cpp
@@ -24,11 +24,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Debug.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/circular_raw_ostream.h"
#include "llvm/Support/raw_ostream.h"
+#include <utility>
#include "DebugOptions.h"
@@ -38,27 +40,62 @@
using namespace llvm;
+/// Parse a debug type string into a pair of the debug type and the debug level.
+/// The expected format is "type[:level]", where the level is an optional
+/// integer.
+static std::pair<std::string, std::optional<int>>
+parseDebugType(StringRef DbgType) {
+ std::optional<int> Level;
+ size_t ColonPos = DbgType.find(':');
+ if (ColonPos != StringRef::npos) {
+ StringRef LevelStr = DbgType.substr(ColonPos + 1);
+ DbgType = DbgType.take_front(ColonPos);
+ if (LevelStr.empty())
+ Level = 0;
+ else {
+ int parsedLevel;
+ if (to_integer(LevelStr, parsedLevel, 10))
+ Level = parsedLevel;
+ }
+ }
+ return std::make_pair(DbgType.str(), Level);
+}
+
// Even though LLVM might be built with NDEBUG, define symbols that the code
// built without NDEBUG can depend on via the llvm/Support/Debug.h header.
namespace llvm {
/// Exported boolean set by the -debug option.
bool DebugFlag = false;
-static ManagedStatic<std::vector<std::string>> CurrentDebugType;
+/// The current debug type and an optional debug level.
+/// The debug level is the verbosity of the debug output.
+/// 0 is a special level that acts as an opt-out for this specific debug type.
+/// If provided, the debug output is enabled only if the user specified a level
+/// at least as high as the provided level.
+static ManagedStatic<std::vector<std::pair<std::string, std::optional<int>>>>
+ CurrentDebugType;
/// Return true if the specified string is the debug type
/// specified on the command line, or if none was specified on the command line
/// with the -debug-only=X option.
-bool isCurrentDebugType(const char *DebugType) {
+bool isCurrentDebugType(const char *DebugType, int Level) {
if (CurrentDebugType->empty())
return true;
+ // Track if there is at least one debug type with a level, this is used
+ // to allow to opt-out of some DebugType and leaving all the others enabled.
+ bool HasEnabledDebugType = false;
// See if DebugType is in list. Note: do not use find() as that forces us to
// unnecessarily create an std::string instance.
- for (auto &d : *CurrentDebugType) {
- if (d == DebugType)
+ for (auto &D : *CurrentDebugType) {
+ HasEnabledDebugType =
+ HasEnabledDebugType || (!D.second.has_value() || D.second.value() > 0);
+ if (D.first != DebugType)
+ continue;
+ if (!D.second.has_value())
return true;
+ return D.second >= Level;
}
- return false;
+ return !HasEnabledDebugType;
}
/// Set the current debug type, as if the -debug-only=X
@@ -73,8 +110,11 @@ void setCurrentDebugType(const char *Type) {
void setCurrentDebugTypes(const char **Types, unsigned Count) {
CurrentDebugType->clear();
- llvm::append_range(*CurrentDebugType, ArrayRef(Types, Count));
+ CurrentDebugType->reserve(Count);
+ for (const char *Type : ArrayRef(Types, Count))
+ CurrentDebugType->push_back(parseDebugType(Type));
}
+
} // namespace llvm
// All Debug.h functionality is a no-op in NDEBUG mode.
@@ -114,10 +154,10 @@ struct DebugOnlyOpt {
if (Val.empty())
return;
DebugFlag = true;
- SmallVector<StringRef,8> dbgTypes;
- StringRef(Val).split(dbgTypes, ',', -1, false);
- for (auto dbgType : dbgTypes)
- CurrentDebugType->push_back(std::string(dbgType));
+ SmallVector<StringRef, 8> DbgTypes;
+ StringRef(Val).split(DbgTypes, ',', -1, false);
+ for (auto DbgType : DbgTypes)
+ CurrentDebugType->push_back(parseDebugType(DbgType));
}
};
} // namespace
@@ -129,8 +169,13 @@ struct CreateDebugOnly {
static void *call() {
return new cl::opt<DebugOnlyOpt, true, cl::parser<std::string>>(
"debug-only",
- cl::desc("Enable a specific type of debug output (comma separated list "
- "of types)"),
+ cl::desc(
+ "Enable a specific type of debug output (comma separated list "
+ "of types using the format \"type[:level]\", where the level "
+ "is an optional integer. The level can be set to 1, 2, 3, etc. to "
+ "control the verbosity of the output. Setting a debug-type level "
+ "to zero acts as an opt-out for this specific debug-type without "
+ "affecting the others."),
cl::Hidden, cl::value_desc("debug string"),
cl::location(DebugOnlyOptLoc), cl::ValueRequired);
}
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index c4b43e1..c52487a 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -176,6 +176,9 @@ public:
std::optional<AArch64PACKey::ID> PACKey,
uint64_t PACDisc, Register PACAddrDisc);
+ // Emit the sequence for PAC.
+ void emitPtrauthSign(const MachineInstr *MI);
+
// Emit the sequence to compute the discriminator.
//
// The returned register is either unmodified AddrDisc or ScratchReg.
@@ -2175,6 +2178,37 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(
OutStreamer->emitLabel(EndSym);
}
+void AArch64AsmPrinter::emitPtrauthSign(const MachineInstr *MI) {
+ Register Val = MI->getOperand(1).getReg();
+ auto Key = (AArch64PACKey::ID)MI->getOperand(2).getImm();
+ uint64_t Disc = MI->getOperand(3).getImm();
+ Register AddrDisc = MI->getOperand(4).getReg();
+ bool AddrDiscKilled = MI->getOperand(4).isKill();
+
+ // As long as at least one of Val and AddrDisc is in GPR64noip, a scratch
+ // register is available.
+ Register ScratchReg = Val == AArch64::X16 ? AArch64::X17 : AArch64::X16;
+ assert(ScratchReg != AddrDisc &&
+ "Neither X16 nor X17 is available as a scratch register");
+
+ // Compute pac discriminator
+ assert(isUInt<16>(Disc));
+ Register DiscReg = emitPtrauthDiscriminator(
+ Disc, AddrDisc, ScratchReg, /*MayUseAddrAsScratch=*/AddrDiscKilled);
+ bool IsZeroDisc = DiscReg == AArch64::XZR;
+ unsigned Opc = getPACOpcodeForKey(Key, IsZeroDisc);
+
+ // paciza x16 ; if IsZeroDisc
+ // pacia x16, x17 ; if !IsZeroDisc
+ MCInst PACInst;
+ PACInst.setOpcode(Opc);
+ PACInst.addOperand(MCOperand::createReg(Val));
+ PACInst.addOperand(MCOperand::createReg(Val));
+ if (!IsZeroDisc)
+ PACInst.addOperand(MCOperand::createReg(DiscReg));
+ EmitToStreamer(*OutStreamer, PACInst);
+}
+
void AArch64AsmPrinter::emitPtrauthBranch(const MachineInstr *MI) {
bool IsCall = MI->getOpcode() == AArch64::BLRA;
unsigned BrTarget = MI->getOperand(0).getReg();
@@ -2890,6 +2924,10 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
MI->getOperand(4).getImm(), MI->getOperand(5).getReg());
return;
+ case AArch64::PAC:
+ emitPtrauthSign(MI);
+ return;
+
case AArch64::LOADauthptrstatic:
LowerLOADauthptrstatic(*MI);
return;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 02ee517..7b49754 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -164,6 +164,9 @@ static cl::opt<bool> UseFEATCPACodegen(
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
+/// Value type used for NZCV flags.
+static constexpr MVT FlagsVT = MVT::i32;
+
static const MCPhysReg GPRArgRegs[] = {AArch64::X0, AArch64::X1, AArch64::X2,
AArch64::X3, AArch64::X4, AArch64::X5,
AArch64::X6, AArch64::X7};
@@ -3098,6 +3101,83 @@ AArch64TargetLowering::EmitGetSMESaveSize(MachineInstr &MI,
return BB;
}
+// Helper function to find the instruction that defined a virtual register.
+// If unable to find such instruction, returns nullptr.
+static const MachineInstr *stripVRegCopies(const MachineRegisterInfo &MRI,
+ Register Reg) {
+ while (Reg.isVirtual()) {
+ MachineInstr *DefMI = MRI.getVRegDef(Reg);
+ assert(DefMI && "Virtual register definition not found");
+ unsigned Opcode = DefMI->getOpcode();
+
+ if (Opcode == AArch64::COPY) {
+ Reg = DefMI->getOperand(1).getReg();
+ // Vreg is defined by copying from physreg.
+ if (Reg.isPhysical())
+ return DefMI;
+ continue;
+ }
+ if (Opcode == AArch64::SUBREG_TO_REG) {
+ Reg = DefMI->getOperand(2).getReg();
+ continue;
+ }
+
+ return DefMI;
+ }
+ return nullptr;
+}
+
+void AArch64TargetLowering::fixupPtrauthDiscriminator(
+ MachineInstr &MI, MachineBasicBlock *BB, MachineOperand &IntDiscOp,
+ MachineOperand &AddrDiscOp, const TargetRegisterClass *AddrDiscRC) const {
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ Register AddrDisc = AddrDiscOp.getReg();
+ int64_t IntDisc = IntDiscOp.getImm();
+ assert(IntDisc == 0 && "Blend components are already expanded");
+
+ const MachineInstr *DiscMI = stripVRegCopies(MRI, AddrDisc);
+ if (DiscMI) {
+ switch (DiscMI->getOpcode()) {
+ case AArch64::MOVKXi:
+ // blend(addr, imm) which is lowered as "MOVK addr, #imm, #48".
+ // #imm should be an immediate and not a global symbol, for example.
+ if (DiscMI->getOperand(2).isImm() &&
+ DiscMI->getOperand(3).getImm() == 48) {
+ AddrDisc = DiscMI->getOperand(1).getReg();
+ IntDisc = DiscMI->getOperand(2).getImm();
+ }
+ break;
+ case AArch64::MOVi32imm:
+ case AArch64::MOVi64imm:
+ // Small immediate integer constant passed via VReg.
+ if (DiscMI->getOperand(1).isImm() &&
+ isUInt<16>(DiscMI->getOperand(1).getImm())) {
+ AddrDisc = AArch64::NoRegister;
+ IntDisc = DiscMI->getOperand(1).getImm();
+ }
+ break;
+ }
+ }
+
+ // For uniformity, always use NoRegister, as XZR is not necessarily contained
+ // in the requested register class.
+ if (AddrDisc == AArch64::XZR)
+ AddrDisc = AArch64::NoRegister;
+
+ // Make sure AddrDisc operand respects the register class imposed by MI.
+ if (AddrDisc && MRI.getRegClass(AddrDisc) != AddrDiscRC) {
+ Register TmpReg = MRI.createVirtualRegister(AddrDiscRC);
+ BuildMI(*BB, MI, DL, TII->get(AArch64::COPY), TmpReg).addReg(AddrDisc);
+ AddrDisc = TmpReg;
+ }
+
+ AddrDiscOp.setReg(AddrDisc);
+ IntDiscOp.setImm(IntDisc);
+}
+
MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
@@ -3196,6 +3276,11 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true);
case AArch64::MOVT_TIZ_PSEUDO:
return EmitZTInstr(MI, BB, AArch64::MOVT_TIZ, /*Op0IsDef=*/true);
+
+ case AArch64::PAC:
+ fixupPtrauthDiscriminator(MI, BB, MI.getOperand(3), MI.getOperand(4),
+ &AArch64::GPR64noipRegClass);
+ return BB;
}
}
@@ -3451,7 +3536,7 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL,
}
unsigned Opcode =
IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
- return DAG.getNode(Opcode, DL, {MVT::i32, MVT::Other}, {Chain, LHS, RHS});
+ return DAG.getNode(Opcode, DL, {FlagsVT, MVT::Other}, {Chain, LHS, RHS});
}
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
@@ -3465,7 +3550,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
}
- return DAG.getNode(AArch64ISD::FCMP, DL, MVT::i32, LHS, RHS);
+ return DAG.getNode(AArch64ISD::FCMP, DL, FlagsVT, LHS, RHS);
}
// The CMP instruction is just an alias for SUBS, and representing it as
@@ -3490,7 +3575,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
// (a.k.a. ANDS) except that the flags are only guaranteed to work for one
// of the signed comparisons.
const SDValue ANDSNode =
- DAG.getNode(AArch64ISD::ANDS, DL, DAG.getVTList(VT, MVT_CC),
+ DAG.getNode(AArch64ISD::ANDS, DL, DAG.getVTList(VT, FlagsVT),
LHS.getOperand(0), LHS.getOperand(1));
// Replace all users of (and X, Y) with newly generated (ands X, Y)
DAG.ReplaceAllUsesWith(LHS, ANDSNode);
@@ -3501,7 +3586,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
}
- return DAG.getNode(Opcode, DL, DAG.getVTList(VT, MVT_CC), LHS, RHS)
+ return DAG.getNode(Opcode, DL, DAG.getVTList(VT, FlagsVT), LHS, RHS)
.getValue(1);
}
@@ -3597,7 +3682,7 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
- return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
+ return DAG.getNode(Opcode, DL, FlagsVT, LHS, RHS, NZCVOp, Condition, CCOp);
}
/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
@@ -4036,7 +4121,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
// Check that the result fits into a 32-bit integer.
- SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC);
+ SDVTList VTs = DAG.getVTList(MVT::i64, FlagsVT);
if (IsSigned) {
// cmp xreg, wreg, sxtw
SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value);
@@ -4059,12 +4144,12 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
DAG.getConstant(63, DL, MVT::i64));
// It is important that LowerBits is last, otherwise the arithmetic
// shift will not be folded into the compare (SUBS).
- SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
+ SDVTList VTs = DAG.getVTList(MVT::i64, FlagsVT);
Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
.getValue(1);
} else {
SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
- SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
+ SDVTList VTs = DAG.getVTList(MVT::i64, FlagsVT);
Overflow =
DAG.getNode(AArch64ISD::SUBS, DL, VTs,
DAG.getConstant(0, DL, MVT::i64),
@@ -4075,7 +4160,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
} // switch (...)
if (Opc) {
- SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
+ SDVTList VTs = DAG.getVTList(Op->getValueType(0), FlagsVT);
// Emit the AArch64 operation with overflow check.
Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
@@ -4177,7 +4262,7 @@ static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG, bool Invert) {
SDValue Op0 = Invert ? DAG.getConstant(0, DL, VT) : Value;
SDValue Op1 = Invert ? Value : DAG.getConstant(1, DL, VT);
SDValue Cmp =
- DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::Glue), Op0, Op1);
+ DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), Op0, Op1);
return Cmp.getValue(1);
}
@@ -4220,16 +4305,15 @@ static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG,
SDValue OpCarryIn = valueToCarryFlag(Op.getOperand(2), DAG, InvertCarry);
SDLoc DL(Op);
- SDVTList VTs = DAG.getVTList(VT0, VT1);
- SDValue Sum = DAG.getNode(Opcode, DL, DAG.getVTList(VT0, MVT::Glue), OpLHS,
+ SDValue Sum = DAG.getNode(Opcode, DL, DAG.getVTList(VT0, FlagsVT), OpLHS,
OpRHS, OpCarryIn);
SDValue OutFlag =
IsSigned ? overflowFlagToValue(Sum.getValue(1), VT1, DAG)
: carryFlagToValue(Sum.getValue(1), VT1, DAG, InvertCarry);
- return DAG.getNode(ISD::MERGE_VALUES, DL, VTs, Sum, OutFlag);
+ return DAG.getMergeValues({Sum, OutFlag}, DL);
}
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
@@ -4254,8 +4338,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
Overflow =
DAG.getNode(AArch64ISD::CSEL, DL, MVT::i32, FVal, TVal, CCVal, Overflow);
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- return DAG.getNode(ISD::MERGE_VALUES, DL, VTs, Value, Overflow);
+ return DAG.getMergeValues({Value, Overflow}, DL);
}
// Prefetch operands are:
@@ -6813,7 +6896,8 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
SDValue Result = DAG.getMemIntrinsicNode(
AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
- {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
+ {StoreNode->getChain(), DAG.getBitcast(MVT::v2i64, Lo),
+ DAG.getBitcast(MVT::v2i64, Hi), StoreNode->getBasePtr()},
StoreNode->getMemoryVT(), StoreNode->getMemOperand());
return Result;
}
@@ -7037,9 +7121,8 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
Op.getOperand(0));
// Generate SUBS & CSEL.
- SDValue Cmp =
- DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
- Op.getOperand(0), DAG.getConstant(0, DL, VT));
+ SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
+ Op.getOperand(0), DAG.getConstant(0, DL, VT));
return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
Cmp.getValue(1));
@@ -11108,7 +11191,7 @@ SDValue AArch64TargetLowering::LowerSETCCCARRY(SDValue Op,
SDValue Carry = Op.getOperand(2);
// SBCS uses a carry not a borrow so the carry flag should be inverted first.
SDValue InvCarry = valueToCarryFlag(Carry, DAG, true);
- SDValue Cmp = DAG.getNode(AArch64ISD::SBCS, DL, DAG.getVTList(VT, MVT::Glue),
+ SDValue Cmp = DAG.getNode(AArch64ISD::SBCS, DL, DAG.getVTList(VT, FlagsVT),
LHS, RHS, InvCarry);
EVT OpVT = Op.getValueType();
@@ -12441,10 +12524,10 @@ SDValue AArch64TargetLowering::LowerAsmOutputForConstraint(
// Get NZCV register. Only update chain when copyfrom is glued.
if (Glue.getNode()) {
- Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, MVT::i32, Glue);
+ Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, FlagsVT, Glue);
Chain = Glue.getValue(1);
} else
- Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, MVT::i32);
+ Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, FlagsVT);
// Extract CC code.
SDValue CC = getSETCC(Cond, Glue, DL, DAG);
@@ -18020,11 +18103,14 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
unsigned ShlAmt = C2->getZExtValue();
if (auto ShouldADD = *N->user_begin();
ShouldADD->getOpcode() == ISD::ADD && ShouldADD->hasOneUse()) {
- if (auto ShouldLOAD = dyn_cast<LoadSDNode>(*ShouldADD->user_begin())) {
- unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8;
- if ((1ULL << ShlAmt) == ByteVT &&
- isIndexedLoadLegal(ISD::PRE_INC, ShouldLOAD->getMemoryVT()))
- return false;
+ if (auto Load = dyn_cast<LoadSDNode>(*ShouldADD->user_begin())) {
+ EVT MemVT = Load->getMemoryVT();
+
+ if (Load->getValueType(0).isScalableVector())
+ return (8ULL << ShlAmt) != MemVT.getScalarSizeInBits();
+
+ if (isIndexedLoadLegal(ISD::PRE_INC, MemVT))
+ return (8ULL << ShlAmt) != MemVT.getFixedSizeInBits();
}
}
}
@@ -18593,7 +18679,7 @@ AArch64TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
Created.push_back(And.getNode());
} else {
SDValue CCVal = DAG.getConstant(AArch64CC::MI, DL, MVT_CC);
- SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+ SDVTList VTs = DAG.getVTList(VT, FlagsVT);
SDValue Negs = DAG.getNode(AArch64ISD::SUBS, DL, VTs, Zero, N0);
SDValue AndPos = DAG.getNode(ISD::AND, DL, VT, N0, Pow2MinusOne);
@@ -19482,10 +19568,10 @@ static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG) {
// can select to CCMN to avoid the extra mov
SDValue AbsOp1 =
DAG.getConstant(Op1->getAPIntValue().abs(), DL, Op1->getValueType(0));
- CCmp = DAG.getNode(AArch64ISD::CCMN, DL, MVT_CC, Cmp1.getOperand(0), AbsOp1,
- NZCVOp, Condition, Cmp0);
+ CCmp = DAG.getNode(AArch64ISD::CCMN, DL, FlagsVT, Cmp1.getOperand(0),
+ AbsOp1, NZCVOp, Condition, Cmp0);
} else {
- CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
+ CCmp = DAG.getNode(AArch64ISD::CCMP, DL, FlagsVT, Cmp1.getOperand(0),
Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
}
return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0),
@@ -25134,8 +25220,9 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
if (!TReassocOp && !FReassocOp)
return SDValue();
- SDValue NewCmp = DAG.getNode(AArch64ISD::SUBS, SDLoc(SubsNode),
- DAG.getVTList(VT, MVT_CC), CmpOpOther, SubsOp);
+ SDValue NewCmp =
+ DAG.getNode(AArch64ISD::SUBS, SDLoc(SubsNode),
+ DAG.getVTList(VT, FlagsVT), CmpOpOther, SubsOp);
auto Reassociate = [&](SDValue ReassocOp, unsigned OpNum) {
if (!ReassocOp)
@@ -27161,7 +27248,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
: AArch64SysReg::RNDRRS);
SDLoc DL(N);
SDValue A = DAG.getNode(
- AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::i32, MVT::Other),
+ AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, FlagsVT, MVT::Other),
N->getOperand(0), DAG.getConstant(Register, DL, MVT::i32));
SDValue B = DAG.getNode(
AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32),
@@ -27907,16 +27994,16 @@ void AArch64TargetLowering::ReplaceNodeResults(
MemVT.getScalarSizeInBits() == 32u ||
MemVT.getScalarSizeInBits() == 64u)) {
+ EVT HalfVT = MemVT.getHalfNumVectorElementsVT(*DAG.getContext());
SDValue Result = DAG.getMemIntrinsicNode(
AArch64ISD::LDNP, SDLoc(N),
- DAG.getVTList({MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
- MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
- MVT::Other}),
+ DAG.getVTList({MVT::v2i64, MVT::v2i64, MVT::Other}),
{LoadNode->getChain(), LoadNode->getBasePtr()},
LoadNode->getMemoryVT(), LoadNode->getMemOperand());
SDValue Pair = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), MemVT,
- Result.getValue(0), Result.getValue(1));
+ DAG.getBitcast(HalfVT, Result.getValue(0)),
+ DAG.getBitcast(HalfVT, Result.getValue(1)));
Results.append({Pair, Result.getValue(2) /* Chain */});
return;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index d8403c2..95d0e3b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -182,6 +182,13 @@ public:
MachineBasicBlock *EmitGetSMESaveSize(MachineInstr &MI,
MachineBasicBlock *BB) const;
+ /// Replace (0, vreg) discriminator components with the operands of blend
+ /// or with (immediate, NoRegister) when possible.
+ void fixupPtrauthDiscriminator(MachineInstr &MI, MachineBasicBlock *BB,
+ MachineOperand &IntDiscOp,
+ MachineOperand &AddrDiscOp,
+ const TargetRegisterClass *AddrDiscRC) const;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index bc57537..8685d7a0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -20,7 +20,6 @@
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CFIInstBuilder.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -36,7 +35,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/StackMaps.h"
-#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -533,8 +531,9 @@ bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
MBP.LHS = LastInst->getOperand(0);
MBP.RHS = MachineOperand::CreateImm(0);
- MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
- : MachineBranchPredicate::PRED_EQ;
+ MBP.Predicate = (LastOpc == AArch64::CBNZX || LastOpc == AArch64::CBNZW)
+ ? MachineBranchPredicate::PRED_NE
+ : MachineBranchPredicate::PRED_EQ;
return false;
}
@@ -7353,9 +7352,6 @@ bool AArch64InstrInfo::isThroughputPattern(unsigned Pattern) const {
case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
- case AArch64MachineCombinerPattern::GATHER_LANE_i32:
- case AArch64MachineCombinerPattern::GATHER_LANE_i16:
- case AArch64MachineCombinerPattern::GATHER_LANE_i8:
return true;
} // end switch (Pattern)
return false;
@@ -7396,252 +7392,11 @@ static bool getMiscPatterns(MachineInstr &Root,
return false;
}
-static bool getGatherPattern(MachineInstr &Root,
- SmallVectorImpl<unsigned> &Patterns,
- unsigned LoadLaneOpCode, unsigned NumLanes) {
- const MachineFunction *MF = Root.getMF();
-
- // Early exit if optimizing for size.
- if (MF->getFunction().hasMinSize())
- return false;
-
- const MachineRegisterInfo &MRI = MF->getRegInfo();
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
-
- // The root of the pattern must load into the last lane of the vector.
- if (Root.getOperand(2).getImm() != NumLanes - 1)
- return false;
-
- // Check that we have load into all lanes except lane 0.
- // For each load we also want to check that:
- // 1. It has a single non-debug use (since we will be replacing the virtual
- // register)
- // 2. That the addressing mode only uses a single offset register.
- auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
- auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
- SmallSet<unsigned, 4> RemainingLanes(Range.begin(), Range.end());
- while (!RemainingLanes.empty() && CurrInstr &&
- CurrInstr->getOpcode() == LoadLaneOpCode &&
- MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
- CurrInstr->getNumOperands() == 4) {
- RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
- CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
- }
-
- if (!RemainingLanes.empty())
- return false;
-
- // Match the SUBREG_TO_REG sequence.
- if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
- return false;
-
- // Verify that the subreg to reg loads an integer into the first lane.
- auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();
- unsigned SingleLaneSizeInBits = 128 / NumLanes;
- if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
- return false;
-
- // Verify that it also has a single non debug use.
- if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
- return false;
-
- switch (NumLanes) {
- case 4:
- Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i32);
- break;
- case 8:
- Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i16);
- break;
- case 16:
- Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i8);
- break;
- default:
- llvm_unreachable("Got bad number of lanes for gather pattern.");
- }
-
- return true;
-}
-
-/// Search for patterns where we use LD1 instructions to load into
-/// separate lanes of an 128 bit Neon register. We can increase Memory Level
-/// Parallelism by loading into 2 Neon registers instead.
-static bool getLoadPatterns(MachineInstr &Root,
- SmallVectorImpl<unsigned> &Patterns) {
-
- // The pattern searches for loads into single lanes.
- switch (Root.getOpcode()) {
- case AArch64::LD1i32:
- return getGatherPattern(Root, Patterns, Root.getOpcode(), 4);
- case AArch64::LD1i16:
- return getGatherPattern(Root, Patterns, Root.getOpcode(), 8);
- case AArch64::LD1i8:
- return getGatherPattern(Root, Patterns, Root.getOpcode(), 16);
- default:
- return false;
- }
-}
-
-static void
-generateGatherPattern(MachineInstr &Root,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- SmallVectorImpl<MachineInstr *> &DelInstrs,
- DenseMap<Register, unsigned> &InstrIdxForVirtReg,
- unsigned Pattern, unsigned NumLanes) {
-
- MachineFunction &MF = *Root.getParent()->getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
-
- // Gather the initial load instructions to build the pattern
- SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
- MachineInstr *CurrInstr = &Root;
- for (unsigned i = 0; i < NumLanes - 1; ++i) {
- LoadToLaneInstrs.push_back(CurrInstr);
- CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
- }
-
- // Sort the load instructions according to the lane.
- llvm::sort(LoadToLaneInstrs,
- [](const MachineInstr *A, const MachineInstr *B) {
- return A->getOperand(2).getImm() > B->getOperand(2).getImm();
- });
-
- MachineInstr *SubregToReg = CurrInstr;
- LoadToLaneInstrs.push_back(
- MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));
- auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
-
- const TargetRegisterClass *FPR128RegClass =
- MRI.getRegClass(Root.getOperand(0).getReg());
-
- auto LoadLaneToRegister = [&](MachineInstr *OriginalInstr,
- Register SrcRegister, unsigned Lane,
- Register OffsetRegister) {
- auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
- MachineInstrBuilder LoadIndexIntoRegister =
- BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
- NewRegister)
- .addReg(SrcRegister)
- .addImm(Lane)
- .addReg(OffsetRegister, getKillRegState(true));
- InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
- InsInstrs.push_back(LoadIndexIntoRegister);
- return NewRegister;
- };
-
- // Helper to create load instruction based on opcode
- auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg,
- Register OffsetReg) -> MachineInstrBuilder {
- unsigned Opcode;
- switch (NumLanes) {
- case 4:
- Opcode = AArch64::LDRSui;
- break;
- case 8:
- Opcode = AArch64::LDRHui;
- break;
- case 16:
- Opcode = AArch64::LDRBui;
- break;
- default:
- llvm_unreachable(
- "Got unsupported number of lanes in machine-combiner gather pattern");
- }
- // Immediate offset load
- return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
- .addReg(OffsetReg)
- .addImm(0); // immediate offset
- };
-
- // Load the remaining lanes into register 0.
- auto LanesToLoadToReg0 =
- llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
- LoadToLaneInstrsAscending.begin() + NumLanes / 2);
- auto PrevReg = SubregToReg->getOperand(0).getReg();
- for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
- PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1,
- LoadInstr->getOperand(3).getReg());
- DelInstrs.push_back(LoadInstr);
- }
- auto LastLoadReg0 = PrevReg;
-
- // First load into register 1. Perform a LDRSui to zero out the upper lanes in
- // a single instruction.
- auto Lane0Load = *LoadToLaneInstrsAscending.begin();
- auto OriginalSplitLoad =
- *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
- auto DestRegForMiddleIndex = MRI.createVirtualRegister(
- MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
-
- MachineInstrBuilder MiddleIndexLoadInstr =
- CreateLoadInstruction(NumLanes, DestRegForMiddleIndex,
- OriginalSplitLoad->getOperand(3).getReg());
-
- InstrIdxForVirtReg.insert(
- std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
- InsInstrs.push_back(MiddleIndexLoadInstr);
- DelInstrs.push_back(OriginalSplitLoad);
-
- // Subreg To Reg instruction for register 1.
- auto DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
- unsigned SubregType;
- switch (NumLanes) {
- case 4:
- SubregType = AArch64::ssub;
- break;
- case 8:
- SubregType = AArch64::hsub;
- break;
- case 16:
- SubregType = AArch64::bsub;
- break;
- default:
- llvm_unreachable(
- "Got invalid NumLanes for machine-combiner gather pattern");
- }
-
- auto SubRegToRegInstr =
- BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
- DestRegForSubregToReg)
- .addImm(0)
- .addReg(DestRegForMiddleIndex, getKillRegState(true))
- .addImm(SubregType);
- InstrIdxForVirtReg.insert(
- std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
- InsInstrs.push_back(SubRegToRegInstr);
-
- // Load remaining lanes into register 1.
- auto LanesToLoadToReg1 =
- llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
- LoadToLaneInstrsAscending.end());
- PrevReg = SubRegToRegInstr->getOperand(0).getReg();
- for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
- PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1,
- LoadInstr->getOperand(3).getReg());
- if (Index == NumLanes / 2 - 2) {
- break;
- }
- DelInstrs.push_back(LoadInstr);
- }
- auto LastLoadReg1 = PrevReg;
-
- // Create the final zip instruction to combine the results.
- MachineInstrBuilder ZipInstr =
- BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
- Root.getOperand(0).getReg())
- .addReg(LastLoadReg0)
- .addReg(LastLoadReg1);
- InsInstrs.push_back(ZipInstr);
-}
-
CombinerObjective
AArch64InstrInfo::getCombinerObjective(unsigned Pattern) const {
switch (Pattern) {
case AArch64MachineCombinerPattern::SUBADD_OP1:
case AArch64MachineCombinerPattern::SUBADD_OP2:
- case AArch64MachineCombinerPattern::GATHER_LANE_i32:
- case AArch64MachineCombinerPattern::GATHER_LANE_i16:
- case AArch64MachineCombinerPattern::GATHER_LANE_i8:
return CombinerObjective::MustReduceDepth;
default:
return TargetInstrInfo::getCombinerObjective(Pattern);
@@ -7671,10 +7426,6 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
if (getMiscPatterns(Root, Patterns))
return true;
- // Load patterns
- if (getLoadPatterns(Root, Patterns))
- return true;
-
return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
DoRegPressureReduce);
}
@@ -8930,21 +8681,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
break;
}
- case AArch64MachineCombinerPattern::GATHER_LANE_i32: {
- generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
- Pattern, 4);
- break;
- }
- case AArch64MachineCombinerPattern::GATHER_LANE_i16: {
- generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
- Pattern, 8);
- break;
- }
- case AArch64MachineCombinerPattern::GATHER_LANE_i8: {
- generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
- Pattern, 16);
- break;
- }
} // end switch (Pattern)
// Record MUL and ADD/SUB for deletion
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 02734866..7c255da 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -172,10 +172,6 @@ enum AArch64MachineCombinerPattern : unsigned {
FMULv8i16_indexed_OP2,
FNMADD,
-
- GATHER_LANE_i32,
- GATHER_LANE_i16,
- GATHER_LANE_i8
};
class AArch64InstrInfo final : public AArch64GenInstrInfo {
const AArch64RegisterInfo RI;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 9f8a257..07cacfa 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -430,26 +430,27 @@ def UseWzrToVecMove : Predicate<"Subtarget->useWzrToVecMove()">;
def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
- SDTCisInt<0>, SDTCisVT<1, i32>]>;
+ SDTCisInt<0>,
+ SDTCisVT<1, FlagsVT>]>;
// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisInt<0>,
- SDTCisVT<3, i32>]>;
+ SDTCisVT<3, FlagsVT>]>;
// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisInt<0>,
- SDTCisVT<1, i32>,
- SDTCisVT<4, i32>]>;
+ SDTCisVT<1, FlagsVT>,
+ SDTCisVT<4, FlagsVT>]>;
def SDT_AArch64Brcond : SDTypeProfile<0, 3,
[SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
- SDTCisVT<2, i32>]>;
+ SDTCisVT<2, FlagsVT>]>;
def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisVT<2, OtherVT>]>;
@@ -458,22 +459,22 @@ def SDT_AArch64CSel : SDTypeProfile<1, 4,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisInt<3>,
- SDTCisVT<4, i32>]>;
+ SDTCisVT<4, FlagsVT>]>;
def SDT_AArch64CCMP : SDTypeProfile<1, 5,
- [SDTCisVT<0, i32>,
+ [SDTCisVT<0, FlagsVT>,
SDTCisInt<1>,
SDTCisSameAs<1, 2>,
SDTCisInt<3>,
SDTCisInt<4>,
SDTCisVT<5, i32>]>;
def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
- [SDTCisVT<0, i32>,
+ [SDTCisVT<0, FlagsVT>,
SDTCisFP<1>,
SDTCisSameAs<1, 2>,
SDTCisInt<3>,
SDTCisInt<4>,
SDTCisVT<5, i32>]>;
-def SDT_AArch64FCmp : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+def SDT_AArch64FCmp : SDTypeProfile<1, 2, [SDTCisVT<0, FlagsVT>,
SDTCisFP<1>,
SDTCisSameAs<2, 1>]>;
def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>]>;
@@ -518,10 +519,10 @@ def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
-def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v2i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
-def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
// Generates the general dynamic sequences, i.e.
// adrp x0, :tlsdesc:var
@@ -1124,10 +1125,10 @@ def AArch64probedalloca
SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPMayStore]>;
-// MRS, also sets the flags via a glue.
+// MRS, also sets the flags.
def AArch64mrs : SDNode<"AArch64ISD::MRS",
SDTypeProfile<2, 1, [SDTCisVT<0, i64>,
- SDTCisVT<1, i32>,
+ SDTCisVT<1, FlagsVT>,
SDTCisVT<2, i32>]>,
[SDNPHasChain]>;
@@ -2032,7 +2033,7 @@ let Predicates = [HasPAuth] in {
def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>;
}
- defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>;
+ defm PAC : SignAuth<0b000, 0b010, "pac", null_frag>;
defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>;
def XPACI : ClearAuth<0, "xpaci">;
@@ -2152,6 +2153,26 @@ let Predicates = [HasPAuth] in {
let Uses = [];
}
+ // PAC pseudo instruction. In AsmPrinter, it is expanded into an actual PAC*
+ // instruction immediately preceded by the discriminator computation.
+ // This enforces the expected immediate modifier is used for signing, even
+ // if an attacker is able to substitute AddrDisc.
+ def PAC : Pseudo<(outs GPR64:$SignedVal),
+ (ins GPR64:$Val, i32imm:$Key, i64imm:$Disc, GPR64noip:$AddrDisc),
+ [], "$SignedVal = $Val">, Sched<[WriteI, ReadI]> {
+ let isCodeGenOnly = 1;
+ let hasSideEffects = 0;
+ let mayStore = 0;
+ let mayLoad = 0;
+ let Size = 12;
+ let Defs = [X16, X17];
+ let usesCustomInserter = 1;
+ }
+
+ // A standalone pattern is used, so that literal 0 can be passed as $Disc.
+ def : Pat<(int_ptrauth_sign GPR64:$Val, timm:$Key, GPR64noip:$AddrDisc),
+ (PAC GPR64:$Val, $Key, 0, GPR64noip:$AddrDisc)>;
+
// AUT and re-PAC a value, using different keys/data.
// This directly manipulates x16/x17, which are the only registers that
// certain OSs guarantee are safe to use for sensitive operations.
@@ -3934,6 +3955,26 @@ defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
(SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
+// load zero-extended i32, bitcast to f64
+def : Pat <(f64 (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
+
+// load zero-extended i16, bitcast to f64
+def : Pat <(f64 (bitconvert (i64 (zextloadi16 (am_indexed32 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+
+// load zero-extended i8, bitcast to f64
+def : Pat <(f64 (bitconvert (i64 (zextloadi8 (am_indexed32 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+
+// load zero-extended i16, bitcast to f32
+def : Pat <(f32 (bitconvert (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (SUBREG_TO_REG (i32 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+
+// load zero-extended i8, bitcast to f32
+def : Pat <(f32 (bitconvert (i32 (zextloadi8 (am_indexed16 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (SUBREG_TO_REG (i32 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+
// Pre-fetch.
def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
[(AArch64Prefetch timm:$Rt,
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 61bf87f..1a7609b 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -305,7 +305,8 @@ def GPR64pi48 : RegisterOperand<GPR64, "printPostIncOperand<48>">;
def GPR64pi64 : RegisterOperand<GPR64, "printPostIncOperand<64>">;
// Condition code regclass.
-def CCR : RegisterClass<"AArch64", [i32], 32, (add NZCV)> {
+defvar FlagsVT = i32;
+def CCR : RegisterClass<"AArch64", [FlagsVT], 32, (add NZCV)> {
let CopyCost = -1; // Don't allow copying of status registers.
// CCR is not allocatable.
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index bafb8d0..8a5b5ba 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -32,10 +32,29 @@ AArch64SelectionDAGInfo::AArch64SelectionDAGInfo()
void AArch64SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
const SDNode *N) const {
+ SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
+
#ifndef NDEBUG
+ // Some additional checks not yet implemented by verifyTargetNode.
+ constexpr MVT FlagsVT = MVT::i32;
switch (N->getOpcode()) {
- default:
- return SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
+ case AArch64ISD::SUBS:
+ assert(N->getValueType(1) == FlagsVT);
+ break;
+ case AArch64ISD::ADC:
+ case AArch64ISD::SBC:
+ assert(N->getOperand(2).getValueType() == FlagsVT);
+ break;
+ case AArch64ISD::ADCS:
+ case AArch64ISD::SBCS:
+ assert(N->getValueType(1) == FlagsVT);
+ assert(N->getOperand(2).getValueType() == FlagsVT);
+ break;
+ case AArch64ISD::CSEL:
+ case AArch64ISD::CSINC:
+ case AArch64ISD::BRCOND:
+ assert(N->getOperand(3).getValueType() == FlagsVT);
+ break;
case AArch64ISD::SADDWT:
case AArch64ISD::SADDWB:
case AArch64ISD::UADDWT:
diff --git a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index c218831..85de2d5 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -36,7 +36,7 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
// SHF_AARCH64_PURECODE flag set if the "+execute-only" target feature is
// present.
if (TM.getMCSubtargetInfo()->hasFeature(AArch64::FeatureExecuteOnly)) {
- auto *Text = cast<MCSectionELF>(TextSection);
+ auto *Text = static_cast<MCSectionELF *>(TextSection);
Text->setFlags(Text->getFlags() | ELF::SHF_AARCH64_PURECODE);
}
}
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 08f547a..6257e99 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -523,7 +523,8 @@ void AArch64TargetELFStreamer::finish() {
// mark it execute-only if it is empty and there is at least one
// execute-only section in the object.
if (any_of(Asm, [](const MCSection &Sec) {
- return cast<MCSectionELF>(Sec).getFlags() & ELF::SHF_AARCH64_PURECODE;
+ return static_cast<const MCSectionELF &>(Sec).getFlags() &
+ ELF::SHF_AARCH64_PURECODE;
})) {
auto *Text =
static_cast<MCSectionELF *>(Ctx.getObjectFileInfo()->getTextSection());
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 1ac340a..a22a17a 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -132,7 +132,8 @@ static bool canUseLocalRelocation(const MCSectionMachO &Section,
// But only if they don't point to a few forbidden sections.
if (!Symbol.isInSection())
return true;
- const MCSectionMachO &RefSec = cast<MCSectionMachO>(Symbol.getSection());
+ const MCSectionMachO &RefSec =
+ static_cast<MCSectionMachO &>(Symbol.getSection());
if (RefSec.getType() == MachO::S_CSTRING_LITERALS)
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 2a36f3d..8b8fc8b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -149,6 +149,12 @@ def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
"Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
>;
+def FeatureFmaMixBF16Insts : SubtargetFeature<"fma-mix-bf16-insts",
+ "HasFmaMixBF16Insts",
+ "true",
+ "Has v_fma_mix_f32_bf16, v_fma_mixlo_bf16, v_fma_mixhi_bf16 instructions"
+>;
+
def FeatureIEEEMinimumMaximumInsts : SubtargetFeature<"ieee-minimum-maximum-insts",
"HasIEEEMinimumMaximumInsts",
"true",
@@ -262,12 +268,24 @@ def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug",
"S_INST_PREFETCH instruction causes shader to hang"
>;
+def FeatureVmemPrefInsts : SubtargetFeature<"vmem-pref-insts",
+ "HasVmemPrefInsts",
+ "true",
+ "Has flat_prefect_b8 and global_prefetch_b8 instructions"
+>;
+
def FeatureSafeSmemPrefetch : SubtargetFeature<"safe-smem-prefetch",
"HasSafeSmemPrefetch",
"true",
"SMEM prefetches do not fail on illegal address"
>;
+def FeatureSafeCUPrefetch : SubtargetFeature<"safe-cu-prefetch",
+ "HasSafeCUPrefetch",
+ "true",
+ "VMEM CU scope prefetches do not fail on illegal address"
+>;
+
def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard",
"HasVcmpxExecWARHazard",
"true",
@@ -2007,6 +2025,7 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureBF16ConversionInsts,
FeatureBF16PackedInsts,
FeatureCvtPkF16F32Inst,
+ FeatureFmaMixBF16Insts,
FeatureMin3Max3PKF16,
FeatureMinimum3Maximum3PKF16,
FeaturePrngInst,
@@ -2020,6 +2039,7 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureFlatBufferGlobalAtomicFaddF64Inst,
FeatureMemoryAtomicFAddF32DenormalSupport,
FeatureKernargPreload,
+ FeatureVmemPrefInsts,
FeatureLshlAddU64Inst,
FeatureAddSubU64Insts,
FeatureLdsBarrierArriveAtomic,
@@ -2599,6 +2619,9 @@ def HasMovrel : Predicate<"Subtarget->hasMovrel()">,
def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">,
AssemblerPredicate<(all_of FeatureFmaMixInsts)>;
+def HasFmaMixBF16Insts : Predicate<"Subtarget->hasFmaMixBF16Insts()">,
+ AssemblerPredicate<(all_of FeatureFmaMixBF16Insts)>;
+
def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
AssemblerPredicate<(all_of FeatureDLInsts)>;
@@ -2797,6 +2820,9 @@ def HasScalarDwordx3Loads : Predicate<"Subtarget->hasScalarDwordx3Loads()">;
def HasXF32Insts : Predicate<"Subtarget->hasXF32Insts()">,
AssemblerPredicate<(all_of FeatureXF32Insts)>;
+def HasVmemPrefInsts : Predicate<"Subtarget->hasVmemPrefInsts()">,
+ AssemblerPredicate<(all_of FeatureVmemPrefInsts)>;
+
def HasAshrPkInsts : Predicate<"Subtarget->hasAshrPkInsts()">,
AssemblerPredicate<(all_of FeatureAshrPkInsts)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index dedee46..59cc1df 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -13,7 +13,6 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/Target/TargetMachine.h"
@@ -1383,7 +1382,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
&AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
&AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
- &AAIndirectCallInfo::ID, &AAInstanceInfo::ID});
+ &AAIndirectCallInfo::ID});
AttributorConfig AC(CGUpdater);
AC.IsClosedWorldModule = Options.IsClosedWorld;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 891d362..c01e5d3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -137,6 +137,9 @@ def gi_global_offset :
def gi_global_saddr :
GIComplexOperandMatcher<s64, "selectGlobalSAddr">,
GIComplexPatternEquiv<GlobalSAddr>;
+def gi_global_saddr_cpol :
+ GIComplexOperandMatcher<s64, "selectGlobalSAddrCPol">,
+ GIComplexPatternEquiv<GlobalSAddrCPol>;
def gi_global_saddr_glc :
GIComplexOperandMatcher<s64, "selectGlobalSAddrGLC">,
GIComplexPatternEquiv<GlobalSAddrGLC>;
@@ -446,5 +449,8 @@ def gi_fp_pow2_to_exponent : GICustomOperandRenderer<"renderFPPow2ToExponent">,
def gi_as_hw_round_mode : GICustomOperandRenderer<"renderRoundMode">,
GISDNodeXFormEquiv<as_hw_round_mode>;
+def gi_prefetch_loc : GICustomOperandRenderer<"renderPrefetchLoc">,
+ GISDNodeXFormEquiv<PrefetchLoc>;
+
def gi_MFMALdScaleModifierOp : GICustomOperandRenderer<"renderScaledMAIIntrinsicOperand">,
GISDNodeXFormEquiv<MFMALdScaleXForm>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 5a2416de..dfaa145 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2020,6 +2020,22 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, SDValue Addr,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPol(SDNode *N, SDValue Addr,
+ SDValue &SAddr, SDValue &VOffset,
+ SDValue &Offset,
+ SDValue &CPol) const {
+ bool ScaleOffset;
+ if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, Offset, ScaleOffset))
+ return false;
+
+ // We are assuming CPol is always the last operand of the intrinsic.
+ auto PassedCPol =
+ N->getConstantOperandVal(N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
+ CPol = CurDAG->getTargetConstant(
+ (ScaleOffset ? AMDGPU::CPol::SCAL : 0) | PassedCPol, SDLoc(), MVT::i32);
+ return true;
+}
+
bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC(SDNode *N, SDValue Addr,
SDValue &SAddr, SDValue &VOffset,
SDValue &Offset,
@@ -3861,58 +3877,114 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
return SelectVOP3Mods(In, Src, SrcMods);
}
+// Match lowered fpext from bf16 to f32. This is a bit operation extending
+// a 16-bit value with 16-bit of zeroes at LSB:
+//
+// 1. (f32 (bitcast (build_vector (i16 0), (i16 (bitcast bf16:val)))))
+// 2. (f32 (bitcast (and i32:val, 0xffff0000))) -> IsExtractHigh = true
+// 3. (f32 (bitcast (shl i32:va, 16) -> IsExtractHigh = false
+static SDValue matchBF16FPExtendLike(SDValue Op, bool &IsExtractHigh) {
+ if (Op.getValueType() != MVT::f32 || Op.getOpcode() != ISD::BITCAST)
+ return SDValue();
+ Op = Op.getOperand(0);
+
+ IsExtractHigh = false;
+ if (Op.getValueType() == MVT::v2i16 && Op.getOpcode() == ISD::BUILD_VECTOR) {
+ auto Low16 = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+ if (!Low16 || !Low16->isZero())
+ return SDValue();
+ Op = stripBitcast(Op.getOperand(1));
+ if (Op.getValueType() != MVT::bf16)
+ return SDValue();
+ return Op;
+ }
+
+ if (Op.getValueType() != MVT::i32)
+ return SDValue();
+
+ if (Op.getOpcode() == ISD::AND) {
+ if (auto Mask = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (Mask->getZExtValue() == 0xffff0000) {
+ IsExtractHigh = true;
+ return Op.getOperand(0);
+ }
+ }
+ return SDValue();
+ }
+
+ if (Op.getOpcode() == ISD::SHL) {
+ if (auto Amt = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (Amt->getZExtValue() == 16)
+ return Op.getOperand(0);
+ }
+ }
+
+ return SDValue();
+}
+
// The return value is not whether the match is possible (which it always is),
// but whether or not it a conversion is really used.
bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
- unsigned &Mods) const {
+ unsigned &Mods,
+ MVT VT) const {
Mods = 0;
SelectVOP3ModsImpl(In, Src, Mods);
+ bool IsExtractHigh = false;
if (Src.getOpcode() == ISD::FP_EXTEND) {
Src = Src.getOperand(0);
- assert(Src.getValueType() == MVT::f16);
- Src = stripBitcast(Src);
+ } else if (VT == MVT::bf16) {
+ SDValue B16 = matchBF16FPExtendLike(Src, IsExtractHigh);
+ if (!B16)
+ return false;
+ Src = B16;
+ } else
+ return false;
- // Be careful about folding modifiers if we already have an abs. fneg is
- // applied last, so we don't want to apply an earlier fneg.
- if ((Mods & SISrcMods::ABS) == 0) {
- unsigned ModsTmp;
- SelectVOP3ModsImpl(Src, Src, ModsTmp);
+ if (Src.getValueType() != VT &&
+ (VT != MVT::bf16 || Src.getValueType() != MVT::i32))
+ return false;
- if ((ModsTmp & SISrcMods::NEG) != 0)
- Mods ^= SISrcMods::NEG;
+ Src = stripBitcast(Src);
- if ((ModsTmp & SISrcMods::ABS) != 0)
- Mods |= SISrcMods::ABS;
- }
+ // Be careful about folding modifiers if we already have an abs. fneg is
+ // applied last, so we don't want to apply an earlier fneg.
+ if ((Mods & SISrcMods::ABS) == 0) {
+ unsigned ModsTmp;
+ SelectVOP3ModsImpl(Src, Src, ModsTmp);
- // op_sel/op_sel_hi decide the source type and source.
- // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
- // If the sources's op_sel is set, it picks the high half of the source
- // register.
+ if ((ModsTmp & SISrcMods::NEG) != 0)
+ Mods ^= SISrcMods::NEG;
- Mods |= SISrcMods::OP_SEL_1;
- if (isExtractHiElt(Src, Src)) {
- Mods |= SISrcMods::OP_SEL_0;
+ if ((ModsTmp & SISrcMods::ABS) != 0)
+ Mods |= SISrcMods::ABS;
+ }
- // TODO: Should we try to look for neg/abs here?
- }
+ // op_sel/op_sel_hi decide the source type and source.
+ // If the source's op_sel_hi is set, it indicates to do a conversion from
+ // fp16. If the sources's op_sel is set, it picks the high half of the source
+ // register.
- // Prevent unnecessary subreg COPY to VGPR_16
- if (Src.getOpcode() == ISD::TRUNCATE &&
- Src.getOperand(0).getValueType() == MVT::i32) {
- Src = Src.getOperand(0);
- }
- return true;
+ Mods |= SISrcMods::OP_SEL_1;
+ if (IsExtractHigh ||
+ (Src.getValueSizeInBits() == 16 && isExtractHiElt(Src, Src))) {
+ Mods |= SISrcMods::OP_SEL_0;
+
+ // TODO: Should we try to look for neg/abs here?
}
- return false;
+ // Prevent unnecessary subreg COPY to VGPR_16
+ if (Src.getOpcode() == ISD::TRUNCATE &&
+ Src.getOperand(0).getValueType() == MVT::i32) {
+ Src = Src.getOperand(0);
+ }
+ return true;
}
bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = 0;
- if (!SelectVOP3PMadMixModsImpl(In, Src, Mods))
+ if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16))
return false;
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
return true;
@@ -3921,7 +3993,24 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src,
bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = 0;
- SelectVOP3PMadMixModsImpl(In, Src, Mods);
+ SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16);
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16ModsExt(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ unsigned Mods = 0;
+ if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16))
+ return false;
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16Mods(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ unsigned Mods = 0;
+ SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16);
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 6123d75..5636d89 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -19,6 +19,7 @@
#include "SIModeRegisterDefaults.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/AMDGPUAddrSpace.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -167,6 +168,9 @@ private:
bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
SDValue &VOffset, SDValue &Offset,
SDValue &CPol) const;
+ bool SelectGlobalSAddrCPol(SDNode *N, SDValue Addr, SDValue &SAddr,
+ SDValue &VOffset, SDValue &Offset,
+ SDValue &CPol) const;
bool SelectGlobalSAddrGLC(SDNode *N, SDValue Addr, SDValue &SAddr,
SDValue &VOffset, SDValue &Offset,
SDValue &CPol) const;
@@ -254,11 +258,15 @@ private:
bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
- unsigned &Mods) const;
+ bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods,
+ MVT VT) const;
bool SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src,
SDValue &SrcMods) const;
bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PMadMixBF16ModsExt(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const;
+ bool SelectVOP3PMadMixBF16Mods(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const;
bool SelectBITOP3(SDValue In, SDValue &Src0, SDValue &Src1, SDValue &Src2,
SDValue &Tbl) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index e3ca09e..6118933 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -391,8 +391,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// Library functions. These default to Expand, but we have instructions
// for them.
setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR,
- ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
- MVT::f32, Legal);
+ ISD::FROUNDEVEN, ISD::FTRUNC},
+ {MVT::f16, MVT::f32}, Legal);
+ setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, MVT::f32, Legal);
setOperationAction(ISD::FLOG2, MVT::f32, Custom);
setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom);
@@ -412,9 +413,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
- if (Subtarget->has16BitInsts())
+ if (Subtarget->has16BitInsts()) {
setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);
- else {
+ setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Legal);
+ } else {
setOperationAction(ISD::IS_FPCLASS, {MVT::f32, MVT::f64}, Legal);
setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom);
}
@@ -4844,94 +4846,11 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
return SDValue();
}
-// Detect when CMP and SELECT use the same constant and fold them to avoid
-// loading the constant twice. Specifically handles patterns like:
-// %cmp = icmp eq i32 %val, 4242
-// %sel = select i1 %cmp, i32 4242, i32 %other
-// It can be optimized to reuse %val instead of 4242 in select.
-static SDValue
-foldCmpSelectWithSharedConstant(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
- const AMDGPUSubtarget *ST) {
- SDValue Cond = N->getOperand(0);
- SDValue TrueVal = N->getOperand(1);
- SDValue FalseVal = N->getOperand(2);
-
- // Check if condition is a comparison.
- if (Cond.getOpcode() != ISD::SETCC)
- return SDValue();
-
- SDValue LHS = Cond.getOperand(0);
- SDValue RHS = Cond.getOperand(1);
- ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
-
- bool isFloatingPoint = LHS.getValueType().isFloatingPoint();
- bool isInteger = LHS.getValueType().isInteger();
-
- // Handle simple floating-point and integer types only.
- if (!isFloatingPoint && !isInteger)
- return SDValue();
-
- bool isEquality = CC == (isFloatingPoint ? ISD::SETOEQ : ISD::SETEQ);
- bool isNonEquality = CC == (isFloatingPoint ? ISD::SETONE : ISD::SETNE);
- if (!isEquality && !isNonEquality)
- return SDValue();
-
- SDValue ArgVal, ConstVal;
- if ((isFloatingPoint && isa<ConstantFPSDNode>(RHS)) ||
- (isInteger && isa<ConstantSDNode>(RHS))) {
- ConstVal = RHS;
- ArgVal = LHS;
- } else if ((isFloatingPoint && isa<ConstantFPSDNode>(LHS)) ||
- (isInteger && isa<ConstantSDNode>(LHS))) {
- ConstVal = LHS;
- ArgVal = RHS;
- } else {
- return SDValue();
- }
-
- // Check if constant should not be optimized - early return if not.
- if (isFloatingPoint) {
- const APFloat &Val = cast<ConstantFPSDNode>(ConstVal)->getValueAPF();
- const GCNSubtarget *GCNST = static_cast<const GCNSubtarget *>(ST);
-
- // Only optimize normal floating-point values (finite, non-zero, and
- // non-subnormal as per IEEE 754), skip optimization for inlinable
- // floating-point constants.
- if (!Val.isNormal() || GCNST->getInstrInfo()->isInlineConstant(Val))
- return SDValue();
- } else {
- int64_t IntVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();
-
- // Skip optimization for inlinable integer immediates.
- // Inlinable immediates include: -16 to 64 (inclusive).
- if (IntVal >= -16 && IntVal <= 64)
- return SDValue();
- }
-
- // For equality and non-equality comparisons, patterns:
- // select (setcc x, const), const, y -> select (setcc x, const), x, y
- // select (setccinv x, const), y, const -> select (setccinv x, const), y, x
- if (!(isEquality && TrueVal == ConstVal) &&
- !(isNonEquality && FalseVal == ConstVal))
- return SDValue();
-
- SDValue SelectLHS = (isEquality && TrueVal == ConstVal) ? ArgVal : TrueVal;
- SDValue SelectRHS =
- (isNonEquality && FalseVal == ConstVal) ? ArgVal : FalseVal;
- return DCI.DAG.getNode(ISD::SELECT, SDLoc(N), N->getValueType(0), Cond,
- SelectLHS, SelectRHS);
-}
-
SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0)))
return Folded;
- // Try to fold CMP + SELECT patterns with shared constants (both FP and
- // integer).
- if (SDValue Folded = foldCmpSelectWithSharedConstant(N, DCI, Subtarget))
- return Folded;
-
SDValue Cond = N->getOperand(0);
if (Cond.getOpcode() != ISD::SETCC)
return SDValue();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 877c3ac..266dee1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -5774,6 +5774,16 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {
}
InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectGlobalSAddrCPol(MachineOperand &Root) const {
+ const MachineInstr &I = *Root.getParent();
+
+ // We are assuming CPol is always the last operand of the intrinsic.
+ auto PassedCPol =
+ I.getOperand(I.getNumOperands() - 1).getImm() & ~AMDGPU::CPol::SCAL;
+ return selectGlobalSAddr(Root, PassedCPol);
+}
+
+InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectGlobalSAddrGLC(MachineOperand &Root) const {
return selectGlobalSAddr(Root, AMDGPU::CPol::GLC);
}
@@ -7068,6 +7078,17 @@ void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,
MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);
}
+void AMDGPUInstructionSelector::renderPrefetchLoc(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ uint32_t V = MI.getOperand(2).getImm();
+ V = (AMDGPU::CPol::SCOPE_MASK - (V & AMDGPU::CPol::SCOPE_MASK))
+ << AMDGPU::CPol::SCOPE_SHIFT;
+ if (!Subtarget->hasSafeCUPrefetch())
+ V = std::max(V, (uint32_t)AMDGPU::CPol::SCOPE_SE); // CU scope is unsafe
+ MIB.addImm(V);
+}
+
/// Convert from 2-bit value to enum values used for op_sel* source modifiers.
void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 5f7f05c..fe9743d0a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -261,6 +261,8 @@ private:
InstructionSelector::ComplexRendererFns
selectGlobalSAddr(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
+ selectGlobalSAddrCPol(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
selectGlobalSAddrGLC(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
@@ -414,6 +416,10 @@ private:
void renderRoundMode(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
+
+ void renderPrefetchLoc(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+
void renderScaledMAIIntrinsicOperand(MachineInstrBuilder &MIB,
const MachineInstr &MI, int OpIdx) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 9b05f7c..c5a1d9e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3501,19 +3501,24 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
applyMappingMAD_64_32(B, OpdMapper);
return;
case AMDGPU::G_PREFETCH: {
- if (!Subtarget.hasPrefetch() || !Subtarget.hasSafeSmemPrefetch()) {
+ if (!Subtarget.hasSafeSmemPrefetch() && !Subtarget.hasVmemPrefInsts()) {
MI.eraseFromParent();
return;
}
Register PtrReg = MI.getOperand(0).getReg();
unsigned PtrBank = getRegBankID(PtrReg, MRI, AMDGPU::SGPRRegBankID);
- if (PtrBank == AMDGPU::VGPRRegBankID) {
+ if (PtrBank == AMDGPU::VGPRRegBankID &&
+ (!Subtarget.hasVmemPrefInsts() || !MI.getOperand(3).getImm())) {
+ // Cannot do I$ prefetch with divergent pointer.
MI.eraseFromParent();
return;
}
unsigned AS = MRI.getType(PtrReg).getAddressSpace();
- if (!AMDGPU::isFlatGlobalAddrSpace(AS) &&
- AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
+ if ((!AMDGPU::isFlatGlobalAddrSpace(AS) &&
+ AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT) ||
+ (!Subtarget.hasSafeSmemPrefetch() &&
+ (AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
+ !MI.getOperand(3).getImm() /* I$ prefetch */))) {
MI.eraseFromParent();
return;
}
@@ -5175,6 +5180,12 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_ds_load_tr16_b128:
case Intrinsic::amdgcn_ds_load_tr4_b64:
case Intrinsic::amdgcn_ds_load_tr6_b96:
+ case Intrinsic::amdgcn_flat_load_monitor_b32:
+ case Intrinsic::amdgcn_flat_load_monitor_b64:
+ case Intrinsic::amdgcn_flat_load_monitor_b128:
+ case Intrinsic::amdgcn_global_load_monitor_b32:
+ case Intrinsic::amdgcn_global_load_monitor_b64:
+ case Intrinsic::amdgcn_global_load_monitor_b128:
case Intrinsic::amdgcn_ds_read_tr4_b64:
case Intrinsic::amdgcn_ds_read_tr6_b96:
case Intrinsic::amdgcn_ds_read_tr8_b64:
@@ -5437,6 +5448,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
break;
}
+ case Intrinsic::amdgcn_flat_prefetch:
+ case Intrinsic::amdgcn_global_prefetch:
+ return getDefaultMappingVOP(MI);
default:
return getInvalidInstructionMapping();
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
index a8e1967..f580f43 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
@@ -159,7 +159,8 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
// If the inputs are tied and the same register, we can shortcut and
// directly replace the register.
- if (Src2->getReg() != CopySrcReg) {
+ if (!Src2->isReg() || Src2->getReg() != CopySrcReg ||
+ Src2->getSubReg() != DefMI->getOperand(1).getSubReg()) {
LLVM_DEBUG(
dbgs()
<< "Replacing untied VGPR MFMAs with AGPR form not yet handled\n");
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index c865082..38f9ee5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -836,8 +836,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
// When we are not using -fgpu-rdc, we can run accelerator code
// selection relatively early, but still after linking to prevent
// eager removal of potentially reachable symbols.
- if (EnableHipStdPar)
+ if (EnableHipStdPar) {
+ PM.addPass(HipStdParMathFixupPass());
PM.addPass(HipStdParAcceleratorCodeSelectionPass());
+ }
PM.addPass(AMDGPUPrintfRuntimeBindingPass());
}
@@ -916,8 +918,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
// selection after linking to prevent, otherwise we end up removing
// potentially reachable symbols that were exported as external in other
// modules.
- if (EnableHipStdPar)
+ if (EnableHipStdPar) {
+ PM.addPass(HipStdParMathFixupPass());
PM.addPass(HipStdParAcceleratorCodeSelectionPass());
+ }
// We want to support the -lto-partitions=N option as "best effort".
// For that, we need to lower LDS earlier in the pipeline before the
// module is partitioned for codegen.
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 679c55d..0f172e0d 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -13,6 +13,7 @@ let WantsRoot = true in {
def GlobalSAddr : ComplexPattern<iPTR, 4, "SelectGlobalSAddr", [], [], -10>;
def GlobalSAddrGLC : ComplexPattern<iPTR, 4, "SelectGlobalSAddrGLC", [], [], -10>;
+ def GlobalSAddrCPol : ComplexPattern<iPTR, 4, "SelectGlobalSAddrCPol", [], [], -10>;
def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [], -10>;
def ScratchSVAddr : ComplexPattern<iPTR, 4, "SelectScratchSVAddr", [], [], -10>;
}
@@ -368,31 +369,68 @@ multiclass FLAT_Global_Store_Pseudo_t16<string opName> {
}
}
-class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo<
+// Async loads, introduced in gfx1250, will store directly
+// to a DS address in vdst (they will not use M0 for DS addess).
+class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0, bit IsAsync = 0> : FLAT_Pseudo<
opName,
(outs ),
!con(
- !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)),
- (ins flat_offset:$offset, CPol_0:$cpol)),
- " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> {
- let LGKM_CNT = 1;
+ !if(IsAsync, (ins VGPR_32:$vdst), (ins)),
+ !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)),
+ (ins flat_offset:$offset, CPol_0:$cpol)),
+ !if(IsAsync, " $vdst,", "")#" $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> {
+ let LGKM_CNT = !not(IsAsync);
+ let VM_CNT = !not(IsAsync);
+ let ASYNC_CNT = IsAsync;
let is_flat_global = 1;
let lds = 1;
let has_data = 0;
+ let has_vdst = IsAsync; // vdst for ds address with IsAsync
+ let mayLoad = 1;
+ let mayStore = 1;
+ let has_saddr = 1;
+ let enabled_saddr = EnableSaddr;
+ let VALU = 1;
+ let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
+ let Uses = !if(IsAsync, [EXEC, ASYNCcnt], [M0, EXEC]);
+ let Defs = !if(IsAsync, [ASYNCcnt], []);
+ let SchedRW = [WriteVMEM, WriteLDS];
+}
+
+multiclass FLAT_Global_Load_LDS_Pseudo<string opName, bit IsAsync = 0> {
+ def "" : FLAT_Global_Load_LDS_Pseudo<opName, 0, IsAsync>,
+ GlobalSaddrTable<0, opName>;
+ def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1, IsAsync>,
+ GlobalSaddrTable<1, opName>;
+}
+
+class FLAT_Global_STORE_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo<
+ opName,
+ (outs ),
+ !con(
+ !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)), (ins VGPR_32:$vdata),
+ (ins flat_offset:$offset, CPol_0:$cpol)),
+ " $vaddr, $vdata"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> {
+ let VM_CNT = 0;
+ let ASYNC_CNT = 1;
+ let is_flat_global = 1;
+ let lds = 1;
+ let has_data = 1; // vdata for ds address
let has_vdst = 0;
let mayLoad = 1;
let mayStore = 1;
let has_saddr = 1;
let enabled_saddr = EnableSaddr;
let VALU = 1;
- let Uses = [M0, EXEC];
+ let Uses = [EXEC, ASYNCcnt];
+ let Defs = [ASYNCcnt];
let SchedRW = [WriteVMEM, WriteLDS];
}
-multiclass FLAT_Global_Load_LDS_Pseudo<string opName> {
- def "" : FLAT_Global_Load_LDS_Pseudo<opName>,
+multiclass FLAT_Global_STORE_LDS_Pseudo<string opName> {
+ def "" : FLAT_Global_STORE_LDS_Pseudo<opName>,
GlobalSaddrTable<0, opName>;
- def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1>,
+ def _SADDR : FLAT_Global_STORE_LDS_Pseudo<opName, 1>,
GlobalSaddrTable<1, opName>;
}
@@ -464,6 +502,37 @@ class FLAT_Global_Invalidate_Writeback<string opName, SDPatternOperator node = n
let sve = 0;
}
+class FLAT_Prefetch_Pseudo<string opName, dag addr = (ins VReg_64:$vaddr), string asm = " $vaddr"> :
+ FLAT_Pseudo<opName, (outs), !con(addr, (ins flat_offset:$offset, CPol_0:$cpol)), asm#"$offset$cpol"> {
+ let has_vdst = 0;
+ let has_data = 0;
+ let mayLoad = 1;
+ let mayStore = 1;
+ let VM_CNT = 0;
+ let LGKM_CNT = 0;
+}
+
+multiclass FLAT_Flat_Prefetch_Pseudo<string opName> {
+ def "" : FLAT_Prefetch_Pseudo<opName>,
+ GlobalSaddrTable<0, opName>;
+ def _SADDR : FLAT_Prefetch_Pseudo<opName, (ins SReg_64:$saddr, VGPR_32:$vaddr), " $vaddr, $saddr">,
+ GlobalSaddrTable<1, opName> {
+ let OtherPredicates = [HasFlatGVSMode];
+ let enabled_saddr = 1;
+ }
+}
+
+multiclass FLAT_Global_Prefetch_Pseudo<string opName> {
+ let is_flat_global = 1, has_saddr = 1 in {
+ def "" : FLAT_Prefetch_Pseudo<opName, (ins VReg_64:$vaddr), " $vaddr, off">,
+ GlobalSaddrTable<0, opName>;
+ def _SADDR : FLAT_Prefetch_Pseudo<opName, (ins SReg_64:$saddr, VGPR_32:$vaddr), " $vaddr, $saddr">,
+ GlobalSaddrTable<1, opName> {
+ let enabled_saddr = 1;
+ }
+ }
+}
+
class FlatScratchInst <string sv_op, string mode> {
string SVOp = sv_op;
string Mode = mode;
@@ -1124,6 +1193,15 @@ let SubtargetPredicate = isGFX12Plus in {
let SubtargetPredicate = isGFX1250Plus in {
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B8 : FLAT_Global_Load_LDS_Pseudo<"global_load_async_to_lds_b8", 1>;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B32 : FLAT_Global_Load_LDS_Pseudo<"global_load_async_to_lds_b32", 1>;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B64 : FLAT_Global_Load_LDS_Pseudo<"global_load_async_to_lds_b64", 1>;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B128 : FLAT_Global_Load_LDS_Pseudo<"global_load_async_to_lds_b128", 1>;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B8 : FLAT_Global_STORE_LDS_Pseudo<"global_store_async_from_lds_b8">;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B32 : FLAT_Global_STORE_LDS_Pseudo<"global_store_async_from_lds_b32">;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B64 : FLAT_Global_STORE_LDS_Pseudo<"global_store_async_from_lds_b64">;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B128 : FLAT_Global_STORE_LDS_Pseudo<"global_store_async_from_lds_b128">;
+
def TENSOR_SAVE : FLAT_Global_Tensor_Pseudo<"tensor_save", 1>;
def TENSOR_STOP : FLAT_Global_Tensor_Pseudo<"tensor_stop">;
} // End SubtargetPredicate = isGFX1250Plus
@@ -1162,6 +1240,16 @@ defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_u
defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">;
defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">;
+let SubtargetPredicate = isGFX125xOnly in {
+defm FLAT_LOAD_MONITOR_B32 : FLAT_Flat_Load_Pseudo <"flat_load_monitor_b32", VGPR_32>;
+defm FLAT_LOAD_MONITOR_B64 : FLAT_Flat_Load_Pseudo <"flat_load_monitor_b64", VReg_64>;
+defm FLAT_LOAD_MONITOR_B128 : FLAT_Flat_Load_Pseudo <"flat_load_monitor_b128", VReg_128>;
+
+defm GLOBAL_LOAD_MONITOR_B32 : FLAT_Global_Load_Pseudo <"global_load_monitor_b32", VGPR_32>;
+defm GLOBAL_LOAD_MONITOR_B64 : FLAT_Global_Load_Pseudo <"global_load_monitor_b64", VReg_64>;
+defm GLOBAL_LOAD_MONITOR_B128 : FLAT_Global_Load_Pseudo <"global_load_monitor_b128", VReg_128>;
+} // End SubtargetPredicate = isGFX125xOnly
+
let SubtargetPredicate = isGFX12Plus in {
let Uses = [EXEC, M0] in {
defm GLOBAL_LOAD_BLOCK : FLAT_Global_Load_Pseudo <"global_load_block", VReg_1024>;
@@ -1218,6 +1306,11 @@ let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
"global_atomic_pk_add_f16", VGPR_32, v2f16
>;
+let SubtargetPredicate = HasVmemPrefInsts in {
+ defm FLAT_PREFETCH_B8 : FLAT_Flat_Prefetch_Pseudo<"flat_prefetch_b8">;
+ defm GLOBAL_PREFETCH_B8 : FLAT_Global_Prefetch_Pseudo<"global_prefetch_b8">;
+}
+
//===----------------------------------------------------------------------===//
// Flat Patterns
//===----------------------------------------------------------------------===//
@@ -1228,6 +1321,11 @@ class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCN
(inst $vaddr, $offset)
>;
+class FlatLoadPat_CPOL <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (FlatOffset i64:$vaddr, i32:$offset), (i32 timm:$cpol))),
+ (inst $vaddr, $offset, $cpol)
+>;
+
class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node (FlatOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in),
(inst $vaddr, $offset, 0, $in)
@@ -1249,8 +1347,8 @@ class FlatSignedLoadPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, Value
>;
class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)),
- (inst $saddr, $voffset, $offset, 0, $in)
+ (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol), vt:$in)),
+ (inst $saddr, $voffset, $offset, $cpol, $in)
>;
class FlatLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1264,8 +1362,8 @@ class FlatLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueT
>;
class GlobalLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))),
- (inst $saddr, $voffset, $offset, (i32 0))
+ (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol))),
+ (inst $saddr, $voffset, $offset, $cpol)
>;
class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1278,6 +1376,16 @@ class FlatLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt>
(inst $saddr, $voffset, $offset, $cpol)
>;
+class FlatLoadSignedPat_CPOL <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), (i32 timm:$cpol))),
+ (inst $vaddr, $offset, $cpol)
+>;
+
+class GlobalLoadSaddrPat_CPOL <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (GlobalSAddrCPol (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol), (i32 timm))),
+ (inst $saddr, $voffset, $offset, $cpol)
+>;
+
class FlatStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> : GCNPat <
(node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol)),
@@ -1459,8 +1567,8 @@ class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueT
>;
class ScratchLoadSVaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset))),
- (inst $vaddr, $saddr, $offset, 0)
+ (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset, CPol:$cpol))),
+ (inst $vaddr, $saddr, $offset, $cpol)
>;
multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
@@ -1473,6 +1581,16 @@ multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueTyp
}
}
+multiclass GlobalFLATLoadPats_CPOL<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
+ def : FlatLoadSignedPat_CPOL<inst, node, vt> {
+ let AddedComplexity = 10;
+ }
+
+ def : GlobalLoadSaddrPat_CPOL<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
+ let AddedComplexity = 11;
+ }
+}
+
multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
def : FlatSignedLoadPat_D16 <inst, node, vt> {
let AddedComplexity = 10;
@@ -2009,6 +2127,16 @@ let WaveSizePredicate = isWave32, OtherPredicates = [HasTransposeLoadF4F6Insts]
defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR6_B96, int_amdgcn_global_load_tr6_b96, v3i32>;
}
+let OtherPredicates = [isGFX125xOnly] in {
+ def : FlatLoadPat_CPOL <FLAT_LOAD_MONITOR_B32, int_amdgcn_flat_load_monitor_b32, i32>;
+ def : FlatLoadPat_CPOL <FLAT_LOAD_MONITOR_B64, int_amdgcn_flat_load_monitor_b64, v2i32>;
+ def : FlatLoadPat_CPOL <FLAT_LOAD_MONITOR_B128, int_amdgcn_flat_load_monitor_b128, v4i32>;
+
+ defm : GlobalFLATLoadPats_CPOL <GLOBAL_LOAD_MONITOR_B32, int_amdgcn_global_load_monitor_b32, i32>;
+ defm : GlobalFLATLoadPats_CPOL <GLOBAL_LOAD_MONITOR_B64, int_amdgcn_global_load_monitor_b64, v2i32>;
+ defm : GlobalFLATLoadPats_CPOL <GLOBAL_LOAD_MONITOR_B128, int_amdgcn_global_load_monitor_b128, v4i32>;
+} // End SubtargetPredicate = isGFX125xOnly
+
let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
@@ -2138,6 +2266,77 @@ defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f
} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch]
+def PrefetchLoc: SDNodeXForm<timm, [{
+ uint32_t V = N->getZExtValue();
+ V = (AMDGPU::CPol::SCOPE_MASK - (V & AMDGPU::CPol::SCOPE_MASK)) << AMDGPU::CPol::SCOPE_SHIFT;
+ if (!Subtarget->hasSafeCUPrefetch())
+ V = std::max(V, (uint32_t)AMDGPU::CPol::SCOPE_SE); // CU scope is unsafe
+ return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);
+}]>;
+
+def prefetch_flat : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
+ (prefetch node:$ptr, node:$rw, node:$loc, node:$type),
+ [{ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; }]> {
+ let GISelPredicateCode = [{
+ return (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
+ }];
+}
+
+def prefetch_global : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
+ (prefetch node:$ptr, node:$rw, node:$loc, node:$type),
+ [{ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+ (cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+ !Subtarget->hasSafeSmemPrefetch()); }]> {
+ let GISelPredicateCode = [{
+ return (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+ ((*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+ !Subtarget->hasSafeSmemPrefetch());
+ }];
+}
+
+multiclass FlatPrefetchPats<string inst, SDPatternOperator prefetch_kind, SDPatternOperator rw> {
+ def : GCNPat <
+ (prefetch_kind (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), rw, (i32 timm:$loc), i32imm_one),
+ (!cast<FLAT_Pseudo>(inst) $vaddr, $offset, (i32 (PrefetchLoc $loc)))
+ > {
+ let AddedComplexity = !if(!eq(rw, i32imm_zero), 0, 25);
+ }
+
+ def : GCNPat <
+ (prefetch_kind (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), rw, (i32 timm:$loc), i32imm_one),
+ (!cast<FLAT_Pseudo>(inst#"_SADDR") $saddr, $voffset, $offset, (i32 (PrefetchLoc $loc)))
+ > {
+ let AddedComplexity = !if(!eq(rw, i32imm_zero), 11, 30);
+ }
+}
+
+multiclass FlatIntrPrefetchPats<string inst, SDPatternOperator intr> {
+ def : GCNPat <
+ (intr (FlatOffset i64:$vaddr, i32:$offset), timm:$cpol),
+ (!cast<FLAT_Pseudo>(inst) $vaddr, $offset, $cpol)
+ >;
+
+ def : GCNPat <
+ (intr (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), timm:$cpol),
+ (!cast<FLAT_Pseudo>(inst#"_SADDR") $saddr, $voffset, $offset, $cpol)> {
+ let AddedComplexity = 11;
+ }
+}
+
+let SubtargetPredicate = HasVmemPrefInsts in {
+ defm : FlatPrefetchPats<"FLAT_PREFETCH_B8", prefetch_flat, i32imm_zero>;
+ defm : FlatPrefetchPats<"GLOBAL_PREFETCH_B8", prefetch_global, i32imm_zero>;
+
+ // Patterns for forced vector prefetch with rw = 1.
+ defm : FlatPrefetchPats<"FLAT_PREFETCH_B8", prefetch_flat, i32imm_one>;
+ defm : FlatPrefetchPats<"GLOBAL_PREFETCH_B8", prefetch_global, i32imm_one>;
+
+
+ // Patterns for target intrinsics
+ defm : FlatIntrPrefetchPats<"FLAT_PREFETCH_B8", int_amdgcn_flat_prefetch>;
+ defm : FlatIntrPrefetchPats<"GLOBAL_PREFETCH_B8", int_amdgcn_global_prefetch>;
+} // End SubtargetPredicate = HasVmemPrefInsts
+
//===----------------------------------------------------------------------===//
// Target
//===----------------------------------------------------------------------===//
@@ -3210,6 +3409,26 @@ multiclass VFLAT_Real_Atomics_gfx1250<bits<8> op, string name = get_FLAT_ps<NAME
defm TENSOR_SAVE : VFLAT_Real_gfx1250<0x06e>;
defm TENSOR_STOP : VFLAT_Real_gfx1250<0x06f>;
+defm FLAT_PREFETCH_B8 : VFLAT_Real_AllAddr_gfx1250<0x05d>;
+defm GLOBAL_PREFETCH_B8 : VFLAT_Real_AllAddr_gfx1250<0x05d>;
+
+defm FLAT_LOAD_MONITOR_B32 : VFLAT_Real_AllAddr_gfx1250<0x070>;
+defm FLAT_LOAD_MONITOR_B64 : VFLAT_Real_AllAddr_gfx1250<0x071>;
+defm FLAT_LOAD_MONITOR_B128 : VFLAT_Real_AllAddr_gfx1250<0x072>;
+
+defm GLOBAL_LOAD_MONITOR_B32 : VFLAT_Real_AllAddr_gfx1250<0x070>;
+defm GLOBAL_LOAD_MONITOR_B64 : VFLAT_Real_AllAddr_gfx1250<0x071>;
+defm GLOBAL_LOAD_MONITOR_B128 : VFLAT_Real_AllAddr_gfx1250<0x072>;
+
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B8 : VFLAT_Real_AllAddr_gfx1250<0x5f>;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B32 : VFLAT_Real_AllAddr_gfx1250<0x60>;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B64 : VFLAT_Real_AllAddr_gfx1250<0x61>;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B128 : VFLAT_Real_AllAddr_gfx1250<0x62>;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B8 : VFLAT_Real_AllAddr_gfx1250<0x63>;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B32 : VFLAT_Real_AllAddr_gfx1250<0x64>;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B64 : VFLAT_Real_AllAddr_gfx1250<0x65>;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B128 : VFLAT_Real_AllAddr_gfx1250<0x66>;
+
defm GLOBAL_LOAD_TR_B128_w32 : VFLAT_Real_AllAddr_gfx1250<0x057, "global_load_tr16_b128">;
defm GLOBAL_LOAD_TR_B64_w32 : VFLAT_Real_AllAddr_gfx1250<0x058, "global_load_tr8_b64">;
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 7d6723a..334afd3 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -38,7 +38,11 @@ bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1,
unsigned GCNRegPressure::getRegKind(const TargetRegisterClass *RC,
const SIRegisterInfo *STI) {
- return STI->isSGPRClass(RC) ? SGPR : (STI->isAGPRClass(RC) ? AGPR : VGPR);
+ return STI->isSGPRClass(RC)
+ ? SGPR
+ : (STI->isAGPRClass(RC)
+ ? AGPR
+ : (STI->isVectorSuperClass(RC) ? AVGPR : VGPR));
}
void GCNRegPressure::inc(unsigned Reg,
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 3749b6d..ea33a22 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -29,43 +29,57 @@ class raw_ostream;
class SlotIndex;
struct GCNRegPressure {
- enum RegKind { SGPR, VGPR, AGPR, TOTAL_KINDS };
+ enum RegKind { SGPR, VGPR, AGPR, AVGPR, TOTAL_KINDS };
GCNRegPressure() {
clear();
}
- bool empty() const { return !Value[SGPR] && !Value[VGPR] && !Value[AGPR]; }
+ bool empty() const {
+ return !Value[SGPR] && !Value[VGPR] && !Value[AGPR] && !Value[AVGPR];
+ }
void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); }
/// \returns the SGPR32 pressure
unsigned getSGPRNum() const { return Value[SGPR]; }
- /// \returns the aggregated ArchVGPR32, AccVGPR32 pressure dependent upon \p
- /// UnifiedVGPRFile
+ /// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure
+ /// dependent upon \p UnifiedVGPRFile
unsigned getVGPRNum(bool UnifiedVGPRFile) const {
if (UnifiedVGPRFile) {
- return Value[AGPR] ? getUnifiedVGPRNum(Value[VGPR], Value[AGPR])
- : Value[VGPR];
+ return Value[AGPR]
+ ? getUnifiedVGPRNum(Value[VGPR], Value[AGPR], Value[AVGPR])
+ : Value[VGPR] + Value[AVGPR];
}
- return std::max(Value[VGPR], Value[AGPR]);
+ // AVGPR assignment priority is based on the width of the register. Account
+ // AVGPR pressure as VGPR.
+ return std::max(Value[VGPR] + Value[AVGPR], Value[AGPR]);
}
/// Returns the aggregated VGPR pressure, assuming \p NumArchVGPRs ArchVGPRs
- /// and \p NumAGPRs AGPRS, for a target with a unified VGPR file.
+ /// \p NumAGPRs AGPRS, and \p NumAVGPRs AVGPRs for a target with a unified
+ /// VGPR file.
inline static unsigned getUnifiedVGPRNum(unsigned NumArchVGPRs,
- unsigned NumAGPRs) {
- return alignTo(NumArchVGPRs, AMDGPU::IsaInfo::getArchVGPRAllocGranule()) +
+ unsigned NumAGPRs,
+ unsigned NumAVGPRs) {
+
+ // Assume AVGPRs will be assigned as VGPRs.
+ return alignTo(NumArchVGPRs + NumAVGPRs,
+ AMDGPU::IsaInfo::getArchVGPRAllocGranule()) +
NumAGPRs;
}
- /// \returns the ArchVGPR32 pressure
- unsigned getArchVGPRNum() const { return Value[VGPR]; }
+ /// \returns the ArchVGPR32 pressure, plus the AVGPRS which we assume will be
+ /// allocated as VGPR
+ unsigned getArchVGPRNum() const { return Value[VGPR] + Value[AVGPR]; }
/// \returns the AccVGPR32 pressure
unsigned getAGPRNum() const { return Value[AGPR]; }
+ /// \returns the AVGPR32 pressure
+ unsigned getAVGPRNum() const { return Value[AVGPR]; }
unsigned getVGPRTuplesWeight() const {
- return std::max(Value[TOTAL_KINDS + VGPR], Value[TOTAL_KINDS + AGPR]);
+ return std::max(Value[TOTAL_KINDS + VGPR] + Value[TOTAL_KINDS + AVGPR],
+ Value[TOTAL_KINDS + AGPR]);
}
unsigned getSGPRTuplesWeight() const { return Value[TOTAL_KINDS + SGPR]; }
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index a655308..ce1ce68 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1911,14 +1911,12 @@ void PreRARematStage::rematerialize() {
for (auto &[DefMI, Remat] : Rematerializations) {
MachineBasicBlock::iterator InsertPos(Remat.UseMI);
Register Reg = DefMI->getOperand(0).getReg();
- unsigned SubReg = DefMI->getOperand(0).getSubReg();
unsigned DefRegion = MIRegion.at(DefMI);
// Rematerialize DefMI to its use block.
- TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg, SubReg, *DefMI,
- *DAG.TRI);
+ TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
+ AMDGPU::NoSubRegister, *DefMI, *DAG.TRI);
Remat.RematMI = &*std::prev(InsertPos);
- Remat.RematMI->getOperand(0).setSubReg(SubReg);
DAG.LIS->InsertMachineInstrInMaps(*Remat.RematMI);
// Update region boundaries in regions we sinked from (remove defining MI)
@@ -2064,14 +2062,13 @@ void PreRARematStage::finalizeGCNSchedStage() {
MachineBasicBlock::iterator InsertPos(DAG.Regions[DefRegion].second);
MachineBasicBlock *MBB = RegionBB[DefRegion];
Register Reg = RematMI.getOperand(0).getReg();
- unsigned SubReg = RematMI.getOperand(0).getSubReg();
// Re-rematerialize MI at the end of its original region. Note that it may
// not be rematerialized exactly in the same position as originally within
// the region, but it should not matter much.
- TII->reMaterialize(*MBB, InsertPos, Reg, SubReg, RematMI, *DAG.TRI);
+ TII->reMaterialize(*MBB, InsertPos, Reg, AMDGPU::NoSubRegister, RematMI,
+ *DAG.TRI);
MachineInstr *NewMI = &*std::prev(InsertPos);
- NewMI->getOperand(0).setSubReg(SubReg);
DAG.LIS->InsertMachineInstrInMaps(*NewMI);
auto UseRegion = MIRegion.find(Remat.UseMI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index 9a2bab1..0a0a107 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -537,6 +537,63 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
return getMaxNumVGPRs(MF.getFunction());
}
+std::pair<unsigned, unsigned>
+GCNSubtarget::getMaxNumVectorRegs(const Function &F) const {
+ const unsigned MaxVectorRegs = getMaxNumVGPRs(F);
+
+ unsigned MaxNumVGPRs = MaxVectorRegs;
+ unsigned MaxNumAGPRs = 0;
+
+ // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
+ // a wave may have up to 512 total vector registers combining together both
+ // VGPRs and AGPRs. Hence, in an entry function without calls and without
+ // AGPRs used within it, it is possible to use the whole vector register
+ // budget for VGPRs.
+ //
+ // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
+ // register file accordingly.
+ if (hasGFX90AInsts()) {
+ unsigned MinNumAGPRs = 0;
+ const unsigned TotalNumAGPRs = AMDGPU::AGPR_32RegClass.getNumRegs();
+ const unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
+
+ const std::pair<unsigned, unsigned> DefaultNumAGPR = {~0u, ~0u};
+
+ // TODO: The lower bound should probably force the number of required
+ // registers up, overriding amdgpu-waves-per-eu.
+ std::tie(MinNumAGPRs, MaxNumAGPRs) =
+ AMDGPU::getIntegerPairAttribute(F, "amdgpu-agpr-alloc", DefaultNumAGPR,
+ /*OnlyFirstRequired=*/true);
+
+ if (MinNumAGPRs == DefaultNumAGPR.first) {
+ // Default to splitting half the registers if AGPRs are required.
+ MinNumAGPRs = MaxNumAGPRs = MaxVectorRegs / 2;
+ } else {
+ // Align to accum_offset's allocation granularity.
+ MinNumAGPRs = alignTo(MinNumAGPRs, 4);
+
+ MinNumAGPRs = std::min(MinNumAGPRs, TotalNumAGPRs);
+ }
+
+ // Clamp values to be inbounds of our limits, and ensure min <= max.
+
+ MaxNumAGPRs = std::min(std::max(MinNumAGPRs, MaxNumAGPRs), MaxVectorRegs);
+ MinNumAGPRs = std::min(std::min(MinNumAGPRs, TotalNumAGPRs), MaxNumAGPRs);
+
+ MaxNumVGPRs = std::min(MaxVectorRegs - MinNumAGPRs, TotalNumVGPRs);
+ MaxNumAGPRs = std::min(MaxVectorRegs - MaxNumVGPRs, MaxNumAGPRs);
+
+ assert(MaxNumVGPRs + MaxNumAGPRs <= MaxVectorRegs &&
+ MaxNumAGPRs <= TotalNumAGPRs && MaxNumVGPRs <= TotalNumVGPRs &&
+ "invalid register counts");
+ } else if (hasMAIInsts()) {
+ // On gfx908 the number of AGPRs always equals the number of VGPRs.
+ MaxNumAGPRs = MaxNumVGPRs = MaxVectorRegs;
+ }
+
+ return std::pair(MaxNumVGPRs, MaxNumAGPRs);
+}
+
void GCNSubtarget::adjustSchedDependency(
SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep,
const TargetSchedModel *SchedModel) const {
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index b22d421..785ede3 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -123,6 +123,7 @@ protected:
bool HasSMemRealTime = false;
bool HasIntClamp = false;
bool HasFmaMixInsts = false;
+ bool HasFmaMixBF16Insts = false;
bool HasMovrel = false;
bool HasVGPRIndexMode = false;
bool HasScalarDwordx3Loads = false;
@@ -244,7 +245,9 @@ protected:
bool HasVMEMtoScalarWriteHazard = false;
bool HasSMEMtoVectorWriteHazard = false;
bool HasInstFwdPrefetchBug = false;
+ bool HasVmemPrefInsts = false;
bool HasSafeSmemPrefetch = false;
+ bool HasSafeCUPrefetch = false;
bool HasVcmpxExecWARHazard = false;
bool HasLdsBranchVmemWARHazard = false;
bool HasNSAtoVMEMBug = false;
@@ -462,6 +465,8 @@ public:
return HasFmaMixInsts;
}
+ bool hasFmaMixBF16Insts() const { return HasFmaMixBF16Insts; }
+
bool hasCARRY() const {
return true;
}
@@ -987,8 +992,12 @@ public:
bool hasPrefetch() const { return GFX12Insts; }
+ bool hasVmemPrefInsts() const { return HasVmemPrefInsts; }
+
bool hasSafeSmemPrefetch() const { return HasSafeSmemPrefetch; }
+ bool hasSafeCUPrefetch() const { return HasSafeCUPrefetch; }
+
// Has s_cmpk_* instructions.
bool hasSCmpK() const { return getGeneration() < GFX12; }
@@ -1308,7 +1317,7 @@ public:
bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
- bool hasVALUReadSGPRHazard() const { return getGeneration() == GFX12; }
+ bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
/// Return if operations acting on VGPR tuples require even alignment.
bool needsAlignedVGPRs() const { return GFX90AInsts || GFX1250Insts; }
@@ -1658,6 +1667,10 @@ public:
return getMaxNumVGPRs(F);
}
+ /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
+ /// of waves per execution unit required for the function \p MF.
+ std::pair<unsigned, unsigned> getMaxNumVectorRegs(const Function &F) const;
+
/// \returns Maximum number of VGPRs that meets number of waves per execution
/// unit requirement for function \p MF, or number of VGPRs explicitly
/// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 3902d4c..40b8bcd 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -392,11 +392,13 @@ enum CPol {
TH_ATOMIC_CASCADE = 4, // Cascading vs regular
// Scope
- SCOPE = 0x3 << 3, // All Scope bits
- SCOPE_CU = 0 << 3,
- SCOPE_SE = 1 << 3,
- SCOPE_DEV = 2 << 3,
- SCOPE_SYS = 3 << 3,
+ SCOPE_SHIFT = 3,
+ SCOPE_MASK = 0x3,
+ SCOPE = SCOPE_MASK << SCOPE_SHIFT, // All Scope bits
+ SCOPE_CU = 0 << SCOPE_SHIFT,
+ SCOPE_SE = 1 << SCOPE_SHIFT,
+ SCOPE_DEV = 2 << SCOPE_SHIFT,
+ SCOPE_SYS = 3 << SCOPE_SHIFT,
NV = 1 << 5, // Non-volatile bit
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index e5d1eaa..b77da4d 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1062,9 +1062,13 @@ bool SIFoldOperandsImpl::tryFoldRegSeqSplat(
switch (OpTy) {
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
OpRC = TRI->getSubRegisterClass(OpRC, AMDGPU::sub0);
break;
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
OpRC = TRI->getSubRegisterClass(OpRC, AMDGPU::sub0_sub1);
break;
default:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f1a8ee1..9017f4f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -882,7 +882,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (Subtarget->hasMad64_32())
setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, Custom);
- if (Subtarget->hasPrefetch() && Subtarget->hasSafeSmemPrefetch())
+ if (Subtarget->hasSafeSmemPrefetch() || Subtarget->hasVmemPrefInsts())
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
if (Subtarget->hasIEEEMinimumMaximumInsts()) {
@@ -1061,10 +1061,12 @@ ArrayRef<MCPhysReg> SITargetLowering::getRoundingControlRegisters() const {
// where this is OK to use.
bool SITargetLowering::isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
EVT DestVT, EVT SrcVT) const {
- return ((Opcode == ISD::FMAD && Subtarget->hasMadMixInsts()) ||
- (Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) &&
- DestVT.getScalarType() == MVT::f32 &&
- SrcVT.getScalarType() == MVT::f16 &&
+ return DestVT.getScalarType() == MVT::f32 &&
+ ((((Opcode == ISD::FMAD && Subtarget->hasMadMixInsts()) ||
+ (Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) &&
+ SrcVT.getScalarType() == MVT::f16) ||
+ (Opcode == ISD::FMA && Subtarget->hasFmaMixBF16Insts() &&
+ SrcVT.getScalarType() == MVT::bf16)) &&
// TODO: This probably only requires no input flushing?
denormalModeIsFlushAllF32(DAG.getMachineFunction());
}
@@ -1475,6 +1477,12 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
MachineMemOperand::MOVolatile;
return true;
}
+ case Intrinsic::amdgcn_flat_load_monitor_b32:
+ case Intrinsic::amdgcn_flat_load_monitor_b64:
+ case Intrinsic::amdgcn_flat_load_monitor_b128:
+ case Intrinsic::amdgcn_global_load_monitor_b32:
+ case Intrinsic::amdgcn_global_load_monitor_b64:
+ case Intrinsic::amdgcn_global_load_monitor_b128:
case Intrinsic::amdgcn_ds_load_tr6_b96:
case Intrinsic::amdgcn_ds_load_tr4_b64:
case Intrinsic::amdgcn_ds_load_tr8_b64:
@@ -1548,7 +1556,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
return true;
}
- case Intrinsic::amdgcn_s_prefetch_data: {
+ case Intrinsic::amdgcn_s_prefetch_data:
+ case Intrinsic::amdgcn_flat_prefetch:
+ case Intrinsic::amdgcn_global_prefetch: {
Info.opc = ISD::INTRINSIC_VOID;
Info.memVT = EVT::getIntegerVT(CI.getContext(), 8);
Info.ptrVal = CI.getArgOperand(0);
@@ -1599,10 +1609,16 @@ bool SITargetLowering::getAddrModeArguments(const IntrinsicInst *II,
case Intrinsic::amdgcn_ds_atomic_barrier_arrive_rtn_b64:
case Intrinsic::amdgcn_flat_atomic_fmax_num:
case Intrinsic::amdgcn_flat_atomic_fmin_num:
+ case Intrinsic::amdgcn_flat_load_monitor_b128:
+ case Intrinsic::amdgcn_flat_load_monitor_b32:
+ case Intrinsic::amdgcn_flat_load_monitor_b64:
case Intrinsic::amdgcn_global_atomic_csub:
case Intrinsic::amdgcn_global_atomic_fmax_num:
case Intrinsic::amdgcn_global_atomic_fmin_num:
case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
+ case Intrinsic::amdgcn_global_load_monitor_b128:
+ case Intrinsic::amdgcn_global_load_monitor_b32:
+ case Intrinsic::amdgcn_global_load_monitor_b64:
case Intrinsic::amdgcn_global_load_tr_b64:
case Intrinsic::amdgcn_global_load_tr_b128:
case Intrinsic::amdgcn_global_load_tr4_b64:
@@ -4440,19 +4456,28 @@ SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
}
SDValue SITargetLowering::lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const {
- if (Op->isDivergent())
+ if (Op->isDivergent() &&
+ (!Subtarget->hasVmemPrefInsts() || !Op.getConstantOperandVal(4)))
+ // Cannot do I$ prefetch with divergent pointer.
return SDValue();
switch (cast<MemSDNode>(Op)->getAddressSpace()) {
case AMDGPUAS::FLAT_ADDRESS:
case AMDGPUAS::GLOBAL_ADDRESS:
case AMDGPUAS::CONSTANT_ADDRESS:
- case AMDGPUAS::CONSTANT_ADDRESS_32BIT:
break;
+ case AMDGPUAS::CONSTANT_ADDRESS_32BIT:
+ if (Subtarget->hasSafeSmemPrefetch())
+ break;
+ [[fallthrough]];
default:
return SDValue();
}
+ // I$ prefetch
+ if (!Subtarget->hasSafeSmemPrefetch() && !Op.getConstantOperandVal(4))
+ return SDValue();
+
return Op;
}
@@ -14154,6 +14179,8 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
Op0.getOpcode() == AMDGPUISD::FMAX_LEGACY)) &&
(VT == MVT::f32 || VT == MVT::f64 ||
(VT == MVT::f16 && Subtarget->has16BitInsts()) ||
+ (VT == MVT::bf16 && Subtarget->hasBF16PackedInsts()) ||
+ (VT == MVT::v2bf16 && Subtarget->hasBF16PackedInsts()) ||
(VT == MVT::v2f16 && Subtarget->hasVOP3PInsts())) &&
Op0.hasOneUse()) {
if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1))
@@ -15869,6 +15896,78 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
return SDValue(CSrc, 0);
}
+SDValue SITargetLowering::performSelectCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+
+ // Try to fold CMP + SELECT patterns with shared constants (both FP and
+ // integer).
+ // Detect when CMP and SELECT use the same constant and fold them to avoid
+ // loading the constant twice. Specifically handles patterns like:
+ // %cmp = icmp eq i32 %val, 4242
+ // %sel = select i1 %cmp, i32 4242, i32 %other
+ // It can be optimized to reuse %val instead of 4242 in select.
+ SDValue Cond = N->getOperand(0);
+ SDValue TrueVal = N->getOperand(1);
+ SDValue FalseVal = N->getOperand(2);
+
+ // Check if condition is a comparison.
+ if (Cond.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ SDValue LHS = Cond.getOperand(0);
+ SDValue RHS = Cond.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ bool isFloatingPoint = LHS.getValueType().isFloatingPoint();
+ bool isInteger = LHS.getValueType().isInteger();
+
+ // Handle simple floating-point and integer types only.
+ if (!isFloatingPoint && !isInteger)
+ return SDValue();
+
+ bool isEquality = CC == (isFloatingPoint ? ISD::SETOEQ : ISD::SETEQ);
+ bool isNonEquality = CC == (isFloatingPoint ? ISD::SETONE : ISD::SETNE);
+ if (!isEquality && !isNonEquality)
+ return SDValue();
+
+ SDValue ArgVal, ConstVal;
+ if ((isFloatingPoint && isa<ConstantFPSDNode>(RHS)) ||
+ (isInteger && isa<ConstantSDNode>(RHS))) {
+ ConstVal = RHS;
+ ArgVal = LHS;
+ } else if ((isFloatingPoint && isa<ConstantFPSDNode>(LHS)) ||
+ (isInteger && isa<ConstantSDNode>(LHS))) {
+ ConstVal = LHS;
+ ArgVal = RHS;
+ } else {
+ return SDValue();
+ }
+
+ // Skip optimization for inlinable immediates.
+ if (isFloatingPoint) {
+ const APFloat &Val = cast<ConstantFPSDNode>(ConstVal)->getValueAPF();
+ if (!Val.isNormal() || Subtarget->getInstrInfo()->isInlineConstant(Val))
+ return SDValue();
+ } else {
+ if (AMDGPU::isInlinableIntLiteral(
+ cast<ConstantSDNode>(ConstVal)->getSExtValue()))
+ return SDValue();
+ }
+
+ // For equality and non-equality comparisons, patterns:
+ // select (setcc x, const), const, y -> select (setcc x, const), x, y
+ // select (setccinv x, const), y, const -> select (setccinv x, const), y, x
+ if (!(isEquality && TrueVal == ConstVal) &&
+ !(isNonEquality && FalseVal == ConstVal))
+ return SDValue();
+
+ SDValue SelectLHS = (isEquality && TrueVal == ConstVal) ? ArgVal : TrueVal;
+ SDValue SelectRHS =
+ (isNonEquality && FalseVal == ConstVal) ? ArgVal : FalseVal;
+ return DCI.DAG.getNode(ISD::SELECT, SDLoc(N), N->getValueType(0), Cond,
+ SelectLHS, SelectRHS);
+}
+
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
@@ -15917,6 +16016,10 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performFMulCombine(N, DCI);
case ISD::SETCC:
return performSetCCCombine(N, DCI);
+ case ISD::SELECT:
+ if (auto Res = performSelectCombine(N, DCI))
+ return Res;
+ break;
case ISD::FMAXNUM:
case ISD::FMINNUM:
case ISD::FMAXNUM_IEEE:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index acf6158..dedd9ae 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -211,6 +211,7 @@ private:
SDValue performExtractVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performInsertVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFPRoundCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue reassociateScalarOps(SDNode *N, SelectionDAG &DAG) const;
unsigned getFusedOpcode(const SelectionDAG &DAG,
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 9faf497..520c321 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -552,7 +552,7 @@ public:
(!Inst.mayLoad() || SIInstrInfo::isAtomicNoRet(Inst))) {
// FLAT and SCRATCH instructions may access scratch. Other VMEM
// instructions do not.
- if (SIInstrInfo::isFLAT(Inst) && mayAccessScratchThroughFlat(Inst))
+ if (TII->mayAccessScratchThroughFlat(Inst))
return SCRATCH_WRITE_ACCESS;
return VMEM_WRITE_ACCESS;
}
@@ -565,7 +565,6 @@ public:
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const;
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
- bool mayAccessScratchThroughFlat(const MachineInstr &MI) const;
bool isVmemAccess(const MachineInstr &MI) const;
bool generateWaitcntInstBefore(MachineInstr &MI,
WaitcntBrackets &ScoreBrackets,
@@ -2108,8 +2107,9 @@ bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait,
bool SIInsertWaitcnts::mayAccessVMEMThroughFlat(const MachineInstr &MI) const {
assert(TII->isFLAT(MI));
- // All flat instructions use the VMEM counter.
- assert(TII->usesVM_CNT(MI));
+ // All flat instructions use the VMEM counter except prefetch.
+ if (!TII->usesVM_CNT(MI))
+ return false;
// If there are no memory operands then conservatively assume the flat
// operation may access VMEM.
@@ -2159,32 +2159,6 @@ bool SIInsertWaitcnts::mayAccessLDSThroughFlat(const MachineInstr &MI) const {
return false;
}
-// This is a flat memory operation. Check to see if it has memory tokens for
-// either scratch or FLAT.
-bool SIInsertWaitcnts::mayAccessScratchThroughFlat(
- const MachineInstr &MI) const {
- assert(TII->isFLAT(MI));
-
- // SCRATCH instructions always access scratch.
- if (TII->isFLATScratch(MI))
- return true;
-
- // GLOBAL instructions never access scratch.
- if (TII->isFLATGlobal(MI))
- return false;
-
- // If there are no memory operands then conservatively assume the flat
- // operation may access scratch.
- if (MI.memoperands_empty())
- return true;
-
- // See if any memory operand specifies an address space that involves scratch.
- return any_of(MI.memoperands(), [](const MachineMemOperand *Memop) {
- unsigned AS = Memop->getAddrSpace();
- return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
- });
-}
-
bool SIInsertWaitcnts::isVmemAccess(const MachineInstr &MI) const {
return (TII->isFLAT(MI) && mayAccessVMEMThroughFlat(MI)) ||
(TII->isVMEM(MI) && !AMDGPU::getMUBUFIsBufferInv(MI.getOpcode()));
@@ -2295,9 +2269,6 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst);
}
- // A Flat memory operation must access at least one address space.
- assert(FlatASCount);
-
// This is a flat memory operation that access both VMEM and LDS, so note it
// - it will require that both the VM and LGKM be flushed to zero if it is
// pending when a VM or LGKM dependency occurs.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 40e6871..2aa6b4e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2508,7 +2508,20 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addReg(DstHi);
}
break;
+
+ case AMDGPU::V_MAX_BF16_PSEUDO_e64:
+ assert(ST.hasBF16PackedInsts());
+ MI.setDesc(get(AMDGPU::V_PK_MAX_NUM_BF16));
+ MI.addOperand(MachineOperand::CreateImm(0)); // op_sel
+ MI.addOperand(MachineOperand::CreateImm(0)); // neg_lo
+ MI.addOperand(MachineOperand::CreateImm(0)); // neg_hi
+ auto Op0 = getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
+ Op0->setImm(Op0->getImm() | SISrcMods::OP_SEL_1);
+ auto Op1 = getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
+ Op1->setImm(Op1->getImm() | SISrcMods::OP_SEL_1);
+ break;
}
+
return true;
}
@@ -2733,49 +2746,47 @@ static MachineInstr *swapImmOperands(MachineInstr &MI,
}
bool SIInstrInfo::isLegalToSwap(const MachineInstr &MI, unsigned OpIdx0,
- const MachineOperand *MO0, unsigned OpIdx1,
- const MachineOperand *MO1) const {
+ unsigned OpIdx1) const {
const MCInstrDesc &InstDesc = MI.getDesc();
const MCOperandInfo &OpInfo0 = InstDesc.operands()[OpIdx0];
const MCOperandInfo &OpInfo1 = InstDesc.operands()[OpIdx1];
- const TargetRegisterClass *DefinedRC1 =
- OpInfo1.RegClass != -1 ? RI.getRegClass(OpInfo1.RegClass) : nullptr;
- const TargetRegisterClass *DefinedRC0 =
- OpInfo1.RegClass != -1 ? RI.getRegClass(OpInfo0.RegClass) : nullptr;
unsigned Opc = MI.getOpcode();
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+ const MachineOperand &MO0 = MI.getOperand(OpIdx0);
+ const MachineOperand &MO1 = MI.getOperand(OpIdx1);
+
// Swap doesn't breach constant bus or literal limits
// It may move literal to position other than src0, this is not allowed
// pre-gfx10 However, most test cases need literals in Src0 for VOP
// FIXME: After gfx9, literal can be in place other than Src0
if (isVALU(MI)) {
- if ((int)OpIdx0 == Src0Idx && !MO0->isReg() &&
- !isInlineConstant(*MO0, OpInfo1))
+ if ((int)OpIdx0 == Src0Idx && !MO0.isReg() &&
+ !isInlineConstant(MO0, OpInfo1))
return false;
- if ((int)OpIdx1 == Src0Idx && !MO1->isReg() &&
- !isInlineConstant(*MO1, OpInfo0))
+ if ((int)OpIdx1 == Src0Idx && !MO1.isReg() &&
+ !isInlineConstant(MO1, OpInfo0))
return false;
}
- if ((int)OpIdx1 != Src0Idx && MO0->isReg()) {
- if (!DefinedRC1)
+ if ((int)OpIdx1 != Src0Idx && MO0.isReg()) {
+ if (OpInfo1.RegClass == -1)
return OpInfo1.OperandType == MCOI::OPERAND_UNKNOWN;
- return isLegalRegOperand(MI, OpIdx1, *MO0) &&
- (!MO1->isReg() || isLegalRegOperand(MI, OpIdx0, *MO1));
+ return isLegalRegOperand(MI, OpIdx1, MO0) &&
+ (!MO1.isReg() || isLegalRegOperand(MI, OpIdx0, MO1));
}
- if ((int)OpIdx0 != Src0Idx && MO1->isReg()) {
- if (!DefinedRC0)
+ if ((int)OpIdx0 != Src0Idx && MO1.isReg()) {
+ if (OpInfo0.RegClass == -1)
return OpInfo0.OperandType == MCOI::OPERAND_UNKNOWN;
- return (!MO0->isReg() || isLegalRegOperand(MI, OpIdx1, *MO0)) &&
- isLegalRegOperand(MI, OpIdx0, *MO1);
+ return (!MO0.isReg() || isLegalRegOperand(MI, OpIdx1, MO0)) &&
+ isLegalRegOperand(MI, OpIdx0, MO1);
}
// No need to check 64-bit literals since swapping does not bring new
// 64-bit literals into current instruction to fold to 32-bit
- return isImmOperandLegal(MI, OpIdx1, *MO0);
+ return isImmOperandLegal(MI, OpIdx1, MO0);
}
MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
@@ -2797,12 +2808,12 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
static_cast<int>(Src1Idx) &&
"inconsistency with findCommutedOpIndices");
- MachineOperand &Src0 = MI.getOperand(Src0Idx);
- MachineOperand &Src1 = MI.getOperand(Src1Idx);
- if (!isLegalToSwap(MI, Src0Idx, &Src0, Src1Idx, &Src1)) {
+ if (!isLegalToSwap(MI, Src0Idx, Src1Idx))
return nullptr;
- }
+
MachineInstr *CommutedMI = nullptr;
+ MachineOperand &Src0 = MI.getOperand(Src0Idx);
+ MachineOperand &Src1 = MI.getOperand(Src1Idx);
if (Src0.isReg() && Src1.isReg()) {
// Be sure to copy the source modifiers to the right place.
CommutedMI =
@@ -4238,6 +4249,32 @@ bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
Opcode == AMDGPU::DS_SUB_GS_REG_RTN || isGWS(Opcode);
}
+bool SIInstrInfo::mayAccessScratchThroughFlat(const MachineInstr &MI) const {
+ if (!isFLAT(MI) || isFLATGlobal(MI))
+ return false;
+
+ // If scratch is not initialized, we can never access it.
+ if (MI.getMF()->getFunction().hasFnAttribute("amdgpu-no-flat-scratch-init"))
+ return false;
+
+ // SCRATCH instructions always access scratch.
+ if (isFLATScratch(MI))
+ return true;
+
+ // If there are no memory operands then conservatively assume the flat
+ // operation may access scratch.
+ if (MI.memoperands_empty())
+ return true;
+
+ // TODO (?): Does this need to be taught how to read noalias.addrspace ?
+
+ // See if any memory operand specifies an address space that involves scratch.
+ return any_of(MI.memoperands(), [](const MachineMemOperand *Memop) {
+ unsigned AS = Memop->getAddrSpace();
+ return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
+ });
+}
+
bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) {
// Skip the full operand and register alias search modifiesRegister
// does. There's only a handful of instructions that touch this, it's only an
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 800ea9a..e042b59 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -197,8 +197,7 @@ protected:
AMDGPU::OpName Src0OpName, MachineOperand &Src1,
AMDGPU::OpName Src1OpName) const;
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx,
- const MachineOperand *fromMO, unsigned toIdx,
- const MachineOperand *toMO) const;
+ unsigned toIdx) const;
MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
unsigned OpIdx0,
unsigned OpIdx1) const override;
@@ -679,6 +678,12 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::FLAT;
}
+ /// \returns true for SCRATCH_ instructions, or FLAT_ instructions with
+ /// SCRATCH_ memory operands.
+ /// Conservatively correct; will return true if \p MI cannot be proven
+ /// to not hit scratch.
+ bool mayAccessScratchThroughFlat(const MachineInstr &MI) const;
+
static bool isBlockLoadStore(uint16_t Opcode) {
switch (Opcode) {
case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 485ca78..83b0490 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1662,6 +1662,8 @@ def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
def VOP3PMadMixModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixModsExt">;
def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
+def VOP3PMadMixBF16ModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixBF16ModsExt">;
+def VOP3PMadMixBF16Mods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixBF16Mods">;
def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">;
def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">;
@@ -2863,9 +2865,11 @@ def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], /*EnableClamp=*/1>;
+def VOP_BF16_BF16_BF16 : VOPProfile <[bf16, bf16, bf16, untyped]>;
def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
+def VOP_BF16_BF16_BF16_BF16 : VOPProfile <[bf16, bf16, bf16, bf16, untyped]>;
def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>;
def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>;
@@ -2917,6 +2921,7 @@ def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=
def VOP_I64_I64_I64_ARITH : VOPProfile <[i64, i64, i64, untyped], /*EnableClamp=*/1>;
def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
+def VOP_F32_BF16_BF16_BF16 : VOPProfile <[f32, bf16, bf16, bf16]>;
def VOP_V2BF16_F32_F32 : VOPProfile <[v2bf16, f32, f32, untyped]>;
def VOP_V32F32_V6I32_F32 : VOPProfile <[v32f32, v6i32, f32, untyped]>;
def VOP_V32F16_V6I32_F32 : VOPProfile <[v32f16, v6i32, f32, untyped]>;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index d05be8f..54fa192 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1894,6 +1894,9 @@ let SubtargetPredicate = UseRealTrue16Insts in
def : ClampPat<V_MAX_F16_t16_e64, f16>;
let SubtargetPredicate = UseFakeTrue16Insts in
def : ClampPat<V_MAX_F16_fake16_e64, f16>;
+// FIXME-TRUE16: Pseudo expansion of this won't work with True16.
+let True16Predicate = UseFakeTrue16Insts in
+def : ClampPat<V_MAX_BF16_PSEUDO_e64, bf16>;
let SubtargetPredicate = HasVOP3PInsts in {
def : GCNPat <
@@ -1903,6 +1906,13 @@ def : GCNPat <
>;
}
+let SubtargetPredicate = HasBF16PackedInsts in {
+def : GCNPat <
+ (v2bf16 (AMDGPUclamp (VOP3PMods v2bf16:$src0, i32:$src0_modifiers))),
+ (V_PK_MAX_NUM_BF16 $src0_modifiers, $src0,
+ $src0_modifiers, $src0, DSTCLAMP.ENABLE)
+>;
+} // End SubtargetPredicate = HasBF16PackedInsts
/********** ================================ **********/
/********** Floating point absolute/negative **********/
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 9f61bf8..9509199 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -351,6 +351,7 @@ void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
MachineRegisterInfo &MRI = MF.getRegInfo();
BitVector ReservedRegs = TRI->getReservedRegs(MF);
BitVector NonWwmAllocMask(TRI->getNumRegs());
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
// FIXME: MaxNumVGPRsForWwmAllocation might need to be adjusted in the future
// to have a balanced allocation between WWM values and per-thread vector
@@ -359,7 +360,7 @@ void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
NumRegs =
std::min(static_cast<unsigned>(MFI->getSGPRSpillVGPRs().size()), NumRegs);
- auto [MaxNumVGPRs, MaxNumAGPRs] = TRI->getMaxNumVectorRegs(MF);
+ auto [MaxNumVGPRs, MaxNumAGPRs] = ST.getMaxNumVectorRegs(MF.getFunction());
// Try to use the highest available registers for now. Later after
// vgpr-regalloc, they can be shifted to the lowest range.
unsigned I = 0;
@@ -376,7 +377,7 @@ void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
// Reserve an arbitrary register and report the error.
TRI->markSuperRegs(RegMask, AMDGPU::VGPR0);
MF.getFunction().getContext().emitError(
- "can't find enough VGPRs for wwm-regalloc");
+ "cannot find enough VGPRs for wwm-regalloc");
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index f0be204..9a1448f 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -81,11 +81,15 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
}
- MayNeedAGPRs = ST.hasMAIInsts() && !MFMAVGPRForm;
- if (!MFMAVGPRForm && ST.hasGFX90AInsts() &&
- ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
- !mayUseAGPRs(F))
- MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
+ MayNeedAGPRs = ST.hasMAIInsts();
+ if (ST.hasGFX90AInsts()) {
+ // FIXME: MayNeedAGPRs is a misnomer for how this is used. MFMA selection
+ // should be separated from availability of AGPRs
+ if (MFMAVGPRForm ||
+ (ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
+ !mayUseAGPRs(F)))
+ MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
+ }
if (AMDGPU::isChainCC(CC)) {
// Chain functions don't receive an SP from their caller, but are free to
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 3212060..f1262e11 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -321,7 +321,7 @@ public:
bool IsNonTemporal,
bool IsLastUse = false) const = 0;
- virtual bool expandSystemScopeStore(MachineBasicBlock::iterator &MI) const {
+ virtual bool finalizeStore(MachineInstr &MI, bool Atomic) const {
return false;
};
@@ -602,7 +602,7 @@ public:
bool IsVolatile, bool IsNonTemporal,
bool IsLastUse) const override;
- bool expandSystemScopeStore(MachineBasicBlock::iterator &MI) const override;
+ bool finalizeStore(MachineInstr &MI, bool Atomic) const override;
bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering,
@@ -704,16 +704,16 @@ void diagnoseUnknownMMRAASName(const MachineInstr &MI, StringRef AS) {
DiagnosticInfoUnsupported(Fn, Str.str(), MI.getDebugLoc(), DS_Warning));
}
-/// Reads \p MI's MMRAs to parse the "amdgpu-as" MMRA.
-/// If this tag isn't present, or if it has no meaningful values, returns \p
-/// Default. Otherwise returns all the address spaces concerned by the MMRA.
-static SIAtomicAddrSpace getFenceAddrSpaceMMRA(const MachineInstr &MI,
- SIAtomicAddrSpace Default) {
- static constexpr StringLiteral FenceASPrefix = "amdgpu-as";
+/// Reads \p MI's MMRAs to parse the "amdgpu-synchronize-as" MMRA.
+/// If this tag isn't present, or if it has no meaningful values, returns
+/// \p none, otherwise returns the address spaces specified by the MD.
+static std::optional<SIAtomicAddrSpace>
+getSynchronizeAddrSpaceMD(const MachineInstr &MI) {
+ static constexpr StringLiteral FenceASPrefix = "amdgpu-synchronize-as";
auto MMRA = MMRAMetadata(MI.getMMRAMetadata());
if (!MMRA)
- return Default;
+ return std::nullopt;
SIAtomicAddrSpace Result = SIAtomicAddrSpace::NONE;
for (const auto &[Prefix, Suffix] : MMRA) {
@@ -726,7 +726,10 @@ static SIAtomicAddrSpace getFenceAddrSpaceMMRA(const MachineInstr &MI,
diagnoseUnknownMMRAASName(MI, Suffix);
}
- return (Result != SIAtomicAddrSpace::NONE) ? Result : Default;
+ if (Result == SIAtomicAddrSpace::NONE)
+ return std::nullopt;
+
+ return Result;
}
} // end anonymous namespace
@@ -903,12 +906,19 @@ SIMemOpAccess::getAtomicFenceInfo(const MachineBasicBlock::iterator &MI) const {
std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
*ScopeOrNone;
- if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
- ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
+ if (OrderingAddrSpace != SIAtomicAddrSpace::ATOMIC) {
+ // We currently expect refineOrderingAS to be the only place that
+ // can refine the AS ordered by the fence.
+ // If that changes, we need to review the semantics of that function
+ // in case it needs to preserve certain address spaces.
reportUnsupported(MI, "Unsupported atomic address space");
return std::nullopt;
}
+ auto SynchronizeAS = getSynchronizeAddrSpaceMD(*MI);
+ if (SynchronizeAS)
+ OrderingAddrSpace = *SynchronizeAS;
+
return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
IsCrossAddressSpaceOrdering, AtomicOrdering::NotAtomic);
}
@@ -2526,9 +2536,6 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
if (IsVolatile) {
Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS);
- if (Op == SIMemOp::STORE)
- Changed |= insertWaitsBeforeSystemScopeStore(MI);
-
// Ensure operation has completed at system scope to cause all volatile
// operations to be visible outside the program in a global order. Do not
// request cross address space as only the global address space can be
@@ -2541,11 +2548,24 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
return Changed;
}
-bool SIGfx12CacheControl::expandSystemScopeStore(
- MachineBasicBlock::iterator &MI) const {
- MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
- if (CPol && ((CPol->getImm() & CPol::SCOPE) == CPol::SCOPE_SYS))
- return insertWaitsBeforeSystemScopeStore(MI);
+bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const {
+ MachineOperand *CPol = TII->getNamedOperand(MI, OpName::cpol);
+ if (!CPol)
+ return false;
+
+ const unsigned Scope = CPol->getImm() & CPol::SCOPE;
+
+ // GFX12.0 only: Extra waits needed before system scope stores.
+ if (!ST.hasGFX1250Insts()) {
+ if (!Atomic && Scope == CPol::SCOPE_SYS)
+ return insertWaitsBeforeSystemScopeStore(MI);
+ return false;
+ }
+
+ // GFX12.5 only: Require SCOPE_SE on stores that may hit the scratch address
+ // space.
+ if (TII->mayAccessScratchThroughFlat(MI) && Scope == CPol::SCOPE_CU)
+ return setScope(MI, CPol::SCOPE_SE);
return false;
}
@@ -2648,6 +2668,8 @@ bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
assert(!MI->mayLoad() && MI->mayStore());
bool Changed = false;
+ // FIXME: Necessary hack because iterator can lose track of the store.
+ MachineInstr &StoreMI = *MI;
if (MOI.isAtomic()) {
if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
@@ -2664,6 +2686,7 @@ bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
MOI.getIsCrossAddressSpaceOrdering(),
Position::BEFORE);
+ Changed |= CC->finalizeStore(StoreMI, /*Atomic=*/true);
return Changed;
}
@@ -2676,7 +2699,7 @@ bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
// GFX12 specific, scope(desired coherence domain in cache hierarchy) is
// instruction field, do not confuse it with atomic scope.
- Changed |= CC->expandSystemScopeStore(MI);
+ Changed |= CC->finalizeStore(StoreMI, /*Atomic=*/false);
return Changed;
}
@@ -2687,11 +2710,7 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
AtomicPseudoMIs.push_back(MI);
bool Changed = false;
- // Refine fenced address space based on MMRAs.
- //
- // TODO: Should we support this MMRA on other atomic operations?
- auto OrderingAddrSpace =
- getFenceAddrSpaceMMRA(*MI, MOI.getOrderingAddrSpace());
+ const SIAtomicAddrSpace OrderingAddrSpace = MOI.getOrderingAddrSpace();
if (MOI.isAtomic()) {
const AtomicOrdering Order = MOI.getOrdering();
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 84cfa87..f3acc5c 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -572,65 +572,6 @@ MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(
return getAlignedHighSGPRForRC(MF, /*Align=*/4, &AMDGPU::SGPR_128RegClass);
}
-std::pair<unsigned, unsigned>
-SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const {
- const unsigned MaxVectorRegs = ST.getMaxNumVGPRs(MF);
-
- unsigned MaxNumVGPRs = MaxVectorRegs;
- unsigned MaxNumAGPRs = 0;
-
- // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
- // a wave may have up to 512 total vector registers combining together both
- // VGPRs and AGPRs. Hence, in an entry function without calls and without
- // AGPRs used within it, it is possible to use the whole vector register
- // budget for VGPRs.
- //
- // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
- // register file accordingly.
- if (ST.hasGFX90AInsts()) {
- unsigned MinNumAGPRs = 0;
- const unsigned TotalNumAGPRs = AMDGPU::AGPR_32RegClass.getNumRegs();
- const unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
-
- const std::pair<unsigned, unsigned> DefaultNumAGPR = {~0u, ~0u};
-
- // TODO: Move this logic into subtarget on IR function
- //
- // TODO: The lower bound should probably force the number of required
- // registers up, overriding amdgpu-waves-per-eu.
- std::tie(MinNumAGPRs, MaxNumAGPRs) = AMDGPU::getIntegerPairAttribute(
- MF.getFunction(), "amdgpu-agpr-alloc", DefaultNumAGPR,
- /*OnlyFirstRequired=*/true);
-
- if (MinNumAGPRs == DefaultNumAGPR.first) {
- // Default to splitting half the registers if AGPRs are required.
- MinNumAGPRs = MaxNumAGPRs = MaxVectorRegs / 2;
- } else {
- // Align to accum_offset's allocation granularity.
- MinNumAGPRs = alignTo(MinNumAGPRs, 4);
-
- MinNumAGPRs = std::min(MinNumAGPRs, TotalNumAGPRs);
- }
-
- // Clamp values to be inbounds of our limits, and ensure min <= max.
-
- MaxNumAGPRs = std::min(std::max(MinNumAGPRs, MaxNumAGPRs), MaxVectorRegs);
- MinNumAGPRs = std::min(std::min(MinNumAGPRs, TotalNumAGPRs), MaxNumAGPRs);
-
- MaxNumVGPRs = std::min(MaxVectorRegs - MinNumAGPRs, TotalNumVGPRs);
- MaxNumAGPRs = std::min(MaxVectorRegs - MaxNumVGPRs, MaxNumAGPRs);
-
- assert(MaxNumVGPRs + MaxNumAGPRs <= MaxVectorRegs &&
- MaxNumAGPRs <= TotalNumAGPRs && MaxNumVGPRs <= TotalNumVGPRs &&
- "invalid register counts");
- } else if (ST.hasMAIInsts()) {
- // On gfx908 the number of AGPRs always equals the number of VGPRs.
- MaxNumAGPRs = MaxNumVGPRs = MaxVectorRegs;
- }
-
- return std::pair(MaxNumVGPRs, MaxNumAGPRs);
-}
-
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(AMDGPU::MODE);
@@ -742,7 +683,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Reserve VGPRs/AGPRs.
//
- auto [MaxNumVGPRs, MaxNumAGPRs] = getMaxNumVectorRegs(MF);
+ auto [MaxNumVGPRs, MaxNumAGPRs] = ST.getMaxNumVectorRegs(MF.getFunction());
for (const TargetRegisterClass *RC : regclasses()) {
if (RC->isBaseClass() && isVGPRClass(RC)) {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 0008e5f..5508f07 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -90,11 +90,6 @@ public:
/// spilling is needed.
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
- /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
- /// of waves per execution unit required for the function \p MF.
- std::pair<unsigned, unsigned>
- getMaxNumVectorRegs(const MachineFunction &MF) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const override;
bool isAsmClobberable(const MachineFunction &MF,
MCRegister PhysReg) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 0039d2f..218841d 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -109,6 +109,23 @@ class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
let TSFlags{2} = HasVGPR;
let TSFlags{3} = HasAGPR;
let TSFlags{4} = HasSGPR;
+
+ // RA will use RegisterClass AllocationPriority amongst other info (e.g. ordering in the basic block)
+ // to decide which registers to try to assign first. Usually, this RegisterClass priority is given
+ // very high priority, if not the highest priority, when considering which VirtReg to allocate next.
+ //
+ // We have 5 bits to assign AllocationPriorities to RegisterClasses. Generally, it is beneficial to
+ // assign more constrained RegisterClasses first. As a result, we prioritize register classes with
+ // more 32 bit tuples (e.g. VReg_512) over registers with fewer tuples (e.g. VGPR_32).
+ //
+ // The interesting case is the vector register case on architectures which have ARegs, VRegs, AVRegs.
+ // In this case, we would like to assign ARegs and VRegs before AVRegs, as AVRegs are less constrained
+ // and can be assigned to both AGPRs and VGPRs. We use the 5th bit to encode this into the
+ // RegisterClass AllocationPriority. BaseClassPriority is used to turn the bit on, and BaseClassScaleFactor
+ // is used for scaling of the bit (i.e. 1 << 4).
+ field int BaseClassPriority = 1;
+ field int BaseClassScaleFactor = 16;
+
}
multiclass SIRegLoHi16 <string n, bits<8> regIdx, bit ArtificialHigh = 1,
@@ -575,7 +592,7 @@ let HasVGPR = 1 in {
def VGPR_16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
(add (interleave (sequence "VGPR%u_LO16", 0, 255),
(sequence "VGPR%u_HI16", 0, 255)))> {
- let AllocationPriority = 2;
+ let AllocationPriority = !add(2, !mul(BaseClassPriority, BaseClassScaleFactor));
let Size = 16;
let GeneratePressureSet = 0;
@@ -601,7 +618,7 @@ def VGPR_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
// i16/f16 only on VI+
def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
(add (sequence "VGPR%u", 0, 255))> {
- let AllocationPriority = 0;
+ let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor));
let Size = 32;
let Weight = 1;
let BaseClassOrder = 32;
@@ -610,7 +627,7 @@ def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types
// Identical to VGPR_32 except it only contains the low 128 (Lo128) registers.
def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
(add (sequence "VGPR%u", 0, 127))> {
- let AllocationPriority = 0;
+ let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor));
let GeneratePressureSet = 0;
let Size = 32;
let Weight = 1;
@@ -668,7 +685,7 @@ def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
// AccVGPR 32-bit registers
def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
(add (sequence "AGPR%u", 0, 255))> {
- let AllocationPriority = 0;
+ let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor));
let Size = 32;
let Weight = 1;
let BaseClassOrder = 32;
@@ -940,14 +957,23 @@ class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
// Requires n v_mov_b32 to copy
let CopyCost = numRegs;
- let AllocationPriority = !sub(numRegs, 1);
+
+ // Since we only have 5 bits for the RegisterClass Allocation Priorty, and since we use the
+ // 5th bit for BaseClassPriority, we need to encode the SizePriority into 4 bits. As a result
+ // of this encoding, for registers with numRegs 15 or 16, we give SizePriority of 14, and for
+ // regsters with numRegs 17+ we give SizePriority of 15. In practice, there is only one
+ // RegClass per Vector Register type in each of these groups (i.e. numRegs = 15,16 : {VReg_512},
+ // and numRegs = 17+ : {VReg_1024}). Therefore, we have not lost any info by compressing.
+ defvar SizePrioriity = !if(!le(numRegs, 14), !sub(numRegs, 1), !if(!le(numRegs, 16), 14, 15));
+
+ let AllocationPriority = !add(SizePrioriity, !mul(BaseClassPriority, BaseClassScaleFactor));
let Weight = numRegs;
}
// Define a register tuple class, along with one requiring an even
// aligned base register.
multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> {
- let HasVGPR = 1 in {
+ let HasVGPR = 1, BaseClassPriority = 1 in {
// Define the regular class.
def "" : VRegClassBase<numRegs, regTypes, regList> {
let BaseClassOrder = !mul(numRegs, 32);
@@ -981,7 +1007,7 @@ defm VReg_1024 : VRegClass<32, Reg1024Types.types, (add VGPR_1024)>;
}
multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
- let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1 in {
+ let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1, BaseClassPriority = 1 in {
// Define the regular class.
def "" : VRegClassBase<numRegs, regTypes, regList> {
let BaseClassOrder = !mul(numRegs, 32);
@@ -1066,6 +1092,7 @@ def VS_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, (add VReg_64, SReg_6
def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> {
let HasVGPR = 1;
let HasAGPR = 1;
+ let BaseClassPriority = 0;
let Size = 32;
}
} // End GeneratePressureSet = 0
@@ -1074,7 +1101,7 @@ def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_3
// aligned base register.
multiclass AVRegClass<int numRegs, list<ValueType> regTypes,
dag vregList, dag aregList> {
- let HasVGPR = 1, HasAGPR = 1 in {
+ let HasVGPR = 1, HasAGPR = 1, BaseClassPriority = 0 in {
// Define the regular class.
def "" : VRegClassBase<numRegs, regTypes, (add vregList, aregList)>;
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 38cc51b..4bda51d 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -856,9 +856,9 @@ def smrd_sextloadi16 : SMRDLoadPat<sextloadi16>;
def smrd_prefetch : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
(prefetch node:$ptr, node:$rw, node:$loc, node:$type),
- [{ return !N->getOperand(1)->isDivergent();}]> {
+ [{ return !N->getOperand(1)->isDivergent() && Subtarget->hasSafeSmemPrefetch();}]> {
let GISelPredicateCode = [{
- return isInstrUniform(MI);
+ return isInstrUniform(MI) && Subtarget->hasSafeSmemPrefetch();
}];
}
@@ -1152,6 +1152,7 @@ multiclass SMPrefetchPat<string type, TImmLeaf cache_type> {
}
defm : SMPrefetchPat<"INST", i32imm_zero>;
+let AddedComplexity = 12 in // Prefer scalar prefetch over global for r/o case.
defm : SMPrefetchPat<"DATA", i32imm_one>;
let SubtargetPredicate = isGFX12Plus in {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index b5b3cc9..83e63ac 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -732,7 +732,14 @@ bool isGenericAtomic(unsigned Opc) {
}
bool isAsyncStore(unsigned Opc) {
- return false; // placeholder before async store implementation.
+ return Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_gfx1250 ||
+ Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_gfx1250 ||
+ Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_gfx1250 ||
+ Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_gfx1250 ||
+ Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_SADDR_gfx1250 ||
+ Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_SADDR_gfx1250 ||
+ Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_SADDR_gfx1250 ||
+ Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_SADDR_gfx1250;
}
bool isTensorStore(unsigned Opc) {
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index ea14c77..95fcd4a 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -35,14 +35,18 @@ class VOP3P_Mix_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR,
bit useTiedOutput = 0> : VOP3P_Profile<P, Features, 1> {
bit UseTiedOutput = useTiedOutput;
+ defvar Src0RC = getVCSrcForVT<P.Src0VT>.ret;
+ defvar Src1RC = getVCSrcForVT<P.Src1VT>.ret;
+ defvar Src2RC = getVCSrcForVT<P.Src2VT>.ret;
+
dag srcs =
- (ins FP16InputMods:$src0_modifiers, VCSrc_f16:$src0,
- FP16InputMods:$src1_modifiers, VCSrc_f16:$src1,
- FP16InputMods:$src2_modifiers, VCSrc_f16:$src2);
+ (ins FP16InputMods:$src0_modifiers, Src0RC:$src0,
+ FP16InputMods:$src1_modifiers, Src1RC:$src1,
+ FP16InputMods:$src2_modifiers, Src2RC:$src2);
dag dpp_srcs =
(ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0,
FPVRegInputMods:$src1_modifiers, VRegSrc_32:$src1,
- FP16InputMods:$src2_modifiers, VCSrc_f16:$src2);
+ FP16InputMods:$src2_modifiers, Src2RC:$src2);
// FIXME: Clamp0 misbehaves with the non-default vdst_in
// following it. For now workaround this by requiring clamp
@@ -161,38 +165,42 @@ defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", VOP3P_Profile<VOP_V2F16_
multiclass MadFmaMixPats<SDPatternOperator fma_like,
Instruction mix_inst,
Instruction mixlo_inst,
- Instruction mixhi_inst> {
+ Instruction mixhi_inst,
+ ValueType VT = f16,
+ ValueType vecVT = v2f16> {
+ defvar VOP3PMadMixModsPat = !if (!eq(VT, bf16), VOP3PMadMixBF16Mods, VOP3PMadMixMods);
+ defvar VOP3PMadMixModsExtPat = !if (!eq(VT, bf16), VOP3PMadMixBF16ModsExt, VOP3PMadMixModsExt);
// At least one of the operands needs to be an fpextend of an f16
// for this to be worthwhile, so we need three patterns here.
// TODO: Could we use a predicate to inspect src1/2/3 instead?
def : GCNPat <
- (f32 (fma_like (f32 (VOP3PMadMixModsExt f16:$src0, i32:$src0_mods)),
- (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_mods)),
- (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_mods)))),
+ (f32 (fma_like (f32 (VOP3PMadMixModsExtPat VT:$src0, i32:$src0_mods)),
+ (f32 (VOP3PMadMixModsPat VT:$src1, i32:$src1_mods)),
+ (f32 (VOP3PMadMixModsPat VT:$src2, i32:$src2_mods)))),
(mix_inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2,
DSTCLAMP.NONE)>;
def : GCNPat <
- (f32 (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_mods)),
- (f32 (VOP3PMadMixModsExt f16:$src1, i32:$src1_mods)),
- (f32 (VOP3PMadMixMods f32:$src2, i32:$src2_mods)))),
+ (f32 (fma_like (f32 (VOP3PMadMixModsPat VT:$src0, i32:$src0_mods)),
+ (f32 (VOP3PMadMixModsExtPat VT:$src1, i32:$src1_mods)),
+ (f32 (VOP3PMadMixModsPat f32:$src2, i32:$src2_mods)))),
(mix_inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2,
DSTCLAMP.NONE)>;
def : GCNPat <
- (f32 (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_mods)),
- (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_mods)),
- (f32 (VOP3PMadMixModsExt f16:$src2, i32:$src2_mods)))),
+ (f32 (fma_like (f32 (VOP3PMadMixModsPat VT:$src0, i32:$src0_mods)),
+ (f32 (VOP3PMadMixModsPat f32:$src1, i32:$src1_mods)),
+ (f32 (VOP3PMadMixModsExtPat VT:$src2, i32:$src2_mods)))),
(mix_inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2,
DSTCLAMP.NONE)>;
def : GCNPat <
(AMDGPUclamp (build_vector
- (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)),
- (f32 (VOP3PMadMixMods f16:$lo_src1, i32:$lo_src1_modifiers)),
- (f32 (VOP3PMadMixMods f16:$lo_src2, i32:$lo_src2_modifiers))))),
- (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$hi_src0, i32:$hi_src0_modifiers)),
- (f32 (VOP3PMadMixMods f16:$hi_src1, i32:$hi_src1_modifiers)),
- (f32 (VOP3PMadMixMods f16:$hi_src2, i32:$hi_src2_modifiers))))))),
- (v2f16 (mixhi_inst $hi_src0_modifiers, $hi_src0,
+ (VT (fpround (fma_like (f32 (VOP3PMadMixModsPat VT:$lo_src0, i32:$lo_src0_modifiers)),
+ (f32 (VOP3PMadMixModsPat VT:$lo_src1, i32:$lo_src1_modifiers)),
+ (f32 (VOP3PMadMixModsPat VT:$lo_src2, i32:$lo_src2_modifiers))))),
+ (VT (fpround (fma_like (f32 (VOP3PMadMixModsPat VT:$hi_src0, i32:$hi_src0_modifiers)),
+ (f32 (VOP3PMadMixModsPat VT:$hi_src1, i32:$hi_src1_modifiers)),
+ (f32 (VOP3PMadMixModsPat VT:$hi_src2, i32:$hi_src2_modifiers))))))),
+ (vecVT (mixhi_inst $hi_src0_modifiers, $hi_src0,
$hi_src1_modifiers, $hi_src1,
$hi_src2_modifiers, $hi_src2,
DSTCLAMP.ENABLE,
@@ -204,8 +212,8 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
>;
def : GCNPat <
- (f16 (fpround (fmul (f32 (VOP3PMadMixMods f32:$src0, i32:$src0_modifiers)),
- (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_modifiers))))),
+ (VT (fpround (fmul (f32 (VOP3PMadMixModsPat f32:$src0, i32:$src0_modifiers)),
+ (f32 (VOP3PMadMixModsPat f32:$src1, i32:$src1_modifiers))))),
(mixlo_inst $src0_modifiers, $src0,
$src1_modifiers, $src1,
(i32 0), (i32 0),
@@ -214,9 +222,9 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
>;
def : GCNPat <
- (build_vector f16:$elt0, (f16 (fpround (fmul (f32 (VOP3PMadMixMods f32:$src0, i32:$src0_modifiers)),
- (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_modifiers)))))),
- (v2f16 (mixhi_inst $src0_modifiers, $src0,
+ (build_vector VT:$elt0, (VT (fpround (fmul (f32 (VOP3PMadMixModsPat f32:$src0, i32:$src0_modifiers)),
+ (f32 (VOP3PMadMixModsPat f32:$src1, i32:$src1_modifiers)))))),
+ (vecVT (mixhi_inst $src0_modifiers, $src0,
$src1_modifiers, $src1,
(i32 0), (i32 0),
DSTCLAMP.NONE,
@@ -224,9 +232,9 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
>;
def : GCNPat <
- (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
- (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
- (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))),
+ (VT (fpround (fma_like (f32 (VOP3PMadMixModsPat VT:$src0, i32:$src0_modifiers)),
+ (f32 (VOP3PMadMixModsPat VT:$src1, i32:$src1_modifiers)),
+ (f32 (VOP3PMadMixModsPat VT:$src2, i32:$src2_modifiers))))),
(mixlo_inst $src0_modifiers, $src0,
$src1_modifiers, $src1,
$src2_modifiers, $src2,
@@ -241,10 +249,10 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
let True16Predicate = p in {
def : GCNPat <
- (build_vector f16:$elt0, (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
- (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
- (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))),
- (v2f16 (mixhi_inst $src0_modifiers, $src0,
+ (build_vector VT:$elt0, (VT (fpround (fma_like (f32 (VOP3PMadMixModsPat VT:$src0, i32:$src0_modifiers)),
+ (f32 (VOP3PMadMixModsPat VT:$src1, i32:$src1_modifiers)),
+ (f32 (VOP3PMadMixModsPat VT:$src2, i32:$src2_modifiers)))))),
+ (vecVT (mixhi_inst $src0_modifiers, $src0,
$src1_modifiers, $src1,
$src2_modifiers, $src2,
DSTCLAMP.NONE,
@@ -253,11 +261,11 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
def : GCNPat <
(build_vector
- f16:$elt0,
- (AMDGPUclamp (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
- (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
- (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))))),
- (v2f16 (mixhi_inst $src0_modifiers, $src0,
+ VT:$elt0,
+ (AMDGPUclamp (VT (fpround (fma_like (f32 (VOP3PMadMixModsPat VT:$src0, i32:$src0_modifiers)),
+ (f32 (VOP3PMadMixModsPat VT:$src1, i32:$src1_modifiers)),
+ (f32 (VOP3PMadMixModsPat VT:$src2, i32:$src2_modifiers))))))),
+ (vecVT (mixhi_inst $src0_modifiers, $src0,
$src1_modifiers, $src1,
$src2_modifiers, $src2,
DSTCLAMP.ENABLE,
@@ -268,38 +276,38 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
let True16Predicate = UseRealTrue16Insts in {
def : GCNPat <
- (build_vector (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
- (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
- (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))), f16:$elt1),
- (v2f16 (mixlo_inst $src0_modifiers, $src0,
+ (build_vector (VT (fpround (fma_like (f32 (VOP3PMadMixModsPat VT:$src0, i32:$src0_modifiers)),
+ (f32 (VOP3PMadMixModsPat VT:$src1, i32:$src1_modifiers)),
+ (f32 (VOP3PMadMixModsPat VT:$src2, i32:$src2_modifiers))))), VT:$elt1),
+ (vecVT (mixlo_inst $src0_modifiers, $src0,
$src1_modifiers, $src1,
$src2_modifiers, $src2,
DSTCLAMP.NONE,
- (REG_SEQUENCE VGPR_32, (f16 (IMPLICIT_DEF)), lo16, $elt1, hi16)))
+ (REG_SEQUENCE VGPR_32, (VT (IMPLICIT_DEF)), lo16, $elt1, hi16)))
>;
def : GCNPat <
- (build_vector f16:$elt0, (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
- (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
- (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))),
- (v2f16 (mixhi_inst $src0_modifiers, $src0,
+ (build_vector VT:$elt0, (VT (fpround (fma_like (f32 (VOP3PMadMixModsPat VT:$src0, i32:$src0_modifiers)),
+ (f32 (VOP3PMadMixModsPat VT:$src1, i32:$src1_modifiers)),
+ (f32 (VOP3PMadMixModsPat VT:$src2, i32:$src2_modifiers)))))),
+ (vecVT (mixhi_inst $src0_modifiers, $src0,
$src1_modifiers, $src1,
$src2_modifiers, $src2,
DSTCLAMP.NONE,
- (REG_SEQUENCE VGPR_32, $elt0, lo16, (f16 (IMPLICIT_DEF)), hi16)))
+ (REG_SEQUENCE VGPR_32, $elt0, lo16, (VT (IMPLICIT_DEF)), hi16)))
>;
def : GCNPat <
(build_vector
- f16:$elt0,
- (AMDGPUclamp (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
- (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
- (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))))),
- (v2f16 (mixhi_inst $src0_modifiers, $src0,
+ VT:$elt0,
+ (AMDGPUclamp (VT (fpround (fma_like (f32 (VOP3PMadMixModsPat VT:$src0, i32:$src0_modifiers)),
+ (f32 (VOP3PMadMixModsPat VT:$src1, i32:$src1_modifiers)),
+ (f32 (VOP3PMadMixModsPat VT:$src2, i32:$src2_modifiers))))))),
+ (vecVT (mixhi_inst $src0_modifiers, $src0,
$src1_modifiers, $src1,
$src2_modifiers, $src2,
DSTCLAMP.ENABLE,
- (REG_SEQUENCE VGPR_32, $elt0, lo16, (f16 (IMPLICIT_DEF)), hi16)))
+ (REG_SEQUENCE VGPR_32, $elt0, lo16, (VT (IMPLICIT_DEF)), hi16)))
>;
} // end True16Predicate
}
@@ -360,6 +368,24 @@ defm V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3P_Mix_Profile<VOP_F
defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
}
+let SubtargetPredicate = HasFmaMixBF16Insts in {
+let isCommutable = 1 in {
+
+let isReMaterializable = 1 in
+defm V_FMA_MIX_F32_BF16 : VOP3_VOP3PInst<"v_fma_mix_f32_bf16", VOP3P_Mix_Profile<VOP_F32_BF16_BF16_BF16, VOP3_OPSEL>>;
+
+let FPDPRounding = 1 in {
+defm V_FMA_MIXLO_BF16 : VOP3_VOP3PInst<"v_fma_mixlo_bf16", VOP3P_Mix_Profile<VOP_BF16_BF16_BF16_BF16, VOP3_OPSEL, 1>>;
+
+let ClampLo = 0, ClampHi = 1 in {
+defm V_FMA_MIXHI_BF16 : VOP3_VOP3PInst<"v_fma_mixhi_bf16", VOP3P_Mix_Profile<VOP_BF16_BF16_BF16_BF16, VOP3_OPSEL, 1>>;
+}
+} // End FPDPRounding = 1
+} // End isCommutable = 1
+
+defm : MadFmaMixPats<fma, V_FMA_MIX_F32_BF16, V_FMA_MIXLO_BF16, V_FMA_MIXHI_BF16, bf16, v2bf16>;
+} // End SubtargetPredicate = HasFmaMixBF16Insts
+
def PK_ADD_MINMAX_Profile : VOP3P_Profile<VOP_V2I16_V2I16_V2I16_V2I16, VOP3_PACKED> {
let HasModifiers = 0;
}
@@ -1210,6 +1236,12 @@ let isCommutable = 1, isReMaterializable = 1 in {
defm V_PK_MIN_NUM_BF16 : VOP3PInst<"v_pk_min_num_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, fminnum_like>;
defm V_PK_MAX_NUM_BF16 : VOP3PInst<"v_pk_max_num_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, fmaxnum_like>;
defm V_PK_FMA_BF16 : VOP3PInst<"v_pk_fma_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, any_fma>;
+
+ // Scalar pseudo used to emulate AMDGPUClamp.
+ // Expanded to V_PK_MAX_NUM_BF16 with unused high half.
+ // FIXME-TRUE16: Pseudo expansion of this won't work with True16.
+ let True16Predicate = UseFakeTrue16Insts in
+ defm V_MAX_BF16_PSEUDO : VOP3Inst <"v_max_bf16", VOP_BF16_BF16_BF16>;
}
} // End isCommutable = 1, isReMaterializable = 1
@@ -2247,6 +2279,13 @@ defm V_PK_MAXIMUM3_F16 : VOP3P_Real_gfx1250<0x37>;
defm V_PK_MIN3_NUM_F16 : VOP3P_Real_gfx1250<0x38>;
defm V_PK_MAX3_NUM_F16 : VOP3P_Real_gfx1250<0x39>;
+defm V_FMA_MIX_F32_BF16 : VOP3P_Realtriple<GFX1250Gen, 0x3d>;
+defm V_FMA_MIXLO_BF16 : VOP3P_Realtriple<GFX1250Gen, 0x3e>;
+defm V_FMA_MIXHI_BF16 : VOP3P_Realtriple<GFX1250Gen, 0x3f>;
+
+let AssemblerPredicate = isGFX1250Plus in
+def : AMDGPUMnemonicAlias<"v_fma_mix_f32_f16", "v_fma_mix_f32">;
+
defm V_PK_MINIMUM_F16 : VOP3P_Real_gfx12<0x1d>;
defm V_PK_MAXIMUM_F16 : VOP3P_Real_gfx12<0x1e>;
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 50217c3..9e4dbec 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -4261,8 +4261,7 @@ std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
// instructions).
if (Latency > 0 && Subtarget.isThumb2()) {
const MachineFunction *MF = DefMI.getParent()->getParent();
- // FIXME: Use Function::hasOptSize().
- if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
+ if (MF->getFunction().hasOptSize())
--Latency;
}
return Latency;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 8b7f06a..066b392 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -370,6 +370,11 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::FMINNUM, VT, Legal);
setOperationAction(ISD::FMAXNUM, VT, Legal);
setOperationAction(ISD::FROUND, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::FRINT, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
@@ -1507,6 +1512,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
+ setOperationAction(ISD::FRINT, MVT::f16, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f16, Legal);
}
if (Subtarget->hasNEON()) {
@@ -20347,6 +20358,13 @@ ARMTargetLowering::getSingleConstraintMatchWeight(
return weight;
}
+static bool isIncompatibleReg(const MCPhysReg &PR, MVT VT) {
+ if (PR == 0 || VT == MVT::Other)
+ return false;
+ return (ARM::SPRRegClass.contains(PR) && VT != MVT::f32 && VT != MVT::i32) ||
+ (ARM::DPRRegClass.contains(PR) && VT != MVT::f64);
+}
+
using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
@@ -20420,7 +20438,10 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
if (StringRef("{cc}").equals_insensitive(Constraint))
return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
- return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+ auto RCP = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+ if (isIncompatibleReg(RCP.first, VT))
+ return {0, nullptr};
+ return RCP;
}
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index ec6f4e2..ece6c10 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -12327,7 +12327,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
}
assert(Section && "must have section to emit alignment");
- if (Section->useCodeAlign())
+ if (getContext().getAsmInfo()->useCodeAlign(*Section))
getStreamer().emitCodeAlignment(Align(2), &getSTI());
else
getStreamer().emitValueToAlignment(Align(2));
@@ -12525,7 +12525,7 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) {
// '.align' is target specifically handled to mean 2**2 byte alignment.
const MCSection *Section = getStreamer().getCurrentSectionOnly();
assert(Section && "must have section to emit alignment");
- if (Section->useCodeAlign())
+ if (getContext().getAsmInfo()->useCodeAlign(*Section))
getStreamer().emitCodeAlignment(Align(4), &getSTI(), 0);
else
getStreamer().emitValueToAlignment(Align(4), 0, 1, 0);
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index a7a9911..868556b 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -708,8 +708,6 @@ private:
void SwitchToExTabSection(const MCSymbol &FnStart);
void SwitchToExIdxSection(const MCSymbol &FnStart);
- void EmitFixup(const MCExpr *Expr, MCFixupKind Kind);
-
bool IsThumb;
bool IsAndroid;
@@ -1096,8 +1094,8 @@ void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) {
}
void ARMTargetELFStreamer::annotateTLSDescriptorSequence(
- const MCSymbolRefExpr *S) {
- getStreamer().EmitFixup(S, FK_Data_4);
+ const MCSymbolRefExpr *Expr) {
+ getStreamer().addFixup(Expr, FK_Data_4);
}
void ARMTargetELFStreamer::emitCode16() { getStreamer().setIsThumb(true); }
@@ -1140,7 +1138,8 @@ void ARMTargetELFStreamer::finish() {
MCContext &Ctx = getContext();
auto &Asm = getStreamer().getAssembler();
if (any_of(Asm, [](const MCSection &Sec) {
- return cast<MCSectionELF>(Sec).getFlags() & ELF::SHF_ARM_PURECODE;
+ return static_cast<const MCSectionELF &>(Sec).getFlags() &
+ ELF::SHF_ARM_PURECODE;
})) {
auto *Text =
static_cast<MCSectionELF *>(Ctx.getObjectFileInfo()->getTextSection());
@@ -1206,11 +1205,6 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
SectionKind::getData(), FnStart);
}
-void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) {
- MCFragment *Frag = getCurrentFragment();
- Frag->addFixup(MCFixup::create(Frag->getContents().size(), Expr, Kind));
-}
-
void ARMELFStreamer::EHReset() {
ExTab = nullptr;
FnStart = nullptr;
diff --git a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
index ad8aa571..0fb33cd 100644
--- a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -260,7 +260,7 @@ bool AVRAsmPrinter::doFinalization(Module &M) {
continue;
}
- auto *Section = cast<MCSectionELF>(TLOF.SectionForGlobal(&GO, TM));
+ auto *Section = static_cast<MCSectionELF *>(TLOF.SectionForGlobal(&GO, TM));
if (Section->getName().starts_with(".data"))
NeedsCopyData = true;
else if (Section->getName().starts_with(".rodata") && SubTM->hasLPM())
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
index c2c1bb4..0615ec9 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
@@ -24,8 +24,6 @@ AVRMCAsmInfo::AVRMCAsmInfo(const Triple &TT, const MCTargetOptions &Options) {
CalleeSaveStackSlotSize = 2;
CommentString = ";";
SeparatorString = "$";
- PrivateGlobalPrefix = ".L";
- PrivateLabelPrefix = ".L";
UsesELFSectionDirectiveForBSS = true;
SupportsDebugInformation = true;
}
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h
index fab2713..1915fa8 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h
@@ -14,7 +14,7 @@
#define LLVM_AVR_ASM_INFO_H
#include "MCTargetDesc/AVRMCExpr.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
#include "llvm/MC/MCExpr.h"
namespace llvm {
@@ -22,7 +22,7 @@ namespace llvm {
class Triple;
/// Specifies the format of AVR assembly files.
-class AVRMCAsmInfo : public MCAsmInfo {
+class AVRMCAsmInfo : public MCAsmInfoELF {
public:
explicit AVRMCAsmInfo(const Triple &TT, const MCTargetOptions &Options);
void printSpecifierExpr(raw_ostream &OS,
diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index 1e29a0f..bed6bc9 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -957,47 +957,47 @@ void BTFDebug::visitMapDefType(const DIType *Ty, uint32_t &TypeId) {
return;
}
- // MapDef type may be a struct type or a non-pointer derived type
- const DIType *OrigTy = Ty;
- while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
- auto Tag = DTy->getTag();
- if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type &&
- Tag != dwarf::DW_TAG_volatile_type &&
- Tag != dwarf::DW_TAG_restrict_type)
- break;
- Ty = DTy->getBaseType();
- }
-
- const auto *CTy = dyn_cast<DICompositeType>(Ty);
- if (!CTy)
- return;
-
- auto Tag = CTy->getTag();
- if (Tag != dwarf::DW_TAG_structure_type || CTy->isForwardDecl())
- return;
-
- // Visit all struct members to ensure their types are visited.
- const DINodeArray Elements = CTy->getElements();
- for (const auto *Element : Elements) {
- const auto *MemberType = cast<DIDerivedType>(Element);
- const DIType *MemberBaseType = MemberType->getBaseType();
-
- // If the member is a composite type, that may indicate the currently
- // visited composite type is a wrapper, and the member represents the
- // actual map definition.
- // In that case, visit the member with `visitMapDefType` instead of
- // `visitTypeEntry`, treating it specifically as a map definition rather
- // than as a regular composite type.
- const auto *MemberCTy = dyn_cast<DICompositeType>(MemberBaseType);
- if (MemberCTy) {
- visitMapDefType(MemberBaseType, TypeId);
- } else {
- visitTypeEntry(MemberBaseType);
+ uint32_t TmpId;
+ switch (Ty->getTag()) {
+ case dwarf::DW_TAG_typedef:
+ case dwarf::DW_TAG_const_type:
+ case dwarf::DW_TAG_volatile_type:
+ case dwarf::DW_TAG_restrict_type:
+ case dwarf::DW_TAG_pointer_type:
+ visitMapDefType(dyn_cast<DIDerivedType>(Ty)->getBaseType(), TmpId);
+ break;
+ case dwarf::DW_TAG_array_type:
+ // Visit nested map array and jump to the element type
+ visitMapDefType(dyn_cast<DICompositeType>(Ty)->getBaseType(), TmpId);
+ break;
+ case dwarf::DW_TAG_structure_type: {
+ // Visit all struct members to ensure their types are visited.
+ const auto *CTy = cast<DICompositeType>(Ty);
+ const DINodeArray Elements = CTy->getElements();
+ for (const auto *Element : Elements) {
+ const auto *MemberType = cast<DIDerivedType>(Element);
+ const DIType *MemberBaseType = MemberType->getBaseType();
+ // If the member is a composite type, that may indicate the currently
+ // visited composite type is a wrapper, and the member represents the
+ // actual map definition.
+ // In that case, visit the member with `visitMapDefType` instead of
+ // `visitTypeEntry`, treating it specifically as a map definition rather
+ // than as a regular composite type.
+ const auto *MemberCTy = dyn_cast<DICompositeType>(MemberBaseType);
+ if (MemberCTy) {
+ visitMapDefType(MemberBaseType, TmpId);
+ } else {
+ visitTypeEntry(MemberBaseType);
+ }
}
+ break;
+ }
+ default:
+ break;
}
// Visit this type, struct or a const/typedef/volatile/restrict type
- visitTypeEntry(OrigTy, TypeId, false, false);
+ visitTypeEntry(Ty, TypeId, false, false);
}
/// Read file contents from the actual file or from the source
@@ -1255,10 +1255,8 @@ void BTFDebug::beginFunctionImpl(const MachineFunction *MF) {
FuncInfo.Label = FuncLabel;
FuncInfo.TypeId = FuncTypeId;
if (FuncLabel->isInSection()) {
- MCSection &Section = FuncLabel->getSection();
- const MCSectionELF *SectionELF = dyn_cast<MCSectionELF>(&Section);
- assert(SectionELF && "Null section for Function Label");
- SecNameOff = addString(SectionELF->getName());
+ auto &Sec = static_cast<const MCSectionELF &>(FuncLabel->getSection());
+ SecNameOff = addString(Sec.getName());
} else {
SecNameOff = addString(".text");
}
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 827e928..bb74f6a 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -54,11 +54,8 @@ unsigned BPFELFObjectWriter::getRelocType(const MCFixup &Fixup,
const MCSymbol &Sym = *A;
if (Sym.isDefined()) {
- MCSection &Section = Sym.getSection();
- const MCSectionELF *SectionELF = dyn_cast<MCSectionELF>(&Section);
- assert(SectionELF && "Null section for reloc symbol");
-
- unsigned Flags = SectionELF->getFlags();
+ auto &Section = static_cast<const MCSectionELF &>(Sym.getSection());
+ unsigned Flags = Section.getFlags();
if (Sym.isTemporary()) {
// .BTF.ext generates FK_Data_4 relocations for
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index 7b21684..63d6e6f 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -13,18 +13,19 @@
#ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCASMINFO_H
#define LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
#include "llvm/TargetParser/Triple.h"
namespace llvm {
-class BPFMCAsmInfo : public MCAsmInfo {
+class BPFMCAsmInfo : public MCAsmInfoELF {
public:
explicit BPFMCAsmInfo(const Triple &TT, const MCTargetOptions &Options) {
if (TT.getArch() == Triple::bpfeb)
IsLittleEndian = false;
PrivateGlobalPrefix = ".L";
+ PrivateLabelPrefix = "L";
WeakRefDirective = "\t.weak\t";
UsesELFSectionDirectiveForBSS = true;
diff --git a/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp b/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
index f0e2e78..7e1436e 100644
--- a/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
+++ b/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
@@ -263,8 +263,13 @@ bool DXILFlattenArraysVisitor::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// merge the byte offsets. Otherwise, this GEP is itself the root of a GEP
// chain and we need to deterine the root array type
if (auto *PtrOpGEP = dyn_cast<GEPOperator>(PtrOperand)) {
- assert(GEPChainInfoMap.contains(PtrOpGEP) &&
- "Expected parent GEP to be visited before this GEP");
+
+ // If the parent GEP was not processed, then we do not want to process its
+ // descendants. This can happen if the GEP chain is for an unsupported type
+ // such as a struct -- we do not flatten structs nor GEP chains for structs
+ if (!GEPChainInfoMap.contains(PtrOpGEP))
+ return false;
+
GEPInfo &PGEPInfo = GEPChainInfoMap[PtrOpGEP];
Info.RootFlattenedArrayType = PGEPInfo.RootFlattenedArrayType;
Info.RootPointerOperand = PGEPInfo.RootPointerOperand;
diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index c73648f..3427968 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -24,18 +24,19 @@
using namespace llvm;
-static void legalizeFreeze(Instruction &I,
+static bool legalizeFreeze(Instruction &I,
SmallVectorImpl<Instruction *> &ToRemove,
DenseMap<Value *, Value *>) {
auto *FI = dyn_cast<FreezeInst>(&I);
if (!FI)
- return;
+ return false;
FI->replaceAllUsesWith(FI->getOperand(0));
ToRemove.push_back(FI);
+ return true;
}
-static void fixI8UseChain(Instruction &I,
+static bool fixI8UseChain(Instruction &I,
SmallVectorImpl<Instruction *> &ToRemove,
DenseMap<Value *, Value *> &ReplacedValues) {
@@ -74,19 +75,19 @@ static void fixI8UseChain(Instruction &I,
if (Trunc->getDestTy()->isIntegerTy(8)) {
ReplacedValues[Trunc] = Trunc->getOperand(0);
ToRemove.push_back(Trunc);
- return;
+ return true;
}
}
if (auto *Store = dyn_cast<StoreInst>(&I)) {
if (!Store->getValueOperand()->getType()->isIntegerTy(8))
- return;
+ return false;
SmallVector<Value *> NewOperands;
ProcessOperands(NewOperands);
Value *NewStore = Builder.CreateStore(NewOperands[0], NewOperands[1]);
ReplacedValues[Store] = NewStore;
ToRemove.push_back(Store);
- return;
+ return true;
}
if (auto *Load = dyn_cast<LoadInst>(&I);
@@ -104,17 +105,17 @@ static void fixI8UseChain(Instruction &I,
LoadInst *NewLoad = Builder.CreateLoad(ElementType, NewOperands[0]);
ReplacedValues[Load] = NewLoad;
ToRemove.push_back(Load);
- return;
+ return true;
}
if (auto *Load = dyn_cast<LoadInst>(&I);
Load && isa<ConstantExpr>(Load->getPointerOperand())) {
auto *CE = dyn_cast<ConstantExpr>(Load->getPointerOperand());
if (!(CE->getOpcode() == Instruction::GetElementPtr))
- return;
+ return false;
auto *GEP = dyn_cast<GEPOperator>(CE);
if (!GEP->getSourceElementType()->isIntegerTy(8))
- return;
+ return false;
Type *ElementType = Load->getType();
ConstantInt *Offset = dyn_cast<ConstantInt>(GEP->getOperand(1));
@@ -143,12 +144,12 @@ static void fixI8UseChain(Instruction &I,
ReplacedValues[Load] = NewLoad;
Load->replaceAllUsesWith(NewLoad);
ToRemove.push_back(Load);
- return;
+ return true;
}
if (auto *BO = dyn_cast<BinaryOperator>(&I)) {
if (!I.getType()->isIntegerTy(8))
- return;
+ return false;
SmallVector<Value *> NewOperands;
ProcessOperands(NewOperands);
Value *NewInst =
@@ -162,24 +163,24 @@ static void fixI8UseChain(Instruction &I,
}
ReplacedValues[BO] = NewInst;
ToRemove.push_back(BO);
- return;
+ return true;
}
if (auto *Sel = dyn_cast<SelectInst>(&I)) {
if (!I.getType()->isIntegerTy(8))
- return;
+ return false;
SmallVector<Value *> NewOperands;
ProcessOperands(NewOperands);
Value *NewInst = Builder.CreateSelect(Sel->getCondition(), NewOperands[1],
NewOperands[2]);
ReplacedValues[Sel] = NewInst;
ToRemove.push_back(Sel);
- return;
+ return true;
}
if (auto *Cmp = dyn_cast<CmpInst>(&I)) {
if (!Cmp->getOperand(0)->getType()->isIntegerTy(8))
- return;
+ return false;
SmallVector<Value *> NewOperands;
ProcessOperands(NewOperands);
Value *NewInst =
@@ -187,18 +188,18 @@ static void fixI8UseChain(Instruction &I,
Cmp->replaceAllUsesWith(NewInst);
ReplacedValues[Cmp] = NewInst;
ToRemove.push_back(Cmp);
- return;
+ return true;
}
if (auto *Cast = dyn_cast<CastInst>(&I)) {
if (!Cast->getSrcTy()->isIntegerTy(8))
- return;
+ return false;
ToRemove.push_back(Cast);
auto *Replacement = ReplacedValues[Cast->getOperand(0)];
if (Cast->getType() == Replacement->getType()) {
Cast->replaceAllUsesWith(Replacement);
- return;
+ return true;
}
Value *AdjustedCast = nullptr;
@@ -213,7 +214,7 @@ static void fixI8UseChain(Instruction &I,
if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
if (!GEP->getType()->isPointerTy() ||
!GEP->getSourceElementType()->isIntegerTy(8))
- return;
+ return false;
Value *BasePtr = GEP->getPointerOperand();
if (ReplacedValues.count(BasePtr))
@@ -248,15 +249,17 @@ static void fixI8UseChain(Instruction &I,
ReplacedValues[GEP] = NewGEP;
GEP->replaceAllUsesWith(NewGEP);
ToRemove.push_back(GEP);
+ return true;
}
+ return false;
}
-static void upcastI8AllocasAndUses(Instruction &I,
+static bool upcastI8AllocasAndUses(Instruction &I,
SmallVectorImpl<Instruction *> &ToRemove,
DenseMap<Value *, Value *> &ReplacedValues) {
auto *AI = dyn_cast<AllocaInst>(&I);
if (!AI || !AI->getAllocatedType()->isIntegerTy(8))
- return;
+ return false;
Type *SmallestType = nullptr;
@@ -291,16 +294,17 @@ static void upcastI8AllocasAndUses(Instruction &I,
}
if (!SmallestType)
- return; // no valid casts found
+ return false; // no valid casts found
// Replace alloca
IRBuilder<> Builder(AI);
auto *NewAlloca = Builder.CreateAlloca(SmallestType);
ReplacedValues[AI] = NewAlloca;
ToRemove.push_back(AI);
+ return true;
}
-static void
+static bool
downcastI64toI32InsertExtractElements(Instruction &I,
SmallVectorImpl<Instruction *> &ToRemove,
DenseMap<Value *, Value *> &) {
@@ -318,6 +322,7 @@ downcastI64toI32InsertExtractElements(Instruction &I,
Extract->replaceAllUsesWith(NewExtract);
ToRemove.push_back(Extract);
+ return true;
}
}
@@ -335,8 +340,10 @@ downcastI64toI32InsertExtractElements(Instruction &I,
Insert->replaceAllUsesWith(Insert32Index);
ToRemove.push_back(Insert);
+ return true;
}
}
+ return false;
}
static void emitMemcpyExpansion(IRBuilder<> &Builder, Value *Dst, Value *Src,
@@ -453,17 +460,17 @@ static void emitMemsetExpansion(IRBuilder<> &Builder, Value *Dst, Value *Val,
// Expands the instruction `I` into corresponding loads and stores if it is a
// memcpy call. In that case, the call instruction is added to the `ToRemove`
// vector. `ReplacedValues` is unused.
-static void legalizeMemCpy(Instruction &I,
+static bool legalizeMemCpy(Instruction &I,
SmallVectorImpl<Instruction *> &ToRemove,
DenseMap<Value *, Value *> &ReplacedValues) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI)
- return;
+ return false;
Intrinsic::ID ID = CI->getIntrinsicID();
if (ID != Intrinsic::memcpy)
- return;
+ return false;
IRBuilder<> Builder(&I);
Value *Dst = CI->getArgOperand(0);
@@ -476,19 +483,20 @@ static void legalizeMemCpy(Instruction &I,
assert(IsVolatile->getZExtValue() == 0 && "Expected IsVolatile to be false");
emitMemcpyExpansion(Builder, Dst, Src, Length);
ToRemove.push_back(CI);
+ return true;
}
-static void legalizeMemSet(Instruction &I,
+static bool legalizeMemSet(Instruction &I,
SmallVectorImpl<Instruction *> &ToRemove,
DenseMap<Value *, Value *> &ReplacedValues) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI)
- return;
+ return false;
Intrinsic::ID ID = CI->getIntrinsicID();
if (ID != Intrinsic::memset)
- return;
+ return false;
IRBuilder<> Builder(&I);
Value *Dst = CI->getArgOperand(0);
@@ -497,23 +505,25 @@ static void legalizeMemSet(Instruction &I,
assert(Size && "Expected Size to be a ConstantInt");
emitMemsetExpansion(Builder, Dst, Val, Size, ReplacedValues);
ToRemove.push_back(CI);
+ return true;
}
-static void updateFnegToFsub(Instruction &I,
+static bool updateFnegToFsub(Instruction &I,
SmallVectorImpl<Instruction *> &ToRemove,
DenseMap<Value *, Value *> &) {
const Intrinsic::ID ID = I.getOpcode();
if (ID != Instruction::FNeg)
- return;
+ return false;
IRBuilder<> Builder(&I);
Value *In = I.getOperand(0);
Value *Zero = ConstantFP::get(In->getType(), -0.0);
I.replaceAllUsesWith(Builder.CreateFSub(Zero, In));
ToRemove.push_back(&I);
+ return true;
}
-static void
+static bool
legalizeGetHighLowi64Bytes(Instruction &I,
SmallVectorImpl<Instruction *> &ToRemove,
DenseMap<Value *, Value *> &ReplacedValues) {
@@ -523,13 +533,13 @@ legalizeGetHighLowi64Bytes(Instruction &I,
BitCast->getSrcTy()->isIntegerTy(64)) {
ToRemove.push_back(BitCast);
ReplacedValues[BitCast] = BitCast->getOperand(0);
- return;
+ return true;
}
}
if (auto *Extract = dyn_cast<ExtractElementInst>(&I)) {
if (!dyn_cast<BitCastInst>(Extract->getVectorOperand()))
- return;
+ return false;
auto *VecTy = dyn_cast<FixedVectorType>(Extract->getVectorOperandType());
if (VecTy && VecTy->getElementType()->isIntegerTy(32) &&
VecTy->getNumElements() == 2) {
@@ -557,12 +567,14 @@ legalizeGetHighLowi64Bytes(Instruction &I,
}
ToRemove.push_back(Extract);
Extract->replaceAllUsesWith(ReplacedValues[Extract]);
+ return true;
}
}
}
+ return false;
}
-static void
+static bool
legalizeScalarLoadStoreOnArrays(Instruction &I,
SmallVectorImpl<Instruction *> &ToRemove,
DenseMap<Value *, Value *> &) {
@@ -579,14 +591,14 @@ legalizeScalarLoadStoreOnArrays(Instruction &I,
PtrOpIndex = SI->getPointerOperandIndex();
LoadStoreTy = SI->getValueOperand()->getType();
} else
- return;
+ return false;
// If the load/store is not of a single-value type (i.e., scalar or vector)
// then we do not modify it. It shouldn't be a vector either because the
// dxil-data-scalarization pass is expected to run before this, but it's not
// incorrect to apply this transformation to vector load/stores.
if (!LoadStoreTy->isSingleValueType())
- return;
+ return false;
Type *ArrayTy;
if (auto *GlobalVarPtrOp = dyn_cast<GlobalVariable>(PtrOp))
@@ -594,10 +606,10 @@ legalizeScalarLoadStoreOnArrays(Instruction &I,
else if (auto *AllocaPtrOp = dyn_cast<AllocaInst>(PtrOp))
ArrayTy = AllocaPtrOp->getAllocatedType();
else
- return;
+ return false;
if (!isa<ArrayType>(ArrayTy))
- return;
+ return false;
assert(ArrayTy->getArrayElementType() == LoadStoreTy &&
"Expected array element type to be the same as to the scalar load or "
@@ -607,6 +619,7 @@ legalizeScalarLoadStoreOnArrays(Instruction &I,
Value *GEP = GetElementPtrInst::Create(
ArrayTy, PtrOp, {Zero, Zero}, GEPNoWrapFlags::all(), "", I.getIterator());
I.setOperand(PtrOpIndex, GEP);
+ return true;
}
namespace {
@@ -624,13 +637,11 @@ public:
ReplacedValues.clear();
for (auto &I : instructions(F)) {
for (auto &LegalizationFn : LegalizationPipeline[Stage])
- LegalizationFn(I, ToRemove, ReplacedValues);
+ MadeChange |= LegalizationFn(I, ToRemove, ReplacedValues);
}
for (auto *Inst : reverse(ToRemove))
Inst->eraseFromParent();
-
- MadeChange |= !ToRemove.empty();
}
return MadeChange;
}
@@ -639,7 +650,7 @@ private:
enum LegalizationStage { Stage1 = 0, Stage2 = 1, NumStages };
using LegalizationFnTy =
- std::function<void(Instruction &, SmallVectorImpl<Instruction *> &,
+ std::function<bool(Instruction &, SmallVectorImpl<Instruction *> &,
DenseMap<Value *, Value *> &)>;
SmallVector<LegalizationFnTy> LegalizationPipeline[NumStages];
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
index 566f3a9..c33ec0e 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -241,7 +241,6 @@ static void replaceAccess(IntrinsicInst *II, dxil::ResourceTypeInfo &RTI) {
}
static bool transformResourcePointers(Function &F, DXILResourceTypeMap &DRTM) {
- bool Changed = false;
SmallVector<std::pair<IntrinsicInst *, dxil::ResourceTypeInfo>> Resources;
for (BasicBlock &BB : F)
for (Instruction &I : BB)
@@ -254,7 +253,7 @@ static bool transformResourcePointers(Function &F, DXILResourceTypeMap &DRTM) {
for (auto &[II, RI] : Resources)
replaceAccess(II, RI);
- return Changed;
+ return !Resources.empty();
}
PreservedAnalyses DXILResourceAccess::run(Function &F,
diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.cpp b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
index dfc8162..ebdfcaa 100644
--- a/llvm/lib/Target/DirectX/DXILRootSignature.cpp
+++ b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/DXILMetadataAnalysis.h"
#include "llvm/BinaryFormat/DXContainer.h"
+#include "llvm/Frontend/HLSL/RootSignatureMetadata.h"
#include "llvm/Frontend/HLSL/RootSignatureValidations.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -29,25 +30,10 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
-#include <optional>
-#include <utility>
using namespace llvm;
using namespace llvm::dxil;
-static bool reportError(LLVMContext *Ctx, Twine Message,
- DiagnosticSeverity Severity = DS_Error) {
- Ctx->diagnose(DiagnosticInfoGeneric(Message, Severity));
- return true;
-}
-
-static bool reportValueError(LLVMContext *Ctx, Twine ParamName,
- uint32_t Value) {
- Ctx->diagnose(DiagnosticInfoGeneric(
- "Invalid value for " + ParamName + ": " + Twine(Value), DS_Error));
- return true;
-}
-
static std::optional<uint32_t> extractMdIntValue(MDNode *Node,
unsigned int OpId) {
if (auto *CI =
@@ -56,453 +42,10 @@ static std::optional<uint32_t> extractMdIntValue(MDNode *Node,
return std::nullopt;
}
-static std::optional<float> extractMdFloatValue(MDNode *Node,
- unsigned int OpId) {
- if (auto *CI = mdconst::dyn_extract<ConstantFP>(Node->getOperand(OpId).get()))
- return CI->getValueAPF().convertToFloat();
- return std::nullopt;
-}
-
-static std::optional<StringRef> extractMdStringValue(MDNode *Node,
- unsigned int OpId) {
- MDString *NodeText = dyn_cast<MDString>(Node->getOperand(OpId));
- if (NodeText == nullptr)
- return std::nullopt;
- return NodeText->getString();
-}
-
-static bool parseRootFlags(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
- MDNode *RootFlagNode) {
-
- if (RootFlagNode->getNumOperands() != 2)
- return reportError(Ctx, "Invalid format for RootFlag Element");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(RootFlagNode, 1))
- RSD.Flags = *Val;
- else
- return reportError(Ctx, "Invalid value for RootFlag");
-
- return false;
-}
-
-static bool parseRootConstants(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
- MDNode *RootConstantNode) {
-
- if (RootConstantNode->getNumOperands() != 5)
- return reportError(Ctx, "Invalid format for RootConstants Element");
-
- dxbc::RTS0::v1::RootParameterHeader Header;
- // The parameter offset doesn't matter here - we recalculate it during
- // serialization Header.ParameterOffset = 0;
- Header.ParameterType =
- llvm::to_underlying(dxbc::RootParameterType::Constants32Bit);
-
- if (std::optional<uint32_t> Val = extractMdIntValue(RootConstantNode, 1))
- Header.ShaderVisibility = *Val;
- else
- return reportError(Ctx, "Invalid value for ShaderVisibility");
-
- dxbc::RTS0::v1::RootConstants Constants;
- if (std::optional<uint32_t> Val = extractMdIntValue(RootConstantNode, 2))
- Constants.ShaderRegister = *Val;
- else
- return reportError(Ctx, "Invalid value for ShaderRegister");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(RootConstantNode, 3))
- Constants.RegisterSpace = *Val;
- else
- return reportError(Ctx, "Invalid value for RegisterSpace");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(RootConstantNode, 4))
- Constants.Num32BitValues = *Val;
- else
- return reportError(Ctx, "Invalid value for Num32BitValues");
-
- RSD.ParametersContainer.addParameter(Header, Constants);
-
- return false;
-}
-
-static bool parseRootDescriptors(LLVMContext *Ctx,
- mcdxbc::RootSignatureDesc &RSD,
- MDNode *RootDescriptorNode,
- RootSignatureElementKind ElementKind) {
- assert(ElementKind == RootSignatureElementKind::SRV ||
- ElementKind == RootSignatureElementKind::UAV ||
- ElementKind == RootSignatureElementKind::CBV &&
- "parseRootDescriptors should only be called with RootDescriptor "
- "element kind.");
- if (RootDescriptorNode->getNumOperands() != 5)
- return reportError(Ctx, "Invalid format for Root Descriptor Element");
-
- dxbc::RTS0::v1::RootParameterHeader Header;
- switch (ElementKind) {
- case RootSignatureElementKind::SRV:
- Header.ParameterType = llvm::to_underlying(dxbc::RootParameterType::SRV);
- break;
- case RootSignatureElementKind::UAV:
- Header.ParameterType = llvm::to_underlying(dxbc::RootParameterType::UAV);
- break;
- case RootSignatureElementKind::CBV:
- Header.ParameterType = llvm::to_underlying(dxbc::RootParameterType::CBV);
- break;
- default:
- llvm_unreachable("invalid Root Descriptor kind");
- break;
- }
-
- if (std::optional<uint32_t> Val = extractMdIntValue(RootDescriptorNode, 1))
- Header.ShaderVisibility = *Val;
- else
- return reportError(Ctx, "Invalid value for ShaderVisibility");
-
- dxbc::RTS0::v2::RootDescriptor Descriptor;
- if (std::optional<uint32_t> Val = extractMdIntValue(RootDescriptorNode, 2))
- Descriptor.ShaderRegister = *Val;
- else
- return reportError(Ctx, "Invalid value for ShaderRegister");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(RootDescriptorNode, 3))
- Descriptor.RegisterSpace = *Val;
- else
- return reportError(Ctx, "Invalid value for RegisterSpace");
-
- if (RSD.Version == 1) {
- RSD.ParametersContainer.addParameter(Header, Descriptor);
- return false;
- }
- assert(RSD.Version > 1);
-
- if (std::optional<uint32_t> Val = extractMdIntValue(RootDescriptorNode, 4))
- Descriptor.Flags = *Val;
- else
- return reportError(Ctx, "Invalid value for Root Descriptor Flags");
-
- RSD.ParametersContainer.addParameter(Header, Descriptor);
- return false;
-}
-
-static bool parseDescriptorRange(LLVMContext *Ctx,
- mcdxbc::DescriptorTable &Table,
- MDNode *RangeDescriptorNode) {
-
- if (RangeDescriptorNode->getNumOperands() != 6)
- return reportError(Ctx, "Invalid format for Descriptor Range");
-
- dxbc::RTS0::v2::DescriptorRange Range;
-
- std::optional<StringRef> ElementText =
- extractMdStringValue(RangeDescriptorNode, 0);
-
- if (!ElementText.has_value())
- return reportError(Ctx, "Descriptor Range, first element is not a string.");
-
- Range.RangeType =
- StringSwitch<uint32_t>(*ElementText)
- .Case("CBV", llvm::to_underlying(dxbc::DescriptorRangeType::CBV))
- .Case("SRV", llvm::to_underlying(dxbc::DescriptorRangeType::SRV))
- .Case("UAV", llvm::to_underlying(dxbc::DescriptorRangeType::UAV))
- .Case("Sampler",
- llvm::to_underlying(dxbc::DescriptorRangeType::Sampler))
- .Default(~0U);
-
- if (Range.RangeType == ~0U)
- return reportError(Ctx, "Invalid Descriptor Range type: " + *ElementText);
-
- if (std::optional<uint32_t> Val = extractMdIntValue(RangeDescriptorNode, 1))
- Range.NumDescriptors = *Val;
- else
- return reportError(Ctx, "Invalid value for Number of Descriptor in Range");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(RangeDescriptorNode, 2))
- Range.BaseShaderRegister = *Val;
- else
- return reportError(Ctx, "Invalid value for BaseShaderRegister");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(RangeDescriptorNode, 3))
- Range.RegisterSpace = *Val;
- else
- return reportError(Ctx, "Invalid value for RegisterSpace");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(RangeDescriptorNode, 4))
- Range.OffsetInDescriptorsFromTableStart = *Val;
- else
- return reportError(Ctx,
- "Invalid value for OffsetInDescriptorsFromTableStart");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(RangeDescriptorNode, 5))
- Range.Flags = *Val;
- else
- return reportError(Ctx, "Invalid value for Descriptor Range Flags");
-
- Table.Ranges.push_back(Range);
- return false;
-}
-
-static bool parseDescriptorTable(LLVMContext *Ctx,
- mcdxbc::RootSignatureDesc &RSD,
- MDNode *DescriptorTableNode) {
- const unsigned int NumOperands = DescriptorTableNode->getNumOperands();
- if (NumOperands < 2)
- return reportError(Ctx, "Invalid format for Descriptor Table");
-
- dxbc::RTS0::v1::RootParameterHeader Header;
- if (std::optional<uint32_t> Val = extractMdIntValue(DescriptorTableNode, 1))
- Header.ShaderVisibility = *Val;
- else
- return reportError(Ctx, "Invalid value for ShaderVisibility");
-
- mcdxbc::DescriptorTable Table;
- Header.ParameterType =
- llvm::to_underlying(dxbc::RootParameterType::DescriptorTable);
-
- for (unsigned int I = 2; I < NumOperands; I++) {
- MDNode *Element = dyn_cast<MDNode>(DescriptorTableNode->getOperand(I));
- if (Element == nullptr)
- return reportError(Ctx, "Missing Root Element Metadata Node.");
-
- if (parseDescriptorRange(Ctx, Table, Element))
- return true;
- }
-
- RSD.ParametersContainer.addParameter(Header, Table);
- return false;
-}
-
-static bool parseStaticSampler(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
- MDNode *StaticSamplerNode) {
- if (StaticSamplerNode->getNumOperands() != 14)
- return reportError(Ctx, "Invalid format for Static Sampler");
-
- dxbc::RTS0::v1::StaticSampler Sampler;
- if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 1))
- Sampler.Filter = *Val;
- else
- return reportError(Ctx, "Invalid value for Filter");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 2))
- Sampler.AddressU = *Val;
- else
- return reportError(Ctx, "Invalid value for AddressU");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 3))
- Sampler.AddressV = *Val;
- else
- return reportError(Ctx, "Invalid value for AddressV");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 4))
- Sampler.AddressW = *Val;
- else
- return reportError(Ctx, "Invalid value for AddressW");
-
- if (std::optional<float> Val = extractMdFloatValue(StaticSamplerNode, 5))
- Sampler.MipLODBias = *Val;
- else
- return reportError(Ctx, "Invalid value for MipLODBias");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 6))
- Sampler.MaxAnisotropy = *Val;
- else
- return reportError(Ctx, "Invalid value for MaxAnisotropy");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 7))
- Sampler.ComparisonFunc = *Val;
- else
- return reportError(Ctx, "Invalid value for ComparisonFunc ");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 8))
- Sampler.BorderColor = *Val;
- else
- return reportError(Ctx, "Invalid value for ComparisonFunc ");
-
- if (std::optional<float> Val = extractMdFloatValue(StaticSamplerNode, 9))
- Sampler.MinLOD = *Val;
- else
- return reportError(Ctx, "Invalid value for MinLOD");
-
- if (std::optional<float> Val = extractMdFloatValue(StaticSamplerNode, 10))
- Sampler.MaxLOD = *Val;
- else
- return reportError(Ctx, "Invalid value for MaxLOD");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 11))
- Sampler.ShaderRegister = *Val;
- else
- return reportError(Ctx, "Invalid value for ShaderRegister");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 12))
- Sampler.RegisterSpace = *Val;
- else
- return reportError(Ctx, "Invalid value for RegisterSpace");
-
- if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 13))
- Sampler.ShaderVisibility = *Val;
- else
- return reportError(Ctx, "Invalid value for ShaderVisibility");
-
- RSD.StaticSamplers.push_back(Sampler);
- return false;
-}
-
-static bool parseRootSignatureElement(LLVMContext *Ctx,
- mcdxbc::RootSignatureDesc &RSD,
- MDNode *Element) {
- std::optional<StringRef> ElementText = extractMdStringValue(Element, 0);
- if (!ElementText.has_value())
- return reportError(Ctx, "Invalid format for Root Element");
-
- RootSignatureElementKind ElementKind =
- StringSwitch<RootSignatureElementKind>(*ElementText)
- .Case("RootFlags", RootSignatureElementKind::RootFlags)
- .Case("RootConstants", RootSignatureElementKind::RootConstants)
- .Case("RootCBV", RootSignatureElementKind::CBV)
- .Case("RootSRV", RootSignatureElementKind::SRV)
- .Case("RootUAV", RootSignatureElementKind::UAV)
- .Case("DescriptorTable", RootSignatureElementKind::DescriptorTable)
- .Case("StaticSampler", RootSignatureElementKind::StaticSamplers)
- .Default(RootSignatureElementKind::Error);
-
- switch (ElementKind) {
-
- case RootSignatureElementKind::RootFlags:
- return parseRootFlags(Ctx, RSD, Element);
- case RootSignatureElementKind::RootConstants:
- return parseRootConstants(Ctx, RSD, Element);
- case RootSignatureElementKind::CBV:
- case RootSignatureElementKind::SRV:
- case RootSignatureElementKind::UAV:
- return parseRootDescriptors(Ctx, RSD, Element, ElementKind);
- case RootSignatureElementKind::DescriptorTable:
- return parseDescriptorTable(Ctx, RSD, Element);
- case RootSignatureElementKind::StaticSamplers:
- return parseStaticSampler(Ctx, RSD, Element);
- case RootSignatureElementKind::Error:
- return reportError(Ctx, "Invalid Root Signature Element: " + *ElementText);
- }
-
- llvm_unreachable("Unhandled RootSignatureElementKind enum.");
-}
-
-static bool parse(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
- MDNode *Node) {
- bool HasError = false;
-
- // Loop through the Root Elements of the root signature.
- for (const auto &Operand : Node->operands()) {
- MDNode *Element = dyn_cast<MDNode>(Operand);
- if (Element == nullptr)
- return reportError(Ctx, "Missing Root Element Metadata Node.");
-
- HasError = HasError || parseRootSignatureElement(Ctx, RSD, Element);
- }
-
- return HasError;
-}
-
-static bool validate(LLVMContext *Ctx, const mcdxbc::RootSignatureDesc &RSD) {
-
- if (!llvm::hlsl::rootsig::verifyVersion(RSD.Version)) {
- return reportValueError(Ctx, "Version", RSD.Version);
- }
-
- if (!llvm::hlsl::rootsig::verifyRootFlag(RSD.Flags)) {
- return reportValueError(Ctx, "RootFlags", RSD.Flags);
- }
-
- for (const mcdxbc::RootParameterInfo &Info : RSD.ParametersContainer) {
- if (!dxbc::isValidShaderVisibility(Info.Header.ShaderVisibility))
- return reportValueError(Ctx, "ShaderVisibility",
- Info.Header.ShaderVisibility);
-
- assert(dxbc::isValidParameterType(Info.Header.ParameterType) &&
- "Invalid value for ParameterType");
-
- switch (Info.Header.ParameterType) {
-
- case llvm::to_underlying(dxbc::RootParameterType::CBV):
- case llvm::to_underlying(dxbc::RootParameterType::UAV):
- case llvm::to_underlying(dxbc::RootParameterType::SRV): {
- const dxbc::RTS0::v2::RootDescriptor &Descriptor =
- RSD.ParametersContainer.getRootDescriptor(Info.Location);
- if (!llvm::hlsl::rootsig::verifyRegisterValue(Descriptor.ShaderRegister))
- return reportValueError(Ctx, "ShaderRegister",
- Descriptor.ShaderRegister);
-
- if (!llvm::hlsl::rootsig::verifyRegisterSpace(Descriptor.RegisterSpace))
- return reportValueError(Ctx, "RegisterSpace", Descriptor.RegisterSpace);
-
- if (RSD.Version > 1) {
- if (!llvm::hlsl::rootsig::verifyRootDescriptorFlag(RSD.Version,
- Descriptor.Flags))
- return reportValueError(Ctx, "RootDescriptorFlag", Descriptor.Flags);
- }
- break;
- }
- case llvm::to_underlying(dxbc::RootParameterType::DescriptorTable): {
- const mcdxbc::DescriptorTable &Table =
- RSD.ParametersContainer.getDescriptorTable(Info.Location);
- for (const dxbc::RTS0::v2::DescriptorRange &Range : Table) {
- if (!llvm::hlsl::rootsig::verifyRangeType(Range.RangeType))
- return reportValueError(Ctx, "RangeType", Range.RangeType);
-
- if (!llvm::hlsl::rootsig::verifyRegisterSpace(Range.RegisterSpace))
- return reportValueError(Ctx, "RegisterSpace", Range.RegisterSpace);
-
- if (!llvm::hlsl::rootsig::verifyNumDescriptors(Range.NumDescriptors))
- return reportValueError(Ctx, "NumDescriptors", Range.NumDescriptors);
-
- if (!llvm::hlsl::rootsig::verifyDescriptorRangeFlag(
- RSD.Version, Range.RangeType, Range.Flags))
- return reportValueError(Ctx, "DescriptorFlag", Range.Flags);
- }
- break;
- }
- }
- }
-
- for (const dxbc::RTS0::v1::StaticSampler &Sampler : RSD.StaticSamplers) {
- if (!llvm::hlsl::rootsig::verifySamplerFilter(Sampler.Filter))
- return reportValueError(Ctx, "Filter", Sampler.Filter);
-
- if (!llvm::hlsl::rootsig::verifyAddress(Sampler.AddressU))
- return reportValueError(Ctx, "AddressU", Sampler.AddressU);
-
- if (!llvm::hlsl::rootsig::verifyAddress(Sampler.AddressV))
- return reportValueError(Ctx, "AddressV", Sampler.AddressV);
-
- if (!llvm::hlsl::rootsig::verifyAddress(Sampler.AddressW))
- return reportValueError(Ctx, "AddressW", Sampler.AddressW);
-
- if (!llvm::hlsl::rootsig::verifyMipLODBias(Sampler.MipLODBias))
- return reportValueError(Ctx, "MipLODBias", Sampler.MipLODBias);
-
- if (!llvm::hlsl::rootsig::verifyMaxAnisotropy(Sampler.MaxAnisotropy))
- return reportValueError(Ctx, "MaxAnisotropy", Sampler.MaxAnisotropy);
-
- if (!llvm::hlsl::rootsig::verifyComparisonFunc(Sampler.ComparisonFunc))
- return reportValueError(Ctx, "ComparisonFunc", Sampler.ComparisonFunc);
-
- if (!llvm::hlsl::rootsig::verifyBorderColor(Sampler.BorderColor))
- return reportValueError(Ctx, "BorderColor", Sampler.BorderColor);
-
- if (!llvm::hlsl::rootsig::verifyLOD(Sampler.MinLOD))
- return reportValueError(Ctx, "MinLOD", Sampler.MinLOD);
-
- if (!llvm::hlsl::rootsig::verifyLOD(Sampler.MaxLOD))
- return reportValueError(Ctx, "MaxLOD", Sampler.MaxLOD);
-
- if (!llvm::hlsl::rootsig::verifyRegisterValue(Sampler.ShaderRegister))
- return reportValueError(Ctx, "ShaderRegister", Sampler.ShaderRegister);
-
- if (!llvm::hlsl::rootsig::verifyRegisterSpace(Sampler.RegisterSpace))
- return reportValueError(Ctx, "RegisterSpace", Sampler.RegisterSpace);
-
- if (!dxbc::isValidShaderVisibility(Sampler.ShaderVisibility))
- return reportValueError(Ctx, "ShaderVisibility",
- Sampler.ShaderVisibility);
- }
-
- return false;
+static bool reportError(LLVMContext *Ctx, Twine Message,
+ DiagnosticSeverity Severity = DS_Error) {
+ Ctx->diagnose(DiagnosticInfoGeneric(Message, Severity));
+ return true;
}
static SmallDenseMap<const Function *, mcdxbc::RootSignatureDesc>
@@ -584,7 +127,9 @@ analyzeModule(Module &M) {
// static sampler offset is calculated when writting dxcontainer.
RSD.StaticSamplersOffset = 0u;
- if (parse(Ctx, RSD, RootElementListNode) || validate(Ctx, RSD)) {
+ hlsl::rootsig::MetadataParser MDParser(RootElementListNode);
+
+ if (MDParser.ParseRootSignature(Ctx, RSD)) {
return RSDMap;
}
diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.h b/llvm/lib/Target/DirectX/DXILRootSignature.h
index fc39b38..254b7ff 100644
--- a/llvm/lib/Target/DirectX/DXILRootSignature.h
+++ b/llvm/lib/Target/DirectX/DXILRootSignature.h
@@ -26,17 +26,6 @@
namespace llvm {
namespace dxil {
-enum class RootSignatureElementKind {
- Error = 0,
- RootFlags = 1,
- RootConstants = 2,
- SRV = 3,
- UAV = 4,
- CBV = 5,
- DescriptorTable = 6,
- StaticSamplers = 7
-};
-
class RootSignatureBindingInfo {
private:
SmallDenseMap<const Function *, mcdxbc::RootSignatureDesc> FuncToRsMap;
diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
index eb4adfe..e7e7f2c 100644
--- a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
+++ b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
@@ -106,11 +106,11 @@ void ModuleShaderFlags::updateFunctionFlags(ComputedShaderFlags &CSF,
DXILResourceTypeMap &DRTM,
const ModuleMetadataInfo &MMDI) {
if (!CSF.Doubles)
- CSF.Doubles = I.getType()->isDoubleTy();
+ CSF.Doubles = I.getType()->getScalarType()->isDoubleTy();
if (!CSF.Doubles) {
for (const Value *Op : I.operands()) {
- if (Op->getType()->isDoubleTy()) {
+ if (Op->getType()->getScalarType()->isDoubleTy()) {
CSF.Doubles = true;
break;
}
@@ -130,12 +130,13 @@ void ModuleShaderFlags::updateFunctionFlags(ComputedShaderFlags &CSF,
}
if (!CSF.LowPrecisionPresent)
- CSF.LowPrecisionPresent =
- I.getType()->isIntegerTy(16) || I.getType()->isHalfTy();
+ CSF.LowPrecisionPresent = I.getType()->getScalarType()->isIntegerTy(16) ||
+ I.getType()->getScalarType()->isHalfTy();
if (!CSF.LowPrecisionPresent) {
for (const Value *Op : I.operands()) {
- if (Op->getType()->isIntegerTy(16) || Op->getType()->isHalfTy()) {
+ if (Op->getType()->getScalarType()->isIntegerTy(16) ||
+ Op->getType()->getScalarType()->isHalfTy()) {
CSF.LowPrecisionPresent = true;
break;
}
@@ -150,11 +151,11 @@ void ModuleShaderFlags::updateFunctionFlags(ComputedShaderFlags &CSF,
}
if (!CSF.Int64Ops)
- CSF.Int64Ops = I.getType()->isIntegerTy(64);
+ CSF.Int64Ops = I.getType()->getScalarType()->isIntegerTy(64);
if (!CSF.Int64Ops && !isa<LifetimeIntrinsic>(&I)) {
for (const Value *Op : I.operands()) {
- if (Op->getType()->isIntegerTy(64)) {
+ if (Op->getType()->getScalarType()->isIntegerTy(64)) {
CSF.Int64Ops = true;
break;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index c86fa2b..54c3cea 100644
--- a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -457,7 +457,7 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) {
TII = ST->getInstrInfo();
const Function &F = MF.getFunction();
- bool OptForSize = F.hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptForSize = F.hasOptSize();
// Combine aggressively (for code size)
ShouldCombineAggressively =
diff --git a/llvm/lib/Target/Hexagon/HexagonMask.cpp b/llvm/lib/Target/Hexagon/HexagonMask.cpp
index 6eccf80..9d7776d 100644
--- a/llvm/lib/Target/Hexagon/HexagonMask.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonMask.cpp
@@ -76,7 +76,7 @@ bool HexagonMask::runOnMachineFunction(MachineFunction &MF) {
HII = HST.getInstrInfo();
const Function &F = MF.getFunction();
- if (!F.hasFnAttribute(Attribute::OptimizeForSize))
+ if (!F.hasOptSize())
return false;
// Mask instruction is available only from v66
if (!HST.hasV66Ops())
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index e915a3c4..d96136c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2385,23 +2385,6 @@ SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
return Res;
}
-static bool isConstantOrUndef(const SDValue Op) {
- if (Op->isUndef())
- return true;
- if (isa<ConstantSDNode>(Op))
- return true;
- if (isa<ConstantFPSDNode>(Op))
- return true;
- return false;
-}
-
-static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
- for (unsigned i = 0; i < Op->getNumOperands(); ++i)
- if (isConstantOrUndef(Op->getOperand(i)))
- return true;
- return false;
-}
-
// Lower BUILD_VECTOR as broadcast load (if possible).
// For example:
// %a = load i8, ptr %ptr
@@ -2451,10 +2434,14 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
EVT ResTy = Op->getValueType(0);
+ unsigned NumElts = ResTy.getVectorNumElements();
SDLoc DL(Op);
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
+ bool IsConstant = false;
+ bool UseSameConstant = true;
+ SDValue ConstantValue;
bool Is128Vec = ResTy.is128BitVector();
bool Is256Vec = ResTy.is256BitVector();
@@ -2505,20 +2492,45 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
return Op;
- if (!isConstantOrUndefBUILD_VECTOR(Node)) {
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Opi = Node->getOperand(i);
+ if (isIntOrFPConstant(Opi)) {
+ IsConstant = true;
+ if (!ConstantValue.getNode())
+ ConstantValue = Opi;
+ else if (ConstantValue != Opi)
+ UseSameConstant = false;
+ }
+ }
+
+ // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
+ if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
+ SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Opi = Node->getOperand(i);
+ if (!isIntOrFPConstant(Opi))
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
+ DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
+ }
+ return Result;
+ }
+
+ if (!IsConstant) {
// Use INSERT_VECTOR_ELT operations rather than expand to stores.
// The resulting code is the same length as the expansion, but it doesn't
// use memory operations.
- EVT ResTy = Node->getValueType(0);
-
assert(ResTy.isVector());
- unsigned NumElts = ResTy.getVectorNumElements();
- SDValue Vector =
- DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Node->getOperand(0));
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Vector = DAG.getUNDEF(ResTy);
+
+ if (!Op0.isUndef())
+ Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
for (unsigned i = 1; i < NumElts; ++i) {
- Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
- Node->getOperand(i),
+ SDValue Opi = Node->getOperand(i);
+ if (Opi.isUndef())
+ continue;
+ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
}
return Vector;
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index 8fa72bc..d9ea88c 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -254,6 +254,7 @@ bool LoongArchAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
MCFixup Fixup =
MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN);
F.setVarFixups({Fixup});
+ F.setLinkerRelaxable();
F.getParent()->setLinkerRelaxable();
return true;
}
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index feb4eb3..d9680c7 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -969,7 +969,7 @@ void MipsTargetELFStreamer::finish() {
Align Alignment = Section.getAlign();
S.switchSection(&Section);
- if (Section.useCodeAlign())
+ if (getContext().getAsmInfo()->useCodeAlign(Section))
S.emitCodeAlignment(Alignment, &STI, Alignment.value());
else
S.emitValueToAlignment(Alignment, 0, 1, Alignment.value());
diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index ca03310..a2e48ab 100644
--- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -737,14 +737,18 @@ void MipsAsmPrinter::emitStartOfAsmFile(Module &M) {
if (FS.empty() && M.size() && F->hasFnAttribute("target-features"))
FS = F->getFnAttribute("target-features").getValueAsString();
+ std::string strFS = FS.str();
+ if (M.size() && F->getFnAttribute("use-soft-float").getValueAsBool())
+ strFS += strFS.empty() ? "+soft-float" : ",+soft-float";
+
// Compute MIPS architecture attributes based on the default subtarget
// that we'd have constructed.
// FIXME: For ifunc related functions we could iterate over and look
// for a feature string that doesn't match the default one.
StringRef CPU = MIPS_MC::selectMipsCPU(TT, TM.getTargetCPU());
const MipsTargetMachine &MTM = static_cast<const MipsTargetMachine &>(TM);
- const MipsSubtarget STI(TT, CPU, FS, MTM.isLittleEndian(), MTM,
- std::nullopt);
+ const MipsSubtarget STI(TT, CPU, StringRef(strFS), MTM.isLittleEndian(),
+ MTM, std::nullopt);
bool IsABICalls = STI.isABICalls();
const MipsABIInfo &ABI = MTM.getABI();
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 0e581a7..ec6b382 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -522,9 +522,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::TRAP, MVT::Other, Legal);
- setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
- setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
-
setTargetDAGCombine({ISD::SDIVREM, ISD::UDIVREM, ISD::SELECT, ISD::AND,
ISD::OR, ISD::ADD, ISD::SUB, ISD::AssertZext, ISD::SHL,
ISD::SIGN_EXTEND});
@@ -1360,8 +1357,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::FP_TO_SINT: return lowerFP_TO_SINT(Op, DAG);
case ISD::READCYCLECOUNTER:
return lowerREADCYCLECOUNTER(Op, DAG);
- case ISD::ConstantFP:
- return lowerConstantFP(Op, DAG);
}
return SDValue();
}
@@ -3019,30 +3014,6 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op,
return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op.getValueType(), Trunc);
}
-SDValue MipsTargetLowering::lowerConstantFP(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT VT = Op.getSimpleValueType();
- SDNode *N = Op.getNode();
- ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(N);
-
- if (!CFP->isNaN() || Subtarget.isNaN2008()) {
- return SDValue();
- }
-
- APFloat NaNValue = CFP->getValueAPF();
- auto &Sem = NaNValue.getSemantics();
-
- // The MSB of the mantissa should be zero for QNaNs in the MIPS legacy NaN
- // encodings, and one for sNaNs. Check every NaN constants and make sure
- // they are correctly encoded for legacy encodings.
- if (!NaNValue.isSignaling()) {
- APFloat RealQNaN = NaNValue.getSNaN(Sem);
- return DAG.getConstantFP(RealQNaN, DL, VT);
- }
- return SDValue();
-}
-
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h
index 31ac5d4..c65c76c 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -592,7 +592,6 @@ class TargetRegisterClass;
SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
/// isEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization.
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 614b321..ce9cd12 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -15,8 +15,6 @@
using namespace llvm;
-void NVPTXMCAsmInfo::anchor() {}
-
NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple,
const MCTargetOptions &Options) {
if (TheTriple.getArch() == Triple::nvptx64) {
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index 77c4dae..f071406 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -19,8 +19,6 @@ namespace llvm {
class Triple;
class NVPTXMCAsmInfo : public MCAsmInfo {
- virtual void anchor();
-
public:
explicit NVPTXMCAsmInfo(const Triple &TheTriple,
const MCTargetOptions &Options);
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
index 9f91143..329e3b5 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
@@ -97,10 +97,7 @@ void NVPTXTargetStreamer::changeSection(const MCSection *CurSection,
if (isDwarfSection(FI, Section)) {
// Emit DWARF .file directives in the outermost scope.
outputDwarfFileDirectives();
- OS << "\t.section";
- Section->printSwitchToSection(*getStreamer().getContext().getAsmInfo(),
- getStreamer().getContext().getTargetTriple(),
- OS, SubSection);
+ OS << "\t.section\t" << Section->getName() << '\n';
// DWARF sections are enclosed into braces - emit the open one.
OS << "\t{\n";
HasSections = true;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 65e7c56..96f52275 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -145,18 +145,6 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
if (tryStoreVector(N))
return;
break;
- case NVPTXISD::LoadParam:
- case NVPTXISD::LoadParamV2:
- case NVPTXISD::LoadParamV4:
- if (tryLoadParam(N))
- return;
- break;
- case NVPTXISD::StoreParam:
- case NVPTXISD::StoreParamV2:
- case NVPTXISD::StoreParamV4:
- if (tryStoreParam(N))
- return;
- break;
case ISD::INTRINSIC_W_CHAIN:
if (tryIntrinsicChain(N))
return;
@@ -1462,267 +1450,6 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
return true;
}
-bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
- SDValue Chain = Node->getOperand(0);
- SDValue Offset = Node->getOperand(2);
- SDValue Glue = Node->getOperand(3);
- SDLoc DL(Node);
- MemSDNode *Mem = cast<MemSDNode>(Node);
-
- unsigned VecSize;
- switch (Node->getOpcode()) {
- default:
- return false;
- case NVPTXISD::LoadParam:
- VecSize = 1;
- break;
- case NVPTXISD::LoadParamV2:
- VecSize = 2;
- break;
- case NVPTXISD::LoadParamV4:
- VecSize = 4;
- break;
- }
-
- EVT EltVT = Node->getValueType(0);
- EVT MemVT = Mem->getMemoryVT();
-
- std::optional<unsigned> Opcode;
-
- switch (VecSize) {
- default:
- return false;
- case 1:
- Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
- NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
- NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64);
- break;
- case 2:
- Opcode =
- pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
- NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
- NVPTX::LoadParamMemV2I64);
- break;
- case 4:
- Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
- NVPTX::LoadParamMemV4I8, NVPTX::LoadParamMemV4I16,
- NVPTX::LoadParamMemV4I32, {/* no v4i64 */});
- break;
- }
- if (!Opcode)
- return false;
-
- SDVTList VTs;
- if (VecSize == 1) {
- VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
- } else if (VecSize == 2) {
- VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
- } else {
- EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
- VTs = CurDAG->getVTList(EVTs);
- }
-
- unsigned OffsetVal = Offset->getAsZExtVal();
-
- SmallVector<SDValue, 2> Ops(
- {CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32), Chain, Glue});
-
- ReplaceNode(Node, CurDAG->getMachineNode(*Opcode, DL, VTs, Ops));
- return true;
-}
-
-// Helpers for constructing opcode (ex: NVPTX::StoreParamV4F32_iiri)
-#define getOpcV2H(ty, opKind0, opKind1) \
- NVPTX::StoreParamV2##ty##_##opKind0##opKind1
-
-#define getOpcV2H1(ty, opKind0, isImm1) \
- (isImm1) ? getOpcV2H(ty, opKind0, i) : getOpcV2H(ty, opKind0, r)
-
-#define getOpcodeForVectorStParamV2(ty, isimm) \
- (isimm[0]) ? getOpcV2H1(ty, i, isimm[1]) : getOpcV2H1(ty, r, isimm[1])
-
-#define getOpcV4H(ty, opKind0, opKind1, opKind2, opKind3) \
- NVPTX::StoreParamV4##ty##_##opKind0##opKind1##opKind2##opKind3
-
-#define getOpcV4H3(ty, opKind0, opKind1, opKind2, isImm3) \
- (isImm3) ? getOpcV4H(ty, opKind0, opKind1, opKind2, i) \
- : getOpcV4H(ty, opKind0, opKind1, opKind2, r)
-
-#define getOpcV4H2(ty, opKind0, opKind1, isImm2, isImm3) \
- (isImm2) ? getOpcV4H3(ty, opKind0, opKind1, i, isImm3) \
- : getOpcV4H3(ty, opKind0, opKind1, r, isImm3)
-
-#define getOpcV4H1(ty, opKind0, isImm1, isImm2, isImm3) \
- (isImm1) ? getOpcV4H2(ty, opKind0, i, isImm2, isImm3) \
- : getOpcV4H2(ty, opKind0, r, isImm2, isImm3)
-
-#define getOpcodeForVectorStParamV4(ty, isimm) \
- (isimm[0]) ? getOpcV4H1(ty, i, isimm[1], isimm[2], isimm[3]) \
- : getOpcV4H1(ty, r, isimm[1], isimm[2], isimm[3])
-
-#define getOpcodeForVectorStParam(n, ty, isimm) \
- (n == 2) ? getOpcodeForVectorStParamV2(ty, isimm) \
- : getOpcodeForVectorStParamV4(ty, isimm)
-
-static unsigned pickOpcodeForVectorStParam(SmallVector<SDValue, 8> &Ops,
- unsigned NumElts,
- MVT::SimpleValueType MemTy,
- SelectionDAG *CurDAG, SDLoc DL) {
- // Determine which inputs are registers and immediates make new operators
- // with constant values
- SmallVector<bool, 4> IsImm(NumElts, false);
- for (unsigned i = 0; i < NumElts; i++) {
- IsImm[i] = (isa<ConstantSDNode>(Ops[i]) || isa<ConstantFPSDNode>(Ops[i]));
- if (IsImm[i]) {
- SDValue Imm = Ops[i];
- if (MemTy == MVT::f32 || MemTy == MVT::f64) {
- const ConstantFPSDNode *ConstImm = cast<ConstantFPSDNode>(Imm);
- const ConstantFP *CF = ConstImm->getConstantFPValue();
- Imm = CurDAG->getTargetConstantFP(*CF, DL, Imm->getValueType(0));
- } else {
- const ConstantSDNode *ConstImm = cast<ConstantSDNode>(Imm);
- const ConstantInt *CI = ConstImm->getConstantIntValue();
- Imm = CurDAG->getTargetConstant(*CI, DL, Imm->getValueType(0));
- }
- Ops[i] = Imm;
- }
- }
-
- // Get opcode for MemTy, size, and register/immediate operand ordering
- switch (MemTy) {
- case MVT::i8:
- return getOpcodeForVectorStParam(NumElts, I8, IsImm);
- case MVT::i16:
- return getOpcodeForVectorStParam(NumElts, I16, IsImm);
- case MVT::i32:
- return getOpcodeForVectorStParam(NumElts, I32, IsImm);
- case MVT::i64:
- assert(NumElts == 2 && "MVT too large for NumElts > 2");
- return getOpcodeForVectorStParamV2(I64, IsImm);
- case MVT::f32:
- return getOpcodeForVectorStParam(NumElts, F32, IsImm);
- case MVT::f64:
- assert(NumElts == 2 && "MVT too large for NumElts > 2");
- return getOpcodeForVectorStParamV2(F64, IsImm);
-
- // These cases don't support immediates, just use the all register version
- // and generate moves.
- case MVT::i1:
- return (NumElts == 2) ? NVPTX::StoreParamV2I8_rr
- : NVPTX::StoreParamV4I8_rrrr;
- case MVT::f16:
- case MVT::bf16:
- return (NumElts == 2) ? NVPTX::StoreParamV2I16_rr
- : NVPTX::StoreParamV4I16_rrrr;
- case MVT::v2f16:
- case MVT::v2bf16:
- case MVT::v2i16:
- case MVT::v4i8:
- return (NumElts == 2) ? NVPTX::StoreParamV2I32_rr
- : NVPTX::StoreParamV4I32_rrrr;
- default:
- llvm_unreachable("Cannot select st.param for unknown MemTy");
- }
-}
-
-bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
- SDLoc DL(N);
- SDValue Chain = N->getOperand(0);
- SDValue Param = N->getOperand(1);
- unsigned ParamVal = Param->getAsZExtVal();
- SDValue Offset = N->getOperand(2);
- unsigned OffsetVal = Offset->getAsZExtVal();
- MemSDNode *Mem = cast<MemSDNode>(N);
- SDValue Glue = N->getOperand(N->getNumOperands() - 1);
-
- // How many elements do we have?
- unsigned NumElts;
- switch (N->getOpcode()) {
- default:
- llvm_unreachable("Unexpected opcode");
- case NVPTXISD::StoreParam:
- NumElts = 1;
- break;
- case NVPTXISD::StoreParamV2:
- NumElts = 2;
- break;
- case NVPTXISD::StoreParamV4:
- NumElts = 4;
- break;
- }
-
- // Build vector of operands
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0; i < NumElts; ++i)
- Ops.push_back(N->getOperand(i + 3));
- Ops.append({CurDAG->getTargetConstant(ParamVal, DL, MVT::i32),
- CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32), Chain, Glue});
-
- // Determine target opcode
- // If we have an i1, use an 8-bit store. The lowering code in
- // NVPTXISelLowering will have already emitted an upcast.
- std::optional<unsigned> Opcode;
- switch (NumElts) {
- default:
- llvm_unreachable("Unexpected NumElts");
- case 1: {
- MVT::SimpleValueType MemTy = Mem->getMemoryVT().getSimpleVT().SimpleTy;
- SDValue Imm = Ops[0];
- if (MemTy != MVT::f16 && MemTy != MVT::bf16 &&
- (isa<ConstantSDNode>(Imm) || isa<ConstantFPSDNode>(Imm))) {
- // Convert immediate to target constant
- if (MemTy == MVT::f32 || MemTy == MVT::f64) {
- const ConstantFPSDNode *ConstImm = cast<ConstantFPSDNode>(Imm);
- const ConstantFP *CF = ConstImm->getConstantFPValue();
- Imm = CurDAG->getTargetConstantFP(*CF, DL, Imm->getValueType(0));
- } else {
- const ConstantSDNode *ConstImm = cast<ConstantSDNode>(Imm);
- const ConstantInt *CI = ConstImm->getConstantIntValue();
- Imm = CurDAG->getTargetConstant(*CI, DL, Imm->getValueType(0));
- }
- Ops[0] = Imm;
- // Use immediate version of store param
- Opcode =
- pickOpcodeForVT(MemTy, NVPTX::StoreParamI8_i, NVPTX::StoreParamI16_i,
- NVPTX::StoreParamI32_i, NVPTX::StoreParamI64_i);
- } else
- Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
- NVPTX::StoreParamI8_r, NVPTX::StoreParamI16_r,
- NVPTX::StoreParamI32_r, NVPTX::StoreParamI64_r);
- if (Opcode == NVPTX::StoreParamI8_r) {
- // Fine tune the opcode depending on the size of the operand.
- // This helps to avoid creating redundant COPY instructions in
- // InstrEmitter::AddRegisterOperand().
- switch (Ops[0].getSimpleValueType().SimpleTy) {
- default:
- break;
- case MVT::i32:
- Opcode = NVPTX::StoreParamI8TruncI32_r;
- break;
- case MVT::i64:
- Opcode = NVPTX::StoreParamI8TruncI64_r;
- break;
- }
- }
- break;
- }
- case 2:
- case 4: {
- MVT::SimpleValueType MemTy = Mem->getMemoryVT().getSimpleVT().SimpleTy;
- Opcode = pickOpcodeForVectorStParam(Ops, NumElts, MemTy, CurDAG, DL);
- break;
- }
- }
-
- SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
- SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, RetVTs, Ops);
- MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
- CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
-
- ReplaceNode(N, Ret);
- return true;
-}
-
/// SelectBFE - Look for instruction sequences that can be made more efficient
/// by using the 'bfe' (bit-field extract) PTX instruction
bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index b99b4ef..e504a8f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -78,8 +78,6 @@ private:
bool tryLDG(MemSDNode *N);
bool tryStore(SDNode *N);
bool tryStoreVector(SDNode *N);
- bool tryLoadParam(SDNode *N);
- bool tryStoreParam(SDNode *N);
bool tryFence(SDNode *N);
void SelectAddrSpaceCast(SDNode *N);
bool tryBFE(SDNode *N);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index f2c2f46..f79b862 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -952,10 +952,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// promoted to f32. v2f16 is expanded to f16, which is then promoted
// to f32.
for (const auto &Op :
- {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS}) {
+ {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FTANH}) {
setOperationAction(Op, MVT::f16, Promote);
setOperationAction(Op, MVT::f32, Legal);
- setOperationAction(Op, MVT::f64, Legal);
+ // only div/rem/sqrt are legal for f64
+ if (Op == ISD::FDIV || Op == ISD::FREM || Op == ISD::FSQRT) {
+ setOperationAction(Op, MVT::f64, Legal);
+ }
setOperationAction(Op, {MVT::v2f16, MVT::v2bf16, MVT::v2f32}, Expand);
setOperationAction(Op, MVT::bf16, Promote);
AddPromotedToType(Op, MVT::bf16, MVT::f32);
@@ -1072,12 +1075,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(NVPTXISD::DeclareArrayParam)
MAKE_CASE(NVPTXISD::DeclareScalarParam)
MAKE_CASE(NVPTXISD::CALL)
- MAKE_CASE(NVPTXISD::LoadParam)
- MAKE_CASE(NVPTXISD::LoadParamV2)
- MAKE_CASE(NVPTXISD::LoadParamV4)
- MAKE_CASE(NVPTXISD::StoreParam)
- MAKE_CASE(NVPTXISD::StoreParamV2)
- MAKE_CASE(NVPTXISD::StoreParamV4)
MAKE_CASE(NVPTXISD::MoveParam)
MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
MAKE_CASE(NVPTXISD::BUILD_VECTOR)
@@ -1315,105 +1312,6 @@ Align NVPTXTargetLowering::getArgumentAlignment(const CallBase *CB, Type *Ty,
return DL.getABITypeAlign(Ty);
}
-static bool adjustElementType(EVT &ElementType) {
- switch (ElementType.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::f16:
- case MVT::bf16:
- ElementType = MVT::i16;
- return true;
- case MVT::f32:
- case MVT::v2f16:
- case MVT::v2bf16:
- ElementType = MVT::i32;
- return true;
- case MVT::f64:
- ElementType = MVT::i64;
- return true;
- }
-}
-
-// Use byte-store when the param address of the argument value is unaligned.
-// This may happen when the return value is a field of a packed structure.
-//
-// This is called in LowerCall() when passing the param values.
-static SDValue LowerUnalignedStoreParam(SelectionDAG &DAG, SDValue Chain,
- uint64_t Offset, EVT ElementType,
- SDValue StVal, SDValue &InGlue,
- unsigned ArgID, const SDLoc &dl) {
- // Bit logic only works on integer types
- if (adjustElementType(ElementType))
- StVal = DAG.getNode(ISD::BITCAST, dl, ElementType, StVal);
-
- // Store each byte
- SDVTList StoreVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- for (unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) {
- // Shift the byte to the last byte position
- SDValue ShiftVal = DAG.getNode(ISD::SRL, dl, ElementType, StVal,
- DAG.getConstant(i * 8, dl, MVT::i32));
- SDValue StoreOperands[] = {Chain, DAG.getConstant(ArgID, dl, MVT::i32),
- DAG.getConstant(Offset + i, dl, MVT::i32),
- ShiftVal, InGlue};
- // Trunc store only the last byte by using
- // st.param.b8
- // The register type can be larger than b8.
- Chain = DAG.getMemIntrinsicNode(
- NVPTXISD::StoreParam, dl, StoreVTs, StoreOperands, MVT::i8,
- MachinePointerInfo(), Align(1), MachineMemOperand::MOStore);
- InGlue = Chain.getValue(1);
- }
- return Chain;
-}
-
-// Use byte-load when the param adress of the returned value is unaligned.
-// This may happen when the returned value is a field of a packed structure.
-static SDValue
-LowerUnalignedLoadRetParam(SelectionDAG &DAG, SDValue &Chain, uint64_t Offset,
- EVT ElementType, SDValue &InGlue,
- SmallVectorImpl<SDValue> &TempProxyRegOps,
- const SDLoc &dl) {
- // Bit logic only works on integer types
- EVT MergedType = ElementType;
- adjustElementType(MergedType);
-
- // Load each byte and construct the whole value. Initial value to 0
- SDValue RetVal = DAG.getConstant(0, dl, MergedType);
- // LoadParamMemI8 loads into i16 register only
- SDVTList LoadVTs = DAG.getVTList(MVT::i16, MVT::Other, MVT::Glue);
- for (unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) {
- SDValue LoadOperands[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
- DAG.getConstant(Offset + i, dl, MVT::i32),
- InGlue};
- // This will be selected to LoadParamMemI8
- SDValue LdVal =
- DAG.getMemIntrinsicNode(NVPTXISD::LoadParam, dl, LoadVTs, LoadOperands,
- MVT::i8, MachinePointerInfo(), Align(1));
- SDValue TmpLdVal = LdVal.getValue(0);
- Chain = LdVal.getValue(1);
- InGlue = LdVal.getValue(2);
-
- TmpLdVal = DAG.getNode(NVPTXISD::ProxyReg, dl,
- TmpLdVal.getSimpleValueType(), TmpLdVal);
- TempProxyRegOps.push_back(TmpLdVal);
-
- SDValue CMask = DAG.getConstant(255, dl, MergedType);
- SDValue CShift = DAG.getConstant(i * 8, dl, MVT::i32);
- // Need to extend the i16 register to the whole width.
- TmpLdVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MergedType, TmpLdVal);
- // Mask off the high bits. Leave only the lower 8bits.
- // Do this because we are using loadparam.b8.
- TmpLdVal = DAG.getNode(ISD::AND, dl, MergedType, TmpLdVal, CMask);
- // Shift and merge
- TmpLdVal = DAG.getNode(ISD::SHL, dl, MergedType, TmpLdVal, CShift);
- RetVal = DAG.getNode(ISD::OR, dl, MergedType, RetVal, TmpLdVal);
- }
- if (ElementType != MergedType)
- RetVal = DAG.getNode(ISD::BITCAST, dl, ElementType, RetVal);
-
- return RetVal;
-}
-
static bool shouldConvertToIndirectCall(const CallBase *CB,
const GlobalAddressSDNode *Func) {
if (!Func)
@@ -1480,10 +1378,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SelectionDAG &DAG = CLI.DAG;
SDLoc dl = CLI.DL;
- SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
- SDValue Chain = CLI.Chain;
+ const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Callee = CLI.Callee;
- bool &isTailCall = CLI.IsTailCall;
ArgListTy &Args = CLI.getArgs();
Type *RetTy = CLI.RetTy;
const CallBase *CB = CLI.CB;
@@ -1493,6 +1389,36 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
return DAG.getConstant(I, dl, MVT::i32);
};
+ const unsigned UniqueCallSite = GlobalUniqueCallSite++;
+ const SDValue CallChain = CLI.Chain;
+ const SDValue StartChain =
+ DAG.getCALLSEQ_START(CallChain, UniqueCallSite, 0, dl);
+ SDValue DeclareGlue = StartChain.getValue(1);
+
+ SmallVector<SDValue, 16> CallPrereqs{StartChain};
+
+ const auto MakeDeclareScalarParam = [&](SDValue Symbol, unsigned Size) {
+ // PTX ABI requires integral types to be at least 32 bits in size. FP16 is
+ // loaded/stored using i16, so it's handled here as well.
+ const unsigned SizeBits = promoteScalarArgumentSize(Size * 8);
+ SDValue Declare =
+ DAG.getNode(NVPTXISD::DeclareScalarParam, dl, {MVT::Other, MVT::Glue},
+ {StartChain, Symbol, GetI32(SizeBits), DeclareGlue});
+ CallPrereqs.push_back(Declare);
+ DeclareGlue = Declare.getValue(1);
+ return Declare;
+ };
+
+ const auto MakeDeclareArrayParam = [&](SDValue Symbol, Align Align,
+ unsigned Size) {
+ SDValue Declare = DAG.getNode(
+ NVPTXISD::DeclareArrayParam, dl, {MVT::Other, MVT::Glue},
+ {StartChain, Symbol, GetI32(Align.value()), GetI32(Size), DeclareGlue});
+ CallPrereqs.push_back(Declare);
+ DeclareGlue = Declare.getValue(1);
+ return Declare;
+ };
+
// Variadic arguments.
//
// Normally, for each argument, we declare a param scalar or a param
@@ -1508,15 +1434,17 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
//
// After all vararg is processed, 'VAOffset' holds the size of the
// vararg byte array.
+ assert((CLI.IsVarArg || CLI.Args.size() == CLI.NumFixedArgs) &&
+ "Non-VarArg function with extra arguments");
- SDValue VADeclareParam; // vararg byte array
const unsigned FirstVAArg = CLI.NumFixedArgs; // position of first variadic
- unsigned VAOffset = 0; // current offset in the param array
+ unsigned VAOffset = 0; // current offset in the param array
- const unsigned UniqueCallSite = GlobalUniqueCallSite++;
- SDValue TempChain = Chain;
- Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl);
- SDValue InGlue = Chain.getValue(1);
+ const SDValue VADeclareParam =
+ CLI.Args.size() > FirstVAArg
+ ? MakeDeclareArrayParam(getCallParamSymbol(DAG, FirstVAArg, MVT::i32),
+ Align(STI.getMaxRequiredAlignment()), 0)
+ : SDValue();
// Args.size() and Outs.size() need not match.
// Outs.size() will be larger
@@ -1577,43 +1505,19 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert((!IsByVal || TypeSize == ArgOuts[0].Flags.getByValSize()) &&
"type size mismatch");
- const std::optional<SDValue> ArgDeclare = [&]() -> std::optional<SDValue> {
- if (IsVAArg) {
- if (ArgI == FirstVAArg) {
- VADeclareParam = DAG.getNode(
- NVPTXISD::DeclareArrayParam, dl, {MVT::Other, MVT::Glue},
- {Chain, ParamSymbol, GetI32(STI.getMaxRequiredAlignment()),
- GetI32(0), InGlue});
- return VADeclareParam;
- }
- return std::nullopt;
- }
- if (IsByVal || shouldPassAsArray(Arg.Ty)) {
- // declare .param .align <align> .b8 .param<n>[<size>];
- return DAG.getNode(NVPTXISD::DeclareArrayParam, dl,
- {MVT::Other, MVT::Glue},
- {Chain, ParamSymbol, GetI32(ArgAlign.value()),
- GetI32(TypeSize), InGlue});
- }
+ const SDValue ArgDeclare = [&]() {
+ if (IsVAArg)
+ return VADeclareParam;
+
+ if (IsByVal || shouldPassAsArray(Arg.Ty))
+ return MakeDeclareArrayParam(ParamSymbol, ArgAlign, TypeSize);
+
assert(ArgOuts.size() == 1 && "We must pass only one value as non-array");
- // declare .param .b<size> .param<n>;
-
- // PTX ABI requires integral types to be at least 32 bits in
- // size. FP16 is loaded/stored using i16, so it's handled
- // here as well.
- const unsigned PromotedSize =
- (ArgOuts[0].VT.isInteger() || ArgOuts[0].VT.isFloatingPoint())
- ? promoteScalarArgumentSize(TypeSize * 8)
- : TypeSize * 8;
-
- return DAG.getNode(NVPTXISD::DeclareScalarParam, dl,
- {MVT::Other, MVT::Glue},
- {Chain, ParamSymbol, GetI32(PromotedSize), InGlue});
+ assert((ArgOuts[0].VT.isInteger() || ArgOuts[0].VT.isFloatingPoint()) &&
+ "Only int and float types are supported as non-array arguments");
+
+ return MakeDeclareScalarParam(ParamSymbol, TypeSize);
}();
- if (ArgDeclare) {
- Chain = ArgDeclare->getValue(0);
- InGlue = ArgDeclare->getValue(1);
- }
// PTX Interoperability Guide 3.3(A): [Integer] Values shorter
// than 32-bits are sign extended or zero extended, depending on
@@ -1623,36 +1527,25 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Arg.Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Arg.Ty) < 32;
const auto GetStoredValue = [&](const unsigned I, EVT EltVT,
- const Align PartAlign) {
- SDValue StVal;
+ const MaybeAlign PartAlign) {
if (IsByVal) {
SDValue Ptr = ArgOutVals[0];
auto MPI = refinePtrAS(Ptr, DAG, DL, *this);
SDValue SrcAddr =
DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(Offsets[I]));
- StVal = DAG.getLoad(EltVT, dl, TempChain, SrcAddr, MPI, PartAlign);
- } else {
- StVal = ArgOutVals[I];
-
- auto PromotedVT = promoteScalarIntegerPTX(StVal.getValueType());
- if (PromotedVT != StVal.getValueType()) {
- StVal = DAG.getNode(getExtOpcode(ArgOuts[I].Flags), dl, PromotedVT,
- StVal);
- }
+ return DAG.getLoad(EltVT, dl, CallChain, SrcAddr, MPI, PartAlign);
}
+ SDValue StVal = ArgOutVals[I];
+ assert(promoteScalarIntegerPTX(StVal.getValueType()) ==
+ StVal.getValueType() &&
+ "OutVal type should always be legal");
- if (ExtendIntegerParam) {
- assert(VTs.size() == 1 && "Scalar can't have multiple parts.");
- // zext/sext to i32
- StVal =
- DAG.getNode(getExtOpcode(ArgOuts[I].Flags), dl, MVT::i32, StVal);
- } else if (EltVT.getSizeInBits() < 16) {
- // Use 16-bit registers for small stores as it's the
- // smallest general purpose register size supported by NVPTX.
- StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
- }
- return StVal;
+ const EVT VTI = promoteScalarIntegerPTX(VTs[I]);
+ const EVT StoreVT =
+ ExtendIntegerParam ? MVT::i32 : (VTI == MVT::i1 ? MVT::i8 : VTI);
+
+ return correctParamType(StVal, StoreVT, ArgOuts[I].Flags, DAG, dl);
};
const auto VectorInfo =
@@ -1661,23 +1554,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned J = 0;
for (const unsigned NumElts : VectorInfo) {
const int CurOffset = Offsets[J];
- EVT EltVT = promoteScalarIntegerPTX(VTs[J]);
- const Align PartAlign = commonAlignment(ArgAlign, CurOffset);
-
- // If we have a PVF_SCALAR entry, it may not be sufficiently aligned for a
- // scalar store. In such cases, fall back to byte stores.
- if (NumElts == 1 && !IsVAArg && PartAlign < DAG.getEVTAlign(EltVT)) {
-
- SDValue StVal = GetStoredValue(J, EltVT, PartAlign);
- Chain = LowerUnalignedStoreParam(DAG, Chain,
- CurOffset + (IsByVal ? VAOffset : 0),
- EltVT, StVal, InGlue, ArgI, dl);
-
- // LowerUnalignedStoreParam took care of inserting the necessary nodes
- // into the SDAG, so just move on to the next element.
- J++;
- continue;
- }
+ const EVT EltVT = promoteScalarIntegerPTX(VTs[J]);
if (IsVAArg && !IsByVal)
// Align each part of the variadic argument to their type.
@@ -1685,44 +1562,45 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert((IsVAArg || VAOffset == 0) &&
"VAOffset must be 0 for non-VA args");
- SmallVector<SDValue, 6> StoreOperands{
- Chain, GetI32(IsVAArg ? FirstVAArg : ArgI),
- GetI32(VAOffset + ((IsVAArg && !IsByVal) ? 0 : CurOffset))};
- // Record the values to store.
- for (const unsigned K : llvm::seq(NumElts))
- StoreOperands.push_back(GetStoredValue(J + K, EltVT, PartAlign));
- StoreOperands.push_back(InGlue);
+ const unsigned Offset =
+ (VAOffset + ((IsVAArg && !IsByVal) ? 0 : CurOffset));
+ SDValue Ptr =
+ DAG.getObjectPtrOffset(dl, ParamSymbol, TypeSize::getFixed(Offset));
- NVPTXISD::NodeType Op;
- switch (NumElts) {
- case 1:
- Op = NVPTXISD::StoreParam;
- break;
- case 2:
- Op = NVPTXISD::StoreParamV2;
- break;
- case 4:
- Op = NVPTXISD::StoreParamV4;
- break;
- default:
- llvm_unreachable("Invalid vector info.");
+ const MaybeAlign CurrentAlign = ExtendIntegerParam
+ ? MaybeAlign(std::nullopt)
+ : commonAlignment(ArgAlign, Offset);
+
+ SDValue Val;
+ if (NumElts == 1) {
+ Val = GetStoredValue(J, EltVT, CurrentAlign);
+ } else {
+ SmallVector<SDValue, 8> StoreVals;
+ for (const unsigned K : llvm::seq(NumElts)) {
+ SDValue ValJ = GetStoredValue(J + K, EltVT, CurrentAlign);
+ if (ValJ.getValueType().isVector())
+ DAG.ExtractVectorElements(ValJ, StoreVals);
+ else
+ StoreVals.push_back(ValJ);
+ }
+
+ EVT VT = EVT::getVectorVT(
+ *DAG.getContext(), StoreVals[0].getValueType(), StoreVals.size());
+ Val = DAG.getBuildVector(VT, dl, StoreVals);
}
- // Adjust type of the store op if we've extended the scalar
- // return value.
- EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT;
- Chain = DAG.getMemIntrinsicNode(
- Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
- TheStoreType, MachinePointerInfo(), PartAlign,
- MachineMemOperand::MOStore);
- InGlue = Chain.getValue(1);
+ SDValue StoreParam =
+ DAG.getStore(ArgDeclare, dl, Val, Ptr,
+ MachinePointerInfo(ADDRESS_SPACE_PARAM), CurrentAlign);
+ CallPrereqs.push_back(StoreParam);
// TODO: We may need to support vector types that can be passed
// as scalars in variadic arguments.
if (IsVAArg && !IsByVal) {
assert(NumElts == 1 &&
"Vectorization is expected to be disabled for variadics.");
+ const EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT;
VAOffset +=
DL.getTypeAllocSize(TheStoreType.getTypeForEVT(*DAG.getContext()));
}
@@ -1733,33 +1611,21 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
VAOffset += TypeSize;
}
- GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
-
// Handle Result
if (!Ins.empty()) {
- const SDValue RetDeclare = [&]() {
- const SDValue RetSymbol = DAG.getExternalSymbol("retval0", MVT::i32);
- const unsigned ResultSize = DL.getTypeAllocSizeInBits(RetTy);
- if (shouldPassAsArray(RetTy)) {
- const Align RetAlign = getArgumentAlignment(CB, RetTy, 0, DL);
- return DAG.getNode(NVPTXISD::DeclareArrayParam, dl,
- {MVT::Other, MVT::Glue},
- {Chain, RetSymbol, GetI32(RetAlign.value()),
- GetI32(ResultSize / 8), InGlue});
- }
- const auto PromotedResultSize = promoteScalarArgumentSize(ResultSize);
- return DAG.getNode(
- NVPTXISD::DeclareScalarParam, dl, {MVT::Other, MVT::Glue},
- {Chain, RetSymbol, GetI32(PromotedResultSize), InGlue});
- }();
- Chain = RetDeclare.getValue(0);
- InGlue = RetDeclare.getValue(1);
+ const SDValue RetSymbol = DAG.getExternalSymbol("retval0", MVT::i32);
+ const unsigned ResultSize = DL.getTypeAllocSize(RetTy);
+ if (shouldPassAsArray(RetTy)) {
+ const Align RetAlign = getArgumentAlignment(CB, RetTy, 0, DL);
+ MakeDeclareArrayParam(RetSymbol, RetAlign, ResultSize);
+ } else {
+ MakeDeclareScalarParam(RetSymbol, ResultSize);
+ }
}
- const bool HasVAArgs = CLI.IsVarArg && (CLI.Args.size() > CLI.NumFixedArgs);
// Set the size of the vararg param byte array if the callee is a variadic
// function and the variadic part is not empty.
- if (HasVAArgs) {
+ if (VADeclareParam) {
SDValue DeclareParamOps[] = {VADeclareParam.getOperand(0),
VADeclareParam.getOperand(1),
VADeclareParam.getOperand(2), GetI32(VAOffset),
@@ -1768,6 +1634,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
VADeclareParam->getVTList(), DeclareParamOps);
}
+ const auto *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
// If the type of the callsite does not match that of the function, convert
// the callsite to an indirect call.
const bool ConvertToIndirectCall = shouldConvertToIndirectCall(CB, Func);
@@ -1797,15 +1664,16 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// instruction.
// The prototype is embedded in a string and put as the operand for a
// CallPrototype SDNode which will print out to the value of the string.
+ const bool HasVAArgs = CLI.IsVarArg && (CLI.Args.size() > CLI.NumFixedArgs);
std::string Proto =
getPrototype(DL, RetTy, Args, CLI.Outs,
HasVAArgs ? std::optional(FirstVAArg) : std::nullopt, *CB,
UniqueCallSite);
const char *ProtoStr = nvTM->getStrPool().save(Proto).data();
- Chain = DAG.getNode(
- NVPTXISD::CallPrototype, dl, {MVT::Other, MVT::Glue},
- {Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InGlue});
- InGlue = Chain.getValue(1);
+ const SDValue PrototypeDeclare = DAG.getNode(
+ NVPTXISD::CallPrototype, dl, MVT::Other,
+ {StartChain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32)});
+ CallPrereqs.push_back(PrototypeDeclare);
}
if (ConvertToIndirectCall) {
@@ -1823,24 +1691,15 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const unsigned NumArgs =
std::min<unsigned>(CLI.NumFixedArgs + 1, Args.size());
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
- /// NumParams, Callee, Proto, InGlue)
- Chain = DAG.getNode(NVPTXISD::CALL, dl, {MVT::Other, MVT::Glue},
- {Chain, GetI32(CLI.IsConvergent), GetI32(IsIndirectCall),
- GetI32(Ins.empty() ? 0 : 1), GetI32(NumArgs), Callee,
- GetI32(Proto), InGlue});
- InGlue = Chain.getValue(1);
-
+ /// NumParams, Callee, Proto)
+ const SDValue CallToken = DAG.getTokenFactor(dl, CallPrereqs);
+ const SDValue Call = DAG.getNode(
+ NVPTXISD::CALL, dl, MVT::Other,
+ {CallToken, GetI32(CLI.IsConvergent), GetI32(IsIndirectCall),
+ GetI32(Ins.empty() ? 0 : 1), GetI32(NumArgs), Callee, GetI32(Proto)});
+
+ SmallVector<SDValue, 16> LoadChains{Call};
SmallVector<SDValue, 16> ProxyRegOps;
- // An item of the vector is filled if the element does not need a ProxyReg
- // operation on it and should be added to InVals as is. ProxyRegOps and
- // ProxyRegTruncates contain empty/none items at the same index.
- SmallVector<SDValue, 16> RetElts;
- // A temporary ProxyReg operations inserted in `LowerUnalignedLoadRetParam()`
- // to use the values of `LoadParam`s and to be replaced later then
- // `CALLSEQ_END` is added.
- SmallVector<SDValue, 16> TempProxyRegOps;
-
- // Generate loads from param memory/moves from registers for result
if (!Ins.empty()) {
SmallVector<EVT, 16> VTs;
SmallVector<uint64_t, 16> Offsets;
@@ -1857,104 +1716,65 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
unsigned I = 0;
- for (const unsigned VectorizedSize : VectorInfo) {
- EVT TheLoadType = promoteScalarIntegerPTX(VTs[I]);
- EVT EltType = Ins[I].VT;
- const Align EltAlign = commonAlignment(RetAlign, Offsets[I]);
-
- if (TheLoadType != VTs[I])
- EltType = TheLoadType;
-
- if (ExtendIntegerRetVal) {
- TheLoadType = MVT::i32;
- EltType = MVT::i32;
- } else if (TheLoadType.getSizeInBits() < 16) {
- EltType = MVT::i16;
- }
+ for (const unsigned NumElts : VectorInfo) {
+ const MaybeAlign CurrentAlign =
+ ExtendIntegerRetVal ? MaybeAlign(std::nullopt)
+ : commonAlignment(RetAlign, Offsets[I]);
- // If we have a PVF_SCALAR entry, it may not be sufficiently aligned for a
- // scalar load. In such cases, fall back to byte loads.
- if (VectorizedSize == 1 && RetTy->isAggregateType() &&
- EltAlign < DAG.getEVTAlign(TheLoadType)) {
- SDValue Ret = LowerUnalignedLoadRetParam(
- DAG, Chain, Offsets[I], TheLoadType, InGlue, TempProxyRegOps, dl);
- ProxyRegOps.push_back(SDValue());
- RetElts.resize(I);
- RetElts.push_back(Ret);
-
- I++;
- continue;
- }
+ const EVT VTI = promoteScalarIntegerPTX(VTs[I]);
+ const EVT LoadVT =
+ ExtendIntegerRetVal ? MVT::i32 : (VTI == MVT::i1 ? MVT::i8 : VTI);
- SmallVector<EVT, 6> LoadVTs(VectorizedSize, EltType);
- LoadVTs.append({MVT::Other, MVT::Glue});
+ const unsigned PackingAmt =
+ LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1;
- NVPTXISD::NodeType Op;
- switch (VectorizedSize) {
- case 1:
- Op = NVPTXISD::LoadParam;
- break;
- case 2:
- Op = NVPTXISD::LoadParamV2;
- break;
- case 4:
- Op = NVPTXISD::LoadParamV4;
- break;
- default:
- llvm_unreachable("Invalid vector info.");
- }
+ const EVT VecVT = NumElts == 1 ? LoadVT
+ : EVT::getVectorVT(*DAG.getContext(),
+ LoadVT.getScalarType(),
+ NumElts * PackingAmt);
- SDValue LoadOperands[] = {Chain, GetI32(1), GetI32(Offsets[I]), InGlue};
- SDValue RetVal = DAG.getMemIntrinsicNode(
- Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType,
- MachinePointerInfo(), EltAlign, MachineMemOperand::MOLoad);
+ const SDValue RetSymbol = DAG.getExternalSymbol("retval0", MVT::i32);
+ SDValue Ptr =
+ DAG.getObjectPtrOffset(dl, RetSymbol, TypeSize::getFixed(Offsets[I]));
- for (const unsigned J : llvm::seq(VectorizedSize)) {
- ProxyRegOps.push_back(RetVal.getValue(J));
- }
+ SDValue R =
+ DAG.getLoad(VecVT, dl, Call, Ptr,
+ MachinePointerInfo(ADDRESS_SPACE_PARAM), CurrentAlign);
- Chain = RetVal.getValue(VectorizedSize);
- InGlue = RetVal.getValue(VectorizedSize + 1);
+ LoadChains.push_back(R.getValue(1));
- I += VectorizedSize;
+ if (NumElts == 1)
+ ProxyRegOps.push_back(R);
+ else
+ for (const unsigned J : llvm::seq(NumElts)) {
+ SDValue Elt = DAG.getNode(
+ LoadVT.isVector() ? ISD::EXTRACT_SUBVECTOR
+ : ISD::EXTRACT_VECTOR_ELT,
+ dl, LoadVT, R, DAG.getVectorIdxConstant(J * PackingAmt, dl));
+ ProxyRegOps.push_back(Elt);
+ }
+ I += NumElts;
}
}
- Chain =
- DAG.getCALLSEQ_END(Chain, UniqueCallSite, UniqueCallSite + 1, InGlue, dl);
- InGlue = Chain.getValue(1);
+ const SDValue EndToken = DAG.getTokenFactor(dl, LoadChains);
+ const SDValue CallEnd = DAG.getCALLSEQ_END(EndToken, UniqueCallSite,
+ UniqueCallSite + 1, SDValue(), dl);
// Append ProxyReg instructions to the chain to make sure that `callseq_end`
// will not get lost. Otherwise, during libcalls expansion, the nodes can become
// dangling.
- for (const unsigned I : llvm::seq(ProxyRegOps.size())) {
- if (I < RetElts.size() && RetElts[I]) {
- InVals.push_back(RetElts[I]);
- continue;
- }
-
- SDValue Ret =
- DAG.getNode(NVPTXISD::ProxyReg, dl, ProxyRegOps[I].getSimpleValueType(),
- {Chain, ProxyRegOps[I]});
-
- const EVT ExpectedVT = Ins[I].VT;
- if (!Ret.getValueType().bitsEq(ExpectedVT)) {
- Ret = DAG.getNode(ISD::TRUNCATE, dl, ExpectedVT, Ret);
- }
+ for (const auto [I, Reg] : llvm::enumerate(ProxyRegOps)) {
+ SDValue Proxy =
+ DAG.getNode(NVPTXISD::ProxyReg, dl, Reg.getValueType(), {CallEnd, Reg});
+ SDValue Ret = correctParamType(Proxy, Ins[I].VT, Ins[I].Flags, DAG, dl);
InVals.push_back(Ret);
}
- for (SDValue &T : TempProxyRegOps) {
- SDValue Repl = DAG.getNode(NVPTXISD::ProxyReg, dl, T.getSimpleValueType(),
- {Chain, T.getOperand(0)});
- DAG.ReplaceAllUsesWith(T, Repl);
- DAG.RemoveDeadNode(T.getNode());
- }
-
- // set isTailCall to false for now, until we figure out how to express
+ // set IsTailCall to false for now, until we figure out how to express
// tail call optimization in PTX
- isTailCall = false;
- return Chain;
+ CLI.IsTailCall = false;
+ return CallEnd;
}
SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
@@ -5114,10 +4934,6 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
Operands.push_back(DCI.DAG.getIntPtrConstant(
cast<LoadSDNode>(LD)->getExtensionType(), DL));
break;
- case NVPTXISD::LoadParamV2:
- OldNumOutputs = 2;
- Opcode = NVPTXISD::LoadParamV4;
- break;
case NVPTXISD::LoadV2:
OldNumOutputs = 2;
Opcode = NVPTXISD::LoadV4;
@@ -5198,12 +5014,6 @@ static SDValue combinePackingMovIntoStore(SDNode *N,
MemVT = ST->getMemoryVT();
Opcode = NVPTXISD::StoreV2;
break;
- case NVPTXISD::StoreParam:
- Opcode = NVPTXISD::StoreParamV2;
- break;
- case NVPTXISD::StoreParamV2:
- Opcode = NVPTXISD::StoreParamV4;
- break;
case NVPTXISD::StoreV2:
MemVT = ST->getMemoryVT();
Opcode = NVPTXISD::StoreV4;
@@ -5215,7 +5025,6 @@ static SDValue combinePackingMovIntoStore(SDNode *N,
return SDValue();
Opcode = NVPTXISD::StoreV8;
break;
- case NVPTXISD::StoreParamV4:
case NVPTXISD::StoreV8:
// PTX doesn't support the next doubling of operands
return SDValue();
@@ -5260,30 +5069,11 @@ static SDValue combinePackingMovIntoStore(SDNode *N,
MemVT, ST->getMemOperand());
}
-static SDValue PerformStoreCombineHelper(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- unsigned Front, unsigned Back) {
- if (all_of(N->ops().drop_front(Front).drop_back(Back),
- [](const SDUse &U) { return U.get()->isUndef(); }))
- // Operand 0 is the previous value in the chain. Cannot return EntryToken
- // as the previous value will become unused and eliminated later.
- return N->getOperand(0);
-
- return combinePackingMovIntoStore(N, DCI, Front, Back);
-}
-
static SDValue PerformStoreCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
return combinePackingMovIntoStore(N, DCI, 1, 2);
}
-static SDValue PerformStoreParamCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- // Operands from the 3rd to the 2nd last one are the values to be stored.
- // {Chain, ArgID, Offset, Val, Glue}
- return PerformStoreCombineHelper(N, DCI, 3, 1);
-}
-
/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
///
static SDValue PerformADDCombine(SDNode *N,
@@ -5939,6 +5729,86 @@ static SDValue combinePRMT(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
N->getConstantOperandAPInt(2),
N->getConstantOperandVal(3)),
SDLoc(N), N->getValueType(0));
+ return SDValue();
+}
+
+// During call lowering we wrap the return values in a ProxyReg node which
+// depend on the chain value produced by the completed call. This ensures that
+// the full call is emitted in cases where libcalls are used to legalize
+// operations. To improve the functioning of other DAG combines we pull all
+// operations we can through one of these nodes, ensuring that the ProxyReg
+// directly wraps a load. That is:
+//
+// (ProxyReg (zext (load retval0))) => (zext (ProxyReg (load retval0)))
+//
+static SDValue sinkProxyReg(SDValue R, SDValue Chain,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ switch (R.getOpcode()) {
+ case ISD::TRUNCATE:
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::BITCAST: {
+ if (SDValue V = sinkProxyReg(R.getOperand(0), Chain, DCI))
+ return DCI.DAG.getNode(R.getOpcode(), SDLoc(R), R.getValueType(), V);
+ return SDValue();
+ }
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::OR: {
+ if (SDValue A = sinkProxyReg(R.getOperand(0), Chain, DCI))
+ if (SDValue B = sinkProxyReg(R.getOperand(1), Chain, DCI))
+ return DCI.DAG.getNode(R.getOpcode(), SDLoc(R), R.getValueType(), A, B);
+ return SDValue();
+ }
+ case ISD::Constant:
+ return R;
+ case ISD::LOAD:
+ case NVPTXISD::LoadV2:
+ case NVPTXISD::LoadV4: {
+ return DCI.DAG.getNode(NVPTXISD::ProxyReg, SDLoc(R), R.getValueType(),
+ {Chain, R});
+ }
+ case ISD::BUILD_VECTOR: {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ SmallVector<SDValue, 16> Ops;
+ for (auto &Op : R->ops()) {
+ SDValue V = sinkProxyReg(Op, Chain, DCI);
+ if (!V)
+ return SDValue();
+ Ops.push_back(V);
+ }
+ return DCI.DAG.getNode(ISD::BUILD_VECTOR, SDLoc(R), R.getValueType(), Ops);
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (SDValue V = sinkProxyReg(R.getOperand(0), Chain, DCI))
+ return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(R),
+ R.getValueType(), V, R.getOperand(1));
+ return SDValue();
+ }
+ default:
+ return SDValue();
+ }
+}
+
+static SDValue combineProxyReg(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Reg = N->getOperand(1);
+
+ // If the ProxyReg is not wrapping a load, try to pull the operations through
+ // the ProxyReg.
+ if (Reg.getOpcode() != ISD::LOAD) {
+ if (SDValue V = sinkProxyReg(Reg, Chain, DCI))
+ return V;
+ }
return SDValue();
}
@@ -5962,7 +5832,6 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FADD:
return PerformFADDCombine(N, DCI, OptLevel);
case ISD::LOAD:
- case NVPTXISD::LoadParamV2:
case NVPTXISD::LoadV2:
case NVPTXISD::LoadV4:
return combineUnpackingMovIntoLoad(N, DCI);
@@ -5970,6 +5839,8 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
return PerformMULCombine(N, DCI, OptLevel);
case NVPTXISD::PRMT:
return combinePRMT(N, DCI, OptLevel);
+ case NVPTXISD::ProxyReg:
+ return combineProxyReg(N, DCI);
case ISD::SETCC:
return PerformSETCCCombine(N, DCI, STI.getSmVersion());
case ISD::SHL:
@@ -5977,10 +5848,6 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SREM:
case ISD::UREM:
return PerformREMCombine(N, DCI, OptLevel);
- case NVPTXISD::StoreParam:
- case NVPTXISD::StoreParamV2:
- case NVPTXISD::StoreParamV4:
- return PerformStoreParamCombine(N, DCI);
case ISD::STORE:
case NVPTXISD::StoreV2:
case NVPTXISD::StoreV4:
@@ -6329,6 +6196,22 @@ static void ReplaceCopyFromReg_128(SDNode *N, SelectionDAG &DAG,
Results.push_back(NewValue.getValue(3));
}
+static void replaceProxyReg(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ SmallVectorImpl<SDValue> &Results) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Reg = N->getOperand(1);
+
+ MVT VT = TLI.getRegisterType(*DAG.getContext(), Reg.getValueType());
+
+ SDValue NewReg = DAG.getAnyExtOrTrunc(Reg, SDLoc(N), VT);
+ SDValue NewProxy =
+ DAG.getNode(NVPTXISD::ProxyReg, SDLoc(N), VT, {Chain, NewReg});
+ SDValue Res = DAG.getAnyExtOrTrunc(NewProxy, SDLoc(N), N->getValueType(0));
+
+ Results.push_back(Res);
+}
+
void NVPTXTargetLowering::ReplaceNodeResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
switch (N->getOpcode()) {
@@ -6346,6 +6229,9 @@ void NVPTXTargetLowering::ReplaceNodeResults(
case ISD::CopyFromReg:
ReplaceCopyFromReg_128(N, DAG, Results);
return;
+ case NVPTXISD::ProxyReg:
+ replaceProxyReg(N, DAG, *this, Results);
+ return;
}
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 228e2aa..cf72a1e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -38,7 +38,7 @@ enum NodeType : unsigned {
/// This node represents a PTX call instruction. It's operands are as follows:
///
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
- /// NumParams, Callee, Proto, InGlue)
+ /// NumParams, Callee, Proto)
CALL,
MoveParam,
@@ -84,13 +84,7 @@ enum NodeType : unsigned {
StoreV2,
StoreV4,
StoreV8,
- LoadParam,
- LoadParamV2,
- LoadParamV4,
- StoreParam,
- StoreParamV2,
- StoreParamV4,
- LAST_MEMORY_OPCODE = StoreParamV4,
+ LAST_MEMORY_OPCODE = StoreV8,
};
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index b5df4c6..86d6f7c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1234,7 +1234,7 @@ defm FMA_F32 : FMA<F32RT, allow_ftz = true>;
defm FMA_F32x2 : FMA<F32X2RT, allow_ftz = true, preds = [hasF32x2Instructions]>;
defm FMA_F64 : FMA<F64RT, allow_ftz = false>;
-// sin/cos
+// sin/cos/tanh
class UnaryOpAllowsApproxFn<SDPatternOperator operator>
: PatFrag<(ops node:$A),
@@ -1250,6 +1250,10 @@ def COS_APPROX_f32 :
BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$src), (ins FTZFlag:$ftz),
"cos.approx$ftz.f32",
[(set f32:$dst, (UnaryOpAllowsApproxFn<fcos> f32:$src))]>;
+def TANH_APPROX_f32 :
+ BasicNVPTXInst<(outs B32:$dst), (ins B32:$src), "tanh.approx.f32",
+ [(set f32:$dst, (UnaryOpAllowsApproxFn<ftanh> f32:$src))]>,
+ Requires<[hasPTX<70>, hasSM<75>]>;
//-----------------------------------
// Bitwise operations
@@ -1753,12 +1757,6 @@ def SDTDeclareArrayParam :
SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
def SDTDeclareScalarParam :
SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
-def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>;
-def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>;
-def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
-def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>;
-def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>;
def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>;
def SDTProxyReg : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>]>;
@@ -1770,104 +1768,20 @@ def declare_array_param :
def declare_scalar_param :
SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParam,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-
-def LoadParam :
- SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
- [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
-def LoadParamV2 :
- SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile,
- [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
-def LoadParamV4 :
- SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile,
- [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
-def StoreParam :
- SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def StoreParamV2 :
- SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def StoreParamV4 :
- SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def MoveParam :
SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
def proxy_reg :
SDNode<"NVPTXISD::ProxyReg", SDTProxyReg, [SDNPHasChain]>;
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
- /// NumParams, Callee, Proto, InGlue)
+ /// NumParams, Callee, Proto)
def SDTCallProfile : SDTypeProfile<0, 6,
[SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>,
SDTCisVT<3, i32>, SDTCisVT<5, i32>]>;
-def call :
- SDNode<"NVPTXISD::CALL", SDTCallProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-
-let mayLoad = true in {
- class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs regclass:$dst), (ins Offseti32imm:$b),
- !strconcat("ld.param", opstr, " \t$dst, [retval0$b];"),
- []>;
-
- class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins Offseti32imm:$b),
- !strconcat("ld.param.v2", opstr,
- " \t{{$dst, $dst2}}, [retval0$b];"), []>;
-
- class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
- regclass:$dst4),
- (ins Offseti32imm:$b),
- !strconcat("ld.param.v4", opstr,
- " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0$b];"),
- []>;
-}
-
-let mayStore = true in {
-
- multiclass StoreParamInst<NVPTXRegClass regclass, Operand IMMType, string opstr, bit support_imm = true> {
- foreach op = [IMMType, regclass] in
- if !or(support_imm, !isa<NVPTXRegClass>(op)) then
- def _ # !if(!isa<NVPTXRegClass>(op), "r", "i")
- : NVPTXInst<(outs),
- (ins op:$val, i32imm:$a, Offseti32imm:$b),
- "st.param" # opstr # " \t[param$a$b], $val;",
- []>;
- }
-
- multiclass StoreParamV2Inst<NVPTXRegClass regclass, Operand IMMType, string opstr> {
- foreach op1 = [IMMType, regclass] in
- foreach op2 = [IMMType, regclass] in
- def _ # !if(!isa<NVPTXRegClass>(op1), "r", "i")
- # !if(!isa<NVPTXRegClass>(op2), "r", "i")
- : NVPTXInst<(outs),
- (ins op1:$val1, op2:$val2,
- i32imm:$a, Offseti32imm:$b),
- "st.param.v2" # opstr # " \t[param$a$b], {{$val1, $val2}};",
- []>;
- }
-
- multiclass StoreParamV4Inst<NVPTXRegClass regclass, Operand IMMType, string opstr> {
- foreach op1 = [IMMType, regclass] in
- foreach op2 = [IMMType, regclass] in
- foreach op3 = [IMMType, regclass] in
- foreach op4 = [IMMType, regclass] in
- def _ # !if(!isa<NVPTXRegClass>(op1), "r", "i")
- # !if(!isa<NVPTXRegClass>(op2), "r", "i")
- # !if(!isa<NVPTXRegClass>(op3), "r", "i")
- # !if(!isa<NVPTXRegClass>(op4), "r", "i")
-
- : NVPTXInst<(outs),
- (ins op1:$val1, op2:$val2, op3:$val3, op4:$val4,
- i32imm:$a, Offseti32imm:$b),
- "st.param.v4" # opstr #
- " \t[param$a$b], {{$val1, $val2, $val3, $val4}};",
- []>;
- }
-}
+def call : SDNode<"NVPTXISD::CALL", SDTCallProfile, [SDNPHasChain, SDNPSideEffect]>;
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
-/// NumParams, Callee, Proto, InGlue)
+/// NumParams, Callee, Proto)
def CallOperand : Operand<i32> { let PrintMethod = "printCallOperand"; }
@@ -1904,43 +1818,6 @@ foreach is_convergent = [0, 1] in {
(call_uni_inst $addr, imm:$rets, imm:$params)>;
}
-def LoadParamMemI64 : LoadParamMemInst<B64, ".b64">;
-def LoadParamMemI32 : LoadParamMemInst<B32, ".b32">;
-def LoadParamMemI16 : LoadParamMemInst<B16, ".b16">;
-def LoadParamMemI8 : LoadParamMemInst<B16, ".b8">;
-def LoadParamMemV2I64 : LoadParamV2MemInst<B64, ".b64">;
-def LoadParamMemV2I32 : LoadParamV2MemInst<B32, ".b32">;
-def LoadParamMemV2I16 : LoadParamV2MemInst<B16, ".b16">;
-def LoadParamMemV2I8 : LoadParamV2MemInst<B16, ".b8">;
-def LoadParamMemV4I32 : LoadParamV4MemInst<B32, ".b32">;
-def LoadParamMemV4I16 : LoadParamV4MemInst<B16, ".b16">;
-def LoadParamMemV4I8 : LoadParamV4MemInst<B16, ".b8">;
-
-defm StoreParamI64 : StoreParamInst<B64, i64imm, ".b64">;
-defm StoreParamI32 : StoreParamInst<B32, i32imm, ".b32">;
-defm StoreParamI16 : StoreParamInst<B16, i16imm, ".b16">;
-defm StoreParamI8 : StoreParamInst<B16, i8imm, ".b8">;
-
-defm StoreParamI8TruncI32 : StoreParamInst<B32, i8imm, ".b8", /* support_imm */ false>;
-defm StoreParamI8TruncI64 : StoreParamInst<B64, i8imm, ".b8", /* support_imm */ false>;
-
-defm StoreParamV2I64 : StoreParamV2Inst<B64, i64imm, ".b64">;
-defm StoreParamV2I32 : StoreParamV2Inst<B32, i32imm, ".b32">;
-defm StoreParamV2I16 : StoreParamV2Inst<B16, i16imm, ".b16">;
-defm StoreParamV2I8 : StoreParamV2Inst<B16, i8imm, ".b8">;
-
-defm StoreParamV4I32 : StoreParamV4Inst<B32, i32imm, ".b32">;
-defm StoreParamV4I16 : StoreParamV4Inst<B16, i16imm, ".b16">;
-defm StoreParamV4I8 : StoreParamV4Inst<B16, i8imm, ".b8">;
-
-defm StoreParamF32 : StoreParamInst<B32, f32imm, ".b32">;
-defm StoreParamF64 : StoreParamInst<B64, f64imm, ".b64">;
-
-defm StoreParamV2F32 : StoreParamV2Inst<B32, f32imm, ".b32">;
-defm StoreParamV2F64 : StoreParamV2Inst<B64, f64imm, ".b64">;
-
-defm StoreParamV4F32 : StoreParamV4Inst<B32, f32imm, ".b32">;
-
def DECLARE_PARAM_array :
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$align, i32imm:$size),
".param .align $align .b8 \t$a[$size];", []>;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 8baf866..1af2f9c 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -220,8 +220,6 @@ bool PPCELFMCAsmInfo::evaluateAsRelocatableImpl(const MCSpecifierExpr &Expr,
return evaluateAsRelocatable(Expr, Res, Asm);
}
-void PPCXCOFFMCAsmInfo::anchor() {}
-
PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) {
if (T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle)
report_fatal_error("XCOFF is not supported for little-endian targets");
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 0f945b3..6af1bd7 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -33,8 +33,6 @@ public:
};
class PPCXCOFFMCAsmInfo : public MCAsmInfoXCOFF {
- void anchor() override;
-
public:
explicit PPCXCOFFMCAsmInfo(bool is64Bit, const Triple &);
void printSpecifierExpr(raw_ostream &OS,
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 54497d9..3dad0e8 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -213,7 +213,7 @@ public:
void emitTCEntry(const MCSymbol &S, PPCMCExpr::Specifier Kind) override {
if (const MCSymbolXCOFF *XSym = dyn_cast<MCSymbolXCOFF>(&S)) {
MCSymbolXCOFF *TCSym =
- cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly())
+ static_cast<const MCSectionXCOFF *>(Streamer.getCurrentSectionOnly())
->getQualNameSymbol();
// On AIX, we have TLS variable offsets (symbol@({gd|ie|le|ld}) depending
// on the TLS access method (or model). For the general-dynamic access
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index a091b21..ce1d51a 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2274,9 +2274,9 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) {
// Setup CurrentFnDescSym and its containing csect.
- MCSectionXCOFF *FnDescSec =
- cast<MCSectionXCOFF>(getObjFileLowering().getSectionForFunctionDescriptor(
- &MF.getFunction(), TM));
+ auto *FnDescSec = static_cast<MCSectionXCOFF *>(
+ getObjFileLowering().getSectionForFunctionDescriptor(&MF.getFunction(),
+ TM));
FnDescSec->setAlignment(Align(Subtarget->isPPC64() ? 8 : 4));
CurrentFnDescSym = FnDescSec->getQualNameSymbol();
@@ -2669,9 +2669,9 @@ void PPCAIXAsmPrinter::emitTracebackTable() {
MCSymbol *EHInfoSym =
TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(MF);
MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(EHInfoSym, TOCType_EHBlock);
- const MCSymbol *TOCBaseSym =
- cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection())
- ->getQualNameSymbol();
+ const MCSymbol *TOCBaseSym = static_cast<const MCSectionXCOFF *>(
+ getObjFileLowering().getTOCBaseSection())
+ ->getQualNameSymbol();
const MCExpr *Exp =
MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCEntry, Ctx),
MCSymbolRefExpr::create(TOCBaseSym, Ctx), Ctx);
@@ -2788,7 +2788,7 @@ void PPCAIXAsmPrinter::emitGlobalVariableHelper(const GlobalVariable *GV) {
}
}
- MCSectionXCOFF *Csect = cast<MCSectionXCOFF>(
+ auto *Csect = static_cast<MCSectionXCOFF *>(
getObjFileLowering().SectionForGlobal(GV, GVKind, TM));
// Switch to the containing csect.
@@ -2869,9 +2869,9 @@ void PPCAIXAsmPrinter::emitFunctionDescriptor() {
OutStreamer->emitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext),
PointerSize);
// Emit TOC base address.
- const MCSymbol *TOCBaseSym =
- cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection())
- ->getQualNameSymbol();
+ const MCSymbol *TOCBaseSym = static_cast<const MCSectionXCOFF *>(
+ getObjFileLowering().getTOCBaseSection())
+ ->getQualNameSymbol();
OutStreamer->emitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext),
PointerSize);
// Emit a null environment pointer.
@@ -2996,10 +2996,10 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
Name += Prefix;
Name += cast<MCSymbolXCOFF>(I.first.first)->getSymbolTableName();
MCSymbol *S = OutContext.getOrCreateSymbol(Name);
- TCEntry = cast<MCSectionXCOFF>(
+ TCEntry = static_cast<MCSectionXCOFF *>(
getObjFileLowering().getSectionForTOCEntry(S, TM));
} else {
- TCEntry = cast<MCSectionXCOFF>(
+ TCEntry = static_cast<MCSectionXCOFF *>(
getObjFileLowering().getSectionForTOCEntry(I.first.first, TM));
}
OutStreamer->switchSection(TCEntry);
@@ -3054,7 +3054,7 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
return;
SectionKind GOKind = getObjFileLowering().getKindForGlobal(GO, TM);
- MCSectionXCOFF *Csect = cast<MCSectionXCOFF>(
+ auto *Csect = static_cast<MCSectionXCOFF *>(
getObjFileLowering().SectionForGlobal(GO, GOKind, TM));
Align GOAlign = getGVAlignment(GO, GO->getDataLayout());
@@ -3316,9 +3316,9 @@ void PPCAIXAsmPrinter::emitTTypeReference(const GlobalValue *GV,
GlobalType = TOCType_GlobalExternal;
MCSymbol *TypeInfoSym = TM.getSymbol(GV);
MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(TypeInfoSym, GlobalType);
- const MCSymbol *TOCBaseSym =
- cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection())
- ->getQualNameSymbol();
+ const MCSymbol *TOCBaseSym = static_cast<const MCSectionXCOFF *>(
+ getObjFileLowering().getTOCBaseSection())
+ ->getQualNameSymbol();
auto &Ctx = OutStreamer->getContext();
const MCExpr *Exp =
MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCEntry, Ctx),
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index a143d85..d71c42c 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -3849,9 +3849,14 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
switch (Inst.getOpcode()) {
default:
break;
- case RISCV::PseudoC_ADDI_NOP:
- emitToStreamer(Out, MCInstBuilder(RISCV::C_NOP));
+ case RISCV::PseudoC_ADDI_NOP: {
+ if (Inst.getOperand(2).getImm() == 0)
+ emitToStreamer(Out, MCInstBuilder(RISCV::C_NOP));
+ else
+ emitToStreamer(
+ Out, MCInstBuilder(RISCV::C_NOP_HINT).addOperand(Inst.getOperand(2)));
return false;
+ }
case RISCV::PseudoLLAImm:
case RISCV::PseudoLAImm:
case RISCV::PseudoLI: {
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index fa7bcfa..67cc01e 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -193,21 +193,19 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, uint32_t RegNo,
static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
- if (RegNo == 0) {
+ if (RegNo == 0)
return MCDisassembler::Fail;
- }
return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static DecodeStatus
-DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo, uint32_t Address,
- const MCDisassembler *Decoder) {
- if (RegNo == 2) {
+static DecodeStatus DecodeGPRNoX2RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint32_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo == 2)
return MCDisassembler::Fail;
- }
- return DecodeGPRNoX0RegisterClass(Inst, RegNo, Address, Decoder);
+ return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
}
static DecodeStatus DecodeGPRNoX31RegisterClass(MCInst &Inst, uint32_t RegNo,
@@ -536,41 +534,26 @@ static DecodeStatus decodeRTZArg(MCInst &Inst, uint32_t Imm, int64_t Address,
return MCDisassembler::Success;
}
-static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder);
-
-static DecodeStatus decodeRVCInstrRdSImm6(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder);
-
-static DecodeStatus decodeRVCInstrRdCLUIImm(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder);
-
-static DecodeStatus
-decodeRVCInstrRdRs1UImmLog2XLenNonZero(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder);
-
-static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder);
-
-static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder);
-
-static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder);
-
static DecodeStatus decodeZcmpRlist(MCInst &Inst, uint32_t Imm,
uint64_t Address,
- const MCDisassembler *Decoder);
+ const MCDisassembler *Decoder) {
+ bool IsRVE = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureStdExtE);
+ if (Imm < RISCVZC::RA || (IsRVE && Imm >= RISCVZC::RA_S0_S2))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(Imm));
+ return MCDisassembler::Success;
+}
static DecodeStatus decodeXqccmpRlistS0(MCInst &Inst, uint32_t Imm,
uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (Imm < RISCVZC::RA_S0)
+ return MCDisassembler::Fail;
+ return decodeZcmpRlist(Inst, Imm, Address, Decoder);
+}
+
+static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
+ uint64_t Address,
const MCDisassembler *Decoder);
static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn,
@@ -579,18 +562,6 @@ static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn,
#include "RISCVGenDisassemblerTables.inc"
-static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- DecodeStatus S = MCDisassembler::Success;
- uint32_t Rd = fieldFromInstruction(Insn, 7, 5);
- if (!Check(S, DecodeGPRNoX0RegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
- Inst.addOperand(Inst.getOperand(0));
- Inst.addOperand(MCOperand::createImm(0));
- return S;
-}
-
static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn,
uint64_t Address,
const MCDisassembler *Decoder) {
@@ -601,66 +572,6 @@ static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn,
return MCDisassembler::Success;
}
-static DecodeStatus decodeRVCInstrRdSImm6(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- Inst.addOperand(MCOperand::createReg(RISCV::X0));
- uint32_t Imm =
- fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5);
- [[maybe_unused]] DecodeStatus Result =
- decodeSImmOperand<6>(Inst, Imm, Address, Decoder);
- assert(Result == MCDisassembler::Success && "Invalid immediate");
- return MCDisassembler::Success;
-}
-
-static DecodeStatus decodeRVCInstrRdCLUIImm(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- Inst.addOperand(MCOperand::createReg(RISCV::X0));
- uint32_t Imm =
- fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5);
- return decodeCLUIImmOperand(Inst, Imm, Address, Decoder);
-}
-
-static DecodeStatus
-decodeRVCInstrRdRs1UImmLog2XLenNonZero(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- Inst.addOperand(MCOperand::createReg(RISCV::X0));
- Inst.addOperand(Inst.getOperand(0));
-
- uint32_t UImm6 =
- fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5);
- return decodeUImmLog2XLenNonZeroOperand(Inst, UImm6, Address, Decoder);
-}
-
-static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- DecodeStatus S = MCDisassembler::Success;
- uint32_t Rd = fieldFromInstruction(Insn, 7, 5);
- uint32_t Rs2 = fieldFromInstruction(Insn, 2, 5);
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder)))
- return MCDisassembler::Fail;
- return S;
-}
-
-static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- DecodeStatus S = MCDisassembler::Success;
- uint32_t Rd = fieldFromInstruction(Insn, 7, 5);
- uint32_t Rs2 = fieldFromInstruction(Insn, 2, 5);
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
- Inst.addOperand(Inst.getOperand(0));
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder)))
- return MCDisassembler::Fail;
- return S;
-}
-
static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
uint64_t Address,
const MCDisassembler *Decoder) {
@@ -691,24 +602,6 @@ static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
return S;
}
-static DecodeStatus decodeZcmpRlist(MCInst &Inst, uint32_t Imm,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- bool IsRVE = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureStdExtE);
- if (Imm < RISCVZC::RA || (IsRVE && Imm >= RISCVZC::RA_S0_S2))
- return MCDisassembler::Fail;
- Inst.addOperand(MCOperand::createImm(Imm));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus decodeXqccmpRlistS0(MCInst &Inst, uint32_t Imm,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- if (Imm < RISCVZC::RA_S0)
- return MCDisassembler::Fail;
- return decodeZcmpRlist(Inst, Imm, Address, Decoder);
-}
-
// Add implied SP operand for C.*SP compressed instructions. The SP operand
// isn't explicitly encoded in the instruction.
void RISCVDisassembler::addSPOperands(MCInst &MI) const {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 2c37c3b..82e3b5c 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -320,6 +320,7 @@ bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
MCFixup Fixup =
MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_RISCV_ALIGN);
F.setVarFixups({Fixup});
+ F.setLinkerRelaxable();
F.getParent()->setLinkerRelaxable();
return true;
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 7ad5d5f..bddea43 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -330,7 +330,6 @@ enum OperandType : unsigned {
OPERAND_UIMM32,
OPERAND_UIMM48,
OPERAND_UIMM64,
- OPERAND_ZERO,
OPERAND_THREE,
OPERAND_FOUR,
OPERAND_SIMM5,
diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index d4f5d8f..2f32e2a 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -293,7 +293,7 @@ void RISCVAsmPrinter::emitNTLHint(const MachineInstr *MI) {
MCInst Hint;
if (STI->hasStdExtZca())
- Hint.setOpcode(RISCV::C_ADD_HINT);
+ Hint.setOpcode(RISCV::C_ADD);
else
Hint.setOpcode(RISCV::ADD);
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index f9c0b54..171940e 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1272,7 +1272,7 @@ def FeatureVendorXSfmm128t
def FeatureVendorXSfvqmaccdod
: RISCVExtension<1, 0,
"SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2)",
- [FeatureStdExtZve32x]>;
+ [FeatureStdExtZve32x, FeatureStdExtZvl128b]>;
def HasVendorXSfvqmaccdod
: Predicate<"Subtarget->hasVendorXSfvqmaccdod()">,
AssemblerPredicate<(all_of FeatureVendorXSfvqmaccdod),
@@ -1281,7 +1281,7 @@ def HasVendorXSfvqmaccdod
def FeatureVendorXSfvqmaccqoq
: RISCVExtension<1, 0,
"SiFive Int8 Matrix Multiplication Instructions (4-by-8 and 8-by-4)",
- [FeatureStdExtZve32x]>;
+ [FeatureStdExtZve32x, FeatureStdExtZvl256b]>;
def HasVendorXSfvqmaccqoq
: Predicate<"Subtarget->hasVendorXSfvqmaccqoq()">,
AssemblerPredicate<(all_of FeatureVendorXSfvqmaccqoq),
@@ -1290,7 +1290,7 @@ def HasVendorXSfvqmaccqoq
def FeatureVendorXSfvfwmaccqqq
: RISCVExtension<1, 0,
"SiFive Matrix Multiply Accumulate Instruction (4-by-4)",
- [FeatureStdExtZvfbfmin]>;
+ [FeatureStdExtZvfbfmin, FeatureStdExtZvl128b]>;
def HasVendorXSfvfwmaccqqq
: Predicate<"Subtarget->hasVendorXSfvfwmaccqqq()">,
AssemblerPredicate<(all_of FeatureVendorXSfvfwmaccqqq),
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index b1ab76a..9fc0d81 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1581,7 +1581,8 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Set the register and all its subregisters.
if (!MRI.def_empty(CSReg) || MRI.getUsedPhysRegsMask().test(CSReg)) {
SavedRegs.set(CSReg);
- llvm::for_each(SubRegs, [&](unsigned Reg) { return SavedRegs.set(Reg); });
+ for (unsigned Reg : SubRegs)
+ SavedRegs.set(Reg);
}
// Combine to super register if all of its subregisters are marked.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3918dd2..607edd3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1618,6 +1618,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
}
+ // Customize load and store operation for bf16 if zfh isn't enabled.
+ if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) {
+ setOperationAction(ISD::LOAD, MVT::bf16, Custom);
+ setOperationAction(ISD::STORE, MVT::bf16, Custom);
+ }
+
// Function alignments.
const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4);
setMinFunctionAlignment(FunctionAlignment);
@@ -2733,6 +2739,27 @@ bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
}
}
+bool RISCVTargetLowering::isLegalLoadStoreElementTypeForRVV(
+ EVT ScalarTy) const {
+ if (!ScalarTy.isSimple())
+ return false;
+ switch (ScalarTy.getSimpleVT().SimpleTy) {
+ case MVT::iPTR:
+ return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::f16:
+ case MVT::bf16:
+ case MVT::f32:
+ return true;
+ case MVT::i64:
+ case MVT::f64:
+ return Subtarget.hasVInstructionsI64();
+ default:
+ return false;
+ }
+}
unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
return NumRepeatedDivisors;
@@ -7216,6 +7243,47 @@ static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) {
return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
}
+SDValue
+RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
+ "Unexpected bfloat16 load lowering");
+
+ SDLoc DL(Op);
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ EVT MemVT = LD->getMemoryVT();
+ SDValue Load = DAG.getExtLoad(
+ ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(),
+ LD->getBasePtr(),
+ EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
+ LD->getMemOperand());
+ // Using mask to make bf16 nan-boxing valid when we don't have flh
+ // instruction. -65536 would be treat as a small number and thus it can be
+ // directly used lui to get the constant.
+ SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT());
+ SDValue OrSixteenOne =
+ DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask});
+ SDValue ConvertedResult =
+ DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne);
+ return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL);
+}
+
+SDValue
+RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
+ "Unexpected bfloat16 store lowering");
+
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ SDLoc DL(Op);
+ SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL,
+ Subtarget.getXLenVT(), ST->getValue());
+ return DAG.getTruncStore(
+ ST->getChain(), DL, FMV, ST->getBasePtr(),
+ EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()),
+ ST->getMemOperand());
+}
+
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -7914,6 +7982,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return DAG.getMergeValues({Pair, Chain}, DL);
}
+ if (VT == MVT::bf16)
+ return lowerXAndesBfHCvtBFloat16Load(Op, DAG);
+
// Handle normal vector tuple load.
if (VT.isRISCVVectorTuple()) {
SDLoc DL(Op);
@@ -7998,6 +8069,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
{Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
Store->getMemOperand());
}
+
+ if (VT == MVT::bf16)
+ return lowerXAndesBfHCvtBFloat16Store(Op, DAG);
+
// Handle normal vector tuple store.
if (VT.isRISCVVectorTuple()) {
SDLoc DL(Op);
@@ -16079,7 +16154,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
uint64_t MulAmt = CNode->getZExtValue();
// Don't do this if the Xqciac extension is enabled and the MulAmt in simm12.
- if (Subtarget.hasVendorXqciac() && isInt<12>(MulAmt))
+ if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
return SDValue();
const bool HasShlAdd = Subtarget.hasStdExtZba() ||
@@ -16184,10 +16259,12 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
// 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
for (uint64_t Offset : {3, 5, 9}) {
if (isPowerOf2_64(MulAmt + Offset)) {
+ unsigned ShAmt = Log2_64(MulAmt + Offset);
+ if (ShAmt >= VT.getSizeInBits())
+ continue;
SDLoc DL(N);
SDValue Shift1 =
- DAG.getNode(ISD::SHL, DL, VT, X,
- DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
+ DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT));
SDValue Mul359 =
DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
@@ -24183,7 +24260,7 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
return false;
EVT ScalarType = DataType.getScalarType();
- if (!isLegalElementTypeForRVV(ScalarType))
+ if (!isLegalLoadStoreElementTypeForRVV(ScalarType))
return false;
if (!Subtarget.enableUnalignedVectorMem() &&
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index f0447e0..a788c0b7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -384,6 +384,7 @@ public:
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
bool isLegalElementTypeForRVV(EVT ScalarTy) const;
+ bool isLegalLoadStoreElementTypeForRVV(EVT ScalarTy) const;
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
@@ -578,6 +579,9 @@ private:
SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerXAndesBfHCvtBFloat16Load(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerXAndesBfHCvtBFloat16Store(SDValue Op, SelectionDAG &DAG) const;
+
bool isEligibleForTailCallOptimization(
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
const SmallVector<CCValAssign, 16> &ArgLocs) const;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 64f9e3e..085064e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2859,9 +2859,6 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
case RISCVOp::OPERAND_UIMM16_NONZERO:
Ok = isUInt<16>(Imm) && (Imm != 0);
break;
- case RISCVOp::OPERAND_ZERO:
- Ok = Imm == 0;
- break;
case RISCVOp::OPERAND_THREE:
Ok = Imm == 3;
break;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index 8252a9b..c5551fb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -57,12 +57,6 @@ def simm6nonzero : RISCVOp,
}];
}
-def immzero : RISCVOp,
- ImmLeaf<XLenVT, [{return (Imm == 0);}]> {
- let ParserMatchClass = ImmZeroAsmOperand;
- let OperandType = "OPERAND_ZERO";
-}
-
def CLUIImmAsmOperand : AsmOperandClass {
let Name = "CLUIImm";
let RenderMethod = "addImmOperands";
@@ -272,7 +266,7 @@ class Bcz<bits<3> funct3, string OpcodeStr>
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class Shift_right<bits<2> funct2, string OpcodeStr>
: RVInst16CB<0b100, 0b01, (outs GPRC:$rd),
- (ins GPRC:$rs1, uimmlog2xlennonzero:$imm),
+ (ins GPRC:$rs1, uimmlog2xlen:$imm),
OpcodeStr, "$rs1, $imm"> {
let Constraints = "$rs1 = $rd";
let Inst{12} = imm{5};
@@ -402,17 +396,19 @@ def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">,
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
- (ins GPRNoX0:$rd, simm6nonzero:$imm),
+ (ins GPRNoX0:$rd, simm6:$imm),
"c.addi", "$rd, $imm">,
Sched<[WriteIALU, ReadIALU]> {
let Constraints = "$rd = $rd_wb";
}
-// Alternate syntax for c.nop. Converted to C_NOP by the assembler.
+// Alternate syntax for c.nop. Converted to C_NOP/C_NOP_HINT by the assembler.
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
-def PseudoC_ADDI_NOP : Pseudo<(outs GPRX0:$rd), (ins GPRX0:$rs1, immzero:$imm),
- [], "c.addi", "$rd, $imm">;
+def PseudoC_ADDI_NOP : Pseudo<(outs GPRX0:$rd), (ins GPRX0:$rs1, simm6:$imm),
+ [], "c.addi", "$rd, $imm"> {
+ let Constraints = "$rs1 = $rd";
+}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1,
DecoderNamespace = "RV32Only", Defs = [X1],
@@ -430,7 +426,7 @@ def C_ADDIW : RVInst16CI<0b001, 0b01, (outs GPRNoX0:$rd_wb),
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def C_LI : RVInst16CI<0b010, 0b01, (outs GPRNoX0:$rd), (ins simm6:$imm),
+def C_LI : RVInst16CI<0b010, 0b01, (outs GPR:$rd), (ins simm6:$imm),
"c.li", "$rd, $imm">,
Sched<[WriteIALU]>;
@@ -449,7 +445,7 @@ def C_ADDI16SP : RVInst16CI<0b011, 0b01, (outs SP:$rd_wb),
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def C_LUI : RVInst16CI<0b011, 0b01, (outs GPRNoX0X2:$rd),
+def C_LUI : RVInst16CI<0b011, 0b01, (outs GPRNoX2:$rd),
(ins c_lui_imm:$imm),
"c.lui", "$rd, $imm">,
Sched<[WriteIALU]>;
@@ -497,8 +493,8 @@ def C_BEQZ : Bcz<0b110, "c.beqz">, Sched<[WriteJmp, ReadJmp]>;
def C_BNEZ : Bcz<0b111, "c.bnez">, Sched<[WriteJmp, ReadJmp]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPRNoX0:$rd_wb),
- (ins GPRNoX0:$rd, uimmlog2xlennonzero:$imm),
+def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb),
+ (ins GPR:$rd, uimmlog2xlen:$imm),
"c.slli", "$rd, $imm">,
Sched<[WriteShiftImm, ReadShiftImm]> {
let Constraints = "$rd = $rd_wb";
@@ -544,7 +540,7 @@ def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1),
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isMoveReg = 1,
isAsCheapAsAMove = 1 in
-def C_MV : RVInst16CR<0b1000, 0b10, (outs GPRNoX0:$rs1), (ins GPRNoX0:$rs2),
+def C_MV : RVInst16CR<0b1000, 0b10, (outs GPR:$rs1), (ins GPRNoX0:$rs2),
"c.mv", "$rs1, $rs2">,
Sched<[WriteIALU, ReadIALU]>;
@@ -557,8 +553,8 @@ def C_JALR : RVInst16CR<0b1001, 0b10, (outs), (ins GPRNoX0:$rs1),
"c.jalr", "$rs1">, Sched<[WriteJalr, ReadJalr]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPRNoX0:$rd),
- (ins GPRNoX0:$rs1, GPRNoX0:$rs2),
+def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPR:$rd),
+ (ins GPR:$rs1, GPRNoX0:$rs2),
"c.add", "$rs1, $rs2">,
Sched<[WriteIALU, ReadIALU, ReadIALU]> {
let Constraints = "$rs1 = $rd";
@@ -616,81 +612,6 @@ def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm),
let rd = 0;
}
-def C_ADDI_HINT_IMM_ZERO : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
- (ins GPRNoX0:$rd, immzero:$imm),
- "c.addi", "$rd, $imm">,
- Sched<[WriteIALU, ReadIALU]> {
- let Constraints = "$rd = $rd_wb";
- let imm = 0;
- let DecoderMethod = "decodeRVCInstrRdRs1ImmZero";
-}
-
-def C_LI_HINT : RVInst16CI<0b010, 0b01, (outs GPRX0:$rd), (ins simm6:$imm),
- "c.li", "$rd, $imm">,
- Sched<[WriteIALU]> {
- let Inst{11-7} = 0;
- let DecoderMethod = "decodeRVCInstrRdSImm6";
-}
-
-def C_LUI_HINT : RVInst16CI<0b011, 0b01, (outs GPRX0:$rd),
- (ins c_lui_imm:$imm),
- "c.lui", "$rd, $imm">,
- Sched<[WriteIALU]> {
- let Inst{11-7} = 0;
- let DecoderMethod = "decodeRVCInstrRdCLUIImm";
-}
-
-def C_MV_HINT : RVInst16CR<0b1000, 0b10, (outs GPRX0:$rs1), (ins GPRNoX0:$rs2),
- "c.mv", "$rs1, $rs2">, Sched<[WriteIALU, ReadIALU]> {
- let Inst{11-7} = 0;
- let DecoderMethod = "decodeRVCInstrRdRs2";
-}
-
-def C_ADD_HINT : RVInst16CR<0b1001, 0b10, (outs GPRX0:$rd),
- (ins GPRX0:$rs1, GPRNoX0:$rs2),
- "c.add", "$rs1, $rs2">,
- Sched<[WriteIALU, ReadIALU, ReadIALU]> {
- let Constraints = "$rs1 = $rd";
- let Inst{11-7} = 0;
- let DecoderMethod = "decodeRVCInstrRdRs1Rs2";
-}
-
-def C_SLLI_HINT : RVInst16CI<0b000, 0b10, (outs GPRX0:$rd_wb),
- (ins GPRX0:$rd, uimmlog2xlennonzero:$imm),
- "c.slli", "$rd, $imm">,
- Sched<[WriteShiftImm, ReadShiftImm]> {
- let Constraints = "$rd = $rd_wb";
- let Inst{11-7} = 0;
- let DecoderMethod = "decodeRVCInstrRdRs1UImmLog2XLenNonZero";
-}
-
-def C_SLLI64_HINT : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), (ins GPR:$rd),
- "c.slli64", "$rd">,
- Sched<[WriteShiftImm, ReadShiftImm]> {
- let Constraints = "$rd = $rd_wb";
- let imm = 0;
-}
-
-def C_SRLI64_HINT : RVInst16CB<0b100, 0b01, (outs GPRC:$rd),
- (ins GPRC:$rs1),
- "c.srli64", "$rs1">,
- Sched<[WriteShiftImm, ReadShiftImm]> {
- let Constraints = "$rs1 = $rd";
- let Inst{6-2} = 0;
- let Inst{11-10} = 0b00;
- let Inst{12} = 0;
-}
-
-def C_SRAI64_HINT : RVInst16CB<0b100, 0b01, (outs GPRC:$rd),
- (ins GPRC:$rs1),
- "c.srai64", "$rs1">,
- Sched<[WriteShiftImm, ReadShiftImm]> {
- let Constraints = "$rs1 = $rd";
- let Inst{6-2} = 0;
- let Inst{11-10} = 0b01;
- let Inst{12} = 0;
-}
-
} // Predicates = [HasStdExtZca], hasSideEffects = 0, mayLoad = 0,
// mayStore = 0
@@ -699,15 +620,17 @@ def C_SRAI64_HINT : RVInst16CB<0b100, 0b01, (outs GPRC:$rd),
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtZca] in {
-// Just a different syntax for the c.nop hint: c.addi x0, simm6 vs c.nop simm6.
-def : InstAlias<"c.addi x0, $imm", (C_NOP_HINT simm6nonzero:$imm), 0>;
+// Legacy aliases.
+def : InstAlias<"c.slli64 $rd", (C_SLLI GPR:$rd, 0), 0>;
+def : InstAlias<"c.srli64 $rs1", (C_SRLI GPRC:$rs1, 0), 0>;
+def : InstAlias<"c.srai64 $rs1", (C_SRAI GPRC:$rs1, 0), 0>;
}
let Predicates = [HasStdExtC, HasStdExtZihintntl] in {
-def : InstAlias<"c.ntl.p1", (C_ADD_HINT X0, X2)>;
-def : InstAlias<"c.ntl.pall", (C_ADD_HINT X0, X3)>;
-def : InstAlias<"c.ntl.s1", (C_ADD_HINT X0, X4)>;
-def : InstAlias<"c.ntl.all", (C_ADD_HINT X0, X5)>;
+def : InstAlias<"c.ntl.p1", (C_ADD X0, X2)>;
+def : InstAlias<"c.ntl.pall", (C_ADD X0, X3)>;
+def : InstAlias<"c.ntl.s1", (C_ADD X0, X4)>;
+def : InstAlias<"c.ntl.all", (C_ADD X0, X5)>;
} // Predicates = [HasStdExtC, HasStdExtZihintntl]
let EmitPriority = 0 in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index dfa532a..6afc942d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -788,7 +788,7 @@ class VPseudoUSLoadNoMask<VReg RetClass,
DAGOperand sewop = sew> :
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$dest, GPRMemZeroOffset:$rs1, AVL:$vl,
- sewop:$sew, vec_policy:$policy), []>,
+ sewop:$sew, vec_policy:$policy)>,
RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
@@ -804,7 +804,7 @@ class VPseudoUSLoadMask<VReg RetClass,
RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$passthru,
GPRMemZeroOffset:$rs1, VMaskOp:$vm, AVL:$vl, sew:$sew,
- vec_policy:$policy), []>,
+ vec_policy:$policy)>,
RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
@@ -821,7 +821,7 @@ class VPseudoUSLoadFFNoMask<VReg RetClass,
int EEW> :
RISCVVPseudo<(outs RetClass:$rd, GPR:$vl),
(ins RetClass:$dest, GPRMemZeroOffset:$rs1, AVL:$avl,
- sew:$sew, vec_policy:$policy), []>,
+ sew:$sew, vec_policy:$policy)>,
RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
@@ -837,7 +837,7 @@ class VPseudoUSLoadFFMask<VReg RetClass,
RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd, GPR:$vl),
(ins GetVRegNoV0<RetClass>.R:$passthru,
GPRMemZeroOffset:$rs1, VMaskOp:$vm, AVL:$avl, sew:$sew,
- vec_policy:$policy), []>,
+ vec_policy:$policy)>,
RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
@@ -854,7 +854,7 @@ class VPseudoSLoadNoMask<VReg RetClass,
int EEW> :
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$dest, GPRMemZeroOffset:$rs1, GPR:$rs2,
- AVL:$vl, sew:$sew, vec_policy:$policy), []>,
+ AVL:$vl, sew:$sew, vec_policy:$policy)>,
RISCVVLE</*Masked*/0, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
@@ -870,7 +870,7 @@ class VPseudoSLoadMask<VReg RetClass,
RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$passthru,
GPRMemZeroOffset:$rs1, GPR:$rs2, VMaskOp:$vm, AVL:$vl,
- sew:$sew, vec_policy:$policy), []>,
+ sew:$sew, vec_policy:$policy)>,
RISCVVLE</*Masked*/1, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
@@ -892,7 +892,7 @@ class VPseudoILoadNoMask<VReg RetClass,
bits<2> TargetConstraintType = 1> :
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$dest, GPRMemZeroOffset:$rs1, IdxClass:$rs2,
- AVL:$vl, sew:$sew, vec_policy:$policy), []>,
+ AVL:$vl, sew:$sew, vec_policy:$policy)>,
RISCVVLX</*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 1;
let mayStore = 0;
@@ -914,7 +914,7 @@ class VPseudoILoadMask<VReg RetClass,
RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$passthru,
GPRMemZeroOffset:$rs1, IdxClass:$rs2, VMaskOp:$vm,
- AVL:$vl, sew:$sew, vec_policy:$policy), []>,
+ AVL:$vl, sew:$sew, vec_policy:$policy)>,
RISCVVLX</*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 1;
let mayStore = 0;
@@ -933,7 +933,7 @@ class VPseudoUSStoreNoMask<VReg StClass,
DAGOperand sewop = sew> :
RISCVVPseudo<(outs),
(ins StClass:$rd, GPRMemZeroOffset:$rs1, AVL:$vl,
- sewop:$sew), []>,
+ sewop:$sew)>,
RISCVVSE</*Masked*/0, /*Strided*/0, !logtwo(EEW), VLMul> {
let mayLoad = 0;
let mayStore = 1;
@@ -946,7 +946,7 @@ class VPseudoUSStoreMask<VReg StClass,
int EEW> :
RISCVVPseudo<(outs),
(ins StClass:$rd, GPRMemZeroOffset:$rs1,
- VMaskOp:$vm, AVL:$vl, sew:$sew), []>,
+ VMaskOp:$vm, AVL:$vl, sew:$sew)>,
RISCVVSE</*Masked*/1, /*Strided*/0, !logtwo(EEW), VLMul> {
let mayLoad = 0;
let mayStore = 1;
@@ -960,7 +960,7 @@ class VPseudoSStoreNoMask<VReg StClass,
int EEW> :
RISCVVPseudo<(outs),
(ins StClass:$rd, GPRMemZeroOffset:$rs1, GPR:$rs2,
- AVL:$vl, sew:$sew), []>,
+ AVL:$vl, sew:$sew)>,
RISCVVSE</*Masked*/0, /*Strided*/1, !logtwo(EEW), VLMul> {
let mayLoad = 0;
let mayStore = 1;
@@ -973,7 +973,7 @@ class VPseudoSStoreMask<VReg StClass,
int EEW> :
RISCVVPseudo<(outs),
(ins StClass:$rd, GPRMemZeroOffset:$rs1, GPR:$rs2,
- VMaskOp:$vm, AVL:$vl, sew:$sew), []>,
+ VMaskOp:$vm, AVL:$vl, sew:$sew)>,
RISCVVSE</*Masked*/1, /*Strided*/1, !logtwo(EEW), VLMul> {
let mayLoad = 0;
let mayStore = 1;
@@ -986,7 +986,7 @@ class VPseudoSStoreMask<VReg StClass,
class VPseudoNullaryNoMask<VReg RegClass> :
RISCVVPseudo<(outs RegClass:$rd),
(ins RegClass:$passthru,
- AVL:$vl, sew:$sew, vec_policy:$policy), []> {
+ AVL:$vl, sew:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1015,7 +1015,7 @@ class VPseudoNullaryMask<VReg RegClass> :
// Nullary for pseudo instructions. They are expanded in
// RISCVExpandPseudoInsts pass.
class VPseudoNullaryPseudoM<string BaseInst> :
- RISCVVPseudo<(outs VR:$rd), (ins AVL:$vl, sew_mask:$sew), []> {
+ RISCVVPseudo<(outs VR:$rd), (ins AVL:$vl, sew_mask:$sew)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1031,7 +1031,7 @@ class VPseudoUnaryNoMask<DAGOperand RetClass,
bits<2> TargetConstraintType = 1> :
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$passthru, OpClass:$rs2,
- AVL:$vl, sew:$sew, vec_policy:$policy), []> {
+ AVL:$vl, sew:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1047,7 +1047,7 @@ class VPseudoUnaryNoMaskNoPolicy<DAGOperand RetClass,
string Constraint = "",
bits<2> TargetConstraintType = 1> :
RISCVVPseudo<(outs RetClass:$rd),
- (ins OpClass:$rs2, AVL:$vl, sew_mask:$sew), []> {
+ (ins OpClass:$rs2, AVL:$vl, sew_mask:$sew)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1063,7 +1063,7 @@ class VPseudoUnaryNoMaskRoundingMode<DAGOperand RetClass,
bits<2> TargetConstraintType = 1> :
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$passthru, OpClass:$rs2, vec_rm:$rm,
- AVL:$vl, sew:$sew, vec_policy:$policy), []> {
+ AVL:$vl, sew:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1084,7 +1084,7 @@ class VPseudoUnaryMask<VReg RetClass,
DAGOperand sewop = sew> :
RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$passthru, OpClass:$rs2,
- VMaskOp:$vm, AVL:$vl, sewop:$sew, vec_policy:$policy), []> {
+ VMaskOp:$vm, AVL:$vl, sewop:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1104,7 +1104,7 @@ class VPseudoUnaryMaskRoundingMode<VReg RetClass,
RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$passthru, OpClass:$rs2,
VMaskOp:$vm, vec_rm:$rm,
- AVL:$vl, sew:$sew, vec_policy:$policy), []> {
+ AVL:$vl, sew:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1139,7 +1139,7 @@ class VPseudoUnaryMask_NoExcept<VReg RetClass,
class VPseudoUnaryNoMaskGPROut :
RISCVVPseudo<(outs GPR:$rd),
- (ins VR:$rs2, AVL:$vl, sew_mask:$sew), []> {
+ (ins VR:$rs2, AVL:$vl, sew_mask:$sew)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1149,7 +1149,7 @@ class VPseudoUnaryNoMaskGPROut :
class VPseudoUnaryMaskGPROut :
RISCVVPseudo<(outs GPR:$rd),
- (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, sew_mask:$sew), []> {
+ (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, sew_mask:$sew)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1163,7 +1163,7 @@ class VPseudoUnaryAnyMask<VReg RetClass,
VReg Op1Class> :
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$passthru, Op1Class:$rs2,
- VR:$vm, AVL:$vl, sew:$sew), []> {
+ VR:$vm, AVL:$vl, sew:$sew)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1197,7 +1197,7 @@ class VPseudoBinaryNoMaskPolicy<VReg RetClass,
bits<2> TargetConstraintType = 1> :
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1,
- AVL:$vl, sew:$sew, vec_policy:$policy), []> {
+ AVL:$vl, sew:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1241,7 +1241,7 @@ class VPseudoBinaryMaskPolicyRoundingMode<VReg RetClass,
(ins GetVRegNoV0<RetClass>.R:$passthru,
Op1Class:$rs2, Op2Class:$rs1,
VMaskOp:$vm, vec_rm:$rm, AVL:$vl,
- sew:$sew, vec_policy:$policy), []> {
+ sew:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1266,7 +1266,7 @@ class VPseudoTiedBinaryNoMask<VReg RetClass,
bits<2> TargetConstraintType = 1> :
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, sew:$sew,
- vec_policy:$policy), []> {
+ vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1288,7 +1288,7 @@ class VPseudoTiedBinaryNoMaskRoundingMode<VReg RetClass,
(ins RetClass:$rs2, Op2Class:$rs1,
vec_rm:$rm,
AVL:$vl, sew:$sew,
- vec_policy:$policy), []> {
+ vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1380,7 +1380,7 @@ class VPseudoTernaryMaskPolicyRoundingMode<VReg RetClass,
Op1Class:$rs2, Op2Class:$rs1,
VMaskOp:$vm,
vec_rm:$rm,
- AVL:$vl, sew:$sew, vec_policy:$policy), []> {
+ AVL:$vl, sew:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1451,7 +1451,7 @@ class VPseudoTiedBinaryMaskRoundingMode<VReg RetClass,
Op2Class:$rs1,
VMaskOp:$vm,
vec_rm:$rm,
- AVL:$vl, sew:$sew, vec_policy:$policy), []> {
+ AVL:$vl, sew:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1480,7 +1480,7 @@ class VPseudoBinaryCarry<VReg RetClass,
(ins Op1Class:$rs2, Op2Class:$rs1,
VMV0:$carry, AVL:$vl, sew:$sew),
(ins Op1Class:$rs2, Op2Class:$rs1,
- AVL:$vl, sew:$sew)), []> {
+ AVL:$vl, sew:$sew))> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1498,7 +1498,7 @@ class VPseudoTiedBinaryCarryIn<VReg RetClass,
bits<2> TargetConstraintType = 1> :
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1,
- VMV0:$carry, AVL:$vl, sew:$sew), []> {
+ VMV0:$carry, AVL:$vl, sew:$sew)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1516,7 +1516,7 @@ class VPseudoTernaryNoMask<VReg RetClass,
string Constraint> :
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
- AVL:$vl, sew:$sew), []> {
+ AVL:$vl, sew:$sew)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1532,7 +1532,7 @@ class VPseudoTernaryNoMaskWithPolicy<VReg RetClass,
bits<2> TargetConstraintType = 1> :
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
- AVL:$vl, sew:$sew, vec_policy:$policy), []> {
+ AVL:$vl, sew:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1570,7 +1570,7 @@ class VPseudoUSSegLoadNoMask<VReg RetClass,
bits<4> NF> :
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$dest, GPRMemZeroOffset:$rs1, AVL:$vl,
- sew:$sew, vec_policy:$policy), []>,
+ sew:$sew, vec_policy:$policy)>,
RISCVVLSEG<NF, /*Masked*/0, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
@@ -1587,7 +1587,7 @@ class VPseudoUSSegLoadMask<VReg RetClass,
RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$passthru,
GPRMemZeroOffset:$rs1, VMaskOp:$vm, AVL:$vl, sew:$sew,
- vec_policy:$policy), []>,
+ vec_policy:$policy)>,
RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
@@ -1605,7 +1605,7 @@ class VPseudoUSSegLoadFFNoMask<VReg RetClass,
bits<4> NF> :
RISCVVPseudo<(outs RetClass:$rd, GPR:$vl),
(ins RetClass:$dest, GPRMemZeroOffset:$rs1, AVL:$avl,
- sew:$sew, vec_policy:$policy), []>,
+ sew:$sew, vec_policy:$policy)>,
RISCVVLSEG<NF, /*Masked*/0, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
@@ -1622,7 +1622,7 @@ class VPseudoUSSegLoadFFMask<VReg RetClass,
RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd, GPR:$vl),
(ins GetVRegNoV0<RetClass>.R:$passthru,
GPRMemZeroOffset:$rs1, VMaskOp:$vm, AVL:$avl, sew:$sew,
- vec_policy:$policy), []>,
+ vec_policy:$policy)>,
RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
@@ -1640,7 +1640,7 @@ class VPseudoSSegLoadNoMask<VReg RetClass,
bits<4> NF> :
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$passthru, GPRMemZeroOffset:$rs1, GPR:$offset,
- AVL:$vl, sew:$sew, vec_policy:$policy), []>,
+ AVL:$vl, sew:$sew, vec_policy:$policy)>,
RISCVVLSEG<NF, /*Masked*/0, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
@@ -1657,7 +1657,7 @@ class VPseudoSSegLoadMask<VReg RetClass,
RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$passthru,
GPRMemZeroOffset:$rs1, GPR:$offset, VMaskOp:$vm,
- AVL:$vl, sew:$sew, vec_policy:$policy), []>,
+ AVL:$vl, sew:$sew, vec_policy:$policy)>,
RISCVVLSEG<NF, /*Masked*/1, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
@@ -1679,7 +1679,7 @@ class VPseudoISegLoadNoMask<VReg RetClass,
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$passthru, GPRMemZeroOffset:$rs1,
IdxClass:$offset, AVL:$vl, sew:$sew,
- vec_policy:$policy), []>,
+ vec_policy:$policy)>,
RISCVVLXSEG<NF, /*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 1;
let mayStore = 0;
@@ -1701,7 +1701,7 @@ class VPseudoISegLoadMask<VReg RetClass,
RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$passthru,
GPRMemZeroOffset:$rs1, IdxClass:$offset, VMaskOp:$vm,
- AVL:$vl, sew:$sew, vec_policy:$policy), []>,
+ AVL:$vl, sew:$sew, vec_policy:$policy)>,
RISCVVLXSEG<NF, /*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 1;
let mayStore = 0;
@@ -1735,7 +1735,7 @@ class VPseudoUSSegStoreMask<VReg ValClass,
bits<4> NF> :
RISCVVPseudo<(outs),
(ins ValClass:$rd, GPRMemZeroOffset:$rs1,
- VMaskOp:$vm, AVL:$vl, sew:$sew), []>,
+ VMaskOp:$vm, AVL:$vl, sew:$sew)>,
RISCVVSSEG<NF, /*Masked*/1, /*Strided*/0, !logtwo(EEW), VLMul> {
let mayLoad = 0;
let mayStore = 1;
@@ -1750,7 +1750,7 @@ class VPseudoSSegStoreNoMask<VReg ValClass,
bits<4> NF> :
RISCVVPseudo<(outs),
(ins ValClass:$rd, GPRMemZeroOffset:$rs1, GPR:$offset,
- AVL:$vl, sew:$sew), []>,
+ AVL:$vl, sew:$sew)>,
RISCVVSSEG<NF, /*Masked*/0, /*Strided*/1, !logtwo(EEW), VLMul> {
let mayLoad = 0;
let mayStore = 1;
@@ -1764,7 +1764,7 @@ class VPseudoSSegStoreMask<VReg ValClass,
bits<4> NF> :
RISCVVPseudo<(outs),
(ins ValClass:$rd, GPRMemZeroOffset:$rs1, GPR: $offset,
- VMaskOp:$vm, AVL:$vl, sew:$sew), []>,
+ VMaskOp:$vm, AVL:$vl, sew:$sew)>,
RISCVVSSEG<NF, /*Masked*/1, /*Strided*/1, !logtwo(EEW), VLMul> {
let mayLoad = 0;
let mayStore = 1;
@@ -1782,7 +1782,7 @@ class VPseudoISegStoreNoMask<VReg ValClass,
bit Ordered> :
RISCVVPseudo<(outs),
(ins ValClass:$rd, GPRMemZeroOffset:$rs1, IdxClass: $index,
- AVL:$vl, sew:$sew), []>,
+ AVL:$vl, sew:$sew)>,
RISCVVSXSEG<NF, /*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 0;
let mayStore = 1;
@@ -1799,7 +1799,7 @@ class VPseudoISegStoreMask<VReg ValClass,
bit Ordered> :
RISCVVPseudo<(outs),
(ins ValClass:$rd, GPRMemZeroOffset:$rs1, IdxClass: $index,
- VMaskOp:$vm, AVL:$vl, sew:$sew), []>,
+ VMaskOp:$vm, AVL:$vl, sew:$sew)>,
RISCVVSXSEG<NF, /*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 0;
let mayStore = 1;
@@ -6703,7 +6703,7 @@ let Predicates = [HasVInstructions] in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
let HasSEWOp = 1, BaseInstr = VMV_X_S in
def PseudoVMV_X_S:
- RISCVVPseudo<(outs GPR:$rd), (ins VR:$rs2, sew:$sew), []>,
+ RISCVVPseudo<(outs GPR:$rd), (ins VR:$rs2, sew:$sew)>,
Sched<[WriteVMovXS, ReadVMovXS]>;
let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X, isReMaterializable = 1,
Constraints = "$rd = $passthru" in
@@ -6723,8 +6723,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
foreach f = FPList in {
let HasSEWOp = 1, BaseInstr = VFMV_F_S in
def "PseudoVFMV_" # f.FX # "_S" :
- RISCVVPseudo<(outs f.fprclass:$rd),
- (ins VR:$rs2, sew:$sew), []>,
+ RISCVVPseudo<(outs f.fprclass:$rd), (ins VR:$rs2, sew:$sew)>,
Sched<[WriteVMovFS, ReadVMovFS]>;
let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, isReMaterializable = 1,
Constraints = "$rd = $passthru" in
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
index 1bb67f4..c75addd9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
@@ -11,6 +11,20 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
+// RISC-V specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDT_NDS_FMV_BF16_X
+ : SDTypeProfile<1, 1, [SDTCisVT<0, bf16>, SDTCisVT<1, XLenVT>]>;
+def SDT_NDS_FMV_X_ANYEXTBF16
+ : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisVT<1, bf16>]>;
+
+def riscv_nds_fmv_bf16_x
+ : SDNode<"RISCVISD::NDS_FMV_BF16_X", SDT_NDS_FMV_BF16_X>;
+def riscv_nds_fmv_x_anyextbf16
+ : SDNode<"RISCVISD::NDS_FMV_X_ANYEXTBF16", SDT_NDS_FMV_X_ANYEXTBF16>;
+
+//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
@@ -773,6 +787,25 @@ def : Pat<(bf16 (fpround FPR32:$rs)),
(NDS_FCVT_BF16_S FPR32:$rs)>;
} // Predicates = [HasVendorXAndesBFHCvt]
+let isCodeGenOnly = 1 in {
+def NDS_FMV_BF16_X : FPUnaryOp_r<0b1111000, 0b00000, 0b000, FPR16, GPR, "fmv.w.x">,
+ Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]>;
+def NDS_FMV_X_BF16 : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR16, "fmv.x.w">,
+ Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]>;
+}
+
+let Predicates = [HasVendorXAndesBFHCvt] in {
+def : Pat<(riscv_nds_fmv_bf16_x GPR:$src), (NDS_FMV_BF16_X GPR:$src)>;
+def : Pat<(riscv_nds_fmv_x_anyextbf16 (bf16 FPR16:$src)),
+ (NDS_FMV_X_BF16 (bf16 FPR16:$src))>;
+} // Predicates = [HasVendorXAndesBFHCvt]
+
+// Use flh/fsh to load/store bf16 if zfh is enabled.
+let Predicates = [HasStdExtZfh, HasVendorXAndesBFHCvt] in {
+def : LdPat<load, FLH, bf16>;
+def : StPat<store, FSH, FPR16, bf16>;
+} // Predicates = [HasStdExtZfh, HasVendorXAndesBFHCvt]
+
let Predicates = [HasVendorXAndesVBFHCvt] in {
defm PseudoNDS_VFWCVT_S_BF16 : VPseudoVWCVT_S_BF16;
defm PseudoNDS_VFNCVT_BF16_S : VPseudoVNCVT_BF16_S;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
index f173440..ed1a60a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
@@ -291,31 +291,31 @@ def : CompressPat<(MUL GPRC:$rs1, GPRC:$rs2, GPRC:$rs1),
let Predicates = [HasStdExtZcb, HasStdExtZbb] in{
def : CompressPat<(SEXT_B GPRC:$rs1, GPRC:$rs1),
- (C_SEXT_B GPRC:$rs1, GPRC:$rs1)>;
+ (C_SEXT_B GPRC:$rs1)>;
def : CompressPat<(SEXT_H GPRC:$rs1, GPRC:$rs1),
- (C_SEXT_H GPRC:$rs1, GPRC:$rs1)>;
+ (C_SEXT_H GPRC:$rs1)>;
} // Predicates = [HasStdExtZcb, HasStdExtZbb]
let Predicates = [HasStdExtZcb, HasStdExtZbb] in{
def : CompressPat<(ZEXT_H_RV32 GPRC:$rs1, GPRC:$rs1),
- (C_ZEXT_H GPRC:$rs1, GPRC:$rs1)>;
+ (C_ZEXT_H GPRC:$rs1)>;
def : CompressPat<(ZEXT_H_RV64 GPRC:$rs1, GPRC:$rs1),
- (C_ZEXT_H GPRC:$rs1, GPRC:$rs1)>;
+ (C_ZEXT_H GPRC:$rs1)>;
} // Predicates = [HasStdExtZcb, HasStdExtZbb]
let Predicates = [HasStdExtZcb] in{
def : CompressPat<(ANDI GPRC:$rs1, GPRC:$rs1, 255),
- (C_ZEXT_B GPRC:$rs1, GPRC:$rs1)>;
+ (C_ZEXT_B GPRC:$rs1)>;
} // Predicates = [HasStdExtZcb]
let Predicates = [HasStdExtZcb, HasStdExtZba, IsRV64] in{
def : CompressPat<(ADD_UW GPRC:$rs1, GPRC:$rs1, X0),
- (C_ZEXT_W GPRC:$rs1, GPRC:$rs1)>;
+ (C_ZEXT_W GPRC:$rs1)>;
} // Predicates = [HasStdExtZcb, HasStdExtZba, IsRV64]
let Predicates = [HasStdExtZcb] in{
def : CompressPat<(XORI GPRC:$rs1, GPRC:$rs1, -1),
- (C_NOT GPRC:$rs1, GPRC:$rs1)>;
+ (C_NOT GPRC:$rs1)>;
}
let Predicates = [HasStdExtZcb] in{
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index 0565fcd..3cbe668 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -32,7 +32,7 @@ bool RISCVTargetLowering::isLegalInterleavedAccessType(
if (!isTypeLegal(VT))
return false;
- if (!isLegalElementTypeForRVV(VT.getScalarType()) ||
+ if (!isLegalLoadStoreElementTypeForRVV(VT.getScalarType()) ||
!allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
Alignment))
return false;
@@ -224,10 +224,10 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
Value *BasePtr = Builder.CreatePtrAdd(Ptr, Offset);
- // Note: Same VL as above, but i32 not xlen due to signature of
- // vp.strided.load
- VL = Builder.CreateElementCount(Builder.getInt32Ty(),
- VTy->getElementCount());
+ // For rv64, need to truncate i64 to i32 to match signature. As VL is at most
+ // the number of active lanes (which is bounded by i32) this is safe.
+ VL = Builder.CreateTrunc(VL, Builder.getInt32Ty());
+
CallInst *CI =
Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
{VTy, BasePtr->getType(), Stride->getType()},
@@ -302,10 +302,9 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);
Value *BasePtr = Builder.CreatePtrAdd(Ptr, Offset);
- // Note: Same VL as above, but i32 not xlen due to signature of
- // vp.strided.store
- VL = Builder.CreateElementCount(Builder.getInt32Ty(),
- VTy->getElementCount());
+ // For rv64, need to truncate i64 to i32 to match signature. As VL is at
+ // most the number of active lanes (which is bounded by i32) this is safe.
+ VL = Builder.CreateTrunc(VL, Builder.getInt32Ty());
CallInst *CI =
Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_store,
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index e87f452..ccb39e8 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -268,6 +268,11 @@ def GPRNoX0 : GPRRegisterClass<(sub GPR, X0)> {
let DiagnosticString = "register must be a GPR excluding zero (x0)";
}
+def GPRNoX2 : GPRRegisterClass<(sub GPR, X2)> {
+ let DiagnosticType = "InvalidRegClassGPRNoX2";
+ let DiagnosticString = "register must be a GPR excluding sp (x2)";
+}
+
def GPRNoX0X2 : GPRRegisterClass<(sub GPR, X0, X2)> {
let DiagnosticType = "InvalidRegClassGPRNoX0X2";
let DiagnosticString = "register must be a GPR excluding zero (x0) and sp (x2)";
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
index 3e286a7..bf23812 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
@@ -24,6 +24,67 @@ class SMX60IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0
bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
}
+defvar SMX60VLEN = 256;
+defvar SMX60DLEN = !div(SMX60VLEN, 2);
+
+class Get1248Latency<string mx> {
+ int c = !cond(
+ !eq(mx, "M2") : 2,
+ !eq(mx, "M4") : 4,
+ !eq(mx, "M8") : 8,
+ true: 1
+ );
+}
+
+// Used for: logical opsz, shifts, sign ext, merge/move, FP sign/recip/convert, mask ops, slides
+class Get4816Latency<string mx> {
+ int c = !cond(
+ !eq(mx, "M4") : 8,
+ !eq(mx, "M8") : 16,
+ true: 4
+ );
+}
+
+// Used for: arithmetic (add/sub/min/max), saturating/averaging, FP add/sub/min/max
+class Get458Latency<string mx> {
+ int c = !cond(
+ !eq(mx, "M4") : 5,
+ !eq(mx, "M8") : 8,
+ true: 4
+ );
+}
+
+// Widening scaling pattern (4,4,4,4,5,8,8): plateaus at higher LMULs
+// Used for: widening operations
+class Get4588Latency<string mx> {
+ int c = !cond(
+ !eq(mx, "M2") : 5,
+ !eq(mx, "M4") : 8,
+ !eq(mx, "M8") : 8, // M8 not supported for most widening, fallback
+ true: 4
+ );
+}
+
+// Used for: mask-producing comparisons, carry ops with mask, FP comparisons
+class Get461018Latency<string mx> {
+ int c = !cond(
+ !eq(mx, "M2") : 6,
+ !eq(mx, "M4") : 10,
+ !eq(mx, "M8") : 18,
+ true: 4
+ );
+}
+
+// Used for: e64 multiply pattern, complex ops
+class Get781632Latency<string mx> {
+ int c = !cond(
+ !eq(mx, "M2") : 8,
+ !eq(mx, "M4") : 16,
+ !eq(mx, "M8") : 32,
+ true: 7
+ );
+}
+
def SpacemitX60Model : SchedMachineModel {
let IssueWidth = 2; // dual-issue
let MicroOpBufferSize = 0; // in-order
@@ -322,58 +383,96 @@ foreach LMul = [1, 2, 4, 8] in {
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- defm "" : LMULWriteResMX<"WriteVIALUV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIALUX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIALUI", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVExtV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUI", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUMV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUMX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUMI", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICmpV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICmpX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICmpI", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMergeV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMergeX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMergeI", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMovV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMovX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMovI", [SMX60_VIEU], mx, IsWorstCase>;
-
- defm "" : LMULWriteResMX<"WriteVShiftV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVShiftX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVShiftI", [SMX60_VIEU], mx, IsWorstCase>;
-
- defm "" : LMULWriteResMX<"WriteVIMulV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulAddV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulAddX", [SMX60_VIEU], mx, IsWorstCase>;
+ let Latency = Get458Latency<mx>.c, ReleaseAtCycles = [4] in {
+ defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+
+ let Latency = Get4816Latency<mx>.c, ReleaseAtCycles = [4] in {
+ // Pattern of vadd, vsub, vrsub: 4/4/5/8
+ // Pattern of vand, vor, vxor: 4/4/8/16
+ // They are grouped together, so we used the worst case 4/4/8/16
+ // TODO: use InstRW to override individual instructions' scheduling data
+ defm "" : LMULWriteResMX<"WriteVIALUV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIALUX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIALUI", [SMX60_VIEU], mx, IsWorstCase>;
+
+ defm "" : LMULWriteResMX<"WriteVExtV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeI", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovI", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftI", [SMX60_VIEU], mx, IsWorstCase>;
+
+ defm "" : LMULWriteResMX<"WriteVICALUV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUI", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+
+ let Latency = Get461018Latency<mx>.c, ReleaseAtCycles = [4] in {
+ defm "" : LMULWriteResMX<"WriteVICALUMV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUMX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUMI", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpI", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+
+ // Pattern of vmacc, vmadd, vmul, vmulh, etc.: e8/e16 = 4/4/5/8, e32 = 5,5,5,8,
+ // e64 = 7,8,16,32. We use the worst-case until we can split the SEW.
+ // TODO: change WriteVIMulV, etc to be defined with LMULSEWSchedWrites
+ let Latency = Get781632Latency<mx>.c, ReleaseAtCycles = [7] in {
+ defm "" : LMULWriteResMX<"WriteVIMulV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulAddV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulAddX", [SMX60_VIEU], mx, IsWorstCase>;
+ }
}
// Widening
+// Pattern of vwmul, vwmacc, etc: e8/e16 = 4/4/5/8, e32 = 5,5,5,8
+// We use the worst-case for all.
foreach mx = SchedMxListW in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;
- defm "" : LMULWriteResMX<"WriteVIWALUV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWALUX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWALUI", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWMulV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWMulX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SMX60_VIEU], mx, IsWorstCase>;
+ let Latency = Get4588Latency<mx>.c, ReleaseAtCycles = [4] in {
+ defm "" : LMULWriteResMX<"WriteVIWALUV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWALUX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWALUI", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SMX60_VIEU], mx, IsWorstCase>;
+ }
}
-// Vector Integer Division and Remainder
+// Division and remainder operations
+// Pattern of vdivu: 11/11/11/20/40/80/160
+// Pattern of vdiv: 12/12/12/22/44/88/176
+// Pattern of vremu: 12/12/12/22/44/88/176
+// Pattern of vrem: 13/13/13/24/48/96/192
+// We use for all: 12/12/12/24/48/96/192
+// TODO: Create separate WriteVIRem to more closely match the latencies
foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SMX60_VIEU], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SMX60_VIEU], mx, sew, IsWorstCase>;
+ // Slightly reduced for fractional LMULs
+ defvar Multiplier = !cond(
+ !eq(mx, "MF8") : 12,
+ !eq(mx, "MF4") : 12,
+ !eq(mx, "MF2") : 12,
+ true: 24
+ );
+
+ let Latency = !mul(Get1248Latency<mx>.c, Multiplier), ReleaseAtCycles = [12] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SMX60_VIEU], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SMX60_VIEU], mx, sew, IsWorstCase>;
+ }
}
}
@@ -381,12 +480,21 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxListW in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;
- defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVNClipV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVNClipX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVNClipI", [SMX60_VIEU], mx, IsWorstCase>;
+ // Slightly increased for integer LMULs
+ defvar Multiplier = !cond(
+ !eq(mx, "M2") : 2,
+ !eq(mx, "M4") : 2,
+ true: 1
+ );
+
+ let Latency = !mul(Get4816Latency<mx>.c, Multiplier), ReleaseAtCycles = [4] in {
+ defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNClipV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNClipX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNClipI", [SMX60_VIEU], mx, IsWorstCase>;
+ }
}
// 12. Vector Fixed-Point Arithmetic Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index b43b915..da6ac2f 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -104,11 +104,6 @@ static cl::opt<bool> EnablePostMISchedLoadStoreClustering(
cl::desc("Enable PostRA load and store clustering in the machine scheduler"),
cl::init(true));
-static cl::opt<bool>
- EnableVLOptimizer("riscv-enable-vl-optimizer",
- cl::desc("Enable the RISC-V VL Optimizer pass"),
- cl::init(true), cl::Hidden);
-
static cl::opt<bool> DisableVectorMaskMutation(
"riscv-disable-vector-mask-mutation",
cl::desc("Disable the vector mask scheduling mutation"), cl::init(false),
@@ -617,8 +612,7 @@ void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVPreRAExpandPseudoPass());
if (TM->getOptLevel() != CodeGenOptLevel::None) {
addPass(createRISCVMergeBaseOffsetOptPass());
- if (EnableVLOptimizer)
- addPass(createRISCVVLOptimizerPass());
+ addPass(createRISCVVLOptimizerPass());
}
addPass(createRISCVInsertReadWriteCSRPass());
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index d62d99c..f0510ec 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -265,7 +265,7 @@ public:
if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize())
return false;
- return TLI->isLegalElementTypeForRVV(ElemType);
+ return TLI->isLegalLoadStoreElementTypeForRVV(ElemType);
}
bool isLegalMaskedLoad(Type *DataType, Align Alignment,
@@ -297,7 +297,7 @@ public:
if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize())
return false;
- return TLI->isLegalElementTypeForRVV(ElemType);
+ return TLI->isLegalLoadStoreElementTypeForRVV(ElemType);
}
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override {
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index b53d919..c946451 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -114,14 +114,6 @@ FunctionPass *llvm::createRISCVVLOptimizerPass() {
return new RISCVVLOptimizer();
}
-/// Return true if R is a physical or virtual vector register, false otherwise.
-static bool isVectorRegClass(Register R, const MachineRegisterInfo *MRI) {
- if (R.isPhysical())
- return RISCV::VRRegClass.contains(R);
- const TargetRegisterClass *RC = MRI->getRegClass(R);
- return RISCVRI::isVRegClass(RC->TSFlags);
-}
-
LLVM_ATTRIBUTE_UNUSED
static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) {
OI.print(OS);
@@ -183,37 +175,28 @@ static unsigned getIntegerExtensionOperandEEW(unsigned Factor,
return Log2EEW;
}
-/// Check whether MO is a mask operand of MI.
-static bool isMaskOperand(const MachineInstr &MI, const MachineOperand &MO,
- const MachineRegisterInfo *MRI) {
-
- if (!MO.isReg() || !isVectorRegClass(MO.getReg(), MRI))
- return false;
-
- const MCInstrDesc &Desc = MI.getDesc();
- return Desc.operands()[MO.getOperandNo()].RegClass == RISCV::VMV0RegClassID;
-}
-
static std::optional<unsigned>
getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
const MachineInstr &MI = *MO.getParent();
+ const MCInstrDesc &Desc = MI.getDesc();
const RISCVVPseudosTable::PseudoInfo *RVV =
RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
assert(RVV && "Could not find MI in PseudoTable");
// MI has a SEW associated with it. The RVV specification defines
// the EEW of each operand and definition in relation to MI.SEW.
- unsigned MILog2SEW =
- MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
+ unsigned MILog2SEW = MI.getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
- const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc());
- const bool IsTied = RISCVII::isTiedPseudo(MI.getDesc().TSFlags);
+ const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc);
+ const bool IsTied = RISCVII::isTiedPseudo(Desc.TSFlags);
bool IsMODef = MO.getOperandNo() == 0 ||
(HasPassthru && MO.getOperandNo() == MI.getNumExplicitDefs());
// All mask operands have EEW=1
- if (isMaskOperand(MI, MO, MRI))
+ const MCOperandInfo &Info = Desc.operands()[MO.getOperandNo()];
+ if (Info.OperandType == MCOI::OPERAND_REGISTER &&
+ Info.RegClass == RISCV::VMV0RegClassID)
return 0;
// switch against BaseInstr to reduce number of cases that need to be
@@ -1296,8 +1279,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
TII->get(RISCV::getRVVMCOpcode(MI.getOpcode())).TSFlags) &&
"Instruction shouldn't be supported if elements depend on VL");
- assert(MI.getOperand(0).isReg() &&
- isVectorRegClass(MI.getOperand(0).getReg(), MRI) &&
+ assert(RISCVRI::isVRegClass(
+ MRI->getRegClass(MI.getOperand(0).getReg())->TSFlags) &&
"All supported instructions produce a vector register result");
LLVM_DEBUG(dbgs() << "Found a candidate for VL reduction: " << MI << "\n");
diff --git a/llvm/lib/Target/SPIRV/SPIRVAPI.cpp b/llvm/lib/Target/SPIRV/SPIRVAPI.cpp
index bbf1d87..cfe7ef4 100644
--- a/llvm/lib/Target/SPIRV/SPIRVAPI.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVAPI.cpp
@@ -116,8 +116,8 @@ SPIRVTranslate(Module *M, std::string &SpirvObj, std::string &ErrMsg,
PM.add(new TargetLibraryInfoWrapperPass(TLII));
std::unique_ptr<MachineModuleInfoWrapperPass> MMIWP(
new MachineModuleInfoWrapperPass(Target.get()));
- const_cast<TargetLoweringObjectFile *>(Target->getObjFileLowering())
- ->Initialize(MMIWP->getMMI().getContext(), *Target);
+ Target->getObjFileLowering()->Initialize(MMIWP->getMMI().getContext(),
+ *Target);
SmallString<4096> OutBuffer;
raw_svector_ostream OutStream(OutBuffer);
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index b90e1aa..3c631ce 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -665,10 +665,10 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
auto *HandleType = cast<TargetExtType>(II->getOperand(0)->getType());
if (HandleType->getTargetExtName() == "spirv.Image" ||
HandleType->getTargetExtName() == "spirv.SignedImage") {
- if (II->hasOneUse()) {
- auto *U = *II->users().begin();
+ for (User *U : II->users()) {
Ty = cast<Instruction>(U)->getAccessType();
- assert(Ty && "Unable to get type for resource pointer.");
+ if (Ty)
+ break;
}
} else if (HandleType->getTargetExtName() == "spirv.VulkanBuffer") {
// This call is supposed to index into an array
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp
index 3ef6030..72bb372 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp
@@ -69,8 +69,8 @@ void SystemZHLASMAsmStreamer::EmitEOL() {
void SystemZHLASMAsmStreamer::changeSection(MCSection *Section,
uint32_t Subsection) {
- Section->printSwitchToSection(*MAI, getContext().getTargetTriple(), OS,
- Subsection);
+ MAI->printSwitchToSection(*Section, Subsection,
+ getContext().getTargetTriple(), OS);
MCStreamer::changeSection(Section, Subsection);
}
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 19c9e9c..6ae69a4 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -900,7 +900,8 @@ public:
bool checkDataSection() {
if (CurrentState != DataSection) {
- auto *WS = cast<MCSectionWasm>(getStreamer().getCurrentSectionOnly());
+ auto *WS = static_cast<const MCSectionWasm *>(
+ getStreamer().getCurrentSectionOnly());
if (WS && WS->isText())
return error("data directive must occur in a data segment: ",
Lexer.getTok());
@@ -1218,7 +1219,8 @@ public:
void doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) override {
// Code below only applies to labels in text sections.
- auto *CWS = cast<MCSectionWasm>(getStreamer().getCurrentSectionOnly());
+ auto *CWS = static_cast<const MCSectionWasm *>(
+ getStreamer().getCurrentSectionOnly());
if (!CWS->isText())
return;
diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td
index 13603f8..a606209 100644
--- a/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -71,6 +71,7 @@ def FeatureReferenceTypes :
SubtargetFeature<"reference-types", "HasReferenceTypes", "true",
"Enable reference types">;
+def FeatureGC : SubtargetFeature<"gc", "HasGC", "true", "Enable wasm gc">;
def FeatureRelaxedSIMD :
SubtargetFeature<"relaxed-simd", "SIMDLevel", "RelaxedSIMD",
"Enable relaxed-simd instructions">;
@@ -136,13 +137,13 @@ def : ProcessorModel<"lime1", NoSchedModel,
// Latest and greatest experimental version of WebAssembly. Bugs included!
def : ProcessorModel<"bleeding-edge", NoSchedModel,
- [FeatureAtomics, FeatureBulkMemory, FeatureBulkMemoryOpt,
- FeatureCallIndirectOverlong, FeatureExceptionHandling,
- FeatureExtendedConst, FeatureFP16, FeatureMultiMemory,
- FeatureMultivalue, FeatureMutableGlobals,
- FeatureNontrappingFPToInt, FeatureRelaxedSIMD,
- FeatureReferenceTypes, FeatureSIMD128, FeatureSignExt,
- FeatureTailCall]>;
+ [FeatureAtomics, FeatureBulkMemory, FeatureBulkMemoryOpt,
+ FeatureCallIndirectOverlong, FeatureExceptionHandling,
+ FeatureExtendedConst, FeatureFP16, FeatureMultiMemory,
+ FeatureMultivalue, FeatureMutableGlobals,
+ FeatureNontrappingFPToInt, FeatureRelaxedSIMD,
+ FeatureReferenceTypes, FeatureGC, FeatureSIMD128,
+ FeatureSignExt, FeatureTailCall]>;
//===----------------------------------------------------------------------===//
// Target Declaration
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 11936a3..cd434f7 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -288,7 +288,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Expand float operations supported for scalars but not SIMD
for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
- ISD::FEXP, ISD::FEXP2})
+ ISD::FEXP, ISD::FEXP2, ISD::FEXP10})
for (auto T : {MVT::v4f32, MVT::v2f64})
setOperationAction(Op, T, Expand);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index b5e723e..2b632fd 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -76,6 +76,9 @@ def HasReferenceTypes :
Predicate<"Subtarget->hasReferenceTypes()">,
AssemblerPredicate<(all_of FeatureReferenceTypes), "reference-types">;
+def HasGC : Predicate<"Subtarget->hasGC()">,
+ AssemblerPredicate<(all_of FeatureGC), "gc">;
+
def HasRelaxedSIMD :
Predicate<"Subtarget->hasRelaxedSIMD()">,
AssemblerPredicate<(all_of FeatureRelaxedSIMD), "relaxed-simd">;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
index 40b87a0..fc82e5b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
@@ -36,13 +36,10 @@ multiclass REF_I<WebAssemblyRegClass rc, ValueType vt, string ht> {
Requires<[HasReferenceTypes]>;
}
-defm REF_TEST_FUNCREF :
- I<(outs I32: $res),
- (ins TypeIndex:$type, FUNCREF: $ref),
- (outs),
- (ins TypeIndex:$type),
- [],
- "ref.test\t$type, $ref", "ref.test $type", 0xfb14>;
+defm REF_TEST_FUNCREF : I<(outs I32:$res), (ins TypeIndex:$type, FUNCREF:$ref),
+ (outs), (ins TypeIndex:$type), [],
+ "ref.test\t$type, $ref", "ref.test $type", 0xfb14>,
+ Requires<[HasGC]>;
defm "" : REF_I<FUNCREF, funcref, "func">;
defm "" : REF_I<EXTERNREF, externref, "extern">;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index d13862f..143298b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1540,6 +1540,8 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate>
def : Pat<(fadd_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
(!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+ def : Pat<(fsub_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
+ (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
}
defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
index 7912aeb..ffd135d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -63,8 +63,10 @@ void OptimizeReturned::visitCallBase(CallBase &CB) {
if (isa<Constant>(Arg))
continue;
// Like replaceDominatedUsesWith but using Instruction/Use dominance.
- Arg->replaceUsesWithIf(&CB,
- [&](Use &U) { return DT->dominates(&CB, U); });
+ Arg->replaceUsesWithIf(&CB, [&](Use &U) {
+ auto *I = cast<Instruction>(U.getUser());
+ return !I->isLifetimeStartOrEnd() && DT->dominates(&CB, U);
+ });
}
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index 40ea48a..a3ce40f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -43,6 +43,11 @@ WebAssemblySubtarget::initializeSubtargetDependencies(StringRef CPU,
Bits.set(WebAssembly::FeatureBulkMemoryOpt);
}
+ // gc implies reference-types
+ if (HasGC) {
+ HasReferenceTypes = true;
+ }
+
// reference-types implies call-indirect-overlong
if (HasReferenceTypes) {
HasCallIndirectOverlong = true;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index 591ce256..f814274 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -51,6 +51,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
bool HasMutableGlobals = false;
bool HasNontrappingFPToInt = false;
bool HasReferenceTypes = false;
+ bool HasGC = false;
bool HasSignExt = false;
bool HasTailCall = false;
bool HasWideArithmetic = false;
@@ -107,6 +108,7 @@ public:
bool hasMutableGlobals() const { return HasMutableGlobals; }
bool hasNontrappingFPToInt() const { return HasNontrappingFPToInt; }
bool hasReferenceTypes() const { return HasReferenceTypes; }
+ bool hasGC() const { return HasGC; }
bool hasRelaxedSIMD() const { return SIMDLevel >= RelaxedSIMD; }
bool hasSignExt() const { return HasSignExt; }
bool hasSIMD128() const { return SIMDLevel >= SIMD128; }
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index b642c1c..d7671ed 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1042,8 +1042,8 @@ private:
}
PrevState = CurrState;
}
- void onRParen() {
- PrevState = State;
+ bool onRParen(StringRef &ErrMsg) {
+ IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
@@ -1054,9 +1054,27 @@ private:
case IES_RBRAC:
case IES_RPAREN:
State = IES_RPAREN;
+ // In the case of a multiply, onRegister has already set IndexReg
+ // directly, with appropriate scale.
+ // Otherwise if we just saw a register it has only been stored in
+ // TmpReg, so we need to store it into the state machine.
+ if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
+ // If we already have a BaseReg, then assume this is the IndexReg with
+ // no explicit scale.
+ if (!BaseReg) {
+ BaseReg = TmpReg;
+ } else {
+ if (IndexReg)
+ return regsUseUpError(ErrMsg);
+ IndexReg = TmpReg;
+ Scale = 0;
+ }
+ }
IC.pushOperator(IC_RPAREN);
break;
}
+ PrevState = CurrState;
+ return false;
}
bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
const InlineAsmIdentifierInfo &IDInfo,
@@ -2172,7 +2190,11 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
}
break;
case AsmToken::LParen: SM.onLParen(); break;
- case AsmToken::RParen: SM.onRParen(); break;
+ case AsmToken::RParen:
+ if (SM.onRParen(ErrMsg)) {
+ return Error(Tok.getLoc(), ErrMsg);
+ }
+ break;
}
if (SM.hadError())
return Error(Tok.getLoc(), "unknown token in expression");
@@ -4781,7 +4803,7 @@ bool X86AsmParser::parseDirectiveEven(SMLoc L) {
getStreamer().initSections(false, getSTI());
Section = getStreamer().getCurrentSectionOnly();
}
- if (Section->useCodeAlign())
+ if (getContext().getAsmInfo()->useCodeAlign(*Section))
getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0);
else
getStreamer().emitValueToAlignment(Align(2), 0, 1, 0);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index e213923..7f9d474 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -388,36 +388,6 @@ static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) {
return false;
}
-/// Check if the instruction to be emitted is right after any data.
-static bool
-isRightAfterData(MCFragment *CurrentFragment,
- const std::pair<MCFragment *, size_t> &PrevInstPosition) {
- MCFragment *F = CurrentFragment;
- // Since data is always emitted into a DataFragment, our check strategy is
- // simple here.
- // - If the fragment is a DataFragment
- // - If it's empty (section start or data after align), return false.
- // - If it's not the fragment where the previous instruction is,
- // returns true.
- // - If it's the fragment holding the previous instruction but its
- // size changed since the previous instruction was emitted into
- // it, returns true.
- // - Otherwise returns false.
- // - If the fragment is not a DataFragment, returns false.
- if (F->getKind() == MCFragment::FT_Data)
- return F->getFixedSize() && (F != PrevInstPosition.first ||
- F->getFixedSize() != PrevInstPosition.second);
-
- return false;
-}
-
-/// \returns the fragment size if it has instructions, otherwise returns 0.
-static size_t getSizeForInstFragment(const MCFragment *F) {
- if (!F || !F->hasInstructions())
- return 0;
- return F->getSize();
-}
-
/// Return true if we can insert NOP or prefixes automatically before the
/// the instruction to be emitted.
bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
@@ -441,9 +411,11 @@ bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
// semantic.
return false;
- if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
- // If this instruction follows any data, there is no clear
- // instruction boundary, inserting a nop/prefix would change semantic.
+ // If this instruction follows any data, there is no clear instruction
+ // boundary, inserting a nop/prefix would change semantic.
+ auto Offset = OS.getCurFragSize();
+ if (Offset && (OS.getCurrentFragment() != PrevInstPosition.first ||
+ Offset != PrevInstPosition.second))
return false;
return true;
@@ -552,7 +524,7 @@ void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS,
// Update PrevInstOpcode here, canPadInst() reads that.
MCFragment *CF = OS.getCurrentFragment();
PrevInstOpcode = Inst.getOpcode();
- PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
+ PrevInstPosition = std::make_pair(CF, OS.getCurFragSize());
if (!canPadBranches(OS))
return;
diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def
index 620526ff..3f2a433 100644
--- a/llvm/lib/Target/X86/X86PassRegistry.def
+++ b/llvm/lib/Target/X86/X86PassRegistry.def
@@ -12,8 +12,52 @@
// NOTE: NO INCLUDE GUARD DESIRED!
+#ifndef DUMMY_FUNCTION_PASS
+#define DUMMY_FUNCTION_PASS(NAME, CREATE_PASS)
+#endif
+DUMMY_FUNCTION_PASS("lower-amx-intrinsics", X86LowerAMXIntrinsics(*this))
+DUMMY_FUNCTION_PASS("lower-amx-type", X86LowerAMXTypePass(*this))
+DUMMY_FUNCTION_PASS("x86-partial-reduction", X86PartialReduction())
+DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass())
+#undef DUMMY_FUNCTION_PASS
+
#ifndef MACHINE_FUNCTION_PASS
#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
#endif
MACHINE_FUNCTION_PASS("x86-isel", X86ISelDAGToDAGPass(*this))
#undef MACHINE_FUNCTION_PASS
+
+#ifndef DUMMY_MACHINE_FUNCTION_PASS
+#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME)
+#endif
+DUMMY_MACHINE_FUNCTION_PASS("x86-avoid-SFB", X86AvoidSFBPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-avoid-trailing-call", X86AvoidTrailingCallPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-cf-opt", X86CallFrameOptimization())
+DUMMY_MACHINE_FUNCTION_PASS("x86-cmov-conversion", X86CmovConverterPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-codege", FPS())
+DUMMY_MACHINE_FUNCTION_PASS("x86-compress-evex", CompressEVEXPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-domain-reassignment", X86DomainReassignment())
+DUMMY_MACHINE_FUNCTION_PASS("x86-dyn-alloca-expander", X86DynAllocaExpander())
+DUMMY_MACHINE_FUNCTION_PASS("x86-execution-domain-fix", X86ExecutionDomainFix())
+DUMMY_MACHINE_FUNCTION_PASS("fastpretileconfig", X86FastPreTileConfig())
+DUMMY_MACHINE_FUNCTION_PASS("fasttileconfig", X86FastTileConfig())
+DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-LEAs", FixupLEAPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-bw-inst", FixupBWInstPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-inst-tuning", X86FixupInstTuningPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-setcc", X86FixupSetCCPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-vector-constants", X86FixupVectorConstantsPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-flags-copy-lowering", X86FlagsCopyLoweringPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-lower-tile-copy", X86LowerTileCopy())
+DUMMY_MACHINE_FUNCTION_PASS("x86-lvi-load", X86LoadValueInjectionLoadHardeningPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-lvi-ret", X86LoadValueInjectionRetHardeningPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-optimize-LEAs", X86OptimizeLEAPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-pseudo", X86ExpandPseudo())
+DUMMY_MACHINE_FUNCTION_PASS("x86-return-thunks", X86ReturnThunks())
+DUMMY_MACHINE_FUNCTION_PASS("x86-seses", X86SpeculativeExecutionSideEffectSuppression())
+DUMMY_MACHINE_FUNCTION_PASS("x86-slh", X86SpeculativeLoadHardeningPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-suppress-apx-for-relocation", X86SuppressAPXForRelocationPass())
+DUMMY_MACHINE_FUNCTION_PASS("tile-pre-config", X86PreTileConfig())
+DUMMY_MACHINE_FUNCTION_PASS("tileconfig", X86TileConfig())
+DUMMY_MACHINE_FUNCTION_PASS("x86-wineh-unwindv2", X86WinEHUnwindV2())
+DUMMY_MACHINE_FUNCTION_PASS("x86argumentstackrebase", X86ArgumentStackSlotPass())
+#undef DUMMY_MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 37a7b37..90791fc 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1838,14 +1838,15 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry AVX512BWShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v32i16, { 1, 1, 1, 1 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v32f16, { 1, 1, 1, 1 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v64i8, { 1, 1, 1, 1 } }, // vpbroadcastb
+ { TTI::SK_Broadcast, MVT::v32i16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v32f16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v64i8, { 1, 3, 1, 1 } }, // vpbroadcastb
- { TTI::SK_Reverse, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw
- { TTI::SK_Reverse, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw
+ { TTI::SK_Reverse, MVT::v32i16, { 2, 6, 2, 4 } }, // vpermw
+ { TTI::SK_Reverse, MVT::v32f16, { 2, 6, 2, 4 } }, // vpermw
{ TTI::SK_Reverse, MVT::v16i16, { 2, 2, 2, 2 } }, // vpermw
- { TTI::SK_Reverse, MVT::v64i8, { 2, 2, 2, 2 } }, // pshufb + vshufi64x2
+ { TTI::SK_Reverse, MVT::v16f16, { 2, 2, 2, 2 } }, // vpermw
+ { TTI::SK_Reverse, MVT::v64i8, { 2, 9, 2, 3 } }, // pshufb + vshufi64x2
{ TTI::SK_PermuteSingleSrc, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw
{ TTI::SK_PermuteSingleSrc, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw
@@ -1874,18 +1875,25 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry AVX512ShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v8f64, { 1, 1, 1, 1 } }, // vbroadcastsd
- {TTI::SK_Broadcast, MVT::v16f32, { 1, 1, 1, 1 } }, // vbroadcastss
- {TTI::SK_Broadcast, MVT::v8i64, { 1, 1, 1, 1 } }, // vpbroadcastq
- {TTI::SK_Broadcast, MVT::v16i32, { 1, 1, 1, 1 } }, // vpbroadcastd
- {TTI::SK_Broadcast, MVT::v32i16, { 1, 1, 1, 1 } }, // vpbroadcastw
- {TTI::SK_Broadcast, MVT::v32f16, { 1, 1, 1, 1 } }, // vpbroadcastw
- {TTI::SK_Broadcast, MVT::v64i8, { 1, 1, 1, 1 } }, // vpbroadcastb
-
- {TTI::SK_Reverse, MVT::v8f64, { 1, 3, 1, 1 } }, // vpermpd
- {TTI::SK_Reverse, MVT::v16f32, { 1, 3, 1, 1 } }, // vpermps
- {TTI::SK_Reverse, MVT::v8i64, { 1, 3, 1, 1 } }, // vpermq
- {TTI::SK_Reverse, MVT::v16i32, { 1, 3, 1, 1 } }, // vpermd
+ {TTI::SK_Broadcast, MVT::v8f64, { 1, 3, 1, 1 } }, // vbroadcastsd
+ {TTI::SK_Broadcast, MVT::v4f64, { 1, 3, 1, 1 } }, // vbroadcastsd
+ {TTI::SK_Broadcast, MVT::v16f32, { 1, 3, 1, 1 } }, // vbroadcastss
+ {TTI::SK_Broadcast, MVT::v8f32, { 1, 3, 1, 1 } }, // vbroadcastss
+ {TTI::SK_Broadcast, MVT::v8i64, { 1, 3, 1, 1 } }, // vpbroadcastq
+ {TTI::SK_Broadcast, MVT::v4i64, { 1, 3, 1, 1 } }, // vpbroadcastq
+ {TTI::SK_Broadcast, MVT::v16i32, { 1, 3, 1, 1 } }, // vpbroadcastd
+ {TTI::SK_Broadcast, MVT::v8i32, { 1, 3, 1, 1 } }, // vpbroadcastd
+ {TTI::SK_Broadcast, MVT::v32i16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v16i16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v32f16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v16f16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v64i8, { 1, 3, 1, 1 } }, // vpbroadcastb
+ {TTI::SK_Broadcast, MVT::v32i8, { 1, 3, 1, 1 }}, // vpbroadcastb
+
+ {TTI::SK_Reverse, MVT::v8f64, { 1, 5, 2, 3 } }, // vpermpd
+ {TTI::SK_Reverse, MVT::v16f32, { 1, 3, 2, 3 } }, // vpermps
+ {TTI::SK_Reverse, MVT::v8i64, { 1, 5, 2, 3 } }, // vpermq
+ {TTI::SK_Reverse, MVT::v16i32, { 1, 3, 2, 3 } }, // vpermd
{TTI::SK_Reverse, MVT::v32i16, { 7, 7, 7, 7 } }, // per mca
{TTI::SK_Reverse, MVT::v32f16, { 7, 7, 7, 7 } }, // per mca
{TTI::SK_Reverse, MVT::v64i8, { 7, 7, 7, 7 } }, // per mca
@@ -1973,21 +1981,24 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry AVX2ShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v4f64, { 1, 1, 1, 1 } }, // vbroadcastpd
- { TTI::SK_Broadcast, MVT::v8f32, { 1, 1, 1, 1 } }, // vbroadcastps
- { TTI::SK_Broadcast, MVT::v4i64, { 1, 1, 1, 1 } }, // vpbroadcastq
- { TTI::SK_Broadcast, MVT::v8i32, { 1, 1, 1, 1 } }, // vpbroadcastd
- { TTI::SK_Broadcast, MVT::v16i16, { 1, 1, 1, 1 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v16f16, { 1, 1, 1, 1 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v32i8, { 1, 1, 1, 1 } }, // vpbroadcastb
-
- { TTI::SK_Reverse, MVT::v4f64, { 1, 1, 1, 1 } }, // vpermpd
- { TTI::SK_Reverse, MVT::v8f32, { 1, 1, 1, 1 } }, // vpermps
- { TTI::SK_Reverse, MVT::v4i64, { 1, 1, 1, 1 } }, // vpermq
- { TTI::SK_Reverse, MVT::v8i32, { 1, 1, 1, 1 } }, // vpermd
- { TTI::SK_Reverse, MVT::v16i16, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
- { TTI::SK_Reverse, MVT::v16f16, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
- { TTI::SK_Reverse, MVT::v32i8, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
+ { TTI::SK_Broadcast, MVT::v4f64, { 1, 3, 1, 2 } }, // vbroadcastpd
+ { TTI::SK_Broadcast, MVT::v8f32, { 1, 3, 1, 2 } }, // vbroadcastps
+ { TTI::SK_Broadcast, MVT::v4i64, { 1, 3, 1, 2 } }, // vpbroadcastq
+ { TTI::SK_Broadcast, MVT::v8i32, { 1, 3, 1, 2 } }, // vpbroadcastd
+ { TTI::SK_Broadcast, MVT::v16i16, { 1, 3, 1, 2 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v8i16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v16f16, { 1, 3, 1, 2 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v8f16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v32i8, { 1, 3, 1, 2 } }, // vpbroadcastb
+ { TTI::SK_Broadcast, MVT::v16i8, { 1, 3, 1, 1 } }, // vpbroadcastb
+
+ { TTI::SK_Reverse, MVT::v4f64, { 1, 6, 1, 2 } }, // vpermpd
+ { TTI::SK_Reverse, MVT::v8f32, { 2, 7, 2, 4 } }, // vpermps
+ { TTI::SK_Reverse, MVT::v4i64, { 1, 6, 1, 2 } }, // vpermq
+ { TTI::SK_Reverse, MVT::v8i32, { 2, 7, 2, 4 } }, // vpermd
+ { TTI::SK_Reverse, MVT::v16i16, { 2, 9, 2, 4 } }, // vperm2i128 + pshufb
+ { TTI::SK_Reverse, MVT::v16f16, { 2, 9, 2, 4 } }, // vperm2i128 + pshufb
+ { TTI::SK_Reverse, MVT::v32i8, { 2, 9, 2, 4 } }, // vperm2i128 + pshufb
{ TTI::SK_Select, MVT::v16i16, { 1, 1, 1, 1 } }, // vpblendvb
{ TTI::SK_Select, MVT::v16f16, { 1, 1, 1, 1 } }, // vpblendvb
@@ -2077,23 +2088,23 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry AVX1ShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vpermilpd
- {TTI::SK_Broadcast, MVT::v8f32, {2,2,2,2}}, // vperm2f128 + vpermilps
- {TTI::SK_Broadcast, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vpermilpd
- {TTI::SK_Broadcast, MVT::v8i32, {2,2,2,2}}, // vperm2f128 + vpermilps
- {TTI::SK_Broadcast, MVT::v16i16, {3,3,3,3}}, // vpshuflw + vpshufd + vinsertf128
- {TTI::SK_Broadcast, MVT::v16f16, {3,3,3,3}}, // vpshuflw + vpshufd + vinsertf128
- {TTI::SK_Broadcast, MVT::v32i8, {2,2,2,2}}, // vpshufb + vinsertf128
-
- {TTI::SK_Reverse, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vpermilpd
- {TTI::SK_Reverse, MVT::v8f32, {2,2,2,2}}, // vperm2f128 + vpermilps
- {TTI::SK_Reverse, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vpermilpd
- {TTI::SK_Reverse, MVT::v8i32, {2,2,2,2}}, // vperm2f128 + vpermilps
- {TTI::SK_Reverse, MVT::v16i16, {4,4,4,4}}, // vextractf128 + 2*pshufb
+ {TTI::SK_Broadcast, MVT::v4f64, {2,3,2,3}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Broadcast, MVT::v8f32, {2,3,2,3}}, // vperm2f128 + vpermilps
+ {TTI::SK_Broadcast, MVT::v4i64, {2,3,2,3}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Broadcast, MVT::v8i32, {2,3,2,3}}, // vperm2f128 + vpermilps
+ {TTI::SK_Broadcast, MVT::v16i16, {2,3,3,4}}, // vpshuflw + vpshufd + vinsertf128
+ {TTI::SK_Broadcast, MVT::v16f16, {2,3,3,4}}, // vpshuflw + vpshufd + vinsertf128
+ {TTI::SK_Broadcast, MVT::v32i8, {3,4,3,6}}, // vpshufb + vinsertf128
+
+ {TTI::SK_Reverse, MVT::v4f64, {2,6,2,2}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Reverse, MVT::v8f32, {2,7,2,4}}, // vperm2f128 + vpermilps
+ {TTI::SK_Reverse, MVT::v4i64, {2,6,2,2}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Reverse, MVT::v8i32, {2,7,2,4}}, // vperm2f128 + vpermilps
+ {TTI::SK_Reverse, MVT::v16i16, {2,9,5,5}}, // vextractf128 + 2*pshufb
// + vinsertf128
- {TTI::SK_Reverse, MVT::v16f16, {4,4,4,4}}, // vextractf128 + 2*pshufb
+ {TTI::SK_Reverse, MVT::v16f16, {2,9,5,5}}, // vextractf128 + 2*pshufb
// + vinsertf128
- {TTI::SK_Reverse, MVT::v32i8, {4,4,4,4}}, // vextractf128 + 2*pshufb
+ {TTI::SK_Reverse, MVT::v32i8, {2,9,5,5}}, // vextractf128 + 2*pshufb
// + vinsertf128
{TTI::SK_Select, MVT::v4i64, {1,1,1,1}}, // vblendpd
@@ -2156,13 +2167,13 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry SSSE3ShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v8i16, {1, 1, 1, 1}}, // pshufb
- {TTI::SK_Broadcast, MVT::v8f16, {1, 1, 1, 1}}, // pshufb
- {TTI::SK_Broadcast, MVT::v16i8, {1, 1, 1, 1}}, // pshufb
+ {TTI::SK_Broadcast, MVT::v8i16, {1, 3, 2, 2}}, // pshufb
+ {TTI::SK_Broadcast, MVT::v8f16, {1, 3, 2, 2}}, // pshufb
+ {TTI::SK_Broadcast, MVT::v16i8, {1, 3, 2, 2}}, // pshufb
- {TTI::SK_Reverse, MVT::v8i16, {1, 1, 1, 1}}, // pshufb
- {TTI::SK_Reverse, MVT::v8f16, {1, 1, 1, 1}}, // pshufb
- {TTI::SK_Reverse, MVT::v16i8, {1, 1, 1, 1}}, // pshufb
+ {TTI::SK_Reverse, MVT::v8i16, {1, 2, 1, 2}}, // pshufb
+ {TTI::SK_Reverse, MVT::v8f16, {1, 2, 1, 2}}, // pshufb
+ {TTI::SK_Reverse, MVT::v16i8, {1, 2, 1, 2}}, // pshufb
{TTI::SK_Select, MVT::v8i16, {3, 3, 3, 3}}, // 2*pshufb + por
{TTI::SK_Select, MVT::v8f16, {3, 3, 3, 3}}, // 2*pshufb + por
@@ -2192,16 +2203,16 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_Broadcast, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
{TTI::SK_Broadcast, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
{TTI::SK_Broadcast, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
- {TTI::SK_Broadcast, MVT::v8i16, {2, 2, 2, 2}}, // pshuflw + pshufd
- {TTI::SK_Broadcast, MVT::v8f16, {2, 2, 2, 2}}, // pshuflw + pshufd
- {TTI::SK_Broadcast, MVT::v16i8, {3, 3, 3, 3}}, // unpck + pshuflw + pshufd
+ {TTI::SK_Broadcast, MVT::v8i16, {1, 2, 2, 2}}, // pshuflw + pshufd
+ {TTI::SK_Broadcast, MVT::v8f16, {1, 2, 2, 2}}, // pshuflw + pshufd
+ {TTI::SK_Broadcast, MVT::v16i8, {2, 3, 3, 4}}, // unpck + pshuflw + pshufd
{TTI::SK_Reverse, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
{TTI::SK_Reverse, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
{TTI::SK_Reverse, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
- {TTI::SK_Reverse, MVT::v8i16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
- {TTI::SK_Reverse, MVT::v8f16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
- {TTI::SK_Reverse, MVT::v16i8, {9, 9, 9, 9}}, // 2*pshuflw + 2*pshufhw
+ {TTI::SK_Reverse, MVT::v8i16, {2, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
+ {TTI::SK_Reverse, MVT::v8f16, {2, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
+ {TTI::SK_Reverse, MVT::v16i8, {5, 6,11,11}}, // 2*pshuflw + 2*pshufhw
// + 2*pshufd + 2*unpck + packus
{TTI::SK_Select, MVT::v2i64, {1, 1, 1, 1}}, // movsd
diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp
index 17c9833..d6afb8a 100644
--- a/llvm/lib/TargetParser/RISCVISAInfo.cpp
+++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp
@@ -858,16 +858,15 @@ void RISCVISAInfo::updateImplication() {
StringRef ExtName = WorkList.pop_back_val();
auto Range = std::equal_range(std::begin(ImpliedExts),
std::end(ImpliedExts), ExtName);
- std::for_each(Range.first, Range.second,
- [&](const ImpliedExtsEntry &Implied) {
- const char *ImpliedExt = Implied.ImpliedExt;
- auto [It, Inserted] = Exts.try_emplace(ImpliedExt);
- if (!Inserted)
- return;
- auto Version = findDefaultVersion(ImpliedExt);
- It->second = *Version;
- WorkList.push_back(ImpliedExt);
- });
+ for (const ImpliedExtsEntry &Implied : llvm::make_range(Range)) {
+ const char *ImpliedExt = Implied.ImpliedExt;
+ auto [It, Inserted] = Exts.try_emplace(ImpliedExt);
+ if (!Inserted)
+ continue;
+ auto Version = findDefaultVersion(ImpliedExt);
+ It->second = *Version;
+ WorkList.push_back(ImpliedExt);
+ }
}
// Add Zcd if C and D are enabled.
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 4ca7444..e5c896f 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -451,6 +451,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["permlane16-swap"] = true;
Features["ashr-pk-insts"] = true;
Features["atomic-buffer-pk-add-bf16-inst"] = true;
+ Features["vmem-pref-insts"] = true;
Features["atomic-fadd-rtn-insts"] = true;
Features["atomic-buffer-global-pk-add-f16-insts"] = true;
Features["atomic-flat-pk-add-16-insts"] = true;
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index ee6651c..6acb0bc 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -277,6 +277,8 @@ StringRef Triple::getVendorTypeName(VendorType Kind) {
case PC: return "pc";
case SCEI: return "scei";
case SUSE: return "suse";
+ case Meta:
+ return "meta";
}
llvm_unreachable("Invalid VendorType!");
@@ -390,6 +392,8 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) {
case OpenHOS: return "ohos";
case PAuthTest:
return "pauthtest";
+ case MTIA:
+ return "mtia";
case LLVM:
return "llvm";
case Mlibc:
@@ -677,6 +681,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
.Case("suse", Triple::SUSE)
.Case("oe", Triple::OpenEmbedded)
.Case("intel", Triple::Intel)
+ .Case("meta", Triple::Meta)
.Default(Triple::UnknownVendor);
}
@@ -780,6 +785,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
.StartsWith("pauthtest", Triple::PAuthTest)
.StartsWith("llvm", Triple::LLVM)
.StartsWith("mlibc", Triple::Mlibc)
+ .StartsWith("mtia", Triple::MTIA)
.Default(Triple::UnknownEnvironment);
}
diff --git a/llvm/lib/TextAPI/SymbolSet.cpp b/llvm/lib/TextAPI/SymbolSet.cpp
index 2e0b416..f21a061 100644
--- a/llvm/lib/TextAPI/SymbolSet.cpp
+++ b/llvm/lib/TextAPI/SymbolSet.cpp
@@ -11,6 +11,11 @@
using namespace llvm;
using namespace llvm::MachO;
+SymbolSet::~SymbolSet() {
+ for (auto &[Key, Sym] : Symbols)
+ Sym->~Symbol();
+}
+
Symbol *SymbolSet::addGlobalImpl(EncodeKind Kind, StringRef Name,
SymbolFlags Flags) {
Name = copyString(Name);
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index 59ae057..ac93f748 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -85,6 +85,9 @@ static Intrinsic::ID NonOverloadedCoroIntrinsics[] = {
Intrinsic::coro_id_async,
Intrinsic::coro_id_retcon,
Intrinsic::coro_id_retcon_once,
+ Intrinsic::coro_noop,
+ Intrinsic::coro_prepare_async,
+ Intrinsic::coro_prepare_retcon,
Intrinsic::coro_promise,
Intrinsic::coro_resume,
Intrinsic::coro_save,
diff --git a/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp b/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
index b3910c4..d895cd7 100644
--- a/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
+++ b/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
@@ -37,6 +37,16 @@
// memory that ends up in one of the runtime equivalents, since this can
// happen if e.g. a library that was compiled without interposition returns
// an allocation that can be validly passed to `free`.
+//
+// 3. MathFixup (required): Some accelerators might have an incomplete
+// implementation for the intrinsics used to implement some of the math
+// functions in <cmath> / their corresponding libcall lowerings. Since this
+// can vary quite significantly between accelerators, we replace calls to a
+// set of intrinsics / lib functions known to be problematic with calls to a
+// HIPSTDPAR specific forwarding layer, which gives an uniform interface for
+// accelerators to implement in their own runtime components. This pass
+// should run before AcceleratorCodeSelection so as to prevent the spurious
+// removal of the HIPSTDPAR specific forwarding functions.
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
@@ -49,6 +59,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -519,3 +530,110 @@ HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) {
return PreservedAnalyses::none();
}
+
+static constexpr std::pair<StringLiteral, StringLiteral> MathLibToHipStdPar[]{
+ {"acosh", "__hipstdpar_acosh_f64"},
+ {"acoshf", "__hipstdpar_acosh_f32"},
+ {"asinh", "__hipstdpar_asinh_f64"},
+ {"asinhf", "__hipstdpar_asinh_f32"},
+ {"atanh", "__hipstdpar_atanh_f64"},
+ {"atanhf", "__hipstdpar_atanh_f32"},
+ {"cbrt", "__hipstdpar_cbrt_f64"},
+ {"cbrtf", "__hipstdpar_cbrt_f32"},
+ {"erf", "__hipstdpar_erf_f64"},
+ {"erff", "__hipstdpar_erf_f32"},
+ {"erfc", "__hipstdpar_erfc_f64"},
+ {"erfcf", "__hipstdpar_erfc_f32"},
+ {"fdim", "__hipstdpar_fdim_f64"},
+ {"fdimf", "__hipstdpar_fdim_f32"},
+ {"expm1", "__hipstdpar_expm1_f64"},
+ {"expm1f", "__hipstdpar_expm1_f32"},
+ {"hypot", "__hipstdpar_hypot_f64"},
+ {"hypotf", "__hipstdpar_hypot_f32"},
+ {"ilogb", "__hipstdpar_ilogb_f64"},
+ {"ilogbf", "__hipstdpar_ilogb_f32"},
+ {"lgamma", "__hipstdpar_lgamma_f64"},
+ {"lgammaf", "__hipstdpar_lgamma_f32"},
+ {"log1p", "__hipstdpar_log1p_f64"},
+ {"log1pf", "__hipstdpar_log1p_f32"},
+ {"logb", "__hipstdpar_logb_f64"},
+ {"logbf", "__hipstdpar_logb_f32"},
+ {"nextafter", "__hipstdpar_nextafter_f64"},
+ {"nextafterf", "__hipstdpar_nextafter_f32"},
+ {"nexttoward", "__hipstdpar_nexttoward_f64"},
+ {"nexttowardf", "__hipstdpar_nexttoward_f32"},
+ {"remainder", "__hipstdpar_remainder_f64"},
+ {"remainderf", "__hipstdpar_remainder_f32"},
+ {"remquo", "__hipstdpar_remquo_f64"},
+ {"remquof", "__hipstdpar_remquo_f32"},
+ {"scalbln", "__hipstdpar_scalbln_f64"},
+ {"scalblnf", "__hipstdpar_scalbln_f32"},
+ {"scalbn", "__hipstdpar_scalbn_f64"},
+ {"scalbnf", "__hipstdpar_scalbn_f32"},
+ {"tgamma", "__hipstdpar_tgamma_f64"},
+ {"tgammaf", "__hipstdpar_tgamma_f32"}};
+
+PreservedAnalyses HipStdParMathFixupPass::run(Module &M,
+ ModuleAnalysisManager &) {
+ if (M.empty())
+ return PreservedAnalyses::all();
+
+ SmallVector<std::pair<Function *, std::string>> ToReplace;
+ for (auto &&F : M) {
+ if (!F.hasName())
+ continue;
+
+ StringRef N = F.getName();
+ Intrinsic::ID ID = F.getIntrinsicID();
+
+ switch (ID) {
+ case Intrinsic::not_intrinsic: {
+ auto It =
+ find_if(MathLibToHipStdPar, [&](auto &&M) { return M.first == N; });
+ if (It == std::cend(MathLibToHipStdPar))
+ continue;
+ ToReplace.emplace_back(&F, It->second);
+ break;
+ }
+ case Intrinsic::acos:
+ case Intrinsic::asin:
+ case Intrinsic::atan:
+ case Intrinsic::atan2:
+ case Intrinsic::cosh:
+ case Intrinsic::modf:
+ case Intrinsic::sinh:
+ case Intrinsic::tan:
+ case Intrinsic::tanh:
+ break;
+ default: {
+ if (F.getReturnType()->isDoubleTy()) {
+ switch (ID) {
+ case Intrinsic::cos:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::log:
+ case Intrinsic::log10:
+ case Intrinsic::log2:
+ case Intrinsic::pow:
+ case Intrinsic::sin:
+ break;
+ default:
+ continue;
+ }
+ break;
+ }
+ continue;
+ }
+ }
+
+ ToReplace.emplace_back(&F, N);
+ llvm::replace(ToReplace.back().second, '.', '_');
+ StringRef Prefix = "llvm";
+ ToReplace.back().second.replace(0, Prefix.size(), "__hipstdpar");
+ }
+ for (auto &&[F, NewF] : ToReplace)
+ F->replaceAllUsesWith(
+ M.getOrInsertFunction(NewF, F->getFunctionType()).getCallee());
+
+ return PreservedAnalyses::none();
+}
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index b803c97..2b392fe 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -97,6 +97,8 @@ STATISTIC(MissingAllocForContextId,
"Number of missing alloc nodes for context ids");
STATISTIC(SkippedCallsCloning,
"Number of calls skipped during cloning due to unexpected operand");
+STATISTIC(MismatchedCloneAssignments,
+ "Number of callsites assigned to call multiple non-matching clones");
static cl::opt<std::string> DotFilePathPrefix(
"memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
@@ -2060,6 +2062,20 @@ static bool isMemProfClone(const Function &F) {
return F.getName().contains(MemProfCloneSuffix);
}
+// Return the clone number of the given function by extracting it from the
+// memprof suffix. Assumes the caller has already confirmed it is a memprof
+// clone.
+static unsigned getMemProfCloneNum(const Function &F) {
+ assert(isMemProfClone(F));
+ auto Pos = F.getName().find_last_of('.');
+ assert(Pos > 0);
+ unsigned CloneNo;
+ bool Err = F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
+ assert(!Err);
+ (void)Err;
+ return CloneNo;
+}
+
std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
const Instruction *Call,
unsigned CloneNo) const {
@@ -2073,14 +2089,14 @@ std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func,
unsigned CloneNo) const {
auto VI = FSToVIMap.find(Func);
assert(VI != FSToVIMap.end());
+ std::string CallerName = getMemProfFuncName(VI->second.name(), CloneNo);
if (isa<AllocInfo *>(Call))
- return (VI->second.name() + " -> alloc").str();
+ return CallerName + " -> alloc";
else {
auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(Call);
- return (VI->second.name() + " -> " +
- getMemProfFuncName(Callsite->Callee.name(),
- Callsite->Clones[CloneNo]))
- .str();
+ return CallerName + " -> " +
+ getMemProfFuncName(Callsite->Callee.name(),
+ Callsite->Clones[CloneNo]);
}
}
@@ -3979,7 +3995,22 @@ IndexCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const {
void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
FuncInfo CalleeFunc) {
- if (CalleeFunc.cloneNo() > 0)
+ auto *CurF = cast<CallBase>(CallerCall.call())->getCalledFunction();
+ auto NewCalleeCloneNo = CalleeFunc.cloneNo();
+ if (isMemProfClone(*CurF)) {
+ // If we already assigned this callsite to call a specific non-default
+ // clone (i.e. not the original function which is clone 0), ensure that we
+ // aren't trying to now update it to call a different clone, which is
+ // indicative of a bug in the graph or function assignment.
+ auto CurCalleeCloneNo = getMemProfCloneNum(*CurF);
+ if (CurCalleeCloneNo != NewCalleeCloneNo) {
+ LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was "
+ << CurCalleeCloneNo << " now " << NewCalleeCloneNo
+ << "\n");
+ MismatchedCloneAssignments++;
+ }
+ }
+ if (NewCalleeCloneNo > 0)
cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
OREGetter(CallerCall.call()->getFunction())
.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call())
@@ -3995,7 +4026,19 @@ void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
assert(CI &&
"Caller cannot be an allocation which should not have profiled calls");
assert(CI->Clones.size() > CallerCall.cloneNo());
- CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
+ auto NewCalleeCloneNo = CalleeFunc.cloneNo();
+ auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
+ // If we already assigned this callsite to call a specific non-default
+ // clone (i.e. not the original function which is clone 0), ensure that we
+ // aren't trying to now update it to call a different clone, which is
+ // indicative of a bug in the graph or function assignment.
+ if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
+ LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was "
+ << CurCalleeCloneNo << " now " << NewCalleeCloneNo
+ << "\n");
+ MismatchedCloneAssignments++;
+ }
+ CurCalleeCloneNo = NewCalleeCloneNo;
}
// Update the debug information attached to NewFunc to use the clone Name. Note
@@ -4457,14 +4500,24 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
CallsiteToCalleeFuncCloneMap[Caller] = CalleeFunc;
};
+ // Information for a single clone of this Func.
+ struct FuncCloneInfo {
+ // The function clone.
+ FuncInfo FuncClone;
+ // Remappings of each call of interest (from original uncloned call to the
+ // corresponding cloned call in this function clone).
+ std::map<CallInfo, CallInfo> CallMap;
+ };
+
// Walk all functions for which we saw calls with memprof metadata, and handle
// cloning for each of its calls.
for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
FuncInfo OrigFunc(Func);
- // Map from each clone of OrigFunc to a map of remappings of each call of
- // interest (from original uncloned call to the corresponding cloned call in
- // that function clone).
- std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
+ // Map from each clone number of OrigFunc to information about that function
+ // clone (the function clone FuncInfo and call remappings). The index into
+ // the vector is the clone number, as function clones are created and
+ // numbered sequentially.
+ std::vector<FuncCloneInfo> FuncCloneInfos;
for (auto &Call : CallsWithMetadata) {
ContextNode *Node = getNodeForInst(Call);
// Skip call if we do not have a node for it (all uses of its stack ids
@@ -4488,8 +4541,9 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
// Record the clone of callsite node assigned to this function clone.
FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
- assert(FuncClonesToCallMap.count(FuncClone));
- std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
+ assert(FuncCloneInfos.size() > FuncClone.cloneNo());
+ std::map<CallInfo, CallInfo> &CallMap =
+ FuncCloneInfos[FuncClone.cloneNo()].CallMap;
CallInfo CallClone(Call);
if (auto It = CallMap.find(Call); It != CallMap.end())
CallClone = It->second;
@@ -4528,10 +4582,10 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
// than existing function clones, which would have been assigned to an
// earlier clone in the list (we assign callsite clones to function
// clones greedily).
- if (FuncClonesToCallMap.size() < NodeCloneCount) {
+ if (FuncCloneInfos.size() < NodeCloneCount) {
// If this is the first callsite copy, assign to original function.
if (NodeCloneCount == 1) {
- // Since FuncClonesToCallMap is empty in this case, no clones have
+ // Since FuncCloneInfos is empty in this case, no clones have
// been created for this function yet, and no callers should have
// been assigned a function clone for this callee node yet.
assert(llvm::none_of(
@@ -4540,7 +4594,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
}));
// Initialize with empty call map, assign Clone to original function
// and its callers, and skip to the next clone.
- FuncClonesToCallMap[OrigFunc] = {};
+ FuncCloneInfos.push_back({OrigFunc, {}});
AssignCallsiteCloneToFuncClone(
OrigFunc, Call, Clone,
AllocationCallToContextNodeMap.count(Call));
@@ -4572,14 +4626,14 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
}
// Clone function and save it along with the CallInfo map created
- // during cloning in the FuncClonesToCallMap.
+ // during cloning in the FuncCloneInfos.
std::map<CallInfo, CallInfo> NewCallMap;
- unsigned CloneNo = FuncClonesToCallMap.size();
+ unsigned CloneNo = FuncCloneInfos.size();
assert(CloneNo > 0 && "Clone 0 is the original function, which "
"should already exist in the map");
FuncInfo NewFuncClone = cloneFunctionForCallsite(
OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
- FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
+ FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
FunctionClonesAnalysis++;
Changed = true;
@@ -4681,7 +4735,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
CallInfo OrigCall(Callee->getOrigNode()->Call);
OrigCall.setCloneNo(0);
std::map<CallInfo, CallInfo> &CallMap =
- FuncClonesToCallMap[NewFuncClone];
+ FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
assert(CallMap.count(OrigCall));
CallInfo NewCall(CallMap[OrigCall]);
assert(NewCall);
@@ -4703,6 +4757,19 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
// where the callers were assigned to different clones of a function.
}
+ auto FindFirstAvailFuncClone = [&]() {
+ // Find first function in FuncCloneInfos without an assigned
+ // clone of this callsite Node. We should always have one
+ // available at this point due to the earlier cloning when the
+ // FuncCloneInfos size was smaller than the clone number.
+ for (auto &CF : FuncCloneInfos) {
+ if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
+ return CF.FuncClone;
+ }
+ llvm_unreachable(
+ "Expected an available func clone for this callsite clone");
+ };
+
// See if we can use existing function clone. Walk through
// all caller edges to see if any have already been assigned to
// a clone of this callsite's function. If we can use it, do so. If not,
@@ -4819,16 +4886,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
// clone of OrigFunc for another caller during this iteration over
// its caller edges.
if (!FuncCloneAssignedToCurCallsiteClone) {
- // Find first function in FuncClonesToCallMap without an assigned
- // clone of this callsite Node. We should always have one
- // available at this point due to the earlier cloning when the
- // FuncClonesToCallMap size was smaller than the clone number.
- for (auto &CF : FuncClonesToCallMap) {
- if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
- FuncCloneAssignedToCurCallsiteClone = CF.first;
- break;
- }
- }
+ FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
assert(FuncCloneAssignedToCurCallsiteClone);
// Assign Clone to FuncCloneAssignedToCurCallsiteClone
AssignCallsiteCloneToFuncClone(
@@ -4842,6 +4900,31 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
FuncCloneAssignedToCurCallsiteClone);
}
}
+ // If we didn't assign a function clone to this callsite clone yet, e.g.
+ // none of its callers has a non-null call, do the assignment here.
+ // We want to ensure that every callsite clone is assigned to some
+ // function clone, so that the call updates below work as expected.
+ // In particular if this is the original callsite, we want to ensure it
+ // is assigned to the original function, otherwise the original function
+ // will appear available for assignment to other callsite clones,
+ // leading to unintended effects. For one, the unknown and not updated
+ // callers will call into cloned paths leading to the wrong hints,
+ // because they still call the original function (clone 0). Also,
+ // because all callsites start out as being clone 0 by default, we can't
+ // easily distinguish between callsites explicitly assigned to clone 0
+ // vs those never assigned, which can lead to multiple updates of the
+ // calls when invoking updateCall below, with mismatched clone values.
+ // TODO: Add a flag to the callsite nodes or some other mechanism to
+ // better distinguish and identify callsite clones that are not getting
+ // assigned to function clones as expected.
+ if (!FuncCloneAssignedToCurCallsiteClone) {
+ FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
+ assert(FuncCloneAssignedToCurCallsiteClone &&
+ "No available func clone for this callsite clone");
+ AssignCallsiteCloneToFuncClone(
+ FuncCloneAssignedToCurCallsiteClone, Call, Clone,
+ /*IsAlloc=*/AllocationCallToContextNodeMap.contains(Call));
+ }
}
if (VerifyCCG) {
checkNode<DerivedCCG, FuncTy, CallTy>(Node);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d88bc2c..1b78ace 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1830,10 +1830,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
// abs(-x) -> abs(x)
- // TODO: Copy nsw if it was present on the neg?
Value *X;
- if (match(IIOperand, m_Neg(m_Value(X))))
+ if (match(IIOperand, m_Neg(m_Value(X)))) {
+ if (cast<Instruction>(IIOperand)->hasNoSignedWrap() || IntMinIsPoison)
+ replaceOperand(*II, 1, Builder.getTrue());
return replaceOperand(*II, 0, X);
+ }
if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X))))
return replaceOperand(*II, 0, X);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 033ef8b..a43a6ee 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -708,10 +708,14 @@ static Instruction *shrinkSplatShuffle(TruncInst &Trunc,
auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
all_equal(Shuf->getShuffleMask()) &&
- Shuf->getType() == Shuf->getOperand(0)->getType()) {
+ ElementCount::isKnownGE(Shuf->getType()->getElementCount(),
+ cast<VectorType>(Shuf->getOperand(0)->getType())
+ ->getElementCount())) {
// trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
// trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
- Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
+ Type *NewTruncTy = Shuf->getOperand(0)->getType()->getWithNewType(
+ Trunc.getType()->getScalarType());
+ Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), NewTruncTy);
return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 365a9b3..0be1034 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1502,8 +1502,7 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
// This is a non-terminator unreachable marker. Don't remove it.
if (isa<UndefValue>(Ptr)) {
// Remove guaranteed-to-transfer instructions before the marker.
- if (removeInstructionsBeforeUnreachable(SI))
- return &SI;
+ removeInstructionsBeforeUnreachable(SI);
// Remove all instructions after the marker and handle dead blocks this
// implies.
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 5849c3e..4e5a8d1 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -363,10 +363,10 @@ private:
void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
- bool instrumentStack(memtag::StackInfo &Info, Value *StackTag, Value *UARTag,
+ void instrumentStack(memtag::StackInfo &Info, Value *StackTag, Value *UARTag,
const DominatorTree &DT, const PostDominatorTree &PDT,
const LoopInfo &LI);
- bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
+ void instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
Value *getNextTagWithCall(IRBuilder<> &IRB);
Value *getStackBaseTag(IRBuilder<> &IRB);
Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, unsigned AllocaNo);
@@ -1418,7 +1418,7 @@ void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
}
}
-bool HWAddressSanitizer::instrumentLandingPads(
+void HWAddressSanitizer::instrumentLandingPads(
SmallVectorImpl<Instruction *> &LandingPadVec) {
for (auto *LP : LandingPadVec) {
IRBuilder<> IRB(LP->getNextNode());
@@ -1427,10 +1427,9 @@ bool HWAddressSanitizer::instrumentLandingPads(
{memtag::readRegister(
IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp" : "sp")});
}
- return true;
}
-bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
+void HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
Value *StackTag, Value *UARTag,
const DominatorTree &DT,
const PostDominatorTree &PDT,
@@ -1460,8 +1459,6 @@ bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
size_t Size = memtag::getAllocaSizeInBytes(*AI);
size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
- Value *AICast = IRB.CreatePointerCast(AI, PtrTy);
-
auto HandleLifetime = [&](IntrinsicInst *II) {
// Set the lifetime intrinsic to cover the whole alloca. This reduces the
// set of assumptions we need to make about the lifetime. Without this we
@@ -1474,14 +1471,13 @@ bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
// one set of start / end in any execution (i.e. the ends are not
// reachable from each other), so this will not cause any problems.
II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize));
- II->setArgOperand(1, AICast);
};
llvm::for_each(Info.LifetimeStart, HandleLifetime);
llvm::for_each(Info.LifetimeEnd, HandleLifetime);
- AI->replaceUsesWithIf(Replacement, [AICast, AILong](const Use &U) {
+ AI->replaceUsesWithIf(Replacement, [AILong](const Use &U) {
auto *User = U.getUser();
- return User != AILong && User != AICast && !isa<LifetimeIntrinsic>(User);
+ return User != AILong && !isa<LifetimeIntrinsic>(User);
});
memtag::annotateDebugRecords(Info, retagMask(N));
@@ -1524,7 +1520,6 @@ bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
}
memtag::alignAndPadAlloca(Info, Mapping.getObjectAlignment());
}
- return true;
}
static void emitRemark(const Function &F, OptimizationRemarkEmitter &ORE,
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 854db0f..f451c2b 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -80,6 +80,27 @@ static cl::opt<unsigned>
ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
cl::desc("Skip Callsite up to this number for this compilation"));
+// ICP the candidate function even when only a declaration is present.
+static cl::opt<bool> ICPAllowDecls(
+ "icp-allow-decls", cl::init(false), cl::Hidden,
+ cl::desc("Promote the target candidate even when the defintion "
+ " is not available"));
+
+// ICP hot candidate functions only. When setting to false, non-cold functions
+// (warm functions) can also be promoted.
+static cl::opt<bool>
+ ICPAllowHotOnly("icp-allow-hot-only", cl::init(true), cl::Hidden,
+ cl::desc("Promote the target candidate only if it is a "
+ "hot function. Otherwise, warm functions can "
+ "also be promoted"));
+
+// If one target cannot be ICP'd, proceed with the remaining targets instead
+// of exiting the callsite.
+static cl::opt<bool> ICPAllowCandidateSkip(
+ "icp-allow-candidate-skip", cl::init(false), cl::Hidden,
+ cl::desc("Continue with the remaining targets instead of exiting "
+ "when failing in a candidate"));
+
// Set if the pass is called in LTO optimization. The difference for LTO mode
// is the pass won't prefix the source module name to the internal linkage
// symbols.
@@ -330,6 +351,7 @@ private:
struct PromotionCandidate {
Function *const TargetFunction;
const uint64_t Count;
+ const uint32_t Index;
// The following fields only exists for promotion candidates with vtable
// information.
@@ -341,7 +363,8 @@ private:
VTableGUIDCountsMap VTableGUIDAndCounts;
SmallVector<Constant *> AddressPoints;
- PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
+ PromotionCandidate(Function *F, uint64_t C, uint32_t I)
+ : TargetFunction(F), Count(C), Index(I) {}
};
// Check if the indirect-call call site should be promoted. Return the number
@@ -356,12 +379,10 @@ private:
// Promote a list of targets for one indirect-call callsite by comparing
// indirect callee with functions. Return true if there are IR
// transformations and false otherwise.
- bool tryToPromoteWithFuncCmp(CallBase &CB, Instruction *VPtr,
- ArrayRef<PromotionCandidate> Candidates,
- uint64_t TotalCount,
- ArrayRef<InstrProfValueData> ICallProfDataRef,
- uint32_t NumCandidates,
- VTableGUIDCountsMap &VTableGUIDCounts);
+ bool tryToPromoteWithFuncCmp(
+ CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
+ uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
+ uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts);
// Promote a list of targets for one indirect call by comparing vtables with
// functions. Return true if there are IR transformations and false
@@ -394,12 +415,15 @@ private:
Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV,
uint64_t AddressPointOffset);
- void updateFuncValueProfiles(CallBase &CB, ArrayRef<InstrProfValueData> VDs,
+ void updateFuncValueProfiles(CallBase &CB,
+ MutableArrayRef<InstrProfValueData> VDs,
uint64_t Sum, uint32_t MaxMDCount);
void updateVPtrValueProfiles(Instruction *VPtr,
VTableGUIDCountsMap &VTableGUIDCounts);
+ bool isValidTarget(uint64_t, Function *, const CallBase &, uint64_t);
+
public:
IndirectCallPromoter(
Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
@@ -419,6 +443,53 @@ public:
} // end anonymous namespace
+bool IndirectCallPromoter::isValidTarget(uint64_t Target,
+ Function *TargetFunction,
+ const CallBase &CB, uint64_t Count) {
+ // Don't promote if the symbol is not defined in the module. This avoids
+ // creating a reference to a symbol that doesn't exist in the module
+ // This can happen when we compile with a sample profile collected from
+ // one binary but used for another, which may have profiled targets that
+ // aren't used in the new binary. We might have a declaration initially in
+ // the case where the symbol is globally dead in the binary and removed by
+ // ThinLTO.
+ using namespace ore;
+ if (TargetFunction == nullptr) {
+ LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
+ << "Cannot promote indirect call: target with md5sum "
+ << NV("target md5sum", Target)
+ << " not found (count=" << NV("Count", Count) << ")";
+ });
+ return false;
+ }
+ if (!ICPAllowDecls && TargetFunction->isDeclaration()) {
+ LLVM_DEBUG(dbgs() << " Not promote: target definition is not available\n");
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NoTargetDef", &CB)
+ << "Do not promote indirect call: target with md5sum "
+ << NV("target md5sum", Target)
+ << " definition not available (count=" << ore::NV("Count", Count)
+ << ")";
+ });
+ return false;
+ }
+
+ const char *Reason = nullptr;
+ if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
+
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
+ << "Cannot promote indirect call to "
+ << NV("TargetFunction", TargetFunction)
+ << " (count=" << NV("Count", Count) << "): " << Reason;
+ });
+ return false;
+ }
+ return true;
+}
+
// Indirect-call promotion heuristic. The direct targets are sorted based on
// the count. Stop at the first target that is not promoted.
std::vector<IndirectCallPromoter::PromotionCandidate>
@@ -469,38 +540,15 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
break;
}
- // Don't promote if the symbol is not defined in the module. This avoids
- // creating a reference to a symbol that doesn't exist in the module
- // This can happen when we compile with a sample profile collected from
- // one binary but used for another, which may have profiled targets that
- // aren't used in the new binary. We might have a declaration initially in
- // the case where the symbol is globally dead in the binary and removed by
- // ThinLTO.
Function *TargetFunction = Symtab->getFunction(Target);
- if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
- LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
- << "Cannot promote indirect call: target with md5sum "
- << ore::NV("target md5sum", Target) << " not found";
- });
- break;
- }
-
- const char *Reason = nullptr;
- if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
- using namespace ore;
-
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
- << "Cannot promote indirect call to "
- << NV("TargetFunction", TargetFunction) << " with count of "
- << NV("Count", Count) << ": " << Reason;
- });
- break;
+ if (!isValidTarget(Target, TargetFunction, CB, Count)) {
+ if (ICPAllowCandidateSkip)
+ continue;
+ else
+ break;
}
- Ret.push_back(PromotionCandidate(TargetFunction, Count));
+ Ret.push_back(PromotionCandidate(TargetFunction, Count, I));
TotalCount -= Count;
}
return Ret;
@@ -642,7 +690,7 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
// Promote indirect-call to conditional direct-call for one callsite.
bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
- uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
+ uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) {
uint32_t NumPromoted = 0;
@@ -655,6 +703,8 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
NumOfPGOICallPromotion++;
NumPromoted++;
+ // Update the count and this entry will be erased later.
+ ICallProfDataRef[C.Index].Count = 0;
if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
continue;
@@ -679,21 +729,33 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
"Number of promoted functions should not be greater than the number "
"of values in profile metadata");
- // Update value profiles on the indirect call.
- updateFuncValueProfiles(CB, ICallProfDataRef.slice(NumPromoted), TotalCount,
- NumCandidates);
+ updateFuncValueProfiles(CB, ICallProfDataRef, TotalCount, NumCandidates);
updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
return true;
}
void IndirectCallPromoter::updateFuncValueProfiles(
- CallBase &CB, ArrayRef<InstrProfValueData> CallVDs, uint64_t TotalCount,
- uint32_t MaxMDCount) {
+ CallBase &CB, MutableArrayRef<InstrProfValueData> CallVDs,
+ uint64_t TotalCount, uint32_t MaxMDCount) {
// First clear the existing !prof.
CB.setMetadata(LLVMContext::MD_prof, nullptr);
+
+ // Sort value profiles by count in descending order.
+ llvm::stable_sort(CallVDs, [](const InstrProfValueData &LHS,
+ const InstrProfValueData &RHS) {
+ return LHS.Count > RHS.Count;
+ });
+ // Drop the <target-value, count> pair if count is zero.
+ ArrayRef<InstrProfValueData> VDs(
+ CallVDs.begin(),
+ llvm::upper_bound(CallVDs, 0U,
+ [](uint64_t Count, const InstrProfValueData &ProfData) {
+ return ProfData.Count <= Count;
+ }));
+
// Annotate the remaining value profiles if counter is not zero.
if (TotalCount != 0)
- annotateValueSite(M, CB, CallVDs, TotalCount, IPVK_IndirectCallTarget,
+ annotateValueSite(M, CB, VDs, TotalCount, IPVK_IndirectCallTarget,
MaxMDCount);
}
@@ -726,7 +788,7 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
uint64_t TotalFuncCount, uint32_t NumCandidates,
MutableArrayRef<InstrProfValueData> ICallProfDataRef,
VTableGUIDCountsMap &VTableGUIDCounts) {
- SmallVector<uint64_t, 4> PromotedFuncCount;
+ SmallVector<std::pair<uint32_t, uint64_t>, 4> PromotedFuncCount;
for (const auto &Candidate : Candidates) {
for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
@@ -771,7 +833,7 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
return Remark;
});
- PromotedFuncCount.push_back(Candidate.Count);
+ PromotedFuncCount.push_back({Candidate.Index, Candidate.Count});
assert(TotalFuncCount >= Candidate.Count &&
"Within one prof metadata, total count is the sum of counts from "
@@ -792,22 +854,12 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
// used to load multiple virtual functions. The vtable profiles needs to be
// updated properly in that case (e.g, for each indirect call annotate both
// type profiles and function profiles in one !prof).
- for (size_t I = 0; I < PromotedFuncCount.size(); I++)
- ICallProfDataRef[I].Count -=
- std::max(PromotedFuncCount[I], ICallProfDataRef[I].Count);
- // Sort value profiles by count in descending order.
- llvm::stable_sort(ICallProfDataRef, [](const InstrProfValueData &LHS,
- const InstrProfValueData &RHS) {
- return LHS.Count > RHS.Count;
- });
- // Drop the <target-value, count> pair if count is zero.
- ArrayRef<InstrProfValueData> VDs(
- ICallProfDataRef.begin(),
- llvm::upper_bound(ICallProfDataRef, 0U,
- [](uint64_t Count, const InstrProfValueData &ProfData) {
- return ProfData.Count <= Count;
- }));
- updateFuncValueProfiles(CB, VDs, TotalFuncCount, NumCandidates);
+ for (size_t I = 0; I < PromotedFuncCount.size(); I++) {
+ uint32_t Index = PromotedFuncCount[I].first;
+ ICallProfDataRef[Index].Count -=
+ std::max(PromotedFuncCount[I].second, ICallProfDataRef[Index].Count);
+ }
+ updateFuncValueProfiles(CB, ICallProfDataRef, TotalFuncCount, NumCandidates);
updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
return true;
}
@@ -822,9 +874,22 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
uint64_t TotalCount;
auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
CB, TotalCount, NumCandidates);
- if (!NumCandidates ||
- (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
+ if (!NumCandidates)
continue;
+ if (PSI && PSI->hasProfileSummary()) {
+ // Don't promote cold candidates.
+ if (PSI->isColdCount(TotalCount)) {
+ LLVM_DEBUG(dbgs() << "Don't promote the cold candidate: TotalCount="
+ << TotalCount << "\n");
+ continue;
+ }
+ // Only pormote hot if ICPAllowHotOnly is true.
+ if (ICPAllowHotOnly && !PSI->isHotCount(TotalCount)) {
+ LLVM_DEBUG(dbgs() << "Don't promote the non-hot candidate: TotalCount="
+ << TotalCount << "\n");
+ continue;
+ }
+ }
auto PromotionCandidates = getPromotionCandidatesForCallSite(
*CB, ICallProfDataRef, TotalCount, NumCandidates);
diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
index f6780c0..ce1d9f1 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -456,7 +456,7 @@ static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI,
if (DisableMemOPOPT)
return false;
- if (F.hasFnAttribute(Attribute::OptimizeForSize))
+ if (F.hasOptSize())
return false;
MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT, TLI);
MemOPSizeOpt.perform();
diff --git a/llvm/lib/Transforms/ObjCARC/CMakeLists.txt b/llvm/lib/Transforms/ObjCARC/CMakeLists.txt
index 80867db..4274667 100644
--- a/llvm/lib/Transforms/ObjCARC/CMakeLists.txt
+++ b/llvm/lib/Transforms/ObjCARC/CMakeLists.txt
@@ -2,7 +2,6 @@ add_llvm_component_library(LLVMObjCARCOpts
ObjCARC.cpp
ObjCARCOpts.cpp
ObjCARCExpand.cpp
- ObjCARCAPElim.cpp
ObjCARCContract.cpp
DependencyAnalysis.cpp
ProvenanceAnalysis.cpp
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
deleted file mode 100644
index dceb2eb..0000000
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-//===- ObjCARCAPElim.cpp - ObjC ARC Optimization --------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines ObjC ARC optimizations. ARC stands for Automatic
-/// Reference Counting and is a system for managing reference counts for objects
-/// in Objective C.
-///
-/// This specific file implements optimizations which remove extraneous
-/// autorelease pools.
-///
-/// WARNING: This file knows about certain library functions. It recognizes them
-/// by name, and hardwires knowledge of their semantics.
-///
-/// WARNING: This file knows about how certain Objective-C library functions are
-/// used. Naive LLVM IR transformations which would otherwise be
-/// behavior-preserving may break these assumptions.
-///
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
-#include "llvm/Analysis/ObjCARCInstKind.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/ObjCARC.h"
-
-using namespace llvm;
-using namespace llvm::objcarc;
-
-#define DEBUG_TYPE "objc-arc-ap-elim"
-
-namespace {
-
-/// Interprocedurally determine if calls made by the given call site can
-/// possibly produce autoreleases.
-bool MayAutorelease(const CallBase &CB, unsigned Depth = 0) {
- if (const Function *Callee = CB.getCalledFunction()) {
- if (!Callee->hasExactDefinition())
- return true;
- for (const BasicBlock &BB : *Callee) {
- for (const Instruction &I : BB)
- if (const CallBase *JCB = dyn_cast<CallBase>(&I))
- // This recursion depth limit is arbitrary. It's just great
- // enough to cover known interesting testcases.
- if (Depth < 3 && !JCB->onlyReadsMemory() &&
- MayAutorelease(*JCB, Depth + 1))
- return true;
- }
- return false;
- }
-
- return true;
-}
-
-bool OptimizeBB(BasicBlock *BB) {
- bool Changed = false;
-
- Instruction *Push = nullptr;
- for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
- switch (GetBasicARCInstKind(&Inst)) {
- case ARCInstKind::AutoreleasepoolPush:
- Push = &Inst;
- break;
- case ARCInstKind::AutoreleasepoolPop:
- // If this pop matches a push and nothing in between can autorelease,
- // zap the pair.
- if (Push && cast<CallInst>(&Inst)->getArgOperand(0) == Push) {
- Changed = true;
- LLVM_DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop "
- "autorelease pair:\n"
- " Pop: "
- << Inst << "\n"
- << " Push: " << *Push
- << "\n");
- Inst.eraseFromParent();
- Push->eraseFromParent();
- }
- Push = nullptr;
- break;
- case ARCInstKind::CallOrUser:
- if (MayAutorelease(cast<CallBase>(Inst)))
- Push = nullptr;
- break;
- default:
- break;
- }
- }
-
- return Changed;
-}
-
-bool runImpl(Module &M) {
- if (!EnableARCOpts)
- return false;
-
- // If nothing in the Module uses ARC, don't do anything.
- if (!ModuleHasARC(M))
- return false;
- // Find the llvm.global_ctors variable, as the first step in
- // identifying the global constructors. In theory, unnecessary autorelease
- // pools could occur anywhere, but in practice it's pretty rare. Global
- // ctors are a place where autorelease pools get inserted automatically,
- // so it's pretty common for them to be unnecessary, and it's pretty
- // profitable to eliminate them.
- GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
- if (!GV)
- return false;
-
- assert(GV->hasDefinitiveInitializer() &&
- "llvm.global_ctors is uncooperative!");
-
- bool Changed = false;
-
- // Dig the constructor functions out of GV's initializer.
- ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
- for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end();
- OI != OE; ++OI) {
- Value *Op = *OI;
- // llvm.global_ctors is an array of three-field structs where the second
- // members are constructor functions.
- Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
- // If the user used a constructor function with the wrong signature and
- // it got bitcasted or whatever, look the other way.
- if (!F)
- continue;
- // Only look at function definitions.
- if (F->isDeclaration())
- continue;
- // Only look at functions with one basic block.
- if (std::next(F->begin()) != F->end())
- continue;
- // Ok, a single-block constructor function definition. Try to optimize it.
- Changed |= OptimizeBB(&F->front());
- }
-
- return Changed;
-}
-
-} // namespace
-
-PreservedAnalyses ObjCARCAPElimPass::run(Module &M, ModuleAnalysisManager &AM) {
- if (!runImpl(M))
- return PreservedAnalyses::all();
- PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
- return PA;
-}
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index d6bd92d..b5eb647 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -1176,6 +1176,28 @@ private:
return true;
}
+ /// This function fixes PHI nodes after fusion in \p SafeToSink.
+ /// \p SafeToSink instructions are the instructions that are to be moved past
+ /// the fused loop. Thus, the PHI nodes in \p SafeToSink should be updated to
+ /// receive values from the fused loop if they are currently taking values
+ /// from the first loop (i.e. FC0)'s latch.
+ void fixPHINodes(ArrayRef<Instruction *> SafeToSink,
+ const FusionCandidate &FC0,
+ const FusionCandidate &FC1) const {
+ for (Instruction *Inst : SafeToSink) {
+ // No update needed for non-PHI nodes.
+ PHINode *Phi = dyn_cast<PHINode>(Inst);
+ if (!Phi)
+ continue;
+ for (unsigned I = 0; I < Phi->getNumIncomingValues(); I++) {
+ if (Phi->getIncomingBlock(I) != FC0.Latch)
+ continue;
+ assert(FC1.Latch && "FC1 latch is not set");
+ Phi->setIncomingBlock(I, FC1.Latch);
+ }
+ }
+ }
+
/// Collect instructions in the \p FC1 Preheader that can be hoisted
/// to the \p FC0 Preheader or sunk into the \p FC1 Body
bool collectMovablePreheaderInsts(
@@ -1481,6 +1503,9 @@ private:
assert(I->getParent() == FC1.Preheader);
I->moveBefore(*FC1.ExitBlock, FC1.ExitBlock->getFirstInsertionPt());
}
+ // PHI nodes in SinkInsts need to be updated to receive values from the
+ // fused loop.
+ fixPHINodes(SinkInsts, FC0, FC1);
}
/// Determine if two fusion candidates have identical guards
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 70e9eee..08446cc 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -17,8 +17,8 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/LoopCacheAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -70,6 +70,13 @@ namespace {
using LoopVector = SmallVector<Loop *, 8>;
+/// A list of direction vectors. Each entry represents a direction vector
+/// corresponding to one or more dependencies existing in the loop nest. The
+/// length of all direction vectors is equal and is N + 1, where N is the depth
+/// of the loop nest. The first N elements correspond to the dependency
+/// direction of each N loops. The last one indicates whether this entry is
+/// forward dependency ('<') or not ('*'). The term "forward" aligns with what
+/// is defined in LoopAccessAnalysis.
// TODO: Check if we can use a sparse matrix here.
using CharMatrix = std::vector<std::vector<char>>;
@@ -126,11 +133,33 @@ static bool noDuplicateRulesAndIgnore(ArrayRef<RuleTy> Rules) {
static void printDepMatrix(CharMatrix &DepMatrix) {
for (auto &Row : DepMatrix) {
- for (auto D : Row)
+ // Drop the last element because it is a flag indicating whether this is
+ // forward dependency or not, which doesn't affect the legality check.
+ for (char D : drop_end(Row))
LLVM_DEBUG(dbgs() << D << " ");
LLVM_DEBUG(dbgs() << "\n");
}
}
+
+/// Return true if \p Src appears before \p Dst in the same basic block.
+/// Precondition: \p Src and \Dst are distinct instructions within the same
+/// basic block.
+static bool inThisOrder(const Instruction *Src, const Instruction *Dst) {
+ assert(Src->getParent() == Dst->getParent() && Src != Dst &&
+ "Expected Src and Dst to be different instructions in the same BB");
+
+ bool FoundSrc = false;
+ for (const Instruction &I : *(Src->getParent())) {
+ if (&I == Src) {
+ FoundSrc = true;
+ continue;
+ }
+ if (&I == Dst)
+ return FoundSrc;
+ }
+
+ llvm_unreachable("Dst not found");
+}
#endif
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
@@ -174,7 +203,10 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
return false;
}
ValueVector::iterator I, IE, J, JE;
- StringSet<> Seen;
+
+ // Manage direction vectors that are already seen. Map each direction vector
+ // to an index of DepMatrix at which it is stored.
+ StringMap<unsigned> Seen;
for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
for (J = I, JE = MemInstr.end(); J != JE; ++J) {
@@ -228,9 +260,49 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
Dep.push_back('I');
}
+ // Test whether the dependency is forward or not.
+ bool IsKnownForward = true;
+ if (Src->getParent() != Dst->getParent()) {
+ // In general, when Src and Dst are in different BBs, the execution
+ // order of them within a single iteration is not guaranteed. Treat
+ // conservatively as not-forward dependency in this case.
+ IsKnownForward = false;
+ } else {
+ // Src and Dst are in the same BB. If they are the different
+ // instructions, Src should appear before Dst in the BB as they are
+ // stored to MemInstr in that order.
+ assert((Src == Dst || inThisOrder(Src, Dst)) &&
+ "Unexpected instructions");
+
+ // If the Dependence object is reversed (due to normalization), it
+ // represents the dependency from Dst to Src, meaning it is a backward
+ // dependency. Otherwise it should be a forward dependency.
+ bool IsReversed = D->getSrc() != Src;
+ if (IsReversed)
+ IsKnownForward = false;
+ }
+
+ // Initialize the last element. Assume forward dependencies only; it
+ // will be updated later if there is any non-forward dependency.
+ Dep.push_back('<');
+
+ // The last element should express the "summary" among one or more
+ // direction vectors whose first N elements are the same (where N is
+ // the depth of the loop nest). Hence we exclude the last element from
+ // the Seen map.
+ auto [Ite, Inserted] = Seen.try_emplace(
+ StringRef(Dep.data(), Dep.size() - 1), DepMatrix.size());
+
// Make sure we only add unique entries to the dependency matrix.
- if (Seen.insert(StringRef(Dep.data(), Dep.size())).second)
+ if (Inserted)
DepMatrix.push_back(Dep);
+
+ // If we cannot prove that this dependency is forward, change the last
+ // element of the corresponding entry. Since a `[... *]` dependency
+ // includes a `[... <]` dependency, we do not need to keep both and
+ // change the existing entry instead.
+ if (!IsKnownForward)
+ DepMatrix[Ite->second].back() = '*';
}
}
}
@@ -281,11 +353,12 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
continue;
// Check if the direction vector is lexicographically positive (or zero)
- // for both before/after exchanged.
- if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size()) == false)
+ // for both before/after exchanged. Ignore the last element because it
+ // doesn't affect the legality.
+ if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size() - 1) == false)
return false;
std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
- if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size()) == false)
+ if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size() - 1) == false)
return false;
}
return true;
@@ -1334,22 +1407,35 @@ LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId) {
for (const auto &Dep : DepMatrix) {
char Dir = Dep[LoopId];
- if (Dir != 'I' && Dir != '=')
- return false;
+ char DepType = Dep.back();
+ assert((DepType == '<' || DepType == '*') &&
+ "Unexpected element in dependency vector");
+
+ // There are no loop-carried dependencies.
+ if (Dir == '=' || Dir == 'I')
+ continue;
+
+ // DepType being '<' means that this direction vector represents a forward
+ // dependency. In principle, a loop with '<' direction can be vectorized in
+ // this case.
+ if (Dir == '<' && DepType == '<')
+ continue;
+
+ // We cannot prove that the loop is vectorizable.
+ return false;
}
return true;
}
std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {
- // If the outer loop is not loop independent it is not profitable to move
- // this to inner position, since doing so would not enable inner loop
- // parallelism.
+ // If the outer loop cannot be vectorized, it is not profitable to move this
+ // to inner position.
if (!canVectorize(DepMatrix, OuterLoopId))
return false;
- // If inner loop has dependence and outer loop is loop independent then it is
- // profitable to interchange to enable inner loop parallelism.
+ // If the inner loop cannot be vectorized but the outer loop can be, then it
+ // is profitable to interchange to enable inner loop parallelism.
if (!canVectorize(DepMatrix, InnerLoopId))
return true;
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 9e318b0..e3ef9d8 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -3785,7 +3785,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
// Ignore icmp instructions which are already being analyzed.
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
unsigned OtherIdx = !U.getOperandNo();
- Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
+ Value *OtherOp = ICI->getOperand(OtherIdx);
if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
continue;
}
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 84d1c0b..9220abb 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1593,11 +1593,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
// since both llvm.lifetime.start and llvm.lifetime.end intrinsics
// practically fill all the bytes of the alloca with an undefined
// value, although conceptually marked as alive/dead.
- int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
- if (Size < 0 || Size == DestSize) {
- LifetimeMarkers.push_back(UI);
- continue;
- }
+ LifetimeMarkers.push_back(UI);
+ continue;
}
AAMetadataInstrs.insert(UI);
@@ -1614,9 +1611,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return true;
};
- // Check that dest has no Mod/Ref, from the alloca to the Store, except full
- // size lifetime intrinsics. And collect modref inst for the reachability
- // check.
+ // Check that dest has no Mod/Ref, from the alloca to the Store. And collect
+ // modref inst for the reachability check.
ModRefInfo DestModRef = ModRefInfo::NoModRef;
MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
SmallVector<BasicBlock *, 8> ReachabilityWorklist;
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index ced61cb..aae5d60 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -458,8 +458,10 @@ bool ScalarizerVisitor::visit(Function &F) {
Instruction *I = &*II;
bool Done = InstVisitor::visit(I);
++II;
- if (Done && I->getType()->isVoidTy())
+ if (Done && I->getType()->isVoidTy()) {
I->eraseFromParent();
+ Scalarized = true;
+ }
}
}
return finish();
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 9b40fc0..f6959ca2 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -2144,9 +2144,23 @@ void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName,
bool CurrentLoopValid, bool PartiallyInvariant,
bool InjectedCondition, ArrayRef<Loop *> NewLoops) {
- // If we did a non-trivial unswitch, we have added new (cloned) loops.
- if (!NewLoops.empty())
+ auto RecordLoopAsUnswitched = [&](Loop *TargetLoop, StringRef Tag,
+ StringRef DisableTag) {
+ auto &Ctx = TargetLoop->getHeader()->getContext();
+ MDNode *DisableMD = MDNode::get(Ctx, MDString::get(Ctx, DisableTag));
+ MDNode *NewLoopID = makePostTransformationMetadata(
+ Ctx, TargetLoop->getLoopID(), {Tag}, {DisableMD});
+ TargetLoop->setLoopID(NewLoopID);
+ };
+
+ // If we performed a non-trivial unswitch, we have added new cloned loops.
+ // Mark such newly-created loops as visited.
+ if (!NewLoops.empty()) {
+ for (Loop *NL : NewLoops)
+ RecordLoopAsUnswitched(NL, "llvm.loop.unswitch.nontrivial",
+ "llvm.loop.unswitch.nontrivial.disable");
U.addSiblingLoops(NewLoops);
+ }
// If the current loop remains valid, we should revisit it to catch any
// other unswitch opportunities. Otherwise, we need to mark it as deleted.
@@ -2154,24 +2168,12 @@ void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName,
if (PartiallyInvariant) {
// Mark the new loop as partially unswitched, to avoid unswitching on
// the same condition again.
- auto &Context = L.getHeader()->getContext();
- MDNode *DisableUnswitchMD = MDNode::get(
- Context,
- MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
- MDNode *NewLoopID = makePostTransformationMetadata(
- Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
- {DisableUnswitchMD});
- L.setLoopID(NewLoopID);
+ RecordLoopAsUnswitched(&L, "llvm.loop.unswitch.partial",
+ "llvm.loop.unswitch.partial.disable");
} else if (InjectedCondition) {
// Do the same for injection of invariant conditions.
- auto &Context = L.getHeader()->getContext();
- MDNode *DisableUnswitchMD = MDNode::get(
- Context,
- MDString::get(Context, "llvm.loop.unswitch.injection.disable"));
- MDNode *NewLoopID = makePostTransformationMetadata(
- Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},
- {DisableUnswitchMD});
- L.setLoopID(NewLoopID);
+ RecordLoopAsUnswitched(&L, "llvm.loop.unswitch.injection",
+ "llvm.loop.unswitch.injection.disable");
} else
U.revisitCurrentLoop();
} else
@@ -2809,9 +2811,9 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
}
/// Cost multiplier is a way to limit potentially exponential behavior
-/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
-/// candidates available. Also accounting for the number of "sibling" loops with
-/// the idea to account for previous unswitches that already happened on this
+/// of loop-unswitch. Cost is multiplied in proportion of 2^number of unswitch
+/// candidates available. Also consider the number of "sibling" loops with
+/// the idea of accounting for previous unswitches that already happened on this
/// cluster of loops. There was an attempt to keep this formula simple,
/// just enough to limit the worst case behavior. Even if it is not that simple
/// now it is still not an attempt to provide a detailed heuristic size
@@ -3507,8 +3509,9 @@ static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
SmallVector<NonTrivialUnswitchCandidate, 4> UnswitchCandidates;
IVConditionInfo PartialIVInfo;
Instruction *PartialIVCondBranch = nullptr;
- collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
- PartialIVCondBranch, L, LI, AA, MSSAU);
+ if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.nontrivial.disable"))
+ collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
+ PartialIVCondBranch, L, LI, AA, MSSAU);
if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable"))
collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
PartialIVCondBranch, L, DT, LI, AA,
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index a69d649..44e63a0 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -128,6 +129,7 @@ struct PredInfo {
using BBPredicates = DenseMap<BasicBlock *, PredInfo>;
using PredMap = DenseMap<BasicBlock *, BBPredicates>;
using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
+using Val2BBMap = DenseMap<Value *, BasicBlock *>;
// A traits type that is intended to be used in graph algorithms. The graph
// traits starts at an entry node, and traverses the RegionNodes that are in
@@ -279,7 +281,7 @@ class StructurizeCFG {
ConstantInt *BoolTrue;
ConstantInt *BoolFalse;
Value *BoolPoison;
-
+ const TargetTransformInfo *TTI;
Function *Func;
Region *ParentRegion;
@@ -301,8 +303,12 @@ class StructurizeCFG {
PredMap LoopPreds;
BranchVector LoopConds;
+ Val2BBMap HoistedValues;
+
RegionNode *PrevNode;
+ void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
+
void orderNodes();
void analyzeLoops(RegionNode *N);
@@ -332,6 +338,8 @@ class StructurizeCFG {
void simplifyAffectedPhis();
+ void simplifyHoistedPhis();
+
DebugLoc killTerminator(BasicBlock *BB);
void changeExit(RegionNode *Node, BasicBlock *NewExit,
@@ -359,7 +367,7 @@ class StructurizeCFG {
public:
void init(Region *R);
- bool run(Region *R, DominatorTree *DT);
+ bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI);
bool makeUniformRegion(Region *R, UniformityInfo &UA);
};
@@ -385,8 +393,11 @@ public:
if (SCFG.makeUniformRegion(R, UA))
return false;
}
+ Function *F = R->getEntry()->getParent();
+ const TargetTransformInfo *TTI =
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return SCFG.run(R, DT);
+ return SCFG.run(R, DT, TTI);
}
StringRef getPassName() const override { return "Structurize control flow"; }
@@ -394,7 +405,9 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
if (SkipUniformRegions)
AU.addRequired<UniformityInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
RegionPass::getAnalysisUsage(AU);
@@ -403,6 +416,34 @@ public:
} // end anonymous namespace
+/// Checks whether an instruction is zero cost instruction and checks if the
+/// operands are from different BB. If so, this instruction can be coalesced
+/// if its hoisted to predecessor block. So, this returns true.
+static bool isHoistableInstruction(Instruction *I, BasicBlock *BB,
+ const TargetTransformInfo *TTI) {
+ if (I->getParent() != BB || isa<PHINode>(I))
+ return false;
+
+ // If the instruction is not a zero cost instruction, return false.
+ auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
+ InstructionCost::CostType CostVal =
+ Cost.isValid()
+ ? Cost.getValue()
+ : (InstructionCost::CostType)TargetTransformInfo::TCC_Expensive;
+ if (CostVal != 0)
+ return false;
+
+ // Check if any operands are instructions defined in the same block.
+ for (auto &Op : I->operands()) {
+ if (auto *OpI = dyn_cast<Instruction>(Op)) {
+ if (OpI->getParent() == BB)
+ return false;
+ }
+ }
+
+ return true;
+}
+
char StructurizeCFGLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
@@ -413,6 +454,39 @@ INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
"Structurize the CFG", false, false)
+/// Structurization can introduce unnecessary VGPR copies due to register
+/// coalescing interference. For example, if the Else block has a zero-cost
+/// instruction and the Then block modifies the VGPR value, only one value is
+/// live at a time in merge block before structurization. After structurization,
+/// the coalescer may incorrectly treat the Then value as live in the Else block
+/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies.
+///
+/// This function examines phi nodes whose incoming values are zero-cost
+/// instructions in the Else block. It identifies such values that can be safely
+/// hoisted and moves them to the nearest common dominator of Then and Else
+/// blocks. A follow-up function after setting PhiNodes assigns the hoisted
+/// value to poison phi nodes along the if→flow edge, aiding register coalescing
+/// and minimizing unnecessary live ranges.
+void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
+ BasicBlock *ThenBB) {
+
+ BasicBlock *ElseSucc = ElseBB->getSingleSuccessor();
+ BasicBlock *CommonDominator = DT->findNearestCommonDominator(ElseBB, ThenBB);
+
+ if (!ElseSucc || !CommonDominator)
+ return;
+ Instruction *Term = CommonDominator->getTerminator();
+ for (PHINode &Phi : ElseSucc->phis()) {
+ Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
+ auto *Inst = dyn_cast<Instruction>(ElseVal);
+ if (!Inst || !isHoistableInstruction(Inst, ElseBB, TTI))
+ continue;
+ Inst->removeFromParent();
+ Inst->insertInto(CommonDominator, Term->getIterator());
+ HoistedValues[Inst] = CommonDominator;
+ }
+}
+
/// Build up the general order of nodes, by performing a topological sort of the
/// parent region's nodes, while ensuring that there is no outer cycle node
/// between any two inner cycle nodes.
@@ -535,7 +609,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
BasicBlock *Other = Term->getSuccessor(!i);
if (Visited.count(Other) && !Loops.count(Other) &&
!Pred.count(Other) && !Pred.count(P)) {
-
+ hoistZeroCostElseBlockPhiValues(Succ, Other);
Pred[Other] = {BoolFalse, std::nullopt};
Pred[P] = {BoolTrue, std::nullopt};
continue;
@@ -891,6 +965,44 @@ void StructurizeCFG::setPhiValues() {
AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
}
+/// Updates PHI nodes after hoisted zero cost instructions by replacing poison
+/// entries on Flow nodes with the appropriate hoisted values
+void StructurizeCFG::simplifyHoistedPhis() {
+ for (WeakVH VH : AffectedPhis) {
+ PHINode *Phi = dyn_cast_or_null<PHINode>(VH);
+ if (!Phi || Phi->getNumIncomingValues() != 2)
+ continue;
+
+ for (int i = 0; i < 2; i++) {
+ Value *V = Phi->getIncomingValue(i);
+ auto BBIt = HoistedValues.find(V);
+
+ if (BBIt == HoistedValues.end())
+ continue;
+
+ Value *OtherV = Phi->getIncomingValue(!i);
+ PHINode *OtherPhi = dyn_cast<PHINode>(OtherV);
+ if (!OtherPhi)
+ continue;
+
+ int PoisonValBBIdx = -1;
+ for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
+ if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i)))
+ continue;
+ PoisonValBBIdx = i;
+ break;
+ }
+ if (PoisonValBBIdx == -1 ||
+ !DT->dominates(BBIt->second,
+ OtherPhi->getIncomingBlock(PoisonValBBIdx)))
+ continue;
+
+ OtherPhi->setIncomingValue(PoisonValBBIdx, V);
+ Phi->setIncomingValue(i, OtherV);
+ }
+ }
+}
+
void StructurizeCFG::simplifyAffectedPhis() {
bool Changed;
do {
@@ -1283,12 +1395,13 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
}
/// Run the transformation for each region found
-bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
+bool StructurizeCFG::run(Region *R, DominatorTree *DT,
+ const TargetTransformInfo *TTI) {
if (R->isTopLevelRegion())
return false;
this->DT = DT;
-
+ this->TTI = TTI;
Func = R->getEntry()->getParent();
assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
@@ -1300,6 +1413,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
insertConditions(false);
insertConditions(true);
setPhiValues();
+ simplifyHoistedPhis();
simplifyConditions();
simplifyAffectedPhis();
rebuildSSA();
@@ -1349,7 +1463,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
bool Changed = false;
DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
auto &RI = AM.getResult<RegionInfoAnalysis>(F);
-
+ TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
UniformityInfo *UI = nullptr;
if (SkipUniformRegions)
UI = &AM.getResult<UniformityInfoAnalysis>(F);
@@ -1368,7 +1482,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
continue;
}
- Changed |= SCFG.run(R, DT);
+ Changed |= SCFG.run(R, DT, TTI);
}
if (!Changed)
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index 9fe655e..fca09c6 100644
--- a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -498,7 +498,7 @@ bool LibCallsShrinkWrap::perform(CallInst *CI) {
static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
DominatorTree *DT) {
- if (F.hasFnAttribute(Attribute::OptimizeForSize))
+ if (F.hasOptSize())
return false;
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
LibCallsShrinkWrap CCDCE(TLI, DTU);
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index ed08c0b..571fa11 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
@@ -42,6 +43,7 @@ cl::opt<unsigned> llvm::SCEVCheapExpansionBudget(
"controls the budget that is considered cheap (default = 4)"));
using namespace PatternMatch;
+using namespace SCEVPatternMatch;
PoisonFlags::PoisonFlags(const Instruction *I) {
NUW = false;
@@ -1224,6 +1226,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
}
Value *SCEVExpander::tryToReuseLCSSAPhi(const SCEVAddRecExpr *S) {
+ Type *STy = S->getType();
const Loop *L = S->getLoop();
BasicBlock *EB = L->getExitBlock();
if (!EB || !EB->getSinglePredecessor() ||
@@ -1231,11 +1234,36 @@ Value *SCEVExpander::tryToReuseLCSSAPhi(const SCEVAddRecExpr *S) {
return nullptr;
for (auto &PN : EB->phis()) {
- if (!SE.isSCEVable(PN.getType()) || PN.getType() != S->getType())
+ if (!SE.isSCEVable(PN.getType()))
continue;
- auto *ExitV = SE.getSCEV(&PN);
- if (S == ExitV)
- return &PN;
+ auto *ExitSCEV = SE.getSCEV(&PN);
+ if (!isa<SCEVAddRecExpr>(ExitSCEV))
+ continue;
+ Type *PhiTy = PN.getType();
+ if (STy->isIntegerTy() && PhiTy->isPointerTy())
+ ExitSCEV = SE.getPtrToIntExpr(ExitSCEV, STy);
+ else if (S->getType() != PN.getType())
+ continue;
+
+ // Check if we can re-use the existing PN, by adjusting it with an expanded
+ // offset, if the offset is simpler.
+ const SCEV *Diff = SE.getMinusSCEV(S, ExitSCEV);
+ const SCEV *Op = Diff;
+ match(Diff, m_scev_Mul(m_scev_AllOnes(), m_SCEV(Op)));
+ match(Op, m_scev_PtrToInt(m_SCEV(Op)));
+ if (!isa<SCEVConstant, SCEVUnknown>(Op))
+ continue;
+
+ assert(Diff->getType()->isIntegerTy() &&
+ "difference must be of integer type");
+ Value *DiffV = expand(Diff);
+ Value *BaseV = fixupLCSSAFormFor(&PN);
+ if (PhiTy->isPointerTy()) {
+ if (STy->isPointerTy())
+ return Builder.CreatePtrAdd(BaseV, DiffV);
+ BaseV = Builder.CreatePtrToInt(BaseV, DiffV->getType());
+ }
+ return Builder.CreateAdd(BaseV, DiffV);
}
return nullptr;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 99a96a8..7b7efb8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1363,11 +1363,15 @@ public:
TTI.hasActiveVectorLength() && !EnableVPlanNativePath;
if (EVLIsLegal)
return;
- // If for some reason EVL mode is unsupported, fallback to
- // DataWithoutLaneMask to try to vectorize the loop with folded tail
- // in a generic way.
- ChosenTailFoldingStyle = {TailFoldingStyle::DataWithoutLaneMask,
- TailFoldingStyle::DataWithoutLaneMask};
+ // If for some reason EVL mode is unsupported, fallback to a scalar epilogue
+ // if it's allowed, or DataWithoutLaneMask otherwise.
+ if (ScalarEpilogueStatus == CM_ScalarEpilogueAllowed ||
+ ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate)
+ ChosenTailFoldingStyle = {TailFoldingStyle::None, TailFoldingStyle::None};
+ else
+ ChosenTailFoldingStyle = {TailFoldingStyle::DataWithoutLaneMask,
+ TailFoldingStyle::DataWithoutLaneMask};
+
LLVM_DEBUG(
dbgs() << "LV: Preference for VP intrinsics indicated. Will "
"not try to generate VP Intrinsics "
@@ -2021,6 +2025,9 @@ public:
/// Retrieves the MemCheckCond and MemCheckBlock that were generated as IR
/// outside VPlan.
std::pair<Value *, BasicBlock *> getMemRuntimeChecks() {
+ using namespace llvm::PatternMatch;
+ if (MemRuntimeCheckCond && match(MemRuntimeCheckCond, m_ZeroInt()))
+ return {nullptr, nullptr};
return {MemRuntimeCheckCond, MemCheckBlock};
}
@@ -4497,19 +4504,17 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
Type *TCType = Legal->getWidestInductionType();
const SCEV *RemainingIterations = nullptr;
unsigned MaxTripCount = 0;
- if (MainLoopVF.isFixed()) {
- // TODO: extend to support scalable VFs.
- const SCEV *TC = vputils::getSCEVExprForVPValue(
- getPlanFor(MainLoopVF).getTripCount(), SE);
- assert(!isa<SCEVCouldNotCompute>(TC) &&
- "Trip count SCEV must be computable");
- RemainingIterations = SE.getURemExpr(
- TC, SE.getConstant(TCType, MainLoopVF.getFixedValue() * IC));
-
- // No iterations left to process in the epilogue.
- if (RemainingIterations->isZero())
- return Result;
+ const SCEV *TC =
+ vputils::getSCEVExprForVPValue(getPlanFor(MainLoopVF).getTripCount(), SE);
+ assert(!isa<SCEVCouldNotCompute>(TC) && "Trip count SCEV must be computable");
+ RemainingIterations =
+ SE.getURemExpr(TC, SE.getElementCount(TCType, MainLoopVF * IC));
+
+ // No iterations left to process in the epilogue.
+ if (RemainingIterations->isZero())
+ return Result;
+ if (MainLoopVF.isFixed()) {
MaxTripCount = MainLoopVF.getFixedValue() * IC - 1;
if (SE.isKnownPredicate(CmpInst::ICMP_ULT, RemainingIterations,
SE.getConstant(TCType, MaxTripCount))) {
@@ -7276,6 +7281,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPBasicBlock *VectorPH = cast<VPBasicBlock>(BestVPlan.getVectorPreheader());
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
+ VPlanTransforms::removeBranchOnConst(BestVPlan);
VPlanTransforms::narrowInterleaveGroups(
BestVPlan, BestVF,
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
@@ -10072,12 +10078,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Get user vectorization factor and interleave count.
ElementCount UserVF = Hints.getWidth();
unsigned UserIC = Hints.getInterleave();
- if (LVL.hasUncountableEarlyExit() && UserIC != 1) {
- UserIC = 1;
- reportVectorizationInfo("Interleaving not supported for loops "
- "with uncountable early exits",
- "InterleaveEarlyExitDisabled", ORE, L);
- }
// Plan how to best vectorize.
LVP.plan(UserVF, UserIC);
@@ -10095,9 +10095,20 @@ bool LoopVectorizePass::processLoop(Loop *L) {
unsigned SelectedIC = std::max(IC, UserIC);
// Optimistically generate runtime checks if they are needed. Drop them if
// they turn out to not be profitable.
- if (VF.Width.isVector() || SelectedIC > 1)
+ if (VF.Width.isVector() || SelectedIC > 1) {
Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
+ // Bail out early if either the SCEV or memory runtime checks are known to
+ // fail. In that case, the vector loop would never execute.
+ using namespace llvm::PatternMatch;
+ if (Checks.getSCEVChecks().first &&
+ match(Checks.getSCEVChecks().first, m_One()))
+ return false;
+ if (Checks.getMemRuntimeChecks().first &&
+ match(Checks.getMemRuntimeChecks().first, m_One()))
+ return false;
+ }
+
// Check if it is profitable to vectorize with runtime checks.
bool ForceVectorization =
Hints.getForce() == LoopVectorizeHints::FK_Enabled;
@@ -10228,6 +10239,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
L, PSE, LI, DT, TLI, TTI, AC, ORE, ElementCount::getFixed(1),
ElementCount::getFixed(1), IC, &CM, BFI, PSI, Checks, BestPlan);
+ // TODO: Move to general VPlan pipeline once epilogue loops are also
+ // supported.
+ VPlanTransforms::runPass(VPlanTransforms::materializeVectorTripCount,
+ BestPlan, VF.Width, IC, PSE);
+
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
ORE->emit([&]() {
@@ -10295,6 +10311,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
VF.MinProfitableTripCount, IC, &CM, BFI, PSI,
Checks, BestPlan);
+ // TODO: Move to general VPlan pipeline once epilogue loops are also
+ // supported.
+ VPlanTransforms::runPass(VPlanTransforms::materializeVectorTripCount,
+ BestPlan, VF.Width, IC, PSE);
+
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
++LoopsVectorized;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0adad5a..593868f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6861,7 +6861,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom,
return std::move(ResOrder);
}
if (TE.State == TreeEntry::StridedVectorize && !TopToBottom &&
- (!TE.UserTreeIndex ||
+ (!TE.UserTreeIndex || !TE.UserTreeIndex.UserTE->hasState() ||
!Instruction::isBinaryOp(TE.UserTreeIndex.UserTE->getOpcode())) &&
(TE.ReorderIndices.empty() || isReverseOrder(TE.ReorderIndices)))
return std::nullopt;
@@ -7175,7 +7175,8 @@ bool BoUpSLP::isProfitableToReorder() const {
// other nodes are phis or geps/binops, combined with phis, and/or single
// gather load node
bool HasPhis = false;
- if (VectorizableTree.front()->getOpcode() == Instruction::PHI &&
+ if (VectorizableTree.front()->hasState() &&
+ VectorizableTree.front()->getOpcode() == Instruction::PHI &&
VectorizableTree.front()->Scalars.size() == TinyVF &&
VectorizableTree.front()->getNumOperands() > PhiOpsLimit)
return false;
@@ -7999,7 +8000,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
}
Instruction *BoUpSLP::getRootEntryInstruction(const TreeEntry &Entry) const {
- if ((Entry.getOpcode() == Instruction::Store ||
+ if (Entry.hasState() &&
+ (Entry.getOpcode() == Instruction::Store ||
Entry.getOpcode() == Instruction::Load) &&
Entry.State == TreeEntry::StridedVectorize &&
!Entry.ReorderIndices.empty() && isReverseOrder(Entry.ReorderIndices))
@@ -10231,6 +10233,15 @@ public:
count_if(VL, [&](Value *V) { return S.isCopyableElement(V); });
if (CopyableNum < VL.size() / 2)
return S;
+ // Too many phi copyables - exit.
+ const unsigned Limit = VL.size() / 24;
+ if ((CopyableNum >= VL.size() - Limit ||
+ (CopyableNum >= VL.size() - 1 && VL.size() > 4) ||
+ CopyableNum >= MaxPHINumOperands) &&
+ all_of(VL, [&](Value *V) {
+ return isa<PHINode>(V) || !S.isCopyableElement(V);
+ }))
+ return InstructionsState::invalid();
// Check profitability if number of copyables > VL.size() / 2.
// 1. Reorder operands for better matching.
if (isCommutative(MainOp)) {
@@ -14483,7 +14494,8 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
// If the tree contains only phis, buildvectors, split nodes and
// small nodes with reuses, we can skip it.
- unsigned SingleStoreLoadNode = 0;
+ SmallVector<const TreeEntry *> StoreLoadNodes;
+ unsigned NumGathers = 0;
constexpr int LimitTreeSize = 36;
if (!ForReduction && !SLPCostThreshold.getNumOccurrences() &&
all_of(VectorizableTree,
@@ -14491,9 +14503,11 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
if (!TE->isGather() && TE->hasState() &&
(TE->getOpcode() == Instruction::Load ||
TE->getOpcode() == Instruction::Store)) {
- ++SingleStoreLoadNode;
+ StoreLoadNodes.push_back(TE.get());
return true;
}
+ if (TE->isGather())
+ ++NumGathers;
return TE->State == TreeEntry::SplitVectorize ||
(TE->Idx == 0 && TE->Scalars.size() == 2 &&
TE->hasState() && TE->getOpcode() == Instruction::ICmp &&
@@ -14510,8 +14524,15 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
!TE->ReorderIndices.empty() || TE->isAltShuffle()) &&
TE->Scalars.size() == 2)));
}) &&
- (!SingleStoreLoadNode ||
- VectorizableTree.size() > LimitTreeSize * SingleStoreLoadNode))
+ (StoreLoadNodes.empty() ||
+ (VectorizableTree.size() > LimitTreeSize * StoreLoadNodes.size() &&
+ (NumGathers > 0 || none_of(StoreLoadNodes, [&](const TreeEntry *TE) {
+ return TE->getOpcode() == Instruction::Store ||
+ all_of(TE->Scalars, [&](Value *V) {
+ return !isa<LoadInst>(V) ||
+ areAllUsersVectorized(cast<Instruction>(V));
+ });
+ })))))
return true;
// We can vectorize the tree if its size is greater than or equal to the
@@ -15254,6 +15275,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
bool IsProfitablePHIUser =
(KeepScalar || (ScalarCost - ExtraCost <= TTI::TCC_Basic &&
VectorizableTree.front()->Scalars.size() > 2)) &&
+ VectorizableTree.front()->hasState() &&
VectorizableTree.front()->getOpcode() == Instruction::PHI &&
!Inst->hasNUsesOrMore(UsesLimit) &&
none_of(Inst->users(),
@@ -15704,7 +15726,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
const BasicBlock *TEInsertBlock = nullptr;
// Main node of PHI entries keeps the correct order of operands/incoming
// blocks.
- if (auto *PHI = dyn_cast<PHINode>(TEUseEI.UserTE->getMainOp());
+ if (auto *PHI = dyn_cast_or_null<PHINode>(
+ TEUseEI.UserTE->hasState() ? TEUseEI.UserTE->getMainOp() : nullptr);
PHI && TEUseEI.UserTE->State != TreeEntry::SplitVectorize) {
TEInsertBlock = PHI->getIncomingBlock(TEUseEI.EdgeIdx);
TEInsertPt = TEInsertBlock->getTerminator();
@@ -15803,7 +15826,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
"Expected only single user of a gather node.");
const EdgeInfo &UseEI = TEPtr->UserTreeIndex;
- PHINode *UserPHI = UseEI.UserTE->State != TreeEntry::SplitVectorize
+ PHINode *UserPHI = (UseEI.UserTE->State != TreeEntry::SplitVectorize &&
+ UseEI.UserTE->hasState())
? dyn_cast<PHINode>(UseEI.UserTE->getMainOp())
: nullptr;
Instruction *InsertPt =
@@ -15816,7 +15840,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
TEUseEI.UserTE->isAltShuffle()) &&
all_of(TEUseEI.UserTE->Scalars, isUsedOutsideBlock)) {
if (UseEI.UserTE->State != TreeEntry::Vectorize ||
- (UseEI.UserTE->getOpcode() == Instruction::PHI &&
+ (UseEI.UserTE->hasState() &&
+ UseEI.UserTE->getOpcode() == Instruction::PHI &&
!UseEI.UserTE->isAltShuffle()) ||
!all_of(UseEI.UserTE->Scalars, isUsedOutsideBlock))
continue;
@@ -16438,24 +16463,31 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
// Get the basic block this bundle is in. All instructions in the bundle
// should be in this block (except for extractelement-like instructions with
// constant indices or gathered loads or copyables).
- auto *Front = E->getMainOp();
+ Instruction *Front;
+ unsigned Opcode;
+ if (E->hasState()) {
+ Front = E->getMainOp();
+ Opcode = E->getOpcode();
+ } else {
+ Front = cast<Instruction>(*find_if(E->Scalars, IsaPred<Instruction>));
+ Opcode = Front->getOpcode();
+ }
auto *BB = Front->getParent();
- assert(((GatheredLoadsEntriesFirst.has_value() &&
- E->getOpcode() == Instruction::Load && E->isGather() &&
- E->Idx < *GatheredLoadsEntriesFirst) ||
- E->State == TreeEntry::SplitVectorize || E->hasCopyableElements() ||
- all_of(E->Scalars,
- [=](Value *V) -> bool {
- if (E->getOpcode() == Instruction::GetElementPtr &&
- !isa<GetElementPtrInst>(V))
- return true;
- auto *I = dyn_cast<Instruction>(V);
- return !I || !E->getMatchingMainOpOrAltOp(I) ||
- I->getParent() == BB ||
- isVectorLikeInstWithConstOps(I);
- })) &&
- "Expected gathered loads or GEPs or instructions from same basic "
- "block.");
+ assert(
+ ((GatheredLoadsEntriesFirst.has_value() && Opcode == Instruction::Load &&
+ E->isGather() && E->Idx < *GatheredLoadsEntriesFirst) ||
+ E->State == TreeEntry::SplitVectorize || E->hasCopyableElements() ||
+ all_of(E->Scalars,
+ [=](Value *V) -> bool {
+ if (Opcode == Instruction::GetElementPtr &&
+ !isa<GetElementPtrInst>(V))
+ return true;
+ auto *I = dyn_cast<Instruction>(V);
+ return !I || !E->getMatchingMainOpOrAltOp(I) ||
+ I->getParent() == BB || isVectorLikeInstWithConstOps(I);
+ })) &&
+ "Expected gathered loads or GEPs or instructions from same basic "
+ "block.");
auto FindLastInst = [&]() {
Instruction *LastInst = Front;
@@ -16470,13 +16502,13 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
LastInst = I;
continue;
}
- assert(((E->getOpcode() == Instruction::GetElementPtr &&
+ assert(((Opcode == Instruction::GetElementPtr &&
!isa<GetElementPtrInst>(I)) ||
E->State == TreeEntry::SplitVectorize ||
(isVectorLikeInstWithConstOps(LastInst) &&
isVectorLikeInstWithConstOps(I)) ||
(GatheredLoadsEntriesFirst.has_value() &&
- E->getOpcode() == Instruction::Load && E->isGather() &&
+ Opcode == Instruction::Load && E->isGather() &&
E->Idx < *GatheredLoadsEntriesFirst)) &&
"Expected vector-like or non-GEP in GEP node insts only.");
if (!DT->isReachableFromEntry(LastInst->getParent())) {
@@ -16512,11 +16544,11 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
FirstInst = I;
continue;
}
- assert(((E->getOpcode() == Instruction::GetElementPtr &&
- !isa<GetElementPtrInst>(I)) ||
- (isVectorLikeInstWithConstOps(FirstInst) &&
- isVectorLikeInstWithConstOps(I))) &&
- "Expected vector-like or non-GEP in GEP node insts only.");
+ assert(((Opcode == Instruction::GetElementPtr &&
+ !isa<GetElementPtrInst>(I)) ||
+ (isVectorLikeInstWithConstOps(FirstInst) &&
+ isVectorLikeInstWithConstOps(I))) &&
+ "Expected vector-like or non-GEP in GEP node insts only.");
if (!DT->isReachableFromEntry(FirstInst->getParent())) {
FirstInst = I;
continue;
@@ -16554,7 +16586,7 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
// Set insertpoint for gathered loads to the very first load.
if (GatheredLoadsEntriesFirst.has_value() &&
E->Idx >= *GatheredLoadsEntriesFirst && !E->isGather() &&
- E->getOpcode() == Instruction::Load) {
+ Opcode == Instruction::Load) {
Res = FindFirstInst();
EntryToLastInstruction.try_emplace(E, Res);
return *Res;
@@ -16586,7 +16618,7 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
};
const ScheduleBundle *Bundle = FindScheduleBundle(E);
if (!E->isGather() && !Bundle) {
- if ((E->getOpcode() == Instruction::GetElementPtr &&
+ if ((Opcode == Instruction::GetElementPtr &&
any_of(E->Scalars,
[](Value *V) {
return !isa<GetElementPtrInst>(V) && isa<Instruction>(V);
@@ -21001,9 +21033,10 @@ void BoUpSLP::computeMinimumValueSizes() {
if (!isa<CastInst, BinaryOperator, FreezeInst, PHINode,
SelectInst>(U) ||
isa<SIToFPInst, UIToFPInst>(U) ||
- !isa<CastInst, BinaryOperator, FreezeInst, PHINode,
- SelectInst>(UserTE->getMainOp()) ||
- isa<SIToFPInst, UIToFPInst>(UserTE->getMainOp()))
+ (UserTE->hasState() &&
+ (!isa<CastInst, BinaryOperator, FreezeInst, PHINode,
+ SelectInst>(UserTE->getMainOp()) ||
+ isa<SIToFPInst, UIToFPInst>(UserTE->getMainOp()))))
return true;
unsigned UserTESz = DL->getTypeSizeInBits(
UserTE->Scalars.front()->getType());
@@ -21253,6 +21286,7 @@ void BoUpSLP::computeMinimumValueSizes() {
NodeIdx < VectorizableTree.size() &&
VectorizableTree[NodeIdx]->UserTreeIndex &&
VectorizableTree[NodeIdx]->UserTreeIndex.EdgeIdx == 0 &&
+ VectorizableTree[NodeIdx]->UserTreeIndex.UserTE->hasState() &&
VectorizableTree[NodeIdx]->UserTreeIndex.UserTE->getOpcode() ==
Instruction::Trunc &&
!VectorizableTree[NodeIdx]->UserTreeIndex.UserTE->isAltShuffle();
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 40a5565..25b9616 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -962,7 +962,11 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
BackedgeTakenCount->setUnderlyingValue(TCMO);
}
- VectorTripCount.setUnderlyingValue(VectorTripCountV);
+ if (!VectorTripCount.getUnderlyingValue())
+ VectorTripCount.setUnderlyingValue(VectorTripCountV);
+ else
+ assert(VectorTripCount.getUnderlyingValue() == VectorTripCountV &&
+ "VectorTripCount set earlier must much VectorTripCountV");
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
// FIXME: Model VF * UF computation completely in VPlan.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 99fd97e..a5de593 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -906,10 +906,10 @@ template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
protected:
/// Return the VPValue operand containing the unroll part or null if there is
/// no such operand.
- VPValue *getUnrollPartOperand(VPUser &U) const;
+ VPValue *getUnrollPartOperand(const VPUser &U) const;
/// Return the unroll part.
- unsigned getUnrollPart(VPUser &U) const;
+ unsigned getUnrollPart(const VPUser &U) const;
};
/// Helper to manage IR metadata for recipes. It filters out metadata that
@@ -1012,6 +1012,10 @@ public:
ReductionStartVector,
// Creates a step vector starting from 0 to VF with a step of 1.
StepVector,
+ /// Extracts a single lane (first operand) from a set of vector operands.
+ /// The lane specifies an index into a vector formed by combining all vector
+ /// operands (all operands after the first one).
+ ExtractLane,
};
@@ -1662,6 +1666,8 @@ struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags,
VPSlotTracker &SlotTracker) const override;
#endif
+ unsigned getOpcode() const { return Instruction::Select; }
+
VPValue *getCond() const {
return getOperand(0);
}
@@ -1835,6 +1841,10 @@ public:
getGEPNoWrapFlags(), getDebugLoc());
}
+ /// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that
+ /// this is only accurate after the VPlan has been unrolled.
+ bool isFirstPart() const { return getUnrollPart(*this) == 0; }
+
/// Return the cost of this VPHeaderPHIRecipe.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override {
@@ -2302,14 +2312,15 @@ public:
/// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
/// be omitted (implied by passing an odd number of operands) in which case
/// all other incoming values are merged into it.
- VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
- : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
+ VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands, DebugLoc DL)
+ : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
assert(Operands.size() > 0 && "Expected at least one operand!");
}
VPBlendRecipe *clone() override {
SmallVector<VPValue *> Ops(operands());
- return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
+ return new VPBlendRecipe(cast_or_null<PHINode>(getUnderlyingValue()), Ops,
+ getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPBlendSC)
@@ -3484,7 +3495,7 @@ public:
/// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
/// this is only accurate after the VPlan has been unrolled.
- bool isPart0() { return getUnrollPart(*this) == 0; }
+ bool isPart0() const { return getUnrollPart(*this) == 0; }
VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
@@ -4054,6 +4065,10 @@ public:
/// Returns VF * UF of the vector loop region.
VPValue &getVFxUF() { return VFxUF; }
+ LLVMContext &getContext() const {
+ return getScalarHeader()->getIRBasicBlock()->getContext();
+ }
+
void addVF(ElementCount VF) { VFs.insert(VF); }
void setVF(ElementCount VF) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 3499e65..16072f2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -110,6 +110,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::BuildStructVector:
case VPInstruction::BuildVector:
return SetResultTyFromOp();
+ case VPInstruction::ExtractLane:
+ return inferScalarType(R->getOperand(1));
case VPInstruction::FirstActiveLane:
return Type::getIntNTy(Ctx, 64);
case VPInstruction::ExtractLastElement:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 194874a..6c1f53b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -437,9 +437,12 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
// We are about to replace the branch to exit the region. Remove the original
// BranchOnCond, if there is any.
+ DebugLoc LatchDL = DL;
if (!LatchVPBB->empty() &&
- match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue())))
+ match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue()))) {
+ LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
LatchVPBB->getTerminator()->eraseFromParent();
+ }
VPBuilder Builder(LatchVPBB);
// Add a VPInstruction to increment the scalar canonical IV by VF * UF.
@@ -452,7 +455,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
// Add the BranchOnCount VPInstruction to the latch.
Builder.createNaryOp(VPInstruction::BranchOnCount,
- {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
+ {CanonicalIVIncrement, &Plan.getVectorTripCount()},
+ LatchDL);
}
void VPlanTransforms::prepareForVectorization(
@@ -462,28 +466,27 @@ void VPlanTransforms::prepareForVectorization(
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
- VPBlockBase *HeaderVPB = Plan.getEntry()->getSingleSuccessor();
- canonicalHeaderAndLatch(HeaderVPB, VPDT);
- VPBlockBase *LatchVPB = HeaderVPB->getPredecessors()[1];
+ auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
+ canonicalHeaderAndLatch(HeaderVPBB, VPDT);
+ auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]);
VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
- // The canonical LatchVPB has the header block as last successor. If it has
+ // The canonical LatchVPBB has the header block as last successor. If it has
// another successor, this successor is an exit block - insert middle block on
// its edge. Otherwise, add middle block as another successor retaining header
// as last.
- if (LatchVPB->getNumSuccessors() == 2) {
- VPBlockBase *LatchExitVPB = LatchVPB->getSuccessors()[0];
- VPBlockUtils::insertOnEdge(LatchVPB, LatchExitVPB, MiddleVPBB);
+ if (LatchVPBB->getNumSuccessors() == 2) {
+ VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0];
+ VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB);
} else {
- VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB);
- LatchVPB->swapSuccessors();
+ VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB);
+ LatchVPBB->swapSuccessors();
}
- addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
- cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
+ addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
[[maybe_unused]] bool HandledUncountableEarlyExit = false;
// Disconnect all early exits from the loop leaving it with a single exit from
@@ -499,8 +502,7 @@ void VPlanTransforms::prepareForVectorization(
assert(!HandledUncountableEarlyExit &&
"can handle exactly one uncountable early exit");
handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan,
- cast<VPBasicBlock>(HeaderVPB),
- cast<VPBasicBlock>(LatchVPB), Range);
+ HeaderVPBB, LatchVPBB, Range);
HandledUncountableEarlyExit = true;
} else {
for (VPRecipeBase &R : EB->phis())
@@ -564,15 +566,15 @@ void VPlanTransforms::prepareForVectorization(
// the corresponding compare because they may have ended up with different
// line numbers and we want to avoid awkward line stepping while debugging.
// E.g., if the compare has got a line number inside the loop.
- DebugLoc LatchDL = TheLoop->getLoopLatch()->getTerminator()->getDebugLoc();
+ DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
VPBuilder Builder(MiddleVPBB);
VPValue *Cmp;
if (!RequiresScalarEpilogueCheck)
- Cmp = Plan.getOrAddLiveIn(ConstantInt::getFalse(
- IntegerType::getInt1Ty(TripCount->getType()->getContext())));
+ Cmp = Plan.getOrAddLiveIn(
+ ConstantInt::getFalse(IntegerType::getInt1Ty(Plan.getContext())));
else if (TailFolded)
- Cmp = Plan.getOrAddLiveIn(ConstantInt::getTrue(
- IntegerType::getInt1Ty(TripCount->getType()->getContext())));
+ Cmp = Plan.getOrAddLiveIn(
+ ConstantInt::getTrue(IntegerType::getInt1Ty(Plan.getContext())));
else
Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
&Plan.getVectorTripCount(), LatchDL, "cmp.n");
@@ -646,7 +648,7 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
.createNaryOp(VPInstruction::BranchOnCond, {CondVPV},
Plan.getCanonicalIV()->getDebugLoc());
if (AddBranchWeights) {
- MDBuilder MDB(Plan.getScalarHeader()->getIRBasicBlock()->getContext());
+ MDBuilder MDB(Plan.getContext());
MDNode *BranchWeights =
MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false);
Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
index f0cab79..3b3bbc3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -184,8 +184,7 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
VPValue *Cond = SI->getOperand(0);
VPBasicBlock *DefaultDst = cast<VPBasicBlock>(Src->getSuccessors()[0]);
MapVector<VPBasicBlock *, SmallVector<VPValue *>> Dst2Compares;
- for (const auto &[Idx, Succ] :
- enumerate(ArrayRef(Src->getSuccessors()).drop_front())) {
+ for (const auto &[Idx, Succ] : enumerate(drop_begin(Src->getSuccessors()))) {
VPBasicBlock *Dst = cast<VPBasicBlock>(Succ);
assert(!getEdgeMask(Src, Dst) && "Edge masks already created");
// Cases whose destination is the same as default are redundant and can
@@ -206,7 +205,7 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
// cases with destination == Dst are taken. Join the conditions for each
// case whose destination == Dst using an OR.
VPValue *Mask = Conds[0];
- for (VPValue *V : ArrayRef<VPValue *>(Conds).drop_front())
+ for (VPValue *V : drop_begin(Conds))
Mask = Builder.createOr(Mask, V);
if (SrcMask)
Mask = Builder.createLogicalAnd(SrcMask, Mask);
@@ -252,8 +251,9 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
}
OperandsWithMask.push_back(EdgeMask);
}
- PHINode *IRPhi = cast<PHINode>(PhiR->getUnderlyingValue());
- auto *Blend = new VPBlendRecipe(IRPhi, OperandsWithMask);
+ PHINode *IRPhi = cast_or_null<PHINode>(PhiR->getUnderlyingValue());
+ auto *Blend =
+ new VPBlendRecipe(IRPhi, OperandsWithMask, PhiR->getDebugLoc());
Builder.insert(Blend);
PhiR->replaceAllUsesWith(Blend);
PhiR->eraseFromParent();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index b2066ce..225658b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -413,20 +413,21 @@ void VPSingleDefRecipe::dump() const { VPDef::dump(); }
template <unsigned PartOpIdx>
VPValue *
-VPUnrollPartAccessor<PartOpIdx>::getUnrollPartOperand(VPUser &U) const {
+VPUnrollPartAccessor<PartOpIdx>::getUnrollPartOperand(const VPUser &U) const {
if (U.getNumOperands() == PartOpIdx + 1)
return U.getOperand(PartOpIdx);
return nullptr;
}
template <unsigned PartOpIdx>
-unsigned VPUnrollPartAccessor<PartOpIdx>::getUnrollPart(VPUser &U) const {
+unsigned VPUnrollPartAccessor<PartOpIdx>::getUnrollPart(const VPUser &U) const {
if (auto *UnrollPartOp = getUnrollPartOperand(U))
return cast<ConstantInt>(UnrollPartOp->getLiveInIRValue())->getZExtValue();
return 0;
}
namespace llvm {
+template class VPUnrollPartAccessor<1>;
template class VPUnrollPartAccessor<2>;
template class VPUnrollPartAccessor<3>;
}
@@ -863,6 +864,31 @@ Value *VPInstruction::generate(VPTransformState &State) {
Res = Builder.CreateOr(Res, State.get(Op));
return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res);
}
+ case VPInstruction::ExtractLane: {
+ Value *LaneToExtract = State.get(getOperand(0), true);
+ Type *IdxTy = State.TypeAnalysis.inferScalarType(getOperand(0));
+ Value *Res = nullptr;
+ Value *RuntimeVF = getRuntimeVF(State.Builder, IdxTy, State.VF);
+
+ for (unsigned Idx = 1; Idx != getNumOperands(); ++Idx) {
+ Value *VectorStart =
+ Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1));
+ Value *VectorIdx = Idx == 1
+ ? LaneToExtract
+ : Builder.CreateSub(LaneToExtract, VectorStart);
+ Value *Ext = State.VF.isScalar()
+ ? State.get(getOperand(Idx))
+ : Builder.CreateExtractElement(
+ State.get(getOperand(Idx)), VectorIdx);
+ if (Res) {
+ Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart);
+ Res = Builder.CreateSelect(Cmp, Ext, Res);
+ } else {
+ Res = Ext;
+ }
+ }
+ return Res;
+ }
case VPInstruction::FirstActiveLane: {
if (getNumOperands() == 1) {
Value *Mask = State.get(getOperand(0));
@@ -921,7 +947,8 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
}
switch (getOpcode()) {
- case Instruction::ExtractElement: {
+ case Instruction::ExtractElement:
+ case VPInstruction::ExtractLane: {
// Add on the cost of extracting the element.
auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
return Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy,
@@ -983,6 +1010,7 @@ bool VPInstruction::isVectorToScalar() const {
return getOpcode() == VPInstruction::ExtractLastElement ||
getOpcode() == VPInstruction::ExtractPenultimateElement ||
getOpcode() == Instruction::ExtractElement ||
+ getOpcode() == VPInstruction::ExtractLane ||
getOpcode() == VPInstruction::FirstActiveLane ||
getOpcode() == VPInstruction::ComputeAnyOfResult ||
getOpcode() == VPInstruction::ComputeFindIVResult ||
@@ -1048,6 +1076,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::BuildVector:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
+ case VPInstruction::ExtractLane:
case VPInstruction::ExtractLastElement:
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::FirstActiveLane:
@@ -1097,6 +1126,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::ComputeAnyOfResult:
case VPInstruction::ComputeFindIVResult:
return Op == getOperand(1);
+ case VPInstruction::ExtractLane:
+ return Op == getOperand(0);
};
llvm_unreachable("switch should return");
}
@@ -1176,6 +1207,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::BuildVector:
O << "buildvector";
break;
+ case VPInstruction::ExtractLane:
+ O << "extract-lane";
+ break;
case VPInstruction::ExtractLastElement:
O << "extract-last-element";
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 5da43b6..8de05c1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -774,10 +774,10 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
using namespace VPlanPatternMatch;
VPValue *Incoming, *Mask;
- if (!match(Op, m_VPInstruction<Instruction::ExtractElement>(
- m_VPValue(Incoming),
+ if (!match(Op, m_VPInstruction<VPInstruction::ExtractLane>(
m_VPInstruction<VPInstruction::FirstActiveLane>(
- m_VPValue(Mask)))))
+ m_VPValue(Mask)),
+ m_VPValue(Incoming))))
return nullptr;
auto *WideIV = getOptimizableIVOf(Incoming);
@@ -997,7 +997,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
// InstSimplifyFolder.
if (TypeSwitch<VPRecipeBase *, bool>(&R)
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
- VPReplicateRecipe>([&](auto *I) {
+ VPReplicateRecipe, VPWidenSelectRecipe>([&](auto *I) {
const DataLayout &DL =
Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout();
Value *V = tryToFoldLiveIns(*I, I->getOpcode(), I->operands(), DL,
@@ -1094,6 +1094,10 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
if (match(Def, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
return Def->replaceAllUsesWith(A);
+ if (match(Def, m_c_Mul(m_VPValue(A), m_SpecificInt(0))))
+ return Def->replaceAllUsesWith(R.getOperand(0) == A ? R.getOperand(1)
+ : R.getOperand(0));
+
if (match(Def, m_Not(m_VPValue(A)))) {
if (match(A, m_Not(m_VPValue(A))))
return Def->replaceAllUsesWith(A);
@@ -1172,6 +1176,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
if (!Plan->isUnrolled())
return;
+ // VPVectorPointer for part 0 can be replaced by their start pointer.
+ if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) {
+ if (VecPtr->isFirstPart()) {
+ VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
+ return;
+ }
+ }
+
// VPScalarIVSteps for part 0 can be replaced by their start value, if only
// the first lane is demanded.
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) {
@@ -1307,8 +1319,9 @@ static void simplifyBlends(VPlan &Plan) {
OperandsWithMask.push_back(Blend->getMask(I));
}
- auto *NewBlend = new VPBlendRecipe(
- cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
+ auto *NewBlend =
+ new VPBlendRecipe(cast_or_null<PHINode>(Blend->getUnderlyingValue()),
+ OperandsWithMask, Blend->getDebugLoc());
NewBlend->insertBefore(&R);
VPValue *DeadMask = Blend->getMask(StartIndex);
@@ -1361,7 +1374,7 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
unsigned NewBitWidth =
ComputeBitWidth(TC->getValue(), BestVF.getKnownMinValue() * BestUF);
- LLVMContext &Ctx = Plan.getCanonicalIV()->getScalarType()->getContext();
+ LLVMContext &Ctx = Plan.getContext();
auto *NewIVTy = IntegerType::get(Ctx, NewBitWidth);
bool MadeChange = false;
@@ -1883,9 +1896,7 @@ void VPlanTransforms::truncateToMinimalBitwidths(
}
}
-/// Remove BranchOnCond recipes with true or false conditions together with
-/// removing dead edges to their successors.
-static void removeBranchOnConst(VPlan &Plan) {
+void VPlanTransforms::removeBranchOnConst(VPlan &Plan) {
using namespace llvm::VPlanPatternMatch;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry()))) {
@@ -1908,12 +1919,9 @@ static void removeBranchOnConst(VPlan &Plan) {
"There must be a single edge between VPBB and its successor");
// Values coming from VPBB into phi recipes of RemoveSucc are removed from
// these recipes.
- for (VPRecipeBase &R : RemovedSucc->phis()) {
- auto *Phi = cast<VPPhiAccessors>(&R);
- assert((!isa<VPIRPhi>(&R) || RemovedSucc->getNumPredecessors() == 1) &&
- "VPIRPhis must have a single predecessor");
- Phi->removeIncomingValueFor(VPBB);
- }
+ for (VPRecipeBase &R : RemovedSucc->phis())
+ cast<VPPhiAccessors>(&R)->removeIncomingValueFor(VPBB);
+
// Disconnect blocks and remove the terminator. RemovedSucc will be deleted
// automatically on VPlan destruction if it becomes unreachable.
VPBlockUtils::disconnectBlocks(VPBB, RemovedSucc);
@@ -2515,8 +2523,8 @@ void VPlanTransforms::createInterleaveGroups(
DL.getTypeAllocSize(getLoadStoreType(IRInsertPos)) *
IG->getIndex(IRInsertPos),
/*IsSigned=*/true);
- VPValue *OffsetVPV = Plan.getOrAddLiveIn(
- ConstantInt::get(IRInsertPos->getParent()->getContext(), -Offset));
+ VPValue *OffsetVPV =
+ Plan.getOrAddLiveIn(ConstantInt::get(Plan.getContext(), -Offset));
VPBuilder B(InsertPos);
Addr = InBounds ? B.createInBoundsPtrAdd(InsertPos->getAddr(), OffsetVPV)
: B.createPtrAdd(InsertPos->getAddr(), OffsetVPV);
@@ -2842,7 +2850,7 @@ void VPlanTransforms::handleUncountableEarlyExit(
VPInstruction::FirstActiveLane, {CondToEarlyExit}, nullptr,
"first.active.lane");
IncomingFromEarlyExit = EarlyExitB.createNaryOp(
- Instruction::ExtractElement, {IncomingFromEarlyExit, FirstActiveLane},
+ VPInstruction::ExtractLane, {FirstActiveLane, IncomingFromEarlyExit},
nullptr, "early.exit.value");
ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit);
}
@@ -3093,6 +3101,29 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
}
}
+void VPlanTransforms::materializeVectorTripCount(
+ VPlan &Plan, ElementCount BestVF, unsigned BestUF,
+ PredicatedScalarEvolution &PSE) {
+ assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
+ assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
+
+ VPValue *TC = Plan.getTripCount();
+ // Skip cases for which the trip count may be non-trivial to materialize.
+ if (!Plan.hasScalarTail() ||
+ Plan.getMiddleBlock()->getSingleSuccessor() ==
+ Plan.getScalarPreheader() ||
+ !TC->isLiveIn())
+ return;
+ // Materialize vector trip counts for constants early if it can simply
+ // be computed as (Original TC / VF * UF) * VF * UF.
+ ScalarEvolution &SE = *PSE.getSE();
+ auto *TCScev = SE.getSCEV(TC->getLiveInIRValue());
+ const SCEV *VFxUF = SE.getElementCount(TCScev->getType(), BestVF * BestUF);
+ auto VecTCScev = SE.getMulExpr(SE.getUDivExpr(TCScev, VFxUF), VFxUF);
+ if (auto *NewC = dyn_cast<SCEVConstant>(VecTCScev))
+ Plan.getVectorTripCount().setUnderlyingValue(NewC->getValue());
+}
+
/// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be
/// converted to a narrower recipe. \p V is used by a wide recipe that feeds a
/// store interleave group at index \p Idx, \p WideMember0 is the recipe feeding
@@ -3350,7 +3381,7 @@ void VPlanTransforms::addBranchWeightToMiddleTerminator(
if (VF.isScalable() && VScaleForTuning.has_value())
VectorStep *= *VScaleForTuning;
assert(VectorStep > 0 && "trip count should not be zero");
- MDBuilder MDB(Plan.getScalarHeader()->getIRBasicBlock()->getContext());
+ MDBuilder MDB(Plan.getContext());
MDNode *BranchWeights =
MDB.createBranchWeights({1, VectorStep - 1}, /*IsExpected=*/false);
MiddleTerm->addMetadata(LLVMContext::MD_prof, BranchWeights);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index ab189f6..d5af6cd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -224,6 +224,10 @@ struct VPlanTransforms {
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy);
+ /// Remove BranchOnCond recipes with true or false conditions together with
+ /// removing dead edges to their successors.
+ static void removeBranchOnConst(VPlan &Plan);
+
/// If there's a single exit block, optimize its phi recipes that use exiting
/// IV values by feeding them precomputed end values instead, possibly taken
/// one step backwards.
@@ -234,6 +238,12 @@ struct VPlanTransforms {
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
static void materializeBroadcasts(VPlan &Plan);
+ // Materialize vector trip counts for constants early if it can simply be
+ // computed as (Original TC / VF * UF) * VF * UF.
+ static void materializeVectorTripCount(VPlan &Plan, ElementCount BestVF,
+ unsigned BestUF,
+ PredicatedScalarEvolution &PSE);
+
/// Try to convert a plan with interleave groups with VF elements to a plan
/// with the interleave groups replaced by wide loads and stores processing VF
/// elements, if all transformed interleave groups access the full vector
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index b89cd21..871e37e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -363,6 +363,13 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
continue;
}
VPValue *Op0;
+ if (match(&R, m_VPInstruction<VPInstruction::ExtractLane>(
+ m_VPValue(Op0), m_VPValue(Op1)))) {
+ addUniformForAllParts(cast<VPInstruction>(&R));
+ for (unsigned Part = 1; Part != UF; ++Part)
+ R.addOperand(getValueForPart(Op1, Part));
+ continue;
+ }
if (match(&R, m_VPInstruction<VPInstruction::ExtractLastElement>(
m_VPValue(Op0))) ||
match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 82adc34..6252f4f 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3174,6 +3174,55 @@ bool VectorCombine::foldCastFromReductions(Instruction &I) {
return true;
}
+/// Returns true if this ShuffleVectorInst eventually feeds into a
+/// vector reduction intrinsic (e.g., vector_reduce_add) by only following
+/// chains of shuffles and binary operators (in any combination/order).
+/// The search does not go deeper than the given Depth.
+static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI) {
+ constexpr unsigned MaxVisited = 32;
+ SmallPtrSet<Instruction *, 8> Visited;
+ SmallVector<Instruction *, 4> WorkList;
+ bool FoundReduction = false;
+
+ WorkList.push_back(SVI);
+ while (!WorkList.empty()) {
+ Instruction *I = WorkList.pop_back_val();
+ for (User *U : I->users()) {
+ auto *UI = cast<Instruction>(U);
+ if (!UI || !Visited.insert(UI).second)
+ continue;
+ if (Visited.size() > MaxVisited)
+ return false;
+ if (auto *II = dyn_cast<IntrinsicInst>(UI)) {
+ // More than one reduction reached
+ if (FoundReduction)
+ return false;
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_umax:
+ FoundReduction = true;
+ continue;
+ default:
+ return false;
+ }
+ }
+
+ if (!isa<BinaryOperator>(UI) && !isa<ShuffleVectorInst>(UI))
+ return false;
+
+ WorkList.emplace_back(UI);
+ }
+ }
+ return FoundReduction;
+}
+
/// This method looks for groups of shuffles acting on binops, of the form:
/// %x = shuffle ...
/// %y = shuffle ...
@@ -3416,6 +3465,65 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, VT, VT, Mask, CostKind);
};
+ unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
+ unsigned MaxVectorSize =
+ TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector);
+ unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
+ // When there are multiple shufflevector operations on the same input,
+ // especially when the vector length is larger than the register size,
+ // identical shuffle patterns may occur across different groups of elements.
+ // To avoid overestimating the cost by counting these repeated shuffles more
+ // than once, we only account for unique shuffle patterns. This adjustment
+ // prevents inflated costs in the cost model for wide vectors split into
+ // several register-sized groups.
+ std::set<SmallVector<int, 4>> UniqueShuffles;
+ auto AddShuffleMaskAdjustedCost = [&](InstructionCost C, ArrayRef<int> Mask) {
+ // Compute the cost for performing the shuffle over the full vector.
+ auto ShuffleCost =
+ TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, VT, VT, Mask, CostKind);
+ unsigned NumFullVectors = Mask.size() / MaxElementsInVector;
+ if (NumFullVectors < 2)
+ return C + ShuffleCost;
+ SmallVector<int, 4> SubShuffle(MaxElementsInVector);
+ unsigned NumUniqueGroups = 0;
+ unsigned NumGroups = Mask.size() / MaxElementsInVector;
+ // For each group of MaxElementsInVector contiguous elements,
+ // collect their shuffle pattern and insert into the set of unique patterns.
+ for (unsigned I = 0; I < NumFullVectors; ++I) {
+ for (unsigned J = 0; J < MaxElementsInVector; ++J)
+ SubShuffle[J] = Mask[MaxElementsInVector * I + J];
+ if (UniqueShuffles.insert(SubShuffle).second)
+ NumUniqueGroups += 1;
+ }
+ return C + ShuffleCost * NumUniqueGroups / NumGroups;
+ };
+ auto AddShuffleAdjustedCost = [&](InstructionCost C, Instruction *I) {
+ auto *SV = dyn_cast<ShuffleVectorInst>(I);
+ if (!SV)
+ return C;
+ SmallVector<int, 16> Mask;
+ SV->getShuffleMask(Mask);
+ return AddShuffleMaskAdjustedCost(C, Mask);
+ };
+ // Check that input consists of ShuffleVectors applied to the same input
+ auto AllShufflesHaveSameOperands =
+ [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
+ if (InputShuffles.size() < 2)
+ return false;
+ ShuffleVectorInst *FirstSV =
+ dyn_cast<ShuffleVectorInst>(*InputShuffles.begin());
+ if (!FirstSV)
+ return false;
+
+ Value *In0 = FirstSV->getOperand(0), *In1 = FirstSV->getOperand(1);
+ return std::all_of(
+ std::next(InputShuffles.begin()), InputShuffles.end(),
+ [&](Instruction *I) {
+ ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
+ return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
+ });
+ };
+
// Get the costs of the shuffles + binops before and after with the new
// shuffle masks.
InstructionCost CostBefore =
@@ -3423,8 +3531,14 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
TTI.getArithmeticInstrCost(Op1->getOpcode(), VT, CostKind);
CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
InstructionCost(0), AddShuffleCost);
- CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
- InstructionCost(0), AddShuffleCost);
+ if (AllShufflesHaveSameOperands(InputShuffles)) {
+ UniqueShuffles.clear();
+ CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
+ InstructionCost(0), AddShuffleAdjustedCost);
+ } else {
+ CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
+ InstructionCost(0), AddShuffleCost);
+ }
// The new binops will be unused for lanes past the used shuffle lengths.
// These types attempt to get the correct cost for that from the target.
@@ -3435,8 +3549,9 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
InstructionCost CostAfter =
TTI.getArithmeticInstrCost(Op0->getOpcode(), Op0SmallVT, CostKind) +
TTI.getArithmeticInstrCost(Op1->getOpcode(), Op1SmallVT, CostKind);
+ UniqueShuffles.clear();
CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
- InstructionCost(0), AddShuffleMaskCost);
+ InstructionCost(0), AddShuffleMaskAdjustedCost);
std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
CostAfter +=
std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
@@ -3445,7 +3560,8 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
LLVM_DEBUG(dbgs() << "Found a binop select shuffle pattern: " << I << "\n");
LLVM_DEBUG(dbgs() << " CostBefore: " << CostBefore
<< " vs CostAfter: " << CostAfter << "\n");
- if (CostBefore <= CostAfter)
+ if (CostBefore < CostAfter ||
+ (CostBefore == CostAfter && !feedsIntoVectorReduction(SVI)))
return false;
// The cost model has passed, create the new instructions.