aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/AliasAnalysis.cpp2
-rwxr-xr-xllvm/lib/Analysis/ConstantFolding.cpp44
-rw-r--r--llvm/lib/Analysis/DependenceAnalysis.cpp22
-rw-r--r--llvm/lib/Analysis/HashRecognize.cpp5
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp248
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp6
-rw-r--r--llvm/lib/BinaryFormat/Dwarf.cpp7
-rw-r--r--llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp4
-rw-r--r--llvm/lib/CAS/ActionCaches.cpp166
-rw-r--r--llvm/lib/CAS/BuiltinCAS.cpp14
-rw-r--r--llvm/lib/CAS/BuiltinCAS.h25
-rw-r--r--llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp38
-rw-r--r--llvm/lib/CAS/CMakeLists.txt3
-rw-r--r--llvm/lib/CAS/InMemoryCAS.cpp8
-rw-r--r--llvm/lib/CAS/ObjectStore.cpp93
-rw-r--r--llvm/lib/CAS/OnDiskCAS.cpp211
-rw-r--r--llvm/lib/CAS/OnDiskGraphDB.cpp34
-rw-r--r--llvm/lib/CAS/OnDiskKeyValueDB.cpp21
-rw-r--r--llvm/lib/CAS/UnifiedOnDiskCache.cpp613
-rw-r--r--llvm/lib/CGData/OutlinedHashTreeRecord.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp5
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp2
-rw-r--r--llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp19
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp4
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineOperand.cpp8
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineStableHash.cpp3
-rw-r--r--llvm/lib/CodeGen/RegAllocFast.cpp2
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp28
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp20
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp25
-rw-r--r--llvm/lib/CodeGenTypes/LowLevelType.cpp6
-rw-r--r--llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h2
-rw-r--r--llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h2
-rw-r--r--llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp3
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDie.cpp28
-rw-r--r--llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp370
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp1161
-rw-r--r--llvm/lib/FileCheck/FileCheckImpl.h2
-rw-r--r--llvm/lib/Frontend/Driver/CodeGenOptions.cpp4
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp2
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp13
-rw-r--r--llvm/lib/IR/ConstantsContext.h2
-rw-r--r--llvm/lib/IR/ModuleSummaryIndex.cpp4
-rw-r--r--llvm/lib/LTO/LTO.cpp40
-rw-r--r--llvm/lib/MC/GOFFObjectWriter.cpp2
-rw-r--r--llvm/lib/MC/MCDXContainerWriter.cpp2
-rw-r--r--llvm/lib/MC/MCGOFFStreamer.cpp2
-rw-r--r--llvm/lib/MC/MCParser/AsmLexer.cpp1
-rw-r--r--llvm/lib/MC/MCParser/AsmParser.cpp10
-rw-r--r--llvm/lib/MC/MCParser/ELFAsmParser.cpp18
-rw-r--r--llvm/lib/MC/MCParser/MasmParser.cpp16
-rw-r--r--llvm/lib/ObjCopy/COFF/COFFWriter.h2
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObject.h10
-rw-r--r--llvm/lib/ObjCopy/MachO/MachOReader.h2
-rw-r--r--llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h2
-rw-r--r--llvm/lib/Object/ELF.cpp26
-rw-r--r--llvm/lib/Object/WindowsMachineFlag.cpp4
-rw-r--r--llvm/lib/ObjectYAML/ELFEmitter.cpp18
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp4
-rw-r--r--llvm/lib/ObjectYAML/GOFFYAML.cpp2
-rw-r--r--llvm/lib/Passes/StandardInstrumentations.cpp4
-rw-r--r--llvm/lib/Remarks/RemarkFormat.cpp2
-rw-r--r--llvm/lib/SandboxIR/Context.cpp2
-rw-r--r--llvm/lib/Support/AArch64BuildAttributes.cpp4
-rw-r--r--llvm/lib/Support/APFloat.cpp6
-rw-r--r--llvm/lib/Support/BalancedPartitioning.cpp2
-rw-r--r--llvm/lib/Support/BranchProbability.cpp9
-rw-r--r--llvm/lib/Support/CommandLine.cpp12
-rw-r--r--llvm/lib/Support/DAGDeltaAlgorithm.cpp16
-rw-r--r--llvm/lib/Support/DynamicLibrary.cpp2
-rw-r--r--llvm/lib/Support/StringRef.cpp5
-rw-r--r--llvm/lib/Support/Timer.cpp2
-rw-r--r--llvm/lib/Support/UnicodeNameToCodepoint.cpp8
-rw-r--r--llvm/lib/Support/Windows/Signals.inc7
-rw-r--r--llvm/lib/Support/raw_ostream.cpp11
-rw-r--r--llvm/lib/Support/raw_socket_stream.cpp2
-rw-r--r--llvm/lib/TableGen/TGLexer.cpp6
-rw-r--r--llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp150
-rw-r--r--llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h6
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp24
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp19
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp84
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp21
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp37
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp34
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h4
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td4
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp28
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp90
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp91
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td6
-rw-r--r--llvm/lib/Target/ARM/ARMInstrVFP.td175
-rw-r--r--llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp13
-rw-r--r--llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp6
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelLowering.cpp74
-rw-r--r--llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/DirectX/DirectXTargetMachine.cpp3
-rw-r--r--llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp3
-rw-r--r--llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepIICHVX.td132
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td391
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td116
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatterns.td13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatternsHVX.td3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp6
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp22
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td31
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td42
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp6
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp2
-rw-r--r--llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp129
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXIntrinsics.td118
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXSubtarget.h21
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrFuture.td8
-rw-r--r--llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.cpp5
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp82
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp10
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp1
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp84
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp19
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoD.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoF.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoP.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp1
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp31
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp30
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp8
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp9
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp3
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp61
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h3
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h2
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp80
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86Operand.h31
-rw-r--r--llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp5
-rw-r--r--llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h7
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp19
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h1
-rw-r--r--llvm/lib/Target/X86/X86.td6
-rw-r--r--llvm/lib/Target/X86/X86ExpandPseudo.cpp190
-rw-r--r--llvm/lib/Target/X86/X86FastPreTileConfig.cpp40
-rw-r--r--llvm/lib/Target/X86/X86FastTileConfig.cpp25
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp78
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp378
-rw-r--r--llvm/lib/Target/X86/X86InstrAMX.td208
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp13
-rw-r--r--llvm/lib/Target/X86/X86InstrOperands.td7
-rw-r--r--llvm/lib/Target/X86/X86InstrPredicates.td1
-rw-r--r--llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp19
-rw-r--r--llvm/lib/Target/X86/X86LowerAMXType.cpp203
-rw-r--r--llvm/lib/Target/X86/X86PreTileConfig.cpp26
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.cpp70
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.td9
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp2
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/X86/X86TileConfig.cpp83
-rw-r--r--llvm/lib/TargetParser/Host.cpp1
-rw-r--r--llvm/lib/TargetParser/PPCTargetParser.cpp8
-rw-r--r--llvm/lib/TargetParser/TargetDataLayout.cpp7
-rw-r--r--llvm/lib/TargetParser/X86TargetParser.cpp3
-rw-r--r--llvm/lib/TextAPI/BinaryReader/DylibReader.cpp2
-rw-r--r--llvm/lib/TextAPI/RecordVisitor.cpp2
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroCloner.h2
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp6
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp24
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp29
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp20
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfUse.cpp35
-rw-r--r--llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp143
-rw-r--r--llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/GVNSink.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp85
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp87
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp38
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp30
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp20
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp60
-rw-r--r--llvm/lib/Transforms/Scalar/StructurizeCFG.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/BasicBlockUtils.cpp73
-rw-r--r--llvm/lib/Transforms/Utils/ControlFlowUtils.cpp5
-rw-r--r--llvm/lib/Transforms/Utils/FixIrreducible.cpp126
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp59
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp118
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp51
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp37
-rw-r--r--llvm/lib/Transforms/Utils/UnifyLoopExits.cpp77
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h27
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp57
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp42
-rw-r--r--llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp4
-rw-r--r--llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h68
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanHelpers.h5
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h61
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp42
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanSLP.h3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp341
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp19
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp75
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.h3
257 files changed, 6514 insertions, 2888 deletions
diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp
index f2dc25f..26a5602 100644
--- a/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -75,7 +75,7 @@ AAResults::AAResults(const TargetLibraryInfo &TLI) : TLI(TLI) {}
AAResults::AAResults(AAResults &&Arg)
: TLI(Arg.TLI), AAs(std::move(Arg.AAs)), AADeps(std::move(Arg.AADeps)) {}
-AAResults::~AAResults() {}
+AAResults::~AAResults() = default;
bool AAResults::invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &Inv) {
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index e9e2e7d..da32542 100755
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -2163,18 +2163,42 @@ Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
}
Constant *constantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) {
- FixedVectorType *VT = dyn_cast<FixedVectorType>(Op->getType());
- if (!VT)
- return nullptr;
-
- // This isn't strictly necessary, but handle the special/common case of zero:
- // all integer reductions of a zero input produce zero.
- if (isa<ConstantAggregateZero>(Op))
- return ConstantInt::get(VT->getElementType(), 0);
+ auto *OpVT = cast<VectorType>(Op->getType());
// This is the same as the underlying binops - poison propagates.
- if (isa<PoisonValue>(Op) || Op->containsPoisonElement())
- return PoisonValue::get(VT->getElementType());
+ if (Op->containsPoisonElement())
+ return PoisonValue::get(OpVT->getElementType());
+
+ // Shortcut non-accumulating reductions.
+ if (Constant *SplatVal = Op->getSplatValue()) {
+ switch (IID) {
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_umax:
+ return SplatVal;
+ case Intrinsic::vector_reduce_add:
+ if (SplatVal->isNullValue())
+ return SplatVal;
+ break;
+ case Intrinsic::vector_reduce_mul:
+ if (SplatVal->isNullValue() || SplatVal->isOneValue())
+ return SplatVal;
+ break;
+ case Intrinsic::vector_reduce_xor:
+ if (SplatVal->isNullValue())
+ return SplatVal;
+ if (OpVT->getElementCount().isKnownMultipleOf(2))
+ return Constant::getNullValue(OpVT->getElementType());
+ break;
+ }
+ }
+
+ FixedVectorType *VT = dyn_cast<FixedVectorType>(OpVT);
+ if (!VT)
+ return nullptr;
// TODO: Handle undef.
auto *EltC = dyn_cast_or_null<ConstantInt>(Op->getAggregateElement(0U));
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 84ee8c0..11d8294 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -2854,14 +2854,18 @@ bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst,
banerjeeMIVtest(Src, Dst, Loops, Result);
}
-// Given a product, e.g., 10*X*Y, returns the first constant operand,
-// in this case 10. If there is no constant part, returns std::nullopt.
-static std::optional<APInt> getConstantPart(const SCEV *Expr) {
+/// Given a SCEVMulExpr, returns its first operand if its first operand is a
+/// constant and the product doesn't overflow in a signed sense. Otherwise,
+/// returns std::nullopt. For example, given (10 * X * Y)<nsw>, it returns 10.
+/// Notably, if it doesn't have nsw, the multiplication may overflow, and if
+/// so, it may not a multiple of 10.
+static std::optional<APInt> getConstanCoefficient(const SCEV *Expr) {
if (const auto *Constant = dyn_cast<SCEVConstant>(Expr))
return Constant->getAPInt();
if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr))
if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0)))
- return Constant->getAPInt();
+ if (Product->hasNoSignedWrap())
+ return Constant->getAPInt();
return std::nullopt;
}
@@ -2887,7 +2891,7 @@ bool DependenceInfo::accumulateCoefficientsGCD(const SCEV *Expr,
if (AddRec->getLoop() == CurLoop) {
CurLoopCoeff = Step;
} else {
- std::optional<APInt> ConstCoeff = getConstantPart(Step);
+ std::optional<APInt> ConstCoeff = getConstanCoefficient(Step);
// If the coefficient is the product of a constant and other stuff, we can
// use the constant in the GCD computation.
@@ -2940,7 +2944,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
// If the coefficient is the product of a constant and other stuff,
// we can use the constant in the GCD computation.
- std::optional<APInt> ConstCoeff = getConstantPart(Coeff);
+ std::optional<APInt> ConstCoeff = getConstanCoefficient(Coeff);
if (!ConstCoeff)
return false;
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs());
@@ -2958,7 +2962,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
// If the coefficient is the product of a constant and other stuff,
// we can use the constant in the GCD computation.
- std::optional<APInt> ConstCoeff = getConstantPart(Coeff);
+ std::optional<APInt> ConstCoeff = getConstanCoefficient(Coeff);
if (!ConstCoeff)
return false;
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs());
@@ -2979,7 +2983,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
} else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) {
// Search for constant operand to participate in GCD;
// If none found; return false.
- std::optional<APInt> ConstOp = getConstantPart(Product);
+ std::optional<APInt> ConstOp = getConstanCoefficient(Product);
if (!ConstOp)
return false;
ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, ConstOp->abs());
@@ -3032,7 +3036,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff);
// If the coefficient is the product of a constant and other stuff,
// we can use the constant in the GCD computation.
- std::optional<APInt> ConstCoeff = getConstantPart(Delta);
+ std::optional<APInt> ConstCoeff = getConstanCoefficient(Delta);
if (!ConstCoeff)
// The difference of the two coefficients might not be a product
// or constant, in which case we give up on this direction.
diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp
index 4529123..8974ce5 100644
--- a/llvm/lib/Analysis/HashRecognize.cpp
+++ b/llvm/lib/Analysis/HashRecognize.cpp
@@ -468,8 +468,11 @@ std::variant<PolynomialInfo, StringRef> HashRecognize::recognizeCRC() const {
// Ensure that the PHIs have exactly two uses:
// the bit-shift, and the XOR (or a cast feeding into the XOR).
+ // Also ensure that the SimpleRecurrence's evolution doesn't have stray
+ // users.
if (!ConditionalRecurrence.Phi->hasNUses(2) ||
- !SimpleRecurrence.Phi->hasNUses(2))
+ !SimpleRecurrence.Phi->hasNUses(2) ||
+ SimpleRecurrence.BO->getUniqueUndroppableUser() != SimpleRecurrence.Phi)
return "Recurrences have stray uses";
// Check that the SelectInst ConditionalRecurrence.Step is conditional on
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 7597f3a..a31f17b 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -2424,10 +2424,10 @@ ScalarEvolution::getStrengthenedNoWrapFlagsFromBinOp(
// We're trying to construct a SCEV of type `Type' with `Ops' as operands and
// `OldFlags' as can't-wrap behavior. Infer a more aggressive set of
// can't-overflow flags for the operation if possible.
-static SCEV::NoWrapFlags
-StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
- const ArrayRef<const SCEV *> Ops,
- SCEV::NoWrapFlags Flags) {
+static SCEV::NoWrapFlags StrengthenNoWrapFlags(ScalarEvolution *SE,
+ SCEVTypes Type,
+ ArrayRef<const SCEV *> Ops,
+ SCEV::NoWrapFlags Flags) {
using namespace std::placeholders;
using OBO = OverflowingBinaryOperator;
@@ -2540,7 +2540,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
unsigned Idx = isa<SCEVConstant>(Ops[0]) ? 1 : 0;
// Delay expensive flag strengthening until necessary.
- auto ComputeFlags = [this, OrigFlags](const ArrayRef<const SCEV *> Ops) {
+ auto ComputeFlags = [this, OrigFlags](ArrayRef<const SCEV *> Ops) {
return StrengthenNoWrapFlags(this, scAddExpr, Ops, OrigFlags);
};
@@ -3125,7 +3125,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
return Folded;
// Delay expensive flag strengthening until necessary.
- auto ComputeFlags = [this, OrigFlags](const ArrayRef<const SCEV *> Ops) {
+ auto ComputeFlags = [this, OrigFlags](ArrayRef<const SCEV *> Ops) {
return StrengthenNoWrapFlags(this, scMulExpr, Ops, OrigFlags);
};
@@ -15510,6 +15510,78 @@ static const SCEV *getNextSCEVDivisibleByDivisor(const SCEV *Expr,
return SE.getConstant(*ExprVal + DivisorVal - Rem);
}
+static bool collectDivisibilityInformation(
+ ICmpInst::Predicate Predicate, const SCEV *LHS, const SCEV *RHS,
+ DenseMap<const SCEV *, const SCEV *> &DivInfo,
+ DenseMap<const SCEV *, APInt> &Multiples, ScalarEvolution &SE) {
+ // If we have LHS == 0, check if LHS is computing a property of some unknown
+ // SCEV %v which we can rewrite %v to express explicitly.
+ if (Predicate != CmpInst::ICMP_EQ || !match(RHS, m_scev_Zero()))
+ return false;
+ // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to
+ // explicitly express that.
+ const SCEVUnknown *URemLHS = nullptr;
+ const SCEV *URemRHS = nullptr;
+ if (!match(LHS, m_scev_URem(m_SCEVUnknown(URemLHS), m_SCEV(URemRHS), SE)))
+ return false;
+
+ const SCEV *Multiple =
+ SE.getMulExpr(SE.getUDivExpr(URemLHS, URemRHS), URemRHS);
+ DivInfo[URemLHS] = Multiple;
+ if (auto *C = dyn_cast<SCEVConstant>(URemRHS))
+ Multiples[URemLHS] = C->getAPInt();
+ return true;
+}
+
+// Check if the condition is a divisibility guard (A % B == 0).
+static bool isDivisibilityGuard(const SCEV *LHS, const SCEV *RHS,
+ ScalarEvolution &SE) {
+ const SCEV *X, *Y;
+ return match(LHS, m_scev_URem(m_SCEV(X), m_SCEV(Y), SE)) && RHS->isZero();
+}
+
+// Apply divisibility by \p Divisor on MinMaxExpr with constant values,
+// recursively. This is done by aligning up/down the constant value to the
+// Divisor.
+static const SCEV *applyDivisibilityOnMinMaxExpr(const SCEV *MinMaxExpr,
+ APInt Divisor,
+ ScalarEvolution &SE) {
+ // Return true if \p Expr is a MinMax SCEV expression with a non-negative
+ // constant operand. If so, return in \p SCTy the SCEV type and in \p RHS
+ // the non-constant operand and in \p LHS the constant operand.
+ auto IsMinMaxSCEVWithNonNegativeConstant =
+ [&](const SCEV *Expr, SCEVTypes &SCTy, const SCEV *&LHS,
+ const SCEV *&RHS) {
+ if (auto *MinMax = dyn_cast<SCEVMinMaxExpr>(Expr)) {
+ if (MinMax->getNumOperands() != 2)
+ return false;
+ if (auto *C = dyn_cast<SCEVConstant>(MinMax->getOperand(0))) {
+ if (C->getAPInt().isNegative())
+ return false;
+ SCTy = MinMax->getSCEVType();
+ LHS = MinMax->getOperand(0);
+ RHS = MinMax->getOperand(1);
+ return true;
+ }
+ }
+ return false;
+ };
+
+ const SCEV *MinMaxLHS = nullptr, *MinMaxRHS = nullptr;
+ SCEVTypes SCTy;
+ if (!IsMinMaxSCEVWithNonNegativeConstant(MinMaxExpr, SCTy, MinMaxLHS,
+ MinMaxRHS))
+ return MinMaxExpr;
+ auto IsMin = isa<SCEVSMinExpr>(MinMaxExpr) || isa<SCEVUMinExpr>(MinMaxExpr);
+ assert(SE.isKnownNonNegative(MinMaxLHS) && "Expected non-negative operand!");
+ auto *DivisibleExpr =
+ IsMin ? getPreviousSCEVDivisibleByDivisor(MinMaxLHS, Divisor, SE)
+ : getNextSCEVDivisibleByDivisor(MinMaxLHS, Divisor, SE);
+ SmallVector<const SCEV *> Ops = {
+ applyDivisibilityOnMinMaxExpr(MinMaxRHS, Divisor, SE), DivisibleExpr};
+ return SE.getMinMaxExpr(SCTy, Ops);
+}
+
void ScalarEvolution::LoopGuards::collectFromBlock(
ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
const BasicBlock *Block, const BasicBlock *Pred,
@@ -15520,19 +15592,13 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
SmallVector<const SCEV *> ExprsToRewrite;
auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS,
const SCEV *RHS,
- DenseMap<const SCEV *, const SCEV *>
- &RewriteMap) {
+ DenseMap<const SCEV *, const SCEV *> &RewriteMap,
+ const LoopGuards &DivGuards) {
// WARNING: It is generally unsound to apply any wrap flags to the proposed
// replacement SCEV which isn't directly implied by the structure of that
// SCEV. In particular, using contextual facts to imply flags is *NOT*
// legal. See the scoping rules for flags in the header to understand why.
- // If LHS is a constant, apply information to the other expression.
- if (isa<SCEVConstant>(LHS)) {
- std::swap(LHS, RHS);
- Predicate = CmpInst::getSwappedPredicate(Predicate);
- }
-
// Check for a condition of the form (-C1 + X < C2). InstCombine will
// create this form when combining two checks of the form (X u< C2 + C1) and
// (X >=u C1).
@@ -15565,67 +15631,6 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
if (MatchRangeCheckIdiom())
return;
- // Return true if \p Expr is a MinMax SCEV expression with a non-negative
- // constant operand. If so, return in \p SCTy the SCEV type and in \p RHS
- // the non-constant operand and in \p LHS the constant operand.
- auto IsMinMaxSCEVWithNonNegativeConstant =
- [&](const SCEV *Expr, SCEVTypes &SCTy, const SCEV *&LHS,
- const SCEV *&RHS) {
- const APInt *C;
- SCTy = Expr->getSCEVType();
- return match(Expr, m_scev_MinMax(m_SCEV(LHS), m_SCEV(RHS))) &&
- match(LHS, m_scev_APInt(C)) && C->isNonNegative();
- };
-
- // Apply divisibilty by \p Divisor on MinMaxExpr with constant values,
- // recursively. This is done by aligning up/down the constant value to the
- // Divisor.
- std::function<const SCEV *(const SCEV *, const SCEV *)>
- ApplyDivisibiltyOnMinMaxExpr = [&](const SCEV *MinMaxExpr,
- const SCEV *Divisor) {
- auto *ConstDivisor = dyn_cast<SCEVConstant>(Divisor);
- if (!ConstDivisor)
- return MinMaxExpr;
- const APInt &DivisorVal = ConstDivisor->getAPInt();
-
- const SCEV *MinMaxLHS = nullptr, *MinMaxRHS = nullptr;
- SCEVTypes SCTy;
- if (!IsMinMaxSCEVWithNonNegativeConstant(MinMaxExpr, SCTy, MinMaxLHS,
- MinMaxRHS))
- return MinMaxExpr;
- auto IsMin =
- isa<SCEVSMinExpr>(MinMaxExpr) || isa<SCEVUMinExpr>(MinMaxExpr);
- assert(SE.isKnownNonNegative(MinMaxLHS) &&
- "Expected non-negative operand!");
- auto *DivisibleExpr =
- IsMin
- ? getPreviousSCEVDivisibleByDivisor(MinMaxLHS, DivisorVal, SE)
- : getNextSCEVDivisibleByDivisor(MinMaxLHS, DivisorVal, SE);
- SmallVector<const SCEV *> Ops = {
- ApplyDivisibiltyOnMinMaxExpr(MinMaxRHS, Divisor), DivisibleExpr};
- return SE.getMinMaxExpr(SCTy, Ops);
- };
-
- // If we have LHS == 0, check if LHS is computing a property of some unknown
- // SCEV %v which we can rewrite %v to express explicitly.
- if (Predicate == CmpInst::ICMP_EQ && match(RHS, m_scev_Zero())) {
- // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to
- // explicitly express that.
- const SCEVUnknown *URemLHS = nullptr;
- const SCEV *URemRHS = nullptr;
- if (match(LHS,
- m_scev_URem(m_SCEVUnknown(URemLHS), m_SCEV(URemRHS), SE))) {
- auto I = RewriteMap.find(URemLHS);
- const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : URemLHS;
- RewrittenLHS = ApplyDivisibiltyOnMinMaxExpr(RewrittenLHS, URemRHS);
- const auto *Multiple =
- SE.getMulExpr(SE.getUDivExpr(RewrittenLHS, URemRHS), URemRHS);
- RewriteMap[URemLHS] = Multiple;
- ExprsToRewrite.push_back(URemLHS);
- return;
- }
- }
-
// Do not apply information for constants or if RHS contains an AddRec.
if (isa<SCEVConstant>(LHS) || SE.containsAddRecurrence(RHS))
return;
@@ -15655,7 +15660,9 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
};
const SCEV *RewrittenLHS = GetMaybeRewritten(LHS);
- const APInt &DividesBy = SE.getConstantMultiple(RewrittenLHS);
+ // Apply divisibility information when computing the constant multiple.
+ const APInt &DividesBy =
+ SE.getConstantMultiple(DivGuards.rewrite(RewrittenLHS));
// Collect rewrites for LHS and its transitive operands based on the
// condition.
@@ -15670,31 +15677,31 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
// predicate.
const SCEV *One = SE.getOne(RHS->getType());
switch (Predicate) {
- case CmpInst::ICMP_ULT:
- if (RHS->getType()->isPointerTy())
- return;
- RHS = SE.getUMaxExpr(RHS, One);
- [[fallthrough]];
- case CmpInst::ICMP_SLT: {
- RHS = SE.getMinusSCEV(RHS, One);
- RHS = getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE);
- break;
- }
- case CmpInst::ICMP_UGT:
- case CmpInst::ICMP_SGT:
- RHS = SE.getAddExpr(RHS, One);
- RHS = getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE);
- break;
- case CmpInst::ICMP_ULE:
- case CmpInst::ICMP_SLE:
- RHS = getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE);
- break;
- case CmpInst::ICMP_UGE:
- case CmpInst::ICMP_SGE:
- RHS = getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE);
- break;
- default:
- break;
+ case CmpInst::ICMP_ULT:
+ if (RHS->getType()->isPointerTy())
+ return;
+ RHS = SE.getUMaxExpr(RHS, One);
+ [[fallthrough]];
+ case CmpInst::ICMP_SLT: {
+ RHS = SE.getMinusSCEV(RHS, One);
+ RHS = getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE);
+ break;
+ }
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_SGT:
+ RHS = SE.getAddExpr(RHS, One);
+ RHS = getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE);
+ break;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_SLE:
+ RHS = getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE);
+ break;
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_SGE:
+ RHS = getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE);
+ break;
+ default:
+ break;
}
SmallVector<const SCEV *, 16> Worklist(1, LHS);
@@ -15840,8 +15847,11 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
// Now apply the information from the collected conditions to
// Guards.RewriteMap. Conditions are processed in reverse order, so the
- // earliest conditions is processed first. This ensures the SCEVs with the
+ // earliest conditions is processed first, except guards with divisibility
+ // information, which are moved to the back. This ensures the SCEVs with the
// shortest dependency chains are constructed first.
+ SmallVector<std::tuple<CmpInst::Predicate, const SCEV *, const SCEV *>>
+ GuardsToProcess;
for (auto [Term, EnterIfTrue] : reverse(Terms)) {
SmallVector<Value *, 8> Worklist;
SmallPtrSet<Value *, 8> Visited;
@@ -15856,7 +15866,14 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
EnterIfTrue ? Cmp->getPredicate() : Cmp->getInversePredicate();
const auto *LHS = SE.getSCEV(Cmp->getOperand(0));
const auto *RHS = SE.getSCEV(Cmp->getOperand(1));
- CollectCondition(Predicate, LHS, RHS, Guards.RewriteMap);
+ // If LHS is a constant, apply information to the other expression.
+ // TODO: If LHS is not a constant, check if using CompareSCEVComplexity
+ // can improve results.
+ if (isa<SCEVConstant>(LHS)) {
+ std::swap(LHS, RHS);
+ Predicate = CmpInst::getSwappedPredicate(Predicate);
+ }
+ GuardsToProcess.emplace_back(Predicate, LHS, RHS);
continue;
}
@@ -15869,6 +15886,31 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
}
}
+ // Process divisibility guards in reverse order to populate DivGuards early.
+ DenseMap<const SCEV *, APInt> Multiples;
+ LoopGuards DivGuards(SE);
+ for (const auto &[Predicate, LHS, RHS] : GuardsToProcess) {
+ if (!isDivisibilityGuard(LHS, RHS, SE))
+ continue;
+ collectDivisibilityInformation(Predicate, LHS, RHS, DivGuards.RewriteMap,
+ Multiples, SE);
+ }
+
+ for (const auto &[Predicate, LHS, RHS] : GuardsToProcess)
+ CollectCondition(Predicate, LHS, RHS, Guards.RewriteMap, DivGuards);
+
+ // Apply divisibility information last. This ensures it is applied to the
+ // outermost expression after other rewrites for the given value.
+ for (const auto &[K, Divisor] : Multiples) {
+ const SCEV *DivisorSCEV = SE.getConstant(Divisor);
+ Guards.RewriteMap[K] =
+ SE.getMulExpr(SE.getUDivExpr(applyDivisibilityOnMinMaxExpr(
+ Guards.rewrite(K), Divisor, SE),
+ DivisorSCEV),
+ DivisorSCEV);
+ ExprsToRewrite.push_back(K);
+ }
+
// Let the rewriter preserve NUW/NSW flags if the unsigned/signed ranges of
// the replacement expressions are contained in the ranges of the replaced
// expressions.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index c47a1c1..0426ac7 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1353,9 +1353,9 @@ TargetTransformInfo::getInlineCallPenalty(const Function *F,
return TTIImpl->getInlineCallPenalty(F, Call, DefaultCallPenalty);
}
-bool TargetTransformInfo::areTypesABICompatible(
- const Function *Caller, const Function *Callee,
- const ArrayRef<Type *> &Types) const {
+bool TargetTransformInfo::areTypesABICompatible(const Function *Caller,
+ const Function *Callee,
+ ArrayRef<Type *> Types) const {
return TTIImpl->areTypesABICompatible(Caller, Callee, Types);
}
diff --git a/llvm/lib/BinaryFormat/Dwarf.cpp b/llvm/lib/BinaryFormat/Dwarf.cpp
index 55fa2df..a6c7e6a 100644
--- a/llvm/lib/BinaryFormat/Dwarf.cpp
+++ b/llvm/lib/BinaryFormat/Dwarf.cpp
@@ -1076,10 +1076,3 @@ StringRef (*const llvm::dwarf::EnumTraits<LineNumberOps>::StringFn)(unsigned) =
LNStandardString;
StringRef (*const llvm::dwarf::EnumTraits<Index>::StringFn)(unsigned) =
IndexString;
-
-constexpr char llvm::dwarf::EnumTraits<Attribute>::Type[];
-constexpr char llvm::dwarf::EnumTraits<Form>::Type[];
-constexpr char llvm::dwarf::EnumTraits<Index>::Type[];
-constexpr char llvm::dwarf::EnumTraits<Tag>::Type[];
-constexpr char llvm::dwarf::EnumTraits<LineNumberOps>::Type[];
-constexpr char llvm::dwarf::EnumTraits<LocationAtom>::Type[];
diff --git a/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp b/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp
index 3de3dcc..80b421d 100644
--- a/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp
+++ b/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp
@@ -209,12 +209,12 @@ template <> struct CustomMappingTraits<MapDocNode> {
static void inputOne(IO &IO, StringRef Key, MapDocNode &M) {
ScalarDocNode KeyObj = M.getDocument()->getNode();
KeyObj.fromString(Key, "");
- IO.mapRequired(Key.str().c_str(), M.getMap()[KeyObj]);
+ IO.mapRequired(Key, M.getMap()[KeyObj]);
}
static void output(IO &IO, MapDocNode &M) {
for (auto I : M.getMap()) {
- IO.mapRequired(I.first.toString().c_str(), I.second);
+ IO.mapRequired(I.first.toString(), I.second);
}
}
};
diff --git a/llvm/lib/CAS/ActionCaches.cpp b/llvm/lib/CAS/ActionCaches.cpp
index 571c5b3..003c850 100644
--- a/llvm/lib/CAS/ActionCaches.cpp
+++ b/llvm/lib/CAS/ActionCaches.cpp
@@ -13,7 +13,11 @@
#include "BuiltinCAS.h"
#include "llvm/ADT/TrieRawHashMap.h"
#include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/OnDiskKeyValueDB.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/Support/BLAKE3.h"
+#include "llvm/Support/Errc.h"
#define DEBUG_TYPE "cas-action-caches"
@@ -47,12 +51,54 @@ public:
Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,
bool CanBeDistributed) const final;
+ Error validate() const final {
+ return createStringError("InMemoryActionCache doesn't support validate()");
+ }
+
private:
using DataT = CacheEntry<sizeof(HashType)>;
using InMemoryCacheT = ThreadSafeTrieRawHashMap<DataT, sizeof(HashType)>;
InMemoryCacheT Cache;
};
+
+/// Builtin basic OnDiskActionCache that uses one underlying OnDiskKeyValueDB.
+class OnDiskActionCache final : public ActionCache {
+public:
+ Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result,
+ bool CanBeDistributed) final;
+ Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,
+ bool CanBeDistributed) const final;
+
+ static Expected<std::unique_ptr<OnDiskActionCache>> create(StringRef Path);
+
+ Error validate() const final;
+
+private:
+ static StringRef getHashName() { return "BLAKE3"; }
+
+ OnDiskActionCache(std::unique_ptr<ondisk::OnDiskKeyValueDB> DB);
+
+ std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
+ using DataT = CacheEntry<sizeof(HashType)>;
+};
+
+/// Builtin unified ActionCache that wraps around UnifiedOnDiskCache to provide
+/// access to its ActionCache.
+class UnifiedOnDiskActionCache final : public ActionCache {
+public:
+ Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result,
+ bool CanBeDistributed) final;
+ Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,
+ bool CanBeDistributed) const final;
+
+ UnifiedOnDiskActionCache(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB);
+
+ Error validate() const final;
+
+private:
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB;
+};
} // end namespace
static Error createResultCachePoisonedError(ArrayRef<uint8_t> KeyHash,
@@ -99,3 +145,123 @@ std::unique_ptr<ActionCache> createInMemoryActionCache() {
}
} // namespace llvm::cas
+
+OnDiskActionCache::OnDiskActionCache(
+ std::unique_ptr<ondisk::OnDiskKeyValueDB> DB)
+ : ActionCache(builtin::BuiltinCASContext::getDefaultContext()),
+ DB(std::move(DB)) {}
+
+Expected<std::unique_ptr<OnDiskActionCache>>
+OnDiskActionCache::create(StringRef AbsPath) {
+ std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
+ if (Error E = ondisk::OnDiskKeyValueDB::open(AbsPath, getHashName(),
+ sizeof(HashType), getHashName(),
+ sizeof(DataT))
+ .moveInto(DB))
+ return std::move(E);
+ return std::unique_ptr<OnDiskActionCache>(
+ new OnDiskActionCache(std::move(DB)));
+}
+
+Expected<std::optional<CASID>>
+OnDiskActionCache::getImpl(ArrayRef<uint8_t> Key,
+ bool /*CanBeDistributed*/) const {
+ std::optional<ArrayRef<char>> Val;
+ if (Error E = DB->get(Key).moveInto(Val))
+ return std::move(E);
+ if (!Val)
+ return std::nullopt;
+ return CASID::create(&getContext(), toStringRef(*Val));
+}
+
+Error OnDiskActionCache::putImpl(ArrayRef<uint8_t> Key, const CASID &Result,
+ bool /*CanBeDistributed*/) {
+ auto ResultHash = Result.getHash();
+ ArrayRef Expected((const char *)ResultHash.data(), ResultHash.size());
+ ArrayRef<char> Observed;
+ if (Error E = DB->put(Key, Expected).moveInto(Observed))
+ return E;
+
+ if (Expected == Observed)
+ return Error::success();
+
+ return createResultCachePoisonedError(
+ Key, getContext(), Result,
+ ArrayRef((const uint8_t *)Observed.data(), Observed.size()));
+}
+
+Error OnDiskActionCache::validate() const {
+ // FIXME: without the matching CAS there is nothing we can check about the
+ // cached values. The hash size is already validated by the DB validator.
+ return DB->validate(nullptr);
+}
+
+UnifiedOnDiskActionCache::UnifiedOnDiskActionCache(
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB)
+ : ActionCache(builtin::BuiltinCASContext::getDefaultContext()),
+ UniDB(std::move(UniDB)) {}
+
+Expected<std::optional<CASID>>
+UnifiedOnDiskActionCache::getImpl(ArrayRef<uint8_t> Key,
+ bool /*CanBeDistributed*/) const {
+ std::optional<ArrayRef<char>> Val;
+ if (Error E = UniDB->getKeyValueDB().get(Key).moveInto(Val))
+ return std::move(E);
+ if (!Val)
+ return std::nullopt;
+ auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Val);
+ return CASID::create(&getContext(),
+ toStringRef(UniDB->getGraphDB().getDigest(ID)));
+}
+
+Error UnifiedOnDiskActionCache::putImpl(ArrayRef<uint8_t> Key,
+ const CASID &Result,
+ bool /*CanBeDistributed*/) {
+ auto Expected = UniDB->getGraphDB().getReference(Result.getHash());
+ if (LLVM_UNLIKELY(!Expected))
+ return Expected.takeError();
+
+ auto Value = ondisk::UnifiedOnDiskCache::getValueFromObjectID(*Expected);
+ std::optional<ArrayRef<char>> Observed;
+ if (Error E = UniDB->getKeyValueDB().put(Key, Value).moveInto(Observed))
+ return E;
+
+ auto ObservedID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Observed);
+ if (*Expected == ObservedID)
+ return Error::success();
+
+ return createResultCachePoisonedError(
+ Key, getContext(), Result, UniDB->getGraphDB().getDigest(ObservedID));
+}
+
+Error UnifiedOnDiskActionCache::validate() const {
+ auto ValidateRef = [](FileOffset Offset, ArrayRef<char> Value) -> Error {
+ auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(Value);
+ auto formatError = [&](Twine Msg) {
+ return createStringError(
+ llvm::errc::illegal_byte_sequence,
+ "bad record at 0x" +
+ utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
+ Msg.str());
+ };
+ if (ID.getOpaqueData() == 0)
+ return formatError("zero is not a valid ref");
+ return Error::success();
+ };
+ return UniDB->getKeyValueDB().validate(ValidateRef);
+}
+
+Expected<std::unique_ptr<ActionCache>>
+cas::createOnDiskActionCache(StringRef Path) {
+#if LLVM_ENABLE_ONDISK_CAS
+ return OnDiskActionCache::create(Path);
+#else
+ return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
+#endif
+}
+
+std::unique_ptr<ActionCache>
+cas::builtin::createActionCacheFromUnifiedOnDiskCache(
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) {
+ return std::make_unique<UnifiedOnDiskActionCache>(std::move(UniDB));
+}
diff --git a/llvm/lib/CAS/BuiltinCAS.cpp b/llvm/lib/CAS/BuiltinCAS.cpp
index 73646ad..e9bc6d8 100644
--- a/llvm/lib/CAS/BuiltinCAS.cpp
+++ b/llvm/lib/CAS/BuiltinCAS.cpp
@@ -9,6 +9,7 @@
#include "BuiltinCAS.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CAS/BuiltinObjectHasher.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
#include "llvm/Support/Process.h"
using namespace llvm;
@@ -68,7 +69,7 @@ Expected<ObjectRef> BuiltinCAS::store(ArrayRef<ObjectRef> Refs,
Refs, Data);
}
-Error BuiltinCAS::validate(const CASID &ID) {
+Error BuiltinCAS::validateObject(const CASID &ID) {
auto Ref = getReference(ID);
if (!Ref)
return createUnknownObjectError(ID);
@@ -92,3 +93,14 @@ Error BuiltinCAS::validate(const CASID &ID) {
return Error::success();
}
+
+Expected<std::unique_ptr<ondisk::UnifiedOnDiskCache>>
+cas::builtin::createBuiltinUnifiedOnDiskCache(StringRef Path) {
+#if LLVM_ENABLE_ONDISK_CAS
+ return ondisk::UnifiedOnDiskCache::open(Path, /*SizeLimit=*/std::nullopt,
+ BuiltinCASContext::getHashName(),
+ sizeof(HashType));
+#else
+ return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
+#endif
+}
diff --git a/llvm/lib/CAS/BuiltinCAS.h b/llvm/lib/CAS/BuiltinCAS.h
index 3b5374d..4d2de66 100644
--- a/llvm/lib/CAS/BuiltinCAS.h
+++ b/llvm/lib/CAS/BuiltinCAS.h
@@ -1,4 +1,4 @@
-//===- BuiltinCAS.h ---------------------------------------------*- C++ -*-===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -15,6 +15,9 @@
namespace llvm::cas {
class ActionCache;
+namespace ondisk {
+class UnifiedOnDiskCache;
+} // namespace ondisk
namespace builtin {
/// Common base class for builtin CAS implementations using the same CASContext.
@@ -65,9 +68,27 @@ public:
"corrupt storage");
}
- Error validate(const CASID &ID) final;
+ Error validateObject(const CASID &ID) final;
};
+/// Create a \p UnifiedOnDiskCache instance that uses \p BLAKE3 hashing.
+Expected<std::unique_ptr<ondisk::UnifiedOnDiskCache>>
+createBuiltinUnifiedOnDiskCache(StringRef Path);
+
+/// \param UniDB A \p UnifiedOnDiskCache instance from \p
+/// createBuiltinUnifiedOnDiskCache.
+std::unique_ptr<ObjectStore> createObjectStoreFromUnifiedOnDiskCache(
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB);
+
+/// \param UniDB A \p UnifiedOnDiskCache instance from \p
+/// createBuiltinUnifiedOnDiskCache.
+std::unique_ptr<ActionCache> createActionCacheFromUnifiedOnDiskCache(
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB);
+
+// FIXME: Proxy not portable. Maybe also error-prone?
+constexpr StringLiteral DefaultDirProxy = "/^llvm::cas::builtin::default";
+constexpr StringLiteral DefaultDir = "llvm.cas.builtin.default";
+
} // end namespace builtin
} // end namespace llvm::cas
diff --git a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp
new file mode 100644
index 0000000..f3f6fa0
--- /dev/null
+++ b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
+#include "BuiltinCAS.h"
+#include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+
+Expected<std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>>>
+cas::createOnDiskUnifiedCASDatabases(StringRef Path) {
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB;
+ if (Error E = builtin::createBuiltinUnifiedOnDiskCache(Path).moveInto(UniDB))
+ return std::move(E);
+ auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB);
+ auto AC = builtin::createActionCacheFromUnifiedOnDiskCache(std::move(UniDB));
+ return std::make_pair(std::move(CAS), std::move(AC));
+}
+
+Expected<ValidationResult> cas::validateOnDiskUnifiedCASDatabasesIfNeeded(
+ StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
+ std::optional<StringRef> LLVMCasBinary) {
+#if LLVM_ENABLE_ONDISK_CAS
+ return ondisk::UnifiedOnDiskCache::validateIfNeeded(
+ Path, builtin::BuiltinCASContext::getHashName(),
+ sizeof(builtin::HashType), CheckHash, AllowRecovery, ForceValidation,
+ LLVMCasBinary);
+#else
+ return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
+#endif
+}
diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt
index a2f8c49..aad77dc 100644
--- a/llvm/lib/CAS/CMakeLists.txt
+++ b/llvm/lib/CAS/CMakeLists.txt
@@ -2,15 +2,18 @@ add_llvm_component_library(LLVMCAS
ActionCache.cpp
ActionCaches.cpp
BuiltinCAS.cpp
+ BuiltinUnifiedCASDatabases.cpp
DatabaseFile.cpp
InMemoryCAS.cpp
MappedFileRegionArena.cpp
ObjectStore.cpp
+ OnDiskCAS.cpp
OnDiskCommon.cpp
OnDiskDataAllocator.cpp
OnDiskGraphDB.cpp
OnDiskKeyValueDB.cpp
OnDiskTrieRawHashMap.cpp
+ UnifiedOnDiskCache.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS
diff --git a/llvm/lib/CAS/InMemoryCAS.cpp b/llvm/lib/CAS/InMemoryCAS.cpp
index c63ee70d..2d4eedd 100644
--- a/llvm/lib/CAS/InMemoryCAS.cpp
+++ b/llvm/lib/CAS/InMemoryCAS.cpp
@@ -233,6 +233,12 @@ public:
return cast<InMemoryObject>(asInMemoryObject(Node)).getData();
}
+ void print(raw_ostream &OS) const final;
+
+ Error validate(bool CheckHash) const final {
+ return createStringError("InMemoryCAS doesn't support validate()");
+ }
+
InMemoryCAS() = default;
private:
@@ -271,6 +277,8 @@ ArrayRef<const InMemoryObject *> InMemoryObject::getRefs() const {
return cast<InMemoryInlineObject>(this)->getRefsImpl();
}
+void InMemoryCAS::print(raw_ostream &OS) const {}
+
Expected<ObjectRef>
InMemoryCAS::storeFromNullTerminatedRegion(ArrayRef<uint8_t> ComputedHash,
sys::fs::mapped_file_region Map) {
diff --git a/llvm/lib/CAS/ObjectStore.cpp b/llvm/lib/CAS/ObjectStore.cpp
index e0be50b..3110577 100644
--- a/llvm/lib/CAS/ObjectStore.cpp
+++ b/llvm/lib/CAS/ObjectStore.cpp
@@ -1,4 +1,4 @@
-//===- ObjectStore.cpp ------------------------------------------*- C++ -*-===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -12,7 +12,7 @@
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
-#include <optional>
+#include <deque>
using namespace llvm;
using namespace llvm::cas;
@@ -21,6 +21,7 @@ void CASContext::anchor() {}
void ObjectStore::anchor() {}
LLVM_DUMP_METHOD void CASID::dump() const { print(dbgs()); }
+LLVM_DUMP_METHOD void ObjectStore::dump() const { print(dbgs()); }
LLVM_DUMP_METHOD void ObjectRef::dump() const { print(dbgs()); }
LLVM_DUMP_METHOD void ObjectHandle::dump() const { print(dbgs()); }
@@ -141,7 +142,7 @@ Error ObjectStore::validateTree(ObjectRef Root) {
auto [I, Inserted] = ValidatedRefs.insert(Ref);
if (!Inserted)
continue; // already validated.
- if (Error E = validate(getID(Ref)))
+ if (Error E = validateObject(getID(Ref)))
return E;
Expected<ObjectHandle> Obj = load(Ref);
if (!Obj)
@@ -155,6 +156,92 @@ Error ObjectStore::validateTree(ObjectRef Root) {
return Error::success();
}
+Expected<ObjectRef> ObjectStore::importObject(ObjectStore &Upstream,
+ ObjectRef Other) {
+ // Copy the full CAS tree from upstream with depth-first ordering to ensure
+ // all the child nodes are available in downstream CAS before inserting
+ // current object. This uses a similar algorithm as
+ // `OnDiskGraphDB::importFullTree` but doesn't assume the upstream CAS schema
+ // so it can be used to import from any other ObjectStore reguardless of the
+ // CAS schema.
+
+ // There is no work to do if importing from self.
+ if (this == &Upstream)
+ return Other;
+
+ /// Keeps track of the state of visitation for current node and all of its
+ /// parents. Upstream Cursor holds information only from upstream CAS.
+ struct UpstreamCursor {
+ ObjectRef Ref;
+ ObjectHandle Node;
+ size_t RefsCount;
+ std::deque<ObjectRef> Refs;
+ };
+ SmallVector<UpstreamCursor, 16> CursorStack;
+ /// PrimaryNodeStack holds the ObjectRef of the current CAS, with nodes either
+ /// just stored in the CAS or nodes already exists in the current CAS.
+ SmallVector<ObjectRef, 128> PrimaryRefStack;
+ /// A map from upstream ObjectRef to current ObjectRef.
+ llvm::DenseMap<ObjectRef, ObjectRef> CreatedObjects;
+
+ auto enqueueNode = [&](ObjectRef Ref, ObjectHandle Node) {
+ unsigned NumRefs = Upstream.getNumRefs(Node);
+ std::deque<ObjectRef> Refs;
+ for (unsigned I = 0; I < NumRefs; ++I)
+ Refs.push_back(Upstream.readRef(Node, I));
+
+ CursorStack.push_back({Ref, Node, NumRefs, std::move(Refs)});
+ };
+
+ auto UpstreamHandle = Upstream.load(Other);
+ if (!UpstreamHandle)
+ return UpstreamHandle.takeError();
+ enqueueNode(Other, *UpstreamHandle);
+
+ while (!CursorStack.empty()) {
+ UpstreamCursor &Cur = CursorStack.back();
+ if (Cur.Refs.empty()) {
+ // Copy the node data into the primary store.
+ // The bottom of \p PrimaryRefStack contains the ObjectRef for the
+ // current node.
+ assert(PrimaryRefStack.size() >= Cur.RefsCount);
+ auto Refs = ArrayRef(PrimaryRefStack)
+ .slice(PrimaryRefStack.size() - Cur.RefsCount);
+ auto NewNode = store(Refs, Upstream.getData(Cur.Node));
+ if (!NewNode)
+ return NewNode.takeError();
+
+ // Remove the current node and its IDs from the stack.
+ PrimaryRefStack.truncate(PrimaryRefStack.size() - Cur.RefsCount);
+ CursorStack.pop_back();
+
+ PrimaryRefStack.push_back(*NewNode);
+ CreatedObjects.try_emplace(Cur.Ref, *NewNode);
+ continue;
+ }
+
+ // Check if the node exists already.
+ auto CurrentID = Cur.Refs.front();
+ Cur.Refs.pop_front();
+ auto Ref = CreatedObjects.find(CurrentID);
+ if (Ref != CreatedObjects.end()) {
+ // If exists already, just need to enqueue the primary node.
+ PrimaryRefStack.push_back(Ref->second);
+ continue;
+ }
+
+ // Load child.
+ auto PrimaryID = Upstream.load(CurrentID);
+ if (LLVM_UNLIKELY(!PrimaryID))
+ return PrimaryID.takeError();
+
+ enqueueNode(CurrentID, *PrimaryID);
+ }
+
+ assert(PrimaryRefStack.size() == 1);
+ return PrimaryRefStack.front();
+}
+
std::unique_ptr<MemoryBuffer>
ObjectProxy::getMemoryBuffer(StringRef Name,
bool RequiresNullTerminator) const {
diff --git a/llvm/lib/CAS/OnDiskCAS.cpp b/llvm/lib/CAS/OnDiskCAS.cpp
new file mode 100644
index 0000000..7d29f44
--- /dev/null
+++ b/llvm/lib/CAS/OnDiskCAS.cpp
@@ -0,0 +1,211 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BuiltinCAS.h"
+#include "llvm/CAS/BuiltinCASContext.h"
+#include "llvm/CAS/BuiltinObjectHasher.h"
+#include "llvm/CAS/OnDiskGraphDB.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Error.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::builtin;
+
+namespace {
+
+class OnDiskCAS : public BuiltinCAS {
+public:
+ Expected<ObjectRef> storeImpl(ArrayRef<uint8_t> ComputedHash,
+ ArrayRef<ObjectRef> Refs,
+ ArrayRef<char> Data) final;
+
+ Expected<std::optional<ObjectHandle>> loadIfExists(ObjectRef Ref) final;
+
+ CASID getID(ObjectRef Ref) const final;
+
+ std::optional<ObjectRef> getReference(const CASID &ID) const final;
+
+ Expected<bool> isMaterialized(ObjectRef Ref) const final;
+
+ ArrayRef<char> getDataConst(ObjectHandle Node) const final;
+
+ void print(raw_ostream &OS) const final;
+ Error validate(bool CheckHash) const final;
+
+ static Expected<std::unique_ptr<OnDiskCAS>> open(StringRef Path);
+
+ OnDiskCAS(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB)
+ : UnifiedDB(std::move(UniDB)), DB(&UnifiedDB->getGraphDB()) {}
+
+private:
+ ObjectHandle convertHandle(ondisk::ObjectHandle Node) const {
+ return makeObjectHandle(Node.getOpaqueData());
+ }
+
+ ondisk::ObjectHandle convertHandle(ObjectHandle Node) const {
+ return ondisk::ObjectHandle(Node.getInternalRef(*this));
+ }
+
+ ObjectRef convertRef(ondisk::ObjectID Ref) const {
+ return makeObjectRef(Ref.getOpaqueData());
+ }
+
+ ondisk::ObjectID convertRef(ObjectRef Ref) const {
+ return ondisk::ObjectID::fromOpaqueData(Ref.getInternalRef(*this));
+ }
+
+ size_t getNumRefs(ObjectHandle Node) const final {
+ auto RefsRange = DB->getObjectRefs(convertHandle(Node));
+ return std::distance(RefsRange.begin(), RefsRange.end());
+ }
+
+ ObjectRef readRef(ObjectHandle Node, size_t I) const final {
+ auto RefsRange = DB->getObjectRefs(convertHandle(Node));
+ return convertRef(RefsRange.begin()[I]);
+ }
+
+ Error forEachRef(ObjectHandle Node,
+ function_ref<Error(ObjectRef)> Callback) const final;
+
+ Error setSizeLimit(std::optional<uint64_t> SizeLimit) final;
+ Expected<std::optional<uint64_t>> getStorageSize() const final;
+ Error pruneStorageData() final;
+
+ OnDiskCAS(std::unique_ptr<ondisk::OnDiskGraphDB> GraphDB)
+ : OwnedDB(std::move(GraphDB)), DB(OwnedDB.get()) {}
+
+ std::unique_ptr<ondisk::OnDiskGraphDB> OwnedDB;
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UnifiedDB;
+ ondisk::OnDiskGraphDB *DB;
+};
+
+} // end anonymous namespace
+
+void OnDiskCAS::print(raw_ostream &OS) const { DB->print(OS); }
+Error OnDiskCAS::validate(bool CheckHash) const {
+ auto Hasher = [](ArrayRef<ArrayRef<uint8_t>> Refs, ArrayRef<char> Data,
+ SmallVectorImpl<uint8_t> &Result) {
+ auto Hash = BuiltinObjectHasher<llvm::cas::builtin::HasherT>::hashObject(
+ Refs, Data);
+ Result.assign(Hash.begin(), Hash.end());
+ };
+
+ if (auto E = DB->validate(CheckHash, Hasher))
+ return E;
+
+ return Error::success();
+}
+
+CASID OnDiskCAS::getID(ObjectRef Ref) const {
+ ArrayRef<uint8_t> Hash = DB->getDigest(convertRef(Ref));
+ return CASID::create(&getContext(), toStringRef(Hash));
+}
+
+std::optional<ObjectRef> OnDiskCAS::getReference(const CASID &ID) const {
+ std::optional<ondisk::ObjectID> ObjID =
+ DB->getExistingReference(ID.getHash());
+ if (!ObjID)
+ return std::nullopt;
+ return convertRef(*ObjID);
+}
+
+Expected<bool> OnDiskCAS::isMaterialized(ObjectRef ExternalRef) const {
+ return DB->isMaterialized(convertRef(ExternalRef));
+}
+
+ArrayRef<char> OnDiskCAS::getDataConst(ObjectHandle Node) const {
+ return DB->getObjectData(convertHandle(Node));
+}
+
+Expected<std::optional<ObjectHandle>>
+OnDiskCAS::loadIfExists(ObjectRef ExternalRef) {
+ Expected<std::optional<ondisk::ObjectHandle>> ObjHnd =
+ DB->load(convertRef(ExternalRef));
+ if (!ObjHnd)
+ return ObjHnd.takeError();
+ if (!*ObjHnd)
+ return std::nullopt;
+ return convertHandle(**ObjHnd);
+}
+
+Expected<ObjectRef> OnDiskCAS::storeImpl(ArrayRef<uint8_t> ComputedHash,
+ ArrayRef<ObjectRef> Refs,
+ ArrayRef<char> Data) {
+ SmallVector<ondisk::ObjectID, 64> IDs;
+ IDs.reserve(Refs.size());
+ for (ObjectRef Ref : Refs) {
+ IDs.push_back(convertRef(Ref));
+ }
+
+ auto StoredID = DB->getReference(ComputedHash);
+ if (LLVM_UNLIKELY(!StoredID))
+ return StoredID.takeError();
+ if (Error E = DB->store(*StoredID, IDs, Data))
+ return std::move(E);
+ return convertRef(*StoredID);
+}
+
+Error OnDiskCAS::forEachRef(ObjectHandle Node,
+ function_ref<Error(ObjectRef)> Callback) const {
+ auto RefsRange = DB->getObjectRefs(convertHandle(Node));
+ for (ondisk::ObjectID Ref : RefsRange) {
+ if (Error E = Callback(convertRef(Ref)))
+ return E;
+ }
+ return Error::success();
+}
+
+Error OnDiskCAS::setSizeLimit(std::optional<uint64_t> SizeLimit) {
+ UnifiedDB->setSizeLimit(SizeLimit);
+ return Error::success();
+}
+
+Expected<std::optional<uint64_t>> OnDiskCAS::getStorageSize() const {
+ return UnifiedDB->getStorageSize();
+}
+
+Error OnDiskCAS::pruneStorageData() { return UnifiedDB->collectGarbage(); }
+
+Expected<std::unique_ptr<OnDiskCAS>> OnDiskCAS::open(StringRef AbsPath) {
+ Expected<std::unique_ptr<ondisk::OnDiskGraphDB>> DB =
+ ondisk::OnDiskGraphDB::open(AbsPath, BuiltinCASContext::getHashName(),
+ sizeof(HashType));
+ if (!DB)
+ return DB.takeError();
+ return std::unique_ptr<OnDiskCAS>(new OnDiskCAS(std::move(*DB)));
+}
+
+bool cas::isOnDiskCASEnabled() {
+#if LLVM_ENABLE_ONDISK_CAS
+ return true;
+#else
+ return false;
+#endif
+}
+
+Expected<std::unique_ptr<ObjectStore>> cas::createOnDiskCAS(const Twine &Path) {
+#if LLVM_ENABLE_ONDISK_CAS
+ // FIXME: An absolute path isn't really good enough. Should open a directory
+ // and use openat() for files underneath.
+ SmallString<256> AbsPath;
+ Path.toVector(AbsPath);
+ sys::fs::make_absolute(AbsPath);
+
+ return OnDiskCAS::open(AbsPath);
+#else
+ return createStringError(inconvertibleErrorCode(), "OnDiskCAS is disabled");
+#endif /* LLVM_ENABLE_ONDISK_CAS */
+}
+
+std::unique_ptr<ObjectStore>
+cas::builtin::createObjectStoreFromUnifiedOnDiskCache(
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) {
+ return std::make_unique<OnDiskCAS>(std::move(UniDB));
+}
diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp
index 64cbe9d..245b6fb 100644
--- a/llvm/lib/CAS/OnDiskGraphDB.cpp
+++ b/llvm/lib/CAS/OnDiskGraphDB.cpp
@@ -893,6 +893,10 @@ int64_t DataRecordHandle::getDataRelOffset() const {
}
Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const {
+ if (UpstreamDB) {
+ if (auto E = UpstreamDB->validate(Deep, Hasher))
+ return E;
+ }
return Index.validate([&](FileOffset Offset,
OnDiskTrieRawHashMap::ConstValueProxy Record)
-> Error {
@@ -1202,11 +1206,8 @@ OnDiskGraphDB::load(ObjectID ExternalRef) {
return I.takeError();
TrieRecord::Data Object = I->Ref.load();
- if (Object.SK == TrieRecord::StorageKind::Unknown) {
- if (!UpstreamDB)
- return std::nullopt;
+ if (Object.SK == TrieRecord::StorageKind::Unknown)
return faultInFromUpstream(ExternalRef);
- }
if (Object.SK == TrieRecord::StorageKind::DataPool)
return ObjectHandle::fromFileOffset(Object.Offset);
@@ -1286,8 +1287,10 @@ OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef,
TrieRecord::Data Object = I->Ref.load();
if (Object.SK != TrieRecord::StorageKind::Unknown)
return ObjectPresence::InPrimaryDB;
+
if (!CheckUpstream || !UpstreamDB)
return ObjectPresence::Missing;
+
std::optional<ObjectID> UpstreamID =
UpstreamDB->getExistingReference(getDigest(*I));
return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB
@@ -1549,9 +1552,10 @@ unsigned OnDiskGraphDB::getHardStorageLimitUtilization() const {
return std::max(IndexPercent, DataPercent);
}
-Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open(
- StringRef AbsPath, StringRef HashName, unsigned HashByteSize,
- std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy) {
+Expected<std::unique_ptr<OnDiskGraphDB>>
+OnDiskGraphDB::open(StringRef AbsPath, StringRef HashName,
+ unsigned HashByteSize, OnDiskGraphDB *UpstreamDB,
+ FaultInPolicy Policy) {
if (std::error_code EC = sys::fs::create_directories(AbsPath))
return createFileError(AbsPath, EC);
@@ -1604,18 +1608,15 @@ Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open(
"unexpected user header in '" + DataPoolPath +
"'");
- return std::unique_ptr<OnDiskGraphDB>(
- new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool),
- std::move(UpstreamDB), Policy));
+ return std::unique_ptr<OnDiskGraphDB>(new OnDiskGraphDB(
+ AbsPath, std::move(*Index), std::move(*DataPool), UpstreamDB, Policy));
}
OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
OnDiskDataAllocator DataPool,
- std::unique_ptr<OnDiskGraphDB> UpstreamDB,
- FaultInPolicy Policy)
+ OnDiskGraphDB *UpstreamDB, FaultInPolicy Policy)
: Index(std::move(Index)), DataPool(std::move(DataPool)),
- RootPath(RootPath.str()), UpstreamDB(std::move(UpstreamDB)),
- FIPolicy(Policy) {
+ RootPath(RootPath.str()), UpstreamDB(UpstreamDB), FIPolicy(Policy) {
/// Lifetime for "big" objects not in DataPool.
///
/// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something
@@ -1638,7 +1639,6 @@ Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID,
// against the process dying during importing and leaving the database with an
// incomplete tree. Note that if the upstream has missing nodes then the tree
// will be copied with missing nodes as well, it won't be considered an error.
-
struct UpstreamCursor {
ObjectHandle Node;
size_t RefsCount;
@@ -1720,7 +1720,6 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,
// Copy the node data into the primary store.
// FIXME: Use hard-link or cloning if the file-system supports it and data is
// stored into a separate file.
-
auto Data = UpstreamDB->getObjectData(UpstreamNode);
auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode);
SmallVector<ObjectID, 64> Refs;
@@ -1737,7 +1736,8 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,
Expected<std::optional<ObjectHandle>>
OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) {
- assert(UpstreamDB);
+ if (!UpstreamDB)
+ return std::nullopt;
auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID));
if (LLVM_UNLIKELY(!UpstreamID))
diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp
index 2186071..15656cb 100644
--- a/llvm/lib/CAS/OnDiskKeyValueDB.cpp
+++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp
@@ -20,6 +20,7 @@
#include "llvm/CAS/OnDiskKeyValueDB.h"
#include "OnDiskCommon.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Errc.h"
@@ -53,15 +54,21 @@ Expected<std::optional<ArrayRef<char>>>
OnDiskKeyValueDB::get(ArrayRef<uint8_t> Key) {
// Check the result cache.
OnDiskTrieRawHashMap::ConstOnDiskPtr ActionP = Cache.find(Key);
- if (!ActionP)
+ if (ActionP) {
+ assert(isAddrAligned(Align(8), ActionP->Data.data()));
+ return ActionP->Data;
+ }
+ if (!UnifiedCache || !UnifiedCache->UpstreamKVDB)
return std::nullopt;
- assert(isAddrAligned(Align(8), ActionP->Data.data()));
- return ActionP->Data;
+
+ // Try to fault in from upstream.
+ return UnifiedCache->faultInFromUpstreamKV(Key);
}
Expected<std::unique_ptr<OnDiskKeyValueDB>>
OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize,
- StringRef ValueName, size_t ValueSize) {
+ StringRef ValueName, size_t ValueSize,
+ UnifiedOnDiskCache *Cache) {
if (std::error_code EC = sys::fs::create_directories(Path))
return createFileError(Path, EC);
@@ -87,10 +94,14 @@ OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize,
return std::move(E);
return std::unique_ptr<OnDiskKeyValueDB>(
- new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache)));
+ new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache), Cache));
}
Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const {
+ if (UnifiedCache && UnifiedCache->UpstreamKVDB) {
+ if (auto E = UnifiedCache->UpstreamKVDB->validate(CheckValue))
+ return E;
+ }
return Cache.validate(
[&](FileOffset Offset,
OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error {
diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp
new file mode 100644
index 0000000..ae9d818
--- /dev/null
+++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp
@@ -0,0 +1,613 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Encapsulates \p OnDiskGraphDB and \p OnDiskKeyValueDB instances within one
+/// directory while also restricting storage growth with a scheme of chaining
+/// the two most recent directories (primary & upstream), where the primary
+/// "faults-in" data from the upstream one. When the primary (most recent)
+/// directory exceeds its intended limit a new empty directory becomes the
+/// primary one.
+///
+/// Within the top-level directory (the path that \p UnifiedOnDiskCache::open
+/// receives) there are directories named like this:
+///
+/// 'v<version>.<x>'
+/// 'v<version>.<x+1>'
+/// 'v<version>.<x+2>'
+/// ...
+///
+/// 'version' is the version integer for this \p UnifiedOnDiskCache's scheme and
+/// the part after the dot is an increasing integer. The primary directory is
+/// the one with the highest integer and the upstream one is the directory
+/// before it. For example, if the sub-directories contained are:
+///
+/// 'v1.5', 'v1.6', 'v1.7', 'v1.8'
+///
+/// Then the primary one is 'v1.8', the upstream one is 'v1.7', and the rest are
+/// unused directories that can be safely deleted at any time and by any
+/// process.
+///
+/// Contained within the top-level directory is a file named "lock" which is
+/// used for processes to take shared or exclusive locks for the contents of the
+/// top directory. While a \p UnifiedOnDiskCache is open it keeps a shared lock
+/// for the top-level directory; when it closes, if the primary sub-directory
+/// exceeded its limit, it attempts to get an exclusive lock in order to create
+/// a new empty primary directory; if it can't get the exclusive lock it gives
+/// up and lets the next \p UnifiedOnDiskCache instance that closes to attempt
+/// again.
+///
+/// The downside of this scheme is that while \p UnifiedOnDiskCache is open on a
+/// directory, by any process, the storage size in that directory will keep
+/// growing unrestricted. But the major benefit is that garbage-collection can
+/// be triggered on a directory concurrently, at any time and by any process,
+/// without affecting any active readers/writers in the same process or other
+/// processes.
+///
+/// The \c UnifiedOnDiskCache also provides validation and recovery on top of
+/// the underlying on-disk storage. The low-level storage is designed to remain
+/// coherent across regular process crashes, but may be invalid after power loss
+/// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows
+/// validating the contents once per boot and can recover by marking invalid
+/// data for garbage collection.
+///
+/// The data recovery described above requires exclusive access to the CAS, and
+/// it is an error to attempt recovery if the CAS is open in any process/thread.
+/// In order to maximize backwards compatibility with tools that do not perform
+/// validation before opening the CAS, we do not attempt to get exclusive access
+/// until recovery is actually performed, meaning as long as the data is valid
+/// it will not conflict with concurrent use.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "BuiltinCAS.h"
+#include "OnDiskCommon.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/OnDiskGraphDB.h"
+#include "llvm/CAS/OnDiskKeyValueDB.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+#include <optional>
+
+#if __has_include(<sys/sysctl.h>)
+#include <sys/sysctl.h>
+#endif
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+/// FIXME: When the version of \p DBDirPrefix is bumped up we need to figure out
+/// how to handle the leftover sub-directories of the previous version, within
+/// the \p UnifiedOnDiskCache::collectGarbage function.
+static constexpr StringLiteral DBDirPrefix = "v1.";
+
+static constexpr StringLiteral ValidationFilename = "v1.validation";
+static constexpr StringLiteral CorruptPrefix = "corrupt.";
+
+ObjectID UnifiedOnDiskCache::getObjectIDFromValue(ArrayRef<char> Value) {
+ // little endian encoded.
+ assert(Value.size() == sizeof(uint64_t));
+ return ObjectID::fromOpaqueData(support::endian::read64le(Value.data()));
+}
+
+UnifiedOnDiskCache::ValueBytes
+UnifiedOnDiskCache::getValueFromObjectID(ObjectID ID) {
+ // little endian encoded.
+ UnifiedOnDiskCache::ValueBytes ValBytes;
+ static_assert(ValBytes.size() == sizeof(ID.getOpaqueData()));
+ support::endian::write64le(ValBytes.data(), ID.getOpaqueData());
+ return ValBytes;
+}
+
+Expected<std::optional<ArrayRef<char>>>
+UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef<uint8_t> Key) {
+ assert(UpstreamGraphDB);
+ assert(UpstreamKVDB);
+
+ std::optional<ArrayRef<char>> UpstreamValue;
+ if (Error E = UpstreamKVDB->get(Key).moveInto(UpstreamValue))
+ return std::move(E);
+ if (!UpstreamValue)
+ return std::nullopt;
+
+ // The value is the \p ObjectID in the context of the upstream
+ // \p OnDiskGraphDB instance. Translate it to the context of the primary
+ // \p OnDiskGraphDB instance.
+ ObjectID UpstreamID = getObjectIDFromValue(*UpstreamValue);
+ auto PrimaryID =
+ PrimaryGraphDB->getReference(UpstreamGraphDB->getDigest(UpstreamID));
+ if (LLVM_UNLIKELY(!PrimaryID))
+ return PrimaryID.takeError();
+ return PrimaryKVDB->put(Key, getValueFromObjectID(*PrimaryID));
+}
+
+/// \returns all the 'v<version>.<x>' names of sub-directories, sorted with
+/// ascending order of the integer after the dot. Corrupt directories, if
+/// included, will come first.
+static Expected<SmallVector<std::string, 4>>
+getAllDBDirs(StringRef Path, bool IncludeCorrupt = false) {
+ struct DBDir {
+ uint64_t Order;
+ std::string Name;
+ };
+ SmallVector<DBDir> FoundDBDirs;
+
+ std::error_code EC;
+ for (sys::fs::directory_iterator DirI(Path, EC), DirE; !EC && DirI != DirE;
+ DirI.increment(EC)) {
+ if (DirI->type() != sys::fs::file_type::directory_file)
+ continue;
+ StringRef SubDir = sys::path::filename(DirI->path());
+ if (IncludeCorrupt && SubDir.starts_with(CorruptPrefix)) {
+ FoundDBDirs.push_back({0, std::string(SubDir)});
+ continue;
+ }
+ if (!SubDir.starts_with(DBDirPrefix))
+ continue;
+ uint64_t Order;
+ if (SubDir.substr(DBDirPrefix.size()).getAsInteger(10, Order))
+ return createStringError(inconvertibleErrorCode(),
+ "unexpected directory " + DirI->path());
+ FoundDBDirs.push_back({Order, std::string(SubDir)});
+ }
+ if (EC)
+ return createFileError(Path, EC);
+
+ llvm::sort(FoundDBDirs, [](const DBDir &LHS, const DBDir &RHS) -> bool {
+ return LHS.Order <= RHS.Order;
+ });
+
+ SmallVector<std::string, 4> DBDirs;
+ for (DBDir &Dir : FoundDBDirs)
+ DBDirs.push_back(std::move(Dir.Name));
+ return DBDirs;
+}
+
+static Expected<SmallVector<std::string, 4>> getAllGarbageDirs(StringRef Path) {
+ auto DBDirs = getAllDBDirs(Path, /*IncludeCorrupt=*/true);
+ if (!DBDirs)
+ return DBDirs.takeError();
+
+ // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure
+ // out how to handle the leftover sub-directories of the previous version.
+
+ for (unsigned Keep = 2; Keep > 0 && !DBDirs->empty(); --Keep) {
+ StringRef Back(DBDirs->back());
+ if (Back.starts_with(CorruptPrefix))
+ break;
+ DBDirs->pop_back();
+ }
+ return *DBDirs;
+}
+
+/// \returns Given a sub-directory named 'v<version>.<x>', it outputs the
+/// 'v<version>.<x+1>' name.
+static void getNextDBDirName(StringRef DBDir, llvm::raw_ostream &OS) {
+ assert(DBDir.starts_with(DBDirPrefix));
+ uint64_t Count;
+ bool Failed = DBDir.substr(DBDirPrefix.size()).getAsInteger(10, Count);
+ assert(!Failed);
+ (void)Failed;
+ OS << DBDirPrefix << Count + 1;
+}
+
+static Error validateOutOfProcess(StringRef LLVMCasBinary, StringRef RootPath,
+ bool CheckHash) {
+ SmallVector<StringRef> Args{LLVMCasBinary, "-cas", RootPath, "-validate"};
+ if (CheckHash)
+ Args.push_back("-check-hash");
+
+ llvm::SmallString<128> StdErrPath;
+ int StdErrFD = -1;
+ if (std::error_code EC = sys::fs::createTemporaryFile(
+ "llvm-cas-validate-stderr", "txt", StdErrFD, StdErrPath,
+ llvm::sys::fs::OF_Text))
+ return createStringError(EC, "failed to create temporary file");
+ FileRemover OutputRemover(StdErrPath.c_str());
+
+ std::optional<llvm::StringRef> Redirects[] = {
+ {""}, // stdin = /dev/null
+ {""}, // stdout = /dev/null
+ StdErrPath.str(),
+ };
+
+ std::string ErrMsg;
+ int Result =
+ sys::ExecuteAndWait(LLVMCasBinary, Args, /*Env=*/std::nullopt, Redirects,
+ /*SecondsToWait=*/120, /*MemoryLimit=*/0, &ErrMsg);
+
+ if (Result == -1)
+ return createStringError("failed to exec " + join(Args, " ") + ": " +
+ ErrMsg);
+ if (Result != 0) {
+ llvm::SmallString<64> Err("cas contents invalid");
+ if (!ErrMsg.empty()) {
+ Err += ": ";
+ Err += ErrMsg;
+ }
+ auto StdErrBuf = MemoryBuffer::getFile(StdErrPath.c_str());
+ if (StdErrBuf && !(*StdErrBuf)->getBuffer().empty()) {
+ Err += ": ";
+ Err += (*StdErrBuf)->getBuffer();
+ }
+ return createStringError(Err);
+ }
+ return Error::success();
+}
+
+static Error validateInProcess(StringRef RootPath, StringRef HashName,
+ unsigned HashByteSize, bool CheckHash) {
+ std::shared_ptr<UnifiedOnDiskCache> UniDB;
+ if (Error E = UnifiedOnDiskCache::open(RootPath, std::nullopt, HashName,
+ HashByteSize)
+ .moveInto(UniDB))
+ return E;
+ auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB);
+ if (Error E = CAS->validate(CheckHash))
+ return E;
+ auto Cache = builtin::createActionCacheFromUnifiedOnDiskCache(UniDB);
+ if (Error E = Cache->validate())
+ return E;
+ return Error::success();
+}
+
+static Expected<uint64_t> getBootTime() {
+#if __has_include(<sys/sysctl.h>) && defined(KERN_BOOTTIME)
+ struct timeval TV;
+ size_t TVLen = sizeof(TV);
+ int KernBoot[2] = {CTL_KERN, KERN_BOOTTIME};
+ if (sysctl(KernBoot, 2, &TV, &TVLen, nullptr, 0) < 0)
+ return createStringError(llvm::errnoAsErrorCode(),
+ "failed to get boottime");
+ if (TVLen != sizeof(TV))
+ return createStringError("sysctl kern.boottime unexpected format");
+ return TV.tv_sec;
+#elif defined(__linux__)
+ // Use the mtime for /proc, which is recreated during system boot.
+ // We could also read /proc/stat and search for 'btime'.
+ sys::fs::file_status Status;
+ if (std::error_code EC = sys::fs::status("/proc", Status))
+ return createFileError("/proc", EC);
+ return Status.getLastModificationTime().time_since_epoch().count();
+#else
+ llvm::report_fatal_error("getBootTime unimplemented");
+#endif
+}
+
+Expected<ValidationResult> UnifiedOnDiskCache::validateIfNeeded(
+ StringRef RootPath, StringRef HashName, unsigned HashByteSize,
+ bool CheckHash, bool AllowRecovery, bool ForceValidation,
+ std::optional<StringRef> LLVMCasBinaryPath) {
+ if (std::error_code EC = sys::fs::create_directories(RootPath))
+ return createFileError(RootPath, EC);
+
+ SmallString<256> PathBuf(RootPath);
+ sys::path::append(PathBuf, ValidationFilename);
+ int FD = -1;
+ if (std::error_code EC = sys::fs::openFileForReadWrite(
+ PathBuf, FD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
+ return createFileError(PathBuf, EC);
+ assert(FD != -1);
+
+ sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD);
+ auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(File); });
+
+ if (std::error_code EC = lockFileThreadSafe(FD, sys::fs::LockKind::Exclusive))
+ return createFileError(PathBuf, EC);
+ auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(FD); });
+
+ SmallString<8> Bytes;
+ if (Error E = sys::fs::readNativeFileToEOF(File, Bytes))
+ return createFileError(PathBuf, std::move(E));
+
+ uint64_t ValidationBootTime = 0;
+ if (!Bytes.empty() &&
+ StringRef(Bytes).trim().getAsInteger(10, ValidationBootTime))
+ return createFileError(PathBuf, errc::illegal_byte_sequence,
+ "expected integer");
+
+ static uint64_t BootTime = 0;
+ if (BootTime == 0)
+ if (Error E = getBootTime().moveInto(BootTime))
+ return std::move(E);
+
+ std::string LogValidationError;
+
+ if (ValidationBootTime == BootTime && !ForceValidation)
+ return ValidationResult::Skipped;
+
+ // Validate!
+ bool NeedsRecovery = false;
+ if (Error E =
+ LLVMCasBinaryPath
+ ? validateOutOfProcess(*LLVMCasBinaryPath, RootPath, CheckHash)
+ : validateInProcess(RootPath, HashName, HashByteSize,
+ CheckHash)) {
+ if (AllowRecovery) {
+ consumeError(std::move(E));
+ NeedsRecovery = true;
+ } else {
+ return std::move(E);
+ }
+ }
+
+ if (NeedsRecovery) {
+ sys::path::remove_filename(PathBuf);
+ sys::path::append(PathBuf, "lock");
+
+ int LockFD = -1;
+ if (std::error_code EC = sys::fs::openFileForReadWrite(
+ PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
+ return createFileError(PathBuf, EC);
+ sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD);
+ auto CloseLock = make_scope_exit([&]() { sys::fs::closeFile(LockFile); });
+ if (std::error_code EC = tryLockFileThreadSafe(LockFD)) {
+ if (EC == std::errc::no_lock_available)
+ return createFileError(
+ PathBuf, EC,
+ "CAS validation requires exclusive access but CAS was in use");
+ return createFileError(PathBuf, EC);
+ }
+ auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); });
+
+ auto DBDirs = getAllDBDirs(RootPath);
+ if (!DBDirs)
+ return DBDirs.takeError();
+
+ for (StringRef DBDir : *DBDirs) {
+ sys::path::remove_filename(PathBuf);
+ sys::path::append(PathBuf, DBDir);
+ std::error_code EC;
+ int Attempt = 0, MaxAttempts = 100;
+ SmallString<128> GCPath;
+ for (; Attempt < MaxAttempts; ++Attempt) {
+ GCPath.assign(RootPath);
+ sys::path::append(GCPath, CorruptPrefix + std::to_string(Attempt) +
+ "." + DBDir);
+ EC = sys::fs::rename(PathBuf, GCPath);
+ // Darwin uses ENOTEMPTY. Linux may return either ENOTEMPTY or EEXIST.
+ if (EC != errc::directory_not_empty && EC != errc::file_exists)
+ break;
+ }
+ if (Attempt == MaxAttempts)
+ return createStringError(
+ EC, "rename " + PathBuf +
+ " failed: too many CAS directories awaiting pruning");
+ if (EC)
+ return createStringError(EC, "rename " + PathBuf + " to " + GCPath +
+ " failed: " + EC.message());
+ }
+ }
+
+ if (ValidationBootTime != BootTime) {
+ // Fix filename in case we have error to report.
+ sys::path::remove_filename(PathBuf);
+ sys::path::append(PathBuf, ValidationFilename);
+ if (std::error_code EC = sys::fs::resize_file(FD, 0))
+ return createFileError(PathBuf, EC);
+ raw_fd_ostream OS(FD, /*shouldClose=*/false);
+ OS.seek(0); // resize does not reset position
+ OS << BootTime << '\n';
+ if (OS.has_error())
+ return createFileError(PathBuf, OS.error());
+ }
+
+ return NeedsRecovery ? ValidationResult::Recovered : ValidationResult::Valid;
+}
+
+Expected<std::unique_ptr<UnifiedOnDiskCache>>
+UnifiedOnDiskCache::open(StringRef RootPath, std::optional<uint64_t> SizeLimit,
+ StringRef HashName, unsigned HashByteSize,
+ OnDiskGraphDB::FaultInPolicy FaultInPolicy) {
+ if (std::error_code EC = sys::fs::create_directories(RootPath))
+ return createFileError(RootPath, EC);
+
+ SmallString<256> PathBuf(RootPath);
+ sys::path::append(PathBuf, "lock");
+ int LockFD = -1;
+ if (std::error_code EC = sys::fs::openFileForReadWrite(
+ PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
+ return createFileError(PathBuf, EC);
+ assert(LockFD != -1);
+ // Locking the directory using shared lock, which will prevent other processes
+ // from creating a new chain (essentially while a \p UnifiedOnDiskCache
+ // instance holds a shared lock the storage for the primary directory will
+ // grow unrestricted).
+ if (std::error_code EC =
+ lockFileThreadSafe(LockFD, sys::fs::LockKind::Shared))
+ return createFileError(PathBuf, EC);
+
+ auto DBDirs = getAllDBDirs(RootPath);
+ if (!DBDirs)
+ return DBDirs.takeError();
+ if (DBDirs->empty())
+ DBDirs->push_back((Twine(DBDirPrefix) + "1").str());
+
+ assert(!DBDirs->empty());
+
+ /// If there is only one directory open databases on it. If there are 2 or
+ /// more directories, get the most recent directories and chain them, with the
+ /// most recent being the primary one. The remaining directories are unused
+ /// data than can be garbage-collected.
+ auto UniDB = std::unique_ptr<UnifiedOnDiskCache>(new UnifiedOnDiskCache());
+ std::unique_ptr<OnDiskGraphDB> UpstreamGraphDB;
+ std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB;
+ if (DBDirs->size() > 1) {
+ StringRef UpstreamDir = *(DBDirs->end() - 2);
+ PathBuf = RootPath;
+ sys::path::append(PathBuf, UpstreamDir);
+ if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize,
+ /*UpstreamDB=*/nullptr, FaultInPolicy)
+ .moveInto(UpstreamGraphDB))
+ return std::move(E);
+ if (Error E = OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize,
+ /*ValueName=*/"objectid",
+ /*ValueSize=*/sizeof(uint64_t))
+ .moveInto(UpstreamKVDB))
+ return std::move(E);
+ }
+
+ StringRef PrimaryDir = *(DBDirs->end() - 1);
+ PathBuf = RootPath;
+ sys::path::append(PathBuf, PrimaryDir);
+ std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB;
+ if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize,
+ UpstreamGraphDB.get(), FaultInPolicy)
+ .moveInto(PrimaryGraphDB))
+ return std::move(E);
+ std::unique_ptr<OnDiskKeyValueDB> PrimaryKVDB;
+ // \p UnifiedOnDiskCache does manual chaining for key-value requests,
+ // including an extra translation step of the value during fault-in.
+ if (Error E =
+ OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize,
+ /*ValueName=*/"objectid",
+ /*ValueSize=*/sizeof(uint64_t), UniDB.get())
+ .moveInto(PrimaryKVDB))
+ return std::move(E);
+
+ UniDB->RootPath = RootPath;
+ UniDB->SizeLimit = SizeLimit.value_or(0);
+ UniDB->LockFD = LockFD;
+ UniDB->NeedsGarbageCollection = DBDirs->size() > 2;
+ UniDB->PrimaryDBDir = PrimaryDir;
+ UniDB->UpstreamGraphDB = std::move(UpstreamGraphDB);
+ UniDB->PrimaryGraphDB = std::move(PrimaryGraphDB);
+ UniDB->UpstreamKVDB = std::move(UpstreamKVDB);
+ UniDB->PrimaryKVDB = std::move(PrimaryKVDB);
+
+ return std::move(UniDB);
+}
+
+void UnifiedOnDiskCache::setSizeLimit(std::optional<uint64_t> SizeLimit) {
+ this->SizeLimit = SizeLimit.value_or(0);
+}
+
+uint64_t UnifiedOnDiskCache::getStorageSize() const {
+ uint64_t TotalSize = getPrimaryStorageSize();
+ if (UpstreamGraphDB)
+ TotalSize += UpstreamGraphDB->getStorageSize();
+ if (UpstreamKVDB)
+ TotalSize += UpstreamKVDB->getStorageSize();
+ return TotalSize;
+}
+
+uint64_t UnifiedOnDiskCache::getPrimaryStorageSize() const {
+ return PrimaryGraphDB->getStorageSize() + PrimaryKVDB->getStorageSize();
+}
+
+bool UnifiedOnDiskCache::hasExceededSizeLimit() const {
+ uint64_t CurSizeLimit = SizeLimit;
+ if (!CurSizeLimit)
+ return false;
+
+ // If the hard limit is beyond 85%, declare above limit and request clean up.
+ unsigned CurrentPercent =
+ std::max(PrimaryGraphDB->getHardStorageLimitUtilization(),
+ PrimaryKVDB->getHardStorageLimitUtilization());
+ if (CurrentPercent > 85)
+ return true;
+
+ // We allow each of the directories in the chain to reach up to half the
+ // intended size limit. Check whether the primary directory has exceeded half
+ // the limit or not, in order to decide whether we need to start a new chain.
+ //
+ // We could check the size limit against the sum of sizes of both the primary
+ // and upstream directories but then if the upstream is significantly larger
+ // than the intended limit, it would trigger a new chain to be created before
+ // the primary has reached its own limit. Essentially in such situation we
+ // prefer reclaiming the storage later in order to have more consistent cache
+ // hits behavior.
+ return (CurSizeLimit / 2) < getPrimaryStorageSize();
+}
+
+Error UnifiedOnDiskCache::close(bool CheckSizeLimit) {
+ if (LockFD == -1)
+ return Error::success(); // already closed.
+ auto CloseLock = make_scope_exit([&]() {
+ assert(LockFD >= 0);
+ sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD);
+ sys::fs::closeFile(LockFile);
+ LockFD = -1;
+ });
+
+ bool ExceededSizeLimit = CheckSizeLimit ? hasExceededSizeLimit() : false;
+ UpstreamKVDB.reset();
+ PrimaryKVDB.reset();
+ UpstreamGraphDB.reset();
+ PrimaryGraphDB.reset();
+ if (std::error_code EC = unlockFileThreadSafe(LockFD))
+ return createFileError(RootPath, EC);
+
+ if (!ExceededSizeLimit)
+ return Error::success();
+
+ // The primary directory exceeded its intended size limit. Try to get an
+ // exclusive lock in order to create a new primary directory for next time
+ // this \p UnifiedOnDiskCache path is opened.
+
+ if (std::error_code EC = tryLockFileThreadSafe(
+ LockFD, std::chrono::milliseconds(0), sys::fs::LockKind::Exclusive)) {
+ if (EC == errc::no_lock_available)
+ return Error::success(); // couldn't get exclusive lock, give up.
+ return createFileError(RootPath, EC);
+ }
+ auto UnlockFile = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); });
+
+ // Managed to get an exclusive lock which means there are no other open
+ // \p UnifiedOnDiskCache instances for the same path, so we can safely start a
+ // new primary directory. To start a new primary directory we just have to
+ // create a new empty directory with the next consecutive index; since this is
+ // an atomic operation we will leave the top-level directory in a consistent
+ // state even if the process dies during this code-path.
+
+ SmallString<256> PathBuf(RootPath);
+ raw_svector_ostream OS(PathBuf);
+ OS << sys::path::get_separator();
+ getNextDBDirName(PrimaryDBDir, OS);
+ if (std::error_code EC = sys::fs::create_directory(PathBuf))
+ return createFileError(PathBuf, EC);
+
+ NeedsGarbageCollection = true;
+ return Error::success();
+}
+
+UnifiedOnDiskCache::UnifiedOnDiskCache() = default;
+
+UnifiedOnDiskCache::~UnifiedOnDiskCache() { consumeError(close()); }
+
+Error UnifiedOnDiskCache::collectGarbage(StringRef Path) {
+ auto DBDirs = getAllGarbageDirs(Path);
+ if (!DBDirs)
+ return DBDirs.takeError();
+
+ SmallString<256> PathBuf(Path);
+ for (StringRef UnusedSubDir : *DBDirs) {
+ sys::path::append(PathBuf, UnusedSubDir);
+ if (std::error_code EC = sys::fs::remove_directories(PathBuf))
+ return createFileError(PathBuf, EC);
+ sys::path::remove_filename(PathBuf);
+ }
+ return Error::success();
+}
+
+Error UnifiedOnDiskCache::collectGarbage() { return collectGarbage(RootPath); }
diff --git a/llvm/lib/CGData/OutlinedHashTreeRecord.cpp b/llvm/lib/CGData/OutlinedHashTreeRecord.cpp
index cc76063..2b6e2f0 100644
--- a/llvm/lib/CGData/OutlinedHashTreeRecord.cpp
+++ b/llvm/lib/CGData/OutlinedHashTreeRecord.cpp
@@ -37,7 +37,7 @@ template <> struct MappingTraits<HashNodeStable> {
template <> struct CustomMappingTraits<IdHashNodeStableMapTy> {
static void inputOne(IO &io, StringRef Key, IdHashNodeStableMapTy &V) {
HashNodeStable NodeStable;
- io.mapRequired(Key.str().c_str(), NodeStable);
+ io.mapRequired(Key, NodeStable);
unsigned Id;
if (Key.getAsInteger(0, Id)) {
io.setError("Id not an integer");
@@ -48,7 +48,7 @@ template <> struct CustomMappingTraits<IdHashNodeStableMapTy> {
static void output(IO &io, IdHashNodeStableMapTy &V) {
for (auto Iter = V.begin(); Iter != V.end(); ++Iter)
- io.mapRequired(utostr(Iter->first).c_str(), Iter->second);
+ io.mapRequired(utostr(Iter->first), Iter->second);
}
};
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index e5c85d5..1ea30d8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -745,11 +745,6 @@ void AppleAccelTableStaticTypeData::emit(AsmPrinter *Asm) const {
Asm->emitInt32(QualifiedNameHash);
}
-constexpr AppleAccelTableData::Atom AppleAccelTableTypeData::Atoms[];
-constexpr AppleAccelTableData::Atom AppleAccelTableOffsetData::Atoms[];
-constexpr AppleAccelTableData::Atom AppleAccelTableStaticOffsetData::Atoms[];
-constexpr AppleAccelTableData::Atom AppleAccelTableStaticTypeData::Atoms[];
-
#ifndef NDEBUG
void AppleAccelTableWriter::Header::print(raw_ostream &OS) const {
OS << "Magic: " << format("0x%x", Magic) << "\n"
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 8aa488f..f65d88a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1443,7 +1443,7 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,
MF.hasBBSections() && NumMBBSectionRanges > 1,
// Use static_cast to avoid breakage of tests on windows.
static_cast<bool>(BBAddrMapSkipEmitBBEntries), HasCalls,
- static_cast<bool>(EmitBBHash)};
+ static_cast<bool>(EmitBBHash), false};
}
void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 171fb83..98cdada 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -112,8 +112,7 @@ void DbgValueHistoryMap::Entry::endEntry(EntryIndex Index) {
/// to the first intersecting scope range if one exists.
static std::optional<ArrayRef<InsnRange>::iterator>
intersects(const MachineInstr *StartMI, const MachineInstr *EndMI,
- const ArrayRef<InsnRange> &Ranges,
- const InstructionOrdering &Ordering) {
+ ArrayRef<InsnRange> Ranges, const InstructionOrdering &Ordering) {
for (auto RangesI = Ranges.begin(), RangesE = Ranges.end();
RangesI != RangesE; ++RangesI) {
if (EndMI && Ordering.isBefore(EndMI, RangesI->first))
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 555c56f..b16e1315 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1120,7 +1120,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
constructMemberDIE(Buffer, DDTy);
}
} else if (auto *Property = dyn_cast<DIObjCProperty>(Element)) {
- DIE &ElemDie = createAndAddDIE(Property->getTag(), Buffer);
+ DIE &ElemDie = createAndAddDIE(Property->getTag(), Buffer, Property);
StringRef PropertyName = Property->getName();
addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
if (Property->getType())
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index fbcd614..485b44ae 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -287,6 +287,25 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
}
continue;
}
+ case 'h': { // Basic block hash secifier.
+ // Skip the profile when the profile iterator (FI) refers to the
+ // past-the-end element.
+ if (FI == ProgramPathAndClusterInfo.end())
+ continue;
+ for (auto BBIDHashStr : Values) {
+ auto [BBIDStr, HashStr] = BBIDHashStr.split(':');
+ unsigned long long BBID = 0, Hash = 0;
+ if (getAsUnsignedInteger(BBIDStr, 10, BBID))
+ return createProfileParseError(Twine("unsigned integer expected: '") +
+ BBIDStr + "'");
+ if (getAsUnsignedInteger(HashStr, 16, Hash))
+ return createProfileParseError(
+ Twine("unsigned integer expected in hex format: '") + HashStr +
+ "'");
+ FI->second.BBHashes[BBID] = Hash;
+ }
+ continue;
+ }
default:
return createProfileParseError(Twine("invalid specifier: '") +
Twine(Specifier) + "'");
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 8ea1326..0309e22 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -368,7 +368,7 @@ class CodeGenPrepare {
std::unique_ptr<DominatorTree> DT;
public:
- CodeGenPrepare(){};
+ CodeGenPrepare() = default;
CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
/// If encounter huge function, we need to limit the build time.
bool IsHugeFunc = false;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 9ace7d6..ec4d13f 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -589,8 +589,8 @@ bool CombinerHelper::matchCombineShuffleVector(
return true;
}
-void CombinerHelper::applyCombineShuffleVector(
- MachineInstr &MI, const ArrayRef<Register> Ops) const {
+void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
+ ArrayRef<Register> Ops) const {
Register DstReg = MI.getOperand(0).getReg();
Builder.setInsertPt(*MI.getParent(), MI);
Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index ca82857..5fab6ec 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1893,6 +1893,8 @@ static bool canCreateUndefOrPoison(Register Reg, const MachineRegisterInfo &MRI,
case TargetOpcode::G_UADDSAT:
case TargetOpcode::G_SSUBSAT:
case TargetOpcode::G_USUBSAT:
+ case TargetOpcode::G_SBFX:
+ case TargetOpcode::G_UBFX:
return false;
case TargetOpcode::G_SSHLSAT:
case TargetOpcode::G_USHLSAT:
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index bb9c76f..8c6d219 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -363,8 +363,9 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
case MachineOperand::MO_RegisterMask:
case MachineOperand::MO_RegisterLiveOut: {
// Shallow compare of the two RegMasks
- const uint32_t *RegMask = getRegMask();
- const uint32_t *OtherRegMask = Other.getRegMask();
+ const uint32_t *RegMask = isRegMask() ? getRegMask() : getRegLiveOut();
+ const uint32_t *OtherRegMask =
+ isRegMask() ? Other.getRegMask() : Other.getRegLiveOut();
if (RegMask == OtherRegMask)
return true;
@@ -434,7 +435,8 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
if (const MachineFunction *MF = getMFIfAvailable(MO)) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
- const uint32_t *RegMask = MO.getRegMask();
+ const uint32_t *RegMask =
+ MO.isRegMask() ? MO.getRegMask() : MO.getRegLiveOut();
std::vector<stable_hash> RegMaskHashes(RegMask, RegMask + RegMaskSize);
return hash_combine(MO.getType(), MO.getTargetFlags(),
stable_hash_combine(RegMaskHashes));
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 3ed1045..f18c051 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -334,7 +334,7 @@ public:
LiveIntervals &LIS;
};
- MachineSchedulerImpl() {}
+ MachineSchedulerImpl() = default;
// Migration only
void setLegacyPass(MachineFunctionPass *P) { this->P = P; }
void setMFAM(MachineFunctionAnalysisManager *MFAM) { this->MFAM = MFAM; }
@@ -358,7 +358,7 @@ public:
MachineLoopInfo &MLI;
AAResults &AA;
};
- PostMachineSchedulerImpl() {}
+ PostMachineSchedulerImpl() = default;
// Migration only
void setLegacyPass(MachineFunctionPass *P) { this->P = P; }
void setMFAM(MachineFunctionAnalysisManager *MFAM) { this->MFAM = MFAM; }
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index 9d56696..6da708d 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -136,7 +136,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
unsigned RegMaskSize =
MachineOperand::getRegMaskSize(TRI->getNumRegs());
- const uint32_t *RegMask = MO.getRegMask();
+ const uint32_t *RegMask =
+ MO.isRegMask() ? MO.getRegMask() : MO.getRegLiveOut();
std::vector<llvm::stable_hash> RegMaskHashes(RegMask,
RegMask + RegMaskSize);
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 697b779..ec6ffd4 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -206,7 +206,7 @@ private:
bool Error = false; ///< Could not allocate.
explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {}
- explicit LiveReg() {}
+ explicit LiveReg() = default;
unsigned getSparseSetIndex() const { return VirtReg.virtRegIndex(); }
};
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index e17a214b..38f6deb 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -378,7 +378,7 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
public:
// For legacy pass only.
- RegisterCoalescer() {}
+ RegisterCoalescer() = default;
RegisterCoalescer &operator=(RegisterCoalescer &&Other) = default;
RegisterCoalescer(LiveIntervals *LIS, SlotIndexes *SI,
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1ef5dc2..46c4bb8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2715,6 +2715,12 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
AddToWorklist(Add.getNode());
+ // We can't set InBounds even if both original ptradds were InBounds and
+ // NUW: SDAG usually represents pointers as integers, therefore, the
+ // matched pattern behaves as if it had implicit casts:
+ // (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds x, y))), z)
+ // The outer inbounds ptradd might therefore rely on a provenance that x
+ // does not have.
return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
}
}
@@ -2740,6 +2746,12 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {
// that.
SDNodeFlags Flags =
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
+ // We can't set InBounds even if both original ptradds were InBounds and
+ // NUW: SDAG usually represents pointers as integers, therefore, the
+ // matched pattern behaves as if it had implicit casts:
+ // (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds GA, v))), c)
+ // The outer inbounds ptradd might therefore rely on a provenance that
+ // GA does not have.
SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
AddToWorklist(Inner.getNode());
return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
@@ -2763,8 +2775,13 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
// If both additions in the original were NUW, reassociation preserves that.
- SDNodeFlags ReassocFlags =
- (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
+ SDNodeFlags CommonFlags = N->getFlags() & N1->getFlags();
+ SDNodeFlags ReassocFlags = CommonFlags & SDNodeFlags::NoUnsignedWrap;
+ if (CommonFlags.hasNoUnsignedWrap()) {
+ // If both operations are NUW and the PTRADD is inbounds, the offests are
+ // both non-negative, so the reassociated PTRADDs are also inbounds.
+ ReassocFlags |= N->getFlags() & SDNodeFlags::InBounds;
+ }
if (ZIsConstant != YIsConstant) {
if (YIsConstant)
@@ -9357,7 +9374,7 @@ static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
// Check if the bytes offsets we are looking at match with either big or
// little endian value loaded. Return true for big endian, false for little
// endian, and std::nullopt if match failed.
-static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
+static std::optional<bool> isBigEndian(ArrayRef<int64_t> ByteOffsets,
int64_t FirstOffset) {
// The endian can be decided only when it is 2 bytes at least.
unsigned Width = ByteOffsets.size();
@@ -22743,7 +22760,10 @@ SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
} else {
- NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
+ // The original DAG loaded the entire vector from memory, so arithmetic
+ // within it must be inbounds.
+ NewPtr = TLI.getInboundsVectorElementPointer(DAG, Ptr, Value.getValueType(),
+ Idx);
}
return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index bf1abfe..58983cb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1172,6 +1172,12 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::FAKE_USE:
Res = SoftenFloatOp_FAKE_USE(N);
break;
+ case ISD::STACKMAP:
+ Res = SoftenFloatOp_STACKMAP(N, OpNo);
+ break;
+ case ISD::PATCHPOINT:
+ Res = SoftenFloatOp_PATCHPOINT(N, OpNo);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -1512,6 +1518,20 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FAKE_USE(SDNode *N) {
N->getOperand(0), Op1);
}
+SDValue DAGTypeLegalizer::SoftenFloatOp_STACKMAP(SDNode *N, unsigned OpNo) {
+ assert(OpNo > 1); // Because the first two arguments are guaranteed legal.
+ SmallVector<SDValue> NewOps(N->ops());
+ NewOps[OpNo] = GetSoftenedFloat(NewOps[OpNo]);
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_PATCHPOINT(SDNode *N, unsigned OpNo) {
+ assert(OpNo >= 7);
+ SmallVector<SDValue> NewOps(N->ops());
+ NewOps[OpNo] = GetSoftenedFloat(NewOps[OpNo]);
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
//===----------------------------------------------------------------------===//
// Float Result Expansion
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index b1776ea..44e5a18 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2871,18 +2871,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo) {
assert(OpNo > 1); // Because the first two arguments are guaranteed legal.
SmallVector<SDValue> NewOps(N->ops());
- SDValue Operand = N->getOperand(OpNo);
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Operand.getValueType());
- NewOps[OpNo] = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Operand);
+ NewOps[OpNo] = GetPromotedInteger(NewOps[OpNo]);
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) {
assert(OpNo >= 7);
SmallVector<SDValue> NewOps(N->ops());
- SDValue Operand = N->getOperand(OpNo);
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Operand.getValueType());
- NewOps[OpNo] = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Operand);
+ NewOps[OpNo] = GetPromotedInteger(NewOps[OpNo]);
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 9656a30..ede522e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -658,6 +658,8 @@ private:
SDValue SoftenFloatOp_ATOMIC_STORE(SDNode *N, unsigned OpNo);
SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N);
SDValue SoftenFloatOp_FAKE_USE(SDNode *N);
+ SDValue SoftenFloatOp_STACKMAP(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatOp_PATCHPOINT(SDNode *N, unsigned OpNo);
//===--------------------------------------------------------------------===//
// Float Expansion Support: LegalizeFloatTypes.cpp
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index da4e409..9bdf822 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -10668,19 +10668,20 @@ static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
DAG.getConstant(MaxIndex, dl, IdxVT));
}
-SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
- SDValue VecPtr, EVT VecVT,
- SDValue Index) const {
+SDValue
+TargetLowering::getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr,
+ EVT VecVT, SDValue Index,
+ const SDNodeFlags PtrArithFlags) const {
return getVectorSubVecPointer(
DAG, VecPtr, VecVT,
EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
- Index);
+ Index, PtrArithFlags);
}
-SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
- SDValue VecPtr, EVT VecVT,
- EVT SubVecVT,
- SDValue Index) const {
+SDValue
+TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr,
+ EVT VecVT, EVT SubVecVT, SDValue Index,
+ const SDNodeFlags PtrArithFlags) const {
SDLoc dl(Index);
// Make sure the index type is big enough to compute in.
Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
@@ -10704,7 +10705,7 @@ SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
DAG.getConstant(EltSize, dl, IdxVT));
- return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
+ return DAG.getMemBasePlusOffset(VecPtr, Index, dl, PtrArithFlags);
}
//===----------------------------------------------------------------------===//
@@ -12382,8 +12383,10 @@ SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT,
!IsFast)
return SDValue();
- SDValue NewPtr =
- getVectorElementPointer(DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
+ // The original DAG loaded the entire vector from memory, so arithmetic
+ // within it must be inbounds.
+ SDValue NewPtr = getInboundsVectorElementPointer(
+ DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
// We are replacing a vector load with a scalar load. The new load must have
// identical memory op ordering to the original.
diff --git a/llvm/lib/CodeGenTypes/LowLevelType.cpp b/llvm/lib/CodeGenTypes/LowLevelType.cpp
index 4785f26..92b7fad 100644
--- a/llvm/lib/CodeGenTypes/LowLevelType.cpp
+++ b/llvm/lib/CodeGenTypes/LowLevelType.cpp
@@ -54,9 +54,3 @@ LLVM_DUMP_METHOD void LLT::dump() const {
dbgs() << '\n';
}
#endif
-
-const constexpr LLT::BitFieldInfo LLT::ScalarSizeFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerSizeFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerAddressSpaceFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::VectorElementsFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::VectorScalableFieldInfo;
diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h
index 84757ae..970abdc 100644
--- a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h
+++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h
@@ -28,7 +28,7 @@ using MacroOffset2UnitMapTy = DenseMap<uint64_t, DwarfUnit *>;
/// Base class for all Dwarf units(Compile unit/Type table unit).
class DwarfUnit : public OutputSections {
public:
- virtual ~DwarfUnit() {}
+ virtual ~DwarfUnit() = default;
DwarfUnit(LinkingGlobalData &GlobalData, unsigned ID,
StringRef ClangModuleName)
: OutputSections(GlobalData), ID(ID), ClangModuleName(ClangModuleName),
diff --git a/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h b/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h
index f67536e..8ccb4a5 100644
--- a/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h
+++ b/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h
@@ -22,7 +22,7 @@ class StringEntryToDwarfStringPoolEntryMap {
public:
StringEntryToDwarfStringPoolEntryMap(LinkingGlobalData &GlobalData)
: GlobalData(GlobalData) {}
- ~StringEntryToDwarfStringPoolEntryMap() {}
+ ~StringEntryToDwarfStringPoolEntryMap() = default;
/// Create DwarfStringPoolEntry for specified StringEntry if necessary.
/// Initialize DwarfStringPoolEntry with initial values.
diff --git a/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
index 6c23ba8..23ab534 100644
--- a/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
+++ b/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
@@ -102,7 +102,8 @@ std::optional<CVType> LazyRandomTypeCollection::tryGetType(TypeIndex Index) {
return std::nullopt;
}
- assert(contains(Index));
+ if (!contains(Index))
+ return std::nullopt;
return Records[Index.toArrayIndex()].Type;
}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index db5cc37..6c78ef0 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -129,6 +129,25 @@ prettyLanguageVersionString(const DWARFAttribute &AttrValue,
static_cast<SourceLanguageName>(*LName), *LVersion);
}
+static llvm::Expected<llvm::StringRef>
+getApplePropertyName(const DWARFDie &PropDIE) {
+ if (!PropDIE)
+ return llvm::createStringError("invalid DIE");
+
+ if (PropDIE.getTag() != DW_TAG_APPLE_property)
+ return llvm::createStringError("not referencing a DW_TAG_APPLE_property");
+
+ auto PropNameForm = PropDIE.find(DW_AT_APPLE_property_name);
+ if (!PropNameForm)
+ return "";
+
+ auto NameOrErr = PropNameForm->getAsCString();
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+
+ return *NameOrErr;
+}
+
static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
const DWARFAttribute &AttrValue, unsigned Indent,
DIDumpOptions DumpOpts) {
@@ -233,6 +252,15 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
Die.getAttributeValueAsReferencedDie(FormValue).getName(
DINameKind::LinkageName))
OS << Space << "\"" << Name << '\"';
+ } else if (Attr == DW_AT_APPLE_property) {
+ auto PropDIE = Die.getAttributeValueAsReferencedDie(FormValue);
+ if (auto PropNameOrErr = getApplePropertyName(PropDIE))
+ OS << Space << "\"" << *PropNameOrErr << '\"';
+ else
+ DumpOpts.RecoverableErrorHandler(createStringError(
+ errc::invalid_argument,
+ llvm::formatv("decoding DW_AT_APPLE_property_name: {}",
+ toString(PropNameOrErr.takeError()))));
} else if (Attr == DW_AT_type || Attr == DW_AT_containing_type) {
DWARFDie D = resolveReferencedType(Die, FormValue);
if (D && !D.isNULL()) {
diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
index 7e606c6a..4e7db82 100644
--- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
@@ -27,7 +27,7 @@
namespace llvm {
namespace orc {
-MemoryMapper::~MemoryMapper() {}
+MemoryMapper::~MemoryMapper() = default;
InProcessMemoryMapper::InProcessMemoryMapper(size_t PageSize)
: PageSize(PageSize) {}
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
index 9275586..ca8192b 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
@@ -16,9 +16,11 @@ add_llvm_component_library(LLVMOrcTargetProcess
ExecutorSharedMemoryMapperService.cpp
DefaultHostBootstrapValues.cpp
ExecutorResolver.cpp
+ LibraryResolver.cpp
JITLoaderGDB.cpp
JITLoaderPerf.cpp
JITLoaderVTune.cpp
+ LibraryScanner.cpp
OrcRTBootstrap.cpp
RegisterEHFrames.cpp
SimpleExecutorDylibManager.cpp
@@ -36,6 +38,8 @@ add_llvm_component_library(LLVMOrcTargetProcess
LINK_COMPONENTS
${intel_jit_profiling}
+ BinaryFormat
+ Object
OrcShared
Support
TargetParser
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp
new file mode 100644
index 0000000..35da82a
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp
@@ -0,0 +1,370 @@
+//===- LibraryResolver.cpp - Library Resolution of Unresolved Symbols ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Library resolution impl for unresolved symbols
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryScanner.h"
+
+#include "llvm/ADT/StringSet.h"
+
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Error.h"
+
+#include <mutex>
+#include <thread>
+
+#define DEBUG_TYPE "orc-resolver"
+
+namespace llvm::orc {
+
+LibraryResolver::LibraryResolver(const LibraryResolver::Setup &S)
+ : LibPathCache(S.Cache ? S.Cache : std::make_shared<LibraryPathCache>()),
+ LibPathResolver(S.PResolver
+ ? S.PResolver
+ : std::make_shared<PathResolver>(LibPathCache)),
+ ScanHelper(S.BasePaths, LibPathCache, LibPathResolver),
+ FB(S.FilterBuilder), LibMgr(),
+ ShouldScanCall(S.ShouldScanCall ? S.ShouldScanCall
+ : [](StringRef) -> bool { return true; }),
+ scanBatchSize(S.ScanBatchSize) {
+
+ if (ScanHelper.getAllUnits().empty()) {
+ LLVM_DEBUG(dbgs() << "Warning: No base paths provided for scanning.\n");
+ }
+}
+
+std::unique_ptr<LibraryResolutionDriver>
+LibraryResolutionDriver::create(const LibraryResolver::Setup &S) {
+ auto LR = std::make_unique<LibraryResolver>(S);
+ return std::unique_ptr<LibraryResolutionDriver>(
+ new LibraryResolutionDriver(std::move(LR)));
+}
+
+void LibraryResolutionDriver::addScanPath(const std::string &Path, PathType K) {
+ LR->ScanHelper.addBasePath(Path, K);
+}
+
+bool LibraryResolutionDriver::markLibraryLoaded(StringRef Path) {
+ auto Lib = LR->LibMgr.getLibrary(Path);
+ if (!Lib)
+ return false;
+
+ Lib->setState(LibraryManager::LibState::Loaded);
+
+ return true;
+}
+
+bool LibraryResolutionDriver::markLibraryUnLoaded(StringRef Path) {
+ auto Lib = LR->LibMgr.getLibrary(Path);
+ if (!Lib)
+ return false;
+
+ Lib->setState(LibraryManager::LibState::Unloaded);
+
+ return true;
+}
+
+void LibraryResolutionDriver::resolveSymbols(
+ std::vector<std::string> Syms,
+ LibraryResolver::OnSearchComplete OnCompletion,
+ const SearchConfig &Config) {
+ LR->searchSymbolsInLibraries(Syms, std::move(OnCompletion), Config);
+}
+
+static bool shouldIgnoreSymbol(const object::SymbolRef &Sym,
+ uint32_t IgnoreFlags) {
+ Expected<uint32_t> FlagsOrErr = Sym.getFlags();
+ if (!FlagsOrErr) {
+ consumeError(FlagsOrErr.takeError());
+ return true;
+ }
+
+ uint32_t Flags = *FlagsOrErr;
+
+ using Filter = SymbolEnumeratorOptions;
+ if ((IgnoreFlags & Filter::IgnoreUndefined) &&
+ (Flags & object::SymbolRef::SF_Undefined))
+ return true;
+ if ((IgnoreFlags & Filter::IgnoreIndirect) &&
+ (Flags & object::SymbolRef::SF_Indirect))
+ return true;
+ if ((IgnoreFlags & Filter::IgnoreWeak) &&
+ (Flags & object::SymbolRef::SF_Weak))
+ return true;
+
+ return false;
+}
+
+bool SymbolEnumerator::enumerateSymbols(StringRef Path, OnEachSymbolFn OnEach,
+ const SymbolEnumeratorOptions &Opts) {
+ if (Path.empty())
+ return false;
+
+ ObjectFileLoader ObjLoader(Path);
+
+ auto ObjOrErr = ObjLoader.getObjectFile();
+ if (!ObjOrErr) {
+ std::string ErrMsg;
+ handleAllErrors(ObjOrErr.takeError(),
+ [&](const ErrorInfoBase &EIB) { ErrMsg = EIB.message(); });
+ LLVM_DEBUG(dbgs() << "Failed loading object file: " << Path
+ << "\nError: " << ErrMsg << "\n");
+ return false;
+ }
+
+ object::ObjectFile *Obj = &ObjOrErr.get();
+
+ auto processSymbolRange =
+ [&](object::ObjectFile::symbol_iterator_range Range) -> EnumerateResult {
+ for (const auto &Sym : Range) {
+ if (shouldIgnoreSymbol(Sym, Opts.FilterFlags))
+ continue;
+
+ auto NameOrErr = Sym.getName();
+ if (!NameOrErr) {
+ consumeError(NameOrErr.takeError());
+ continue;
+ }
+
+ StringRef Name = *NameOrErr;
+ if (Name.empty())
+ continue;
+
+ EnumerateResult Res = OnEach(Name);
+ if (Res != EnumerateResult::Continue)
+ return Res;
+ }
+ return EnumerateResult::Continue;
+ };
+
+ EnumerateResult Res = processSymbolRange(Obj->symbols());
+ if (Res != EnumerateResult::Continue)
+ return Res == EnumerateResult::Stop;
+
+ if (Obj->isELF()) {
+ const auto *ElfObj = cast<object::ELFObjectFileBase>(Obj);
+ Res = processSymbolRange(ElfObj->getDynamicSymbolIterators());
+ if (Res != EnumerateResult::Continue)
+ return Res == EnumerateResult::Stop;
+ } else if (Obj->isCOFF()) {
+ const auto *CoffObj = cast<object::COFFObjectFile>(Obj);
+ for (auto I = CoffObj->export_directory_begin(),
+ E = CoffObj->export_directory_end();
+ I != E; ++I) {
+ StringRef Name;
+ if (I->getSymbolName(Name))
+ continue;
+ if (Name.empty())
+ continue;
+
+ EnumerateResult Res = OnEach(Name);
+ if (Res != EnumerateResult::Continue)
+ return Res == EnumerateResult::Stop;
+ }
+ } else if (Obj->isMachO()) {
+ }
+
+ return true;
+}
+
+class SymbolSearchContext {
+public:
+ SymbolSearchContext(SymbolQuery &Q) : Q(Q) {}
+
+ bool hasSearched(LibraryInfo *Lib) const { return Searched.count(Lib); }
+
+ void markSearched(LibraryInfo *Lib) { Searched.insert(Lib); }
+
+ inline bool allResolved() const { return Q.allResolved(); }
+
+ SymbolQuery &query() { return Q; }
+
+private:
+ SymbolQuery &Q;
+ DenseSet<LibraryInfo *> Searched;
+};
+
+void LibraryResolver::resolveSymbolsInLibrary(
+ LibraryInfo &Lib, SymbolQuery &UnresolvedSymbols,
+ const SymbolEnumeratorOptions &Opts) {
+ LLVM_DEBUG(dbgs() << "Checking unresolved symbols "
+ << " in library : " << Lib.getFileName() << "\n";);
+ StringSet<> DiscoveredSymbols;
+
+ if (!UnresolvedSymbols.hasUnresolved()) {
+ LLVM_DEBUG(dbgs() << "Skipping library: " << Lib.getFullPath()
+ << " — unresolved symbols exist.\n";);
+ return;
+ }
+
+ bool HasEnumerated = false;
+ auto enumerateSymbolsIfNeeded = [&]() {
+ if (HasEnumerated)
+ return;
+
+ HasEnumerated = true;
+
+ LLVM_DEBUG(dbgs() << "Enumerating symbols in library: " << Lib.getFullPath()
+ << "\n";);
+ SymbolEnumerator::enumerateSymbols(
+ Lib.getFullPath(),
+ [&](StringRef sym) {
+ DiscoveredSymbols.insert(sym);
+ return EnumerateResult::Continue;
+ },
+ Opts);
+
+ if (DiscoveredSymbols.empty()) {
+ LLVM_DEBUG(dbgs() << " No symbols and remove library : "
+ << Lib.getFullPath() << "\n";);
+ LibMgr.removeLibrary(Lib.getFullPath());
+ return;
+ }
+ };
+
+ if (!Lib.hasFilter()) {
+ LLVM_DEBUG(dbgs() << "Building filter for library: " << Lib.getFullPath()
+ << "\n";);
+ enumerateSymbolsIfNeeded();
+ SmallVector<StringRef> SymbolVec;
+ SymbolVec.reserve(DiscoveredSymbols.size());
+ for (const auto &KV : DiscoveredSymbols)
+ SymbolVec.push_back(KV.first());
+
+ Lib.ensureFilterBuilt(FB, SymbolVec);
+ LLVM_DEBUG({
+ dbgs() << "DiscoveredSymbols : " << DiscoveredSymbols.size() << "\n";
+ for (const auto &KV : DiscoveredSymbols)
+ dbgs() << "DiscoveredSymbols : " << KV.first() << "\n";
+ });
+ }
+
+ const auto &Unresolved = UnresolvedSymbols.getUnresolvedSymbols();
+ bool HadAnySym = false;
+ LLVM_DEBUG(dbgs() << "Total unresolved symbols : " << Unresolved.size()
+ << "\n";);
+ for (const auto &Sym : Unresolved) {
+ if (Lib.mayContain(Sym)) {
+ LLVM_DEBUG(dbgs() << "Checking symbol '" << Sym
+ << "' in library: " << Lib.getFullPath() << "\n";);
+ enumerateSymbolsIfNeeded();
+ if (DiscoveredSymbols.count(Sym) > 0) {
+ LLVM_DEBUG(dbgs() << " Resolved symbol: " << Sym
+ << " in library: " << Lib.getFullPath() << "\n";);
+ UnresolvedSymbols.resolve(Sym, Lib.getFullPath());
+ HadAnySym = true;
+ }
+ }
+ }
+
+ using LibraryState = LibraryManager::LibState;
+ if (HadAnySym && Lib.getState() != LibraryState::Loaded)
+ Lib.setState(LibraryState::Queried);
+}
+
+void LibraryResolver::searchSymbolsInLibraries(
+ std::vector<std::string> &SymbolList, OnSearchComplete OnComplete,
+ const SearchConfig &Config) {
+ SymbolQuery Q(SymbolList);
+
+ using LibraryState = LibraryManager::LibState;
+ using LibraryType = PathType;
+ auto tryResolveFrom = [&](LibraryState S, LibraryType K) {
+ LLVM_DEBUG(dbgs() << "Trying resolve from state=" << static_cast<int>(S)
+ << " type=" << static_cast<int>(K) << "\n";);
+
+ SymbolSearchContext Ctx(Q);
+ while (!Ctx.allResolved()) {
+
+ for (auto &Lib : LibMgr.getView(S, K)) {
+ if (Ctx.hasSearched(Lib.get()))
+ continue;
+
+ // can use Async here?
+ resolveSymbolsInLibrary(*Lib, Ctx.query(), Config.Options);
+ Ctx.markSearched(Lib.get());
+
+ if (Ctx.allResolved())
+ return;
+ }
+
+ if (Ctx.allResolved())
+ return;
+
+ if (!scanLibrariesIfNeeded(K, scanBatchSize))
+ break; // no more new libs to scan
+ }
+ };
+
+ for (const auto &[St, Ty] : Config.Policy.Plan) {
+ tryResolveFrom(St, Ty);
+ if (Q.allResolved())
+ break;
+ }
+
+ // done:
+ LLVM_DEBUG({
+ dbgs() << "Search complete.\n";
+ for (const auto &r : Q.getAllResults())
+ dbgs() << "Resolved Symbol:" << r->Name << " -> " << r->ResolvedLibPath
+ << "\n";
+ });
+
+ OnComplete(Q);
+}
+
+bool LibraryResolver::scanLibrariesIfNeeded(PathType PK, size_t BatchSize) {
+ LLVM_DEBUG(dbgs() << "LibraryResolver::scanLibrariesIfNeeded: Scanning for "
+ << (PK == PathType::User ? "User" : "System")
+ << " libraries\n";);
+ if (!ScanHelper.leftToScan(PK))
+ return false;
+
+ LibraryScanner Scanner(ScanHelper, LibMgr, ShouldScanCall);
+ Scanner.scanNext(PK, BatchSize);
+ return true;
+}
+
+bool LibraryResolver::symbolExistsInLibrary(const LibraryInfo &Lib,
+ StringRef SymName,
+ std::vector<std::string> *AllSyms) {
+ SymbolEnumeratorOptions Opts;
+ return symbolExistsInLibrary(Lib, SymName, AllSyms, Opts);
+}
+
+bool LibraryResolver::symbolExistsInLibrary(
+ const LibraryInfo &Lib, StringRef SymName,
+ std::vector<std::string> *AllSyms, const SymbolEnumeratorOptions &Opts) {
+ bool Found = false;
+
+ SymbolEnumerator::enumerateSymbols(
+ Lib.getFullPath(),
+ [&](StringRef Sym) {
+ if (AllSyms)
+ AllSyms->emplace_back(Sym.str());
+
+ if (Sym == SymName) {
+ Found = true;
+ }
+
+ return EnumerateResult::Continue;
+ },
+ Opts);
+
+ return Found;
+}
+
+} // end namespace llvm::orc
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp
new file mode 100644
index 0000000..d93f686
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp
@@ -0,0 +1,1161 @@
+//===- LibraryScanner.cpp - Provide Library Scanning Implementation ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryScanner.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h"
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ELFTypes.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/MachOUniversal.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/Triple.h"
+
+#ifdef LLVM_ON_UNIX
+#include <sys/stat.h>
+#include <unistd.h>
+#endif // LLVM_ON_UNIX
+
+#ifdef __APPLE__
+#include <sys/stat.h>
+#undef LC_LOAD_DYLIB
+#undef LC_RPATH
+#endif // __APPLE__
+
+#define DEBUG_TYPE "orc-scanner"
+
+namespace llvm::orc {
+
+void handleError(Error Err, StringRef context = "") {
+ consumeError(handleErrors(std::move(Err), [&](const ErrorInfoBase &EIB) {
+ dbgs() << "LLVM Error";
+ if (!context.empty())
+ dbgs() << " [" << context << "]";
+ dbgs() << ": " << EIB.message() << "\n";
+ }));
+}
+
+bool ObjectFileLoader::isArchitectureCompatible(const object::ObjectFile &Obj) {
+ Triple HostTriple(sys::getDefaultTargetTriple());
+ Triple ObjTriple = Obj.makeTriple();
+
+ LLVM_DEBUG({
+ dbgs() << "Host triple: " << HostTriple.str()
+ << ", Object triple: " << ObjTriple.str() << "\n";
+ });
+
+ if (ObjTriple.getArch() != Triple::UnknownArch &&
+ HostTriple.getArch() != ObjTriple.getArch())
+ return false;
+
+ if (ObjTriple.getOS() != Triple::UnknownOS &&
+ HostTriple.getOS() != ObjTriple.getOS())
+ return false;
+
+ if (ObjTriple.getEnvironment() != Triple::UnknownEnvironment &&
+ HostTriple.getEnvironment() != Triple::UnknownEnvironment &&
+ HostTriple.getEnvironment() != ObjTriple.getEnvironment())
+ return false;
+
+ return true;
+}
+
+Expected<object::OwningBinary<object::ObjectFile>>
+ObjectFileLoader::loadObjectFileWithOwnership(StringRef FilePath) {
+ LLVM_DEBUG(dbgs() << "ObjectFileLoader: Attempting to open file " << FilePath
+ << "\n";);
+ auto BinOrErr = object::createBinary(FilePath);
+ if (!BinOrErr) {
+ LLVM_DEBUG(dbgs() << "ObjectFileLoader: Failed to open file " << FilePath
+ << "\n";);
+ return BinOrErr.takeError();
+ }
+
+ LLVM_DEBUG(dbgs() << "ObjectFileLoader: Successfully opened file " << FilePath
+ << "\n";);
+
+ auto OwningBin = BinOrErr->takeBinary();
+ object::Binary *Bin = OwningBin.first.get();
+
+ if (Bin->isArchive()) {
+ LLVM_DEBUG(dbgs() << "ObjectFileLoader: File is an archive, not supported: "
+ << FilePath << "\n";);
+ return createStringError(std::errc::invalid_argument,
+ "Archive files are not supported: %s",
+ FilePath.str().c_str());
+ }
+
+#if defined(__APPLE__)
+ if (auto *UB = dyn_cast<object::MachOUniversalBinary>(Bin)) {
+ LLVM_DEBUG(dbgs() << "ObjectFileLoader: Detected Mach-O universal binary: "
+ << FilePath << "\n";);
+ for (auto ObjForArch : UB->objects()) {
+ auto ObjOrErr = ObjForArch.getAsObjectFile();
+ if (!ObjOrErr) {
+ LLVM_DEBUG(
+ dbgs()
+ << "ObjectFileLoader: Skipping invalid architecture slice\n";);
+
+ consumeError(ObjOrErr.takeError());
+ continue;
+ }
+
+ std::unique_ptr<object::ObjectFile> Obj = std::move(ObjOrErr.get());
+ if (isArchitectureCompatible(*Obj)) {
+ LLVM_DEBUG(
+ dbgs() << "ObjectFileLoader: Found compatible object slice\n";);
+
+ return object::OwningBinary<object::ObjectFile>(
+ std::move(Obj), std::move(OwningBin.second));
+
+ } else {
+ LLVM_DEBUG(dbgs() << "ObjectFileLoader: Incompatible architecture "
+ "slice skipped\n";);
+ }
+ }
+ LLVM_DEBUG(dbgs() << "ObjectFileLoader: No compatible slices found in "
+ "universal binary\n";);
+ return createStringError(inconvertibleErrorCode(),
+ "No compatible object found in fat binary: %s",
+ FilePath.str().c_str());
+ }
+#endif
+
+ auto ObjOrErr =
+ object::ObjectFile::createObjectFile(Bin->getMemoryBufferRef());
+ if (!ObjOrErr) {
+ LLVM_DEBUG(dbgs() << "ObjectFileLoader: Failed to create object file\n";);
+ return ObjOrErr.takeError();
+ }
+ LLVM_DEBUG(dbgs() << "ObjectFileLoader: Detected object file\n";);
+
+ std::unique_ptr<object::ObjectFile> Obj = std::move(*ObjOrErr);
+ if (!isArchitectureCompatible(*Obj)) {
+ LLVM_DEBUG(dbgs() << "ObjectFileLoader: Incompatible architecture: "
+ << FilePath << "\n";);
+ return createStringError(inconvertibleErrorCode(),
+ "Incompatible object file: %s",
+ FilePath.str().c_str());
+ }
+
+ LLVM_DEBUG(dbgs() << "ObjectFileLoader: Object file is compatible\n";);
+
+ return object::OwningBinary<object::ObjectFile>(std::move(Obj),
+ std::move(OwningBin.second));
+}
+
+template <class ELFT>
+bool isELFSharedLibrary(const object::ELFFile<ELFT> &ELFObj) {
+ if (ELFObj.getHeader().e_type != ELF::ET_DYN)
+ return false;
+
+ auto PHOrErr = ELFObj.program_headers();
+ if (!PHOrErr) {
+ consumeError(PHOrErr.takeError());
+ return true;
+ }
+
+ for (auto Phdr : *PHOrErr) {
+ if (Phdr.p_type == ELF::PT_INTERP)
+ return false;
+ }
+
+ return true;
+}
+
+bool isSharedLibraryObject(object::ObjectFile &Obj) {
+ if (Obj.isELF()) {
+ if (auto *ELF32LE = dyn_cast<object::ELF32LEObjectFile>(&Obj))
+ return isELFSharedLibrary(ELF32LE->getELFFile());
+ if (auto *ELF64LE = dyn_cast<object::ELF64LEObjectFile>(&Obj))
+ return isELFSharedLibrary(ELF64LE->getELFFile());
+ if (auto *ELF32BE = dyn_cast<object::ELF32BEObjectFile>(&Obj))
+ return isELFSharedLibrary(ELF32BE->getELFFile());
+ if (auto *ELF64BE = dyn_cast<object::ELF64BEObjectFile>(&Obj))
+ return isELFSharedLibrary(ELF64BE->getELFFile());
+ } else if (Obj.isMachO()) {
+ const object::MachOObjectFile *MachO =
+ dyn_cast<object::MachOObjectFile>(&Obj);
+ if (!MachO) {
+ LLVM_DEBUG(dbgs() << "Failed to cast to MachOObjectFile.\n";);
+ return false;
+ }
+ LLVM_DEBUG({
+ bool Result =
+ MachO->getHeader().filetype == MachO::HeaderFileType::MH_DYLIB;
+ dbgs() << "Mach-O filetype: " << MachO->getHeader().filetype
+ << " (MH_DYLIB == " << MachO::HeaderFileType::MH_DYLIB
+ << "), shared: " << Result << "\n";
+ });
+
+ return MachO->getHeader().filetype == MachO::HeaderFileType::MH_DYLIB;
+ } else if (Obj.isCOFF()) {
+ const object::COFFObjectFile *coff = dyn_cast<object::COFFObjectFile>(&Obj);
+ if (!coff)
+ return false;
+ return coff->getCharacteristics() & COFF::IMAGE_FILE_DLL;
+ } else {
+ LLVM_DEBUG(dbgs() << "Binary is not an ObjectFile.\n";);
+ }
+
+ return false;
+}
+
+bool DylibPathValidator::isSharedLibrary(StringRef Path) {
+ LLVM_DEBUG(dbgs() << "Checking if path is a shared library: " << Path
+ << "\n";);
+
+ auto FileType = sys::fs::get_file_type(Path, /*Follow*/ true);
+ if (FileType != sys::fs::file_type::regular_file) {
+ LLVM_DEBUG(dbgs() << "File type is not a regular file for path: " << Path
+ << "\n";);
+ return false;
+ }
+
+ file_magic MagicCode;
+ identify_magic(Path, MagicCode);
+
+ // Skip archives.
+ if (MagicCode == file_magic::archive)
+ return false;
+
+ // Universal binary handling.
+#if defined(__APPLE__)
+ if (MagicCode == file_magic::macho_universal_binary) {
+ ObjectFileLoader ObjLoader(Path);
+ auto ObjOrErr = ObjLoader.getObjectFile();
+ if (!ObjOrErr) {
+ consumeError(ObjOrErr.takeError());
+ return false;
+ }
+ return isSharedLibraryObject(ObjOrErr.get());
+ }
+#endif
+
+ // Object file inspection for PE/COFF, ELF, and Mach-O
+ bool NeedsObjectInspection =
+#if defined(_WIN32)
+ (MagicCode == file_magic::pecoff_executable);
+#elif defined(__APPLE__)
+ (MagicCode == file_magic::macho_fixed_virtual_memory_shared_lib ||
+ MagicCode == file_magic::macho_dynamically_linked_shared_lib ||
+ MagicCode == file_magic::macho_dynamically_linked_shared_lib_stub);
+#elif defined(LLVM_ON_UNIX)
+#ifdef __CYGWIN__
+ (MagicCode == file_magic::pecoff_executable);
+#else
+ (MagicCode == file_magic::elf_shared_object);
+#endif
+#else
+#error "Unsupported platform."
+#endif
+
+ if (NeedsObjectInspection) {
+ ObjectFileLoader ObjLoader(Path);
+ auto ObjOrErr = ObjLoader.getObjectFile();
+ if (!ObjOrErr) {
+ consumeError(ObjOrErr.takeError());
+ return false;
+ }
+ return isSharedLibraryObject(ObjOrErr.get());
+ }
+
+ LLVM_DEBUG(dbgs() << "Path is not identified as a shared library: " << Path
+ << "\n";);
+ return false;
+}
+
+void DylibSubstitutor::configure(StringRef LoaderPath) {
+ SmallString<512> ExecPath(sys::fs::getMainExecutable(nullptr, nullptr));
+ sys::path::remove_filename(ExecPath);
+
+ SmallString<512> LoaderDir;
+ if (LoaderPath.empty()) {
+ LoaderDir = ExecPath;
+ } else {
+ LoaderDir = LoaderPath.str();
+ if (!sys::fs::is_directory(LoaderPath))
+ sys::path::remove_filename(LoaderDir);
+ }
+
+#ifdef __APPLE__
+ Placeholders["@loader_path"] = std::string(LoaderDir);
+ Placeholders["@executable_path"] = std::string(ExecPath);
+#else
+ Placeholders["$origin"] = std::string(LoaderDir);
+#endif
+}
+
+std::optional<std::string>
+SearchPathResolver::resolve(StringRef Stem, const DylibSubstitutor &Subst,
+ DylibPathValidator &Validator) const {
+ for (const auto &SP : Paths) {
+ std::string Base = Subst.substitute(SP);
+
+ SmallString<512> FullPath(Base);
+ if (!PlaceholderPrefix.empty() &&
+ Stem.starts_with_insensitive(PlaceholderPrefix))
+ FullPath.append(Stem.drop_front(PlaceholderPrefix.size()));
+ else
+ sys::path::append(FullPath, Stem);
+
+ LLVM_DEBUG(dbgs() << "SearchPathResolver::resolve FullPath = " << FullPath
+ << "\n";);
+
+ if (auto Valid = Validator.validate(FullPath.str()))
+ return Valid;
+ }
+
+ return std::nullopt;
+}
+
+std::optional<std::string>
+DylibResolverImpl::tryWithExtensions(StringRef LibStem) const {
+ LLVM_DEBUG(dbgs() << "tryWithExtensions: baseName = " << LibStem << "\n";);
+ SmallVector<SmallString<256>, 8> Candidates;
+
+ // Add extensions by platform
+#if defined(__APPLE__)
+ Candidates.emplace_back(LibStem);
+ Candidates.back() += ".dylib";
+#elif defined(_WIN32)
+ Candidates.emplace_back(LibStem);
+ Candidates.back() += ".dll";
+#else
+ Candidates.emplace_back(LibStem);
+ Candidates.back() += ".so";
+#endif
+
+ // Optionally try "lib" prefix if not already there
+ StringRef FileName = sys::path::filename(LibStem);
+ StringRef Base = sys::path::parent_path(LibStem);
+ if (!FileName.starts_with("lib")) {
+ SmallString<256> WithPrefix(Base);
+ if (!WithPrefix.empty())
+ sys::path::append(WithPrefix, ""); // ensure separator if needed
+ WithPrefix += "lib";
+ WithPrefix += FileName;
+
+#if defined(__APPLE__)
+ WithPrefix += ".dylib";
+#elif defined(_WIN32)
+ WithPrefix += ".dll";
+#else
+ WithPrefix += ".so";
+#endif
+
+ Candidates.push_back(std::move(WithPrefix));
+ }
+
+ LLVM_DEBUG({
+ dbgs() << " Candidates to try:\n";
+ for (const auto &C : Candidates)
+ dbgs() << " " << C << "\n";
+ });
+
+ // Try all variants using tryAllPaths
+ for (const auto &Name : Candidates) {
+
+ LLVM_DEBUG(dbgs() << " Trying candidate: " << Name << "\n";);
+
+ for (const auto &R : Resolvers) {
+ if (auto Res = R.resolve(Name, Substitutor, Validator))
+ return Res;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << " -> No candidate Resolved.\n";);
+
+ return std::nullopt;
+}
+
+std::optional<std::string>
+DylibResolverImpl::resolve(StringRef LibStem, bool VariateLibStem) const {
+ LLVM_DEBUG(dbgs() << "Resolving library stem: " << LibStem << "\n";);
+
+ // If it is an absolute path, don't try iterate over the paths.
+ if (sys::path::is_absolute(LibStem)) {
+ LLVM_DEBUG(dbgs() << " -> Absolute path detected.\n";);
+ return Validator.validate(LibStem);
+ }
+
+ if (!LibStem.starts_with_insensitive("@rpath")) {
+ if (auto norm = Validator.validate(Substitutor.substitute(LibStem))) {
+ LLVM_DEBUG(dbgs() << " -> Resolved after substitution: " << *norm
+ << "\n";);
+
+ return norm;
+ }
+ }
+
+ for (const auto &R : Resolvers) {
+ LLVM_DEBUG(dbgs() << " -> Resolving via search path ... \n";);
+ if (auto Result = R.resolve(LibStem, Substitutor, Validator)) {
+ LLVM_DEBUG(dbgs() << " -> Resolved via search path: " << *Result
+ << "\n";);
+
+ return Result;
+ }
+ }
+
+ // Expand libStem with paths, extensions, etc.
+ // std::string foundName;
+ if (VariateLibStem) {
+ LLVM_DEBUG(dbgs() << " -> Trying with extensions...\n";);
+
+ if (auto Norm = tryWithExtensions(LibStem)) {
+ LLVM_DEBUG(dbgs() << " -> Resolved via tryWithExtensions: " << *Norm
+ << "\n";);
+
+ return Norm;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << " -> Could not resolve: " << LibStem << "\n";);
+
+ return std::nullopt;
+}
+
+#ifndef _WIN32
+mode_t PathResolver::lstatCached(StringRef Path) {
+ // If already cached - retun cached result
+ if (auto Cache = LibPathCache->read_lstat(Path))
+ return *Cache;
+
+ // Not cached: perform lstat and store
+ struct stat buf{};
+ mode_t st_mode = (lstat(Path.str().c_str(), &buf) == -1) ? 0 : buf.st_mode;
+
+ LibPathCache->insert_lstat(Path, st_mode);
+
+ return st_mode;
+}
+
+std::optional<std::string> PathResolver::readlinkCached(StringRef Path) {
+ // If already cached - retun cached result
+ if (auto Cache = LibPathCache->read_link(Path))
+ return Cache;
+
+ // If result not in cache - call system function and cache result
+ char buf[PATH_MAX];
+ ssize_t len;
+ if ((len = readlink(Path.str().c_str(), buf, sizeof(buf))) != -1) {
+ buf[len] = '\0';
+ std::string s(buf);
+ LibPathCache->insert_link(Path, s);
+ return s;
+ }
+ return std::nullopt;
+}
+
+void createComponent(StringRef Path, StringRef BasePath, bool BaseIsResolved,
+ SmallVector<StringRef, 16> &Component) {
+ StringRef Separator = sys::path::get_separator();
+ if (!BaseIsResolved) {
+ if (Path[0] == '~' &&
+ (Path.size() == 1 || sys::path::is_separator(Path[1]))) {
+ static SmallString<128> HomeP;
+ if (HomeP.str().empty())
+ sys::path::home_directory(HomeP);
+ StringRef(HomeP).split(Component, Separator, /*MaxSplit*/ -1,
+ /*KeepEmpty*/ false);
+ } else if (BasePath.empty()) {
+ static SmallString<256> CurrentPath;
+ if (CurrentPath.str().empty())
+ sys::fs::current_path(CurrentPath);
+ StringRef(CurrentPath)
+ .split(Component, Separator, /*MaxSplit*/ -1, /*KeepEmpty*/ false);
+ } else {
+ BasePath.split(Component, Separator, /*MaxSplit*/ -1,
+ /*KeepEmpty*/ false);
+ }
+ }
+
+ Path.split(Component, Separator, /*MaxSplit*/ -1, /*KeepEmpty*/ false);
+}
+
+void normalizePathSegments(SmallVector<StringRef, 16> &PathParts) {
+ SmallVector<StringRef, 16> NormalizedPath;
+ for (auto &Part : PathParts) {
+ if (Part == ".") {
+ continue;
+ } else if (Part == "..") {
+ if (!NormalizedPath.empty() && NormalizedPath.back() != "..") {
+ NormalizedPath.pop_back();
+ } else {
+ NormalizedPath.push_back("..");
+ }
+ } else {
+ NormalizedPath.push_back(Part);
+ }
+ }
+ PathParts.swap(NormalizedPath);
+}
+#endif
+
+std::optional<std::string> PathResolver::realpathCached(StringRef Path,
+ std::error_code &EC,
+ StringRef Base,
+ bool BaseIsResolved,
+ long SymLoopLevel) {
+ EC.clear();
+
+ if (Path.empty()) {
+ EC = std::make_error_code(std::errc::no_such_file_or_directory);
+ LLVM_DEBUG(dbgs() << "PathResolver::realpathCached: Empty path\n";);
+
+ return std::nullopt;
+ }
+
+ if (SymLoopLevel <= 0) {
+ EC = std::make_error_code(std::errc::too_many_symbolic_link_levels);
+ LLVM_DEBUG(
+ dbgs() << "PathResolver::realpathCached: Too many Symlink levels: "
+ << Path << "\n";);
+
+ return std::nullopt;
+ }
+
+ // If already cached - retun cached result
+ bool isRelative = sys::path::is_relative(Path);
+ if (!isRelative) {
+ if (auto Cached = LibPathCache->read_realpath(Path)) {
+ EC = Cached->ErrnoCode;
+ if (EC) {
+ LLVM_DEBUG(dbgs() << "PathResolver::realpathCached: Cached (error) for "
+ << Path << "\n";);
+ } else {
+ LLVM_DEBUG(
+ dbgs() << "PathResolver::realpathCached: Cached (success) for "
+ << Path << " => " << Cached->canonicalPath << "\n";);
+ }
+ return Cached->canonicalPath.empty()
+ ? std::nullopt
+ : std::make_optional(Cached->canonicalPath);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "PathResolver::realpathCached: Resolving path: " << Path
+ << "\n";);
+
+ // If result not in cache - call system function and cache result
+
+ StringRef Separator(sys::path::get_separator());
+ SmallString<256> Resolved(Separator);
+#ifndef _WIN32
+ SmallVector<StringRef, 16> Components;
+
+ if (isRelative) {
+ if (BaseIsResolved) {
+ Resolved.assign(Base);
+ LLVM_DEBUG(dbgs() << " Using Resolved base: " << Base << "\n";);
+ }
+ createComponent(Path, Base, BaseIsResolved, Components);
+ } else {
+ Path.split(Components, Separator, /*MaxSplit*/ -1, /*KeepEmpty*/ false);
+ }
+
+ normalizePathSegments(Components);
+ LLVM_DEBUG({
+ for (auto &C : Components)
+ dbgs() << " " << C << " ";
+
+ dbgs() << "\n";
+ });
+
+ // Handle path list items
+ for (const auto &Component : Components) {
+ if (Component == ".")
+ continue;
+ if (Component == "..") {
+ // collapse "a/b/../c" to "a/c"
+ size_t S = Resolved.rfind(Separator);
+ if (S != llvm::StringRef::npos)
+ Resolved.resize(S);
+ if (Resolved.empty())
+ Resolved = Separator;
+ continue;
+ }
+
+ size_t oldSize = Resolved.size();
+ sys::path::append(Resolved, Component);
+ const char *ResolvedPath = Resolved.c_str();
+ LLVM_DEBUG(dbgs() << " Processing Component: " << Component << " => "
+ << ResolvedPath << "\n";);
+ mode_t st_mode = lstatCached(ResolvedPath);
+
+ if (S_ISLNK(st_mode)) {
+ LLVM_DEBUG(dbgs() << " Found symlink: " << ResolvedPath << "\n";);
+
+ auto SymlinkOpt = readlinkCached(ResolvedPath);
+ if (!SymlinkOpt) {
+ EC = std::make_error_code(std::errc::no_such_file_or_directory);
+ LibPathCache->insert_realpath(Path, LibraryPathCache::PathInfo{"", EC});
+ LLVM_DEBUG(dbgs() << " Failed to read symlink: " << ResolvedPath
+ << "\n";);
+
+ return std::nullopt;
+ }
+
+ StringRef Symlink = *SymlinkOpt;
+ LLVM_DEBUG(dbgs() << " Symlink points to: " << Symlink << "\n";);
+
+ std::string resolvedBase = "";
+ if (sys::path::is_relative(Symlink)) {
+ Resolved.resize(oldSize);
+ resolvedBase = Resolved.str().str();
+ }
+
+ auto RealSymlink =
+ realpathCached(Symlink, EC, resolvedBase,
+ /*BaseIsResolved=*/true, SymLoopLevel - 1);
+ if (!RealSymlink) {
+ LibPathCache->insert_realpath(Path, LibraryPathCache::PathInfo{"", EC});
+ LLVM_DEBUG(dbgs() << " Failed to resolve symlink target: " << Symlink
+ << "\n";);
+
+ return std::nullopt;
+ }
+
+ Resolved.assign(*RealSymlink);
+ LLVM_DEBUG(dbgs() << " Symlink Resolved to: " << Resolved << "\n";);
+
+ } else if (st_mode == 0) {
+ EC = std::make_error_code(std::errc::no_such_file_or_directory);
+ LibPathCache->insert_realpath(Path, LibraryPathCache::PathInfo{"", EC});
+ LLVM_DEBUG(dbgs() << " Component does not exist: " << ResolvedPath
+ << "\n";);
+
+ return std::nullopt;
+ }
+ }
+#else
+ EC = sys::fs::real_path(Path, Resolved); // Windows fallback
+#endif
+
+ std::string Canonical = Resolved.str().str();
+ {
+ LibPathCache->insert_realpath(Path, LibraryPathCache::PathInfo{
+ Canonical,
+ std::error_code() // success
+ });
+ }
+ LLVM_DEBUG(dbgs() << "PathResolver::realpathCached: Final Resolved: " << Path
+ << " => " << Canonical << "\n";);
+ return Canonical;
+}
+
+void LibraryScanHelper::addBasePath(const std::string &Path, PathType K) {
+ std::error_code EC;
+ std::string Canon = resolveCanonical(Path, EC);
+ if (EC) {
+ LLVM_DEBUG(
+ dbgs()
+ << "LibraryScanHelper::addBasePath: Failed to canonicalize path: "
+ << Path << "\n";);
+ return;
+ }
+ std::unique_lock<std::shared_mutex> Lock(Mtx);
+ if (LibSearchPaths.count(Canon)) {
+ LLVM_DEBUG(dbgs() << "LibraryScanHelper::addBasePath: Already added: "
+ << Canon << "\n";);
+ return;
+ }
+ K = K == PathType::Unknown ? classifyKind(Canon) : K;
+ auto SP = std::make_shared<LibrarySearchPath>(Canon, K);
+ LibSearchPaths[Canon] = SP;
+
+ if (K == PathType::User) {
+ LLVM_DEBUG(dbgs() << "LibraryScanHelper::addBasePath: Added User path: "
+ << Canon << "\n";);
+ UnscannedUsr.push_back(StringRef(SP->BasePath));
+ } else {
+ LLVM_DEBUG(dbgs() << "LibraryScanHelper::addBasePath: Added System path: "
+ << Canon << "\n";);
+ UnscannedSys.push_back(StringRef(SP->BasePath));
+ }
+}
+
+std::vector<std::shared_ptr<LibrarySearchPath>>
+LibraryScanHelper::getNextBatch(PathType K, size_t BatchSize) {
+ std::vector<std::shared_ptr<LibrarySearchPath>> Result;
+ auto &Queue = (K == PathType::User) ? UnscannedUsr : UnscannedSys;
+
+ std::unique_lock<std::shared_mutex> Lock(Mtx);
+
+ while (!Queue.empty() && (BatchSize == 0 || Result.size() < BatchSize)) {
+ StringRef Base = Queue.front();
+ auto It = LibSearchPaths.find(Base);
+ if (It != LibSearchPaths.end()) {
+ auto &SP = It->second;
+ ScanState Expected = ScanState::NotScanned;
+ if (SP->State.compare_exchange_strong(Expected, ScanState::Scanning)) {
+ Result.push_back(SP);
+ }
+ }
+ Queue.pop_front();
+ }
+
+ return Result;
+}
+
+bool LibraryScanHelper::isTrackedBasePath(StringRef Path) const {
+ std::error_code EC;
+ std::string Canon = resolveCanonical(Path, EC);
+ if (EC)
+ return false;
+
+ std::shared_lock<std::shared_mutex> Lock(Mtx);
+ return LibSearchPaths.count(Canon) > 0;
+}
+
+bool LibraryScanHelper::leftToScan(PathType K) const {
+ std::shared_lock<std::shared_mutex> Lock(Mtx);
+ for (const auto &KV : LibSearchPaths) {
+ const auto &SP = KV.second;
+ if (SP->Kind == K && SP->State == ScanState::NotScanned)
+ return true;
+ }
+ return false;
+}
+
+void LibraryScanHelper::resetToScan() {
+ std::shared_lock<std::shared_mutex> Lock(Mtx);
+
+ for (auto &[_, SP] : LibSearchPaths) {
+ ScanState Expected = ScanState::Scanned;
+
+ if (!SP->State.compare_exchange_strong(Expected, ScanState::NotScanned))
+ continue;
+
+ auto &TargetList =
+ (SP->Kind == PathType::User) ? UnscannedUsr : UnscannedSys;
+ TargetList.emplace_back(SP->BasePath);
+ }
+}
+
+std::vector<std::shared_ptr<LibrarySearchPath>>
+LibraryScanHelper::getAllUnits() const {
+ std::shared_lock<std::shared_mutex> Lock(Mtx);
+ std::vector<std::shared_ptr<LibrarySearchPath>> Result;
+ Result.reserve(LibSearchPaths.size());
+ for (const auto &[_, SP] : LibSearchPaths) {
+ Result.push_back(SP);
+ }
+ return Result;
+}
+
+std::string LibraryScanHelper::resolveCanonical(StringRef Path,
+ std::error_code &EC) const {
+ auto Canon = LibPathResolver->resolve(Path, EC);
+ return EC ? Path.str() : *Canon;
+}
+
+PathType LibraryScanHelper::classifyKind(StringRef Path) const {
+ // Detect home directory
+ const char *Home = getenv("HOME");
+ if (Home && Path.find(Home) == 0)
+ return PathType::User;
+
+ static const std::array<std::string, 5> UserPrefixes = {
+ "/usr/local", // often used by users for manual installs
+ "/opt/homebrew", // common on macOS
+ "/opt/local", // MacPorts
+ "/home", // Linux home dirs
+ "/Users", // macOS user dirs
+ };
+
+ for (const auto &Prefix : UserPrefixes) {
+ if (Path.find(Prefix) == 0)
+ return PathType::User;
+ }
+
+ return PathType::System;
+}
+
+Expected<LibraryDepsInfo> parseMachODeps(const object::MachOObjectFile &Obj) {
+ LibraryDepsInfo Libdeps;
+ LLVM_DEBUG(dbgs() << "Parsing Mach-O dependencies...\n";);
+ for (const auto &Command : Obj.load_commands()) {
+ switch (Command.C.cmd) {
+ case MachO::LC_LOAD_DYLIB: {
+ MachO::dylib_command dylibCmd = Obj.getDylibIDLoadCommand(Command);
+ const char *name = Command.Ptr + dylibCmd.dylib.name;
+ Libdeps.addDep(name);
+ LLVM_DEBUG(dbgs() << " Found LC_LOAD_DYLIB: " << name << "\n";);
+ } break;
+ case MachO::LC_LOAD_WEAK_DYLIB:
+ case MachO::LC_REEXPORT_DYLIB:
+ case MachO::LC_LOAD_UPWARD_DYLIB:
+ case MachO::LC_LAZY_LOAD_DYLIB:
+ break;
+ case MachO::LC_RPATH: {
+ // Extract RPATH
+ MachO::rpath_command rpathCmd = Obj.getRpathCommand(Command);
+ const char *rpath = Command.Ptr + rpathCmd.path;
+ LLVM_DEBUG(dbgs() << " Found LC_RPATH: " << rpath << "\n";);
+
+ SmallVector<StringRef, 4> RawPaths;
+ SplitString(StringRef(rpath), RawPaths,
+ sys::EnvPathSeparator == ':' ? ":" : ";");
+
+ for (const auto &raw : RawPaths) {
+ Libdeps.addRPath(raw.str()); // Convert to std::string
+ LLVM_DEBUG(dbgs() << " Parsed RPATH entry: " << raw << "\n";);
+ }
+ break;
+ }
+ }
+ }
+
+ return Expected<LibraryDepsInfo>(std::move(Libdeps));
+}
+
+template <class ELFT>
+static Expected<StringRef> getDynamicStrTab(const object::ELFFile<ELFT> &Elf) {
+ auto DynamicEntriesOrError = Elf.dynamicEntries();
+ if (!DynamicEntriesOrError)
+ return DynamicEntriesOrError.takeError();
+
+ for (const typename ELFT::Dyn &Dyn : *DynamicEntriesOrError) {
+ if (Dyn.d_tag == ELF::DT_STRTAB) {
+ auto MappedAddrOrError = Elf.toMappedAddr(Dyn.getPtr());
+ if (!MappedAddrOrError)
+ return MappedAddrOrError.takeError();
+ return StringRef(reinterpret_cast<const char *>(*MappedAddrOrError));
+ }
+ }
+
+ // If the dynamic segment is not present, we fall back on the sections.
+ auto SectionsOrError = Elf.sections();
+ if (!SectionsOrError)
+ return SectionsOrError.takeError();
+
+ for (const typename ELFT::Shdr &Sec : *SectionsOrError) {
+ if (Sec.sh_type == ELF::SHT_DYNSYM)
+ return Elf.getStringTableForSymtab(Sec);
+ }
+
+ return make_error<StringError>("dynamic string table not found",
+ inconvertibleErrorCode());
+}
+
+template <typename ELFT>
+Expected<LibraryDepsInfo> parseELF(const object::ELFFile<ELFT> &Elf) {
+ LibraryDepsInfo Deps;
+ Expected<StringRef> StrTabOrErr = getDynamicStrTab(Elf);
+ if (!StrTabOrErr)
+ return StrTabOrErr.takeError();
+
+ const char *Data = StrTabOrErr->data();
+
+ auto DynamicEntriesOrError = Elf.dynamicEntries();
+ if (!DynamicEntriesOrError) {
+ return DynamicEntriesOrError.takeError();
+ }
+
+ for (const typename ELFT::Dyn &Dyn : *DynamicEntriesOrError) {
+ switch (Dyn.d_tag) {
+ case ELF::DT_NEEDED:
+ Deps.addDep(Data + Dyn.d_un.d_val);
+ break;
+ case ELF::DT_RPATH: {
+ SmallVector<StringRef, 4> RawPaths;
+ SplitString(Data + Dyn.d_un.d_val, RawPaths,
+ sys::EnvPathSeparator == ':' ? ":" : ";");
+ for (const auto &raw : RawPaths)
+ Deps.addRPath(raw.str());
+ break;
+ }
+ case ELF::DT_RUNPATH: {
+ SmallVector<StringRef, 4> RawPaths;
+ SplitString(Data + Dyn.d_un.d_val, RawPaths,
+ sys::EnvPathSeparator == ':' ? ":" : ";");
+ for (const auto &raw : RawPaths)
+ Deps.addRunPath(raw.str());
+ break;
+ }
+ case ELF::DT_FLAGS_1:
+ // Check if this is not a pie executable.
+ if (Dyn.d_un.d_val & ELF::DF_1_PIE)
+ Deps.isPIE = true;
+ break;
+ // (Dyn.d_tag == ELF::DT_NULL) continue;
+ // (Dyn.d_tag == ELF::DT_AUXILIARY || Dyn.d_tag == ELF::DT_FILTER)
+ default:
+ break;
+ }
+ }
+
+ return Expected<LibraryDepsInfo>(std::move(Deps));
+}
+
+Expected<LibraryDepsInfo> parseELFDeps(const object::ELFObjectFileBase &Obj) {
+ using namespace object;
+ LLVM_DEBUG(dbgs() << "parseELFDeps: Detected ELF object\n";);
+ if (const auto *ELF = dyn_cast<ELF32LEObjectFile>(&Obj))
+ return parseELF(ELF->getELFFile());
+ else if (const auto *ELF = dyn_cast<ELF32BEObjectFile>(&Obj))
+ return parseELF(ELF->getELFFile());
+ else if (const auto *ELF = dyn_cast<ELF64LEObjectFile>(&Obj))
+ return parseELF(ELF->getELFFile());
+ else if (const auto *ELF = dyn_cast<ELF64BEObjectFile>(&Obj))
+ return parseELF(ELF->getELFFile());
+
+ LLVM_DEBUG(dbgs() << "parseELFDeps: Unknown ELF format\n";);
+ return createStringError(std::errc::not_supported, "Unknown ELF format");
+}
+
+Expected<LibraryDepsInfo> LibraryScanner::extractDeps(StringRef FilePath) {
+ LLVM_DEBUG(dbgs() << "extractDeps: Attempting to open file " << FilePath
+ << "\n";);
+
+ ObjectFileLoader ObjLoader(FilePath);
+ auto ObjOrErr = ObjLoader.getObjectFile();
+ if (!ObjOrErr) {
+ LLVM_DEBUG(dbgs() << "extractDeps: Failed to open " << FilePath << "\n";);
+ return ObjOrErr.takeError();
+ }
+
+ object::ObjectFile *Obj = &ObjOrErr.get();
+
+ if (auto *elfObj = dyn_cast<object::ELFObjectFileBase>(Obj)) {
+ LLVM_DEBUG(dbgs() << "extractDeps: File " << FilePath
+ << " is an ELF object\n";);
+
+ return parseELFDeps(*elfObj);
+ }
+
+ if (auto *macho = dyn_cast<object::MachOObjectFile>(Obj)) {
+ LLVM_DEBUG(dbgs() << "extractDeps: File " << FilePath
+ << " is a Mach-O object\n";);
+ return parseMachODeps(*macho);
+ }
+
+ if (Obj->isCOFF()) {
+ // TODO: COFF support
+ return LibraryDepsInfo();
+ }
+
+ LLVM_DEBUG(dbgs() << "extractDeps: Unsupported binary format for file "
+ << FilePath << "\n";);
+ return createStringError(inconvertibleErrorCode(),
+ "Unsupported binary format: %s",
+ FilePath.str().c_str());
+}
+
+std::optional<std::string> LibraryScanner::shouldScan(StringRef FilePath) {
+ std::error_code EC;
+
+ LLVM_DEBUG(dbgs() << "[shouldScan] Checking: " << FilePath << "\n";);
+
+ // [1] Check file existence early
+ if (!sys::fs::exists(FilePath)) {
+ LLVM_DEBUG(dbgs() << " -> Skipped: file does not exist.\n";);
+
+ return std::nullopt;
+ }
+
+ // [2] Resolve to canonical path
+ auto CanonicalPathOpt = ScanHelper.resolve(FilePath, EC);
+ if (EC || !CanonicalPathOpt) {
+ LLVM_DEBUG(dbgs() << " -> Skipped: failed to resolve path (EC="
+ << EC.message() << ").\n";);
+
+ return std::nullopt;
+ }
+
+ const std::string &CanonicalPath = *CanonicalPathOpt;
+ LLVM_DEBUG(dbgs() << " -> Canonical path: " << CanonicalPath << "\n");
+
+ // [3] Check if it's a directory — skip directories
+ if (sys::fs::is_directory(CanonicalPath)) {
+ LLVM_DEBUG(dbgs() << " -> Skipped: path is a directory.\n";);
+
+ return std::nullopt;
+ }
+
+ // [4] Skip if it's not a shared library.
+ if (!DylibPathValidator::isSharedLibrary(CanonicalPath)) {
+ LLVM_DEBUG(dbgs() << " -> Skipped: not a shared library.\n";);
+ return std::nullopt;
+ }
+
+ // [5] Skip if we've already seen this path (via cache)
+ if (ScanHelper.hasSeenOrMark(CanonicalPath)) {
+ LLVM_DEBUG(dbgs() << " -> Skipped: already seen.\n";);
+
+ return std::nullopt;
+ }
+
+ // [6] Already tracked in LibraryManager?
+ if (LibMgr.hasLibrary(CanonicalPath)) {
+ LLVM_DEBUG(dbgs() << " -> Skipped: already tracked by LibraryManager.\n";);
+
+ return std::nullopt;
+ }
+
+ // [7] Run user-defined hook (default: always true)
+ if (!ShouldScanCall(CanonicalPath)) {
+ LLVM_DEBUG(dbgs() << " -> Skipped: user-defined hook rejected.\n";);
+
+ return std::nullopt;
+ }
+
+ LLVM_DEBUG(dbgs() << " -> Accepted: ready to scan " << CanonicalPath
+ << "\n";);
+ return CanonicalPath;
+}
+
+void LibraryScanner::handleLibrary(StringRef FilePath, PathType K, int level) {
+ LLVM_DEBUG(dbgs() << "LibraryScanner::handleLibrary: Scanning: " << FilePath
+ << ", level=" << level << "\n";);
+ auto CanonPathOpt = shouldScan(FilePath);
+ if (!CanonPathOpt) {
+ LLVM_DEBUG(dbgs() << " Skipped (shouldScan returned false): " << FilePath
+ << "\n";);
+
+ return;
+ }
+ const std::string CanonicalPath = *CanonPathOpt;
+
+ auto DepsOrErr = extractDeps(CanonicalPath);
+ if (!DepsOrErr) {
+ LLVM_DEBUG(dbgs() << " Failed to extract deps for: " << CanonicalPath
+ << "\n";);
+ handleError(DepsOrErr.takeError());
+ return;
+ }
+
+ LibraryDepsInfo &Deps = *DepsOrErr;
+
+ LLVM_DEBUG({
+ dbgs() << " Found deps : \n";
+ for (const auto &dep : Deps.deps)
+ dbgs() << " : " << dep << "\n";
+ dbgs() << " Found @rpath : " << Deps.rpath.size() << "\n";
+ for (const auto &r : Deps.rpath)
+ dbgs() << " : " << r << "\n";
+ dbgs() << " Found @runpath : \n";
+ for (const auto &r : Deps.runPath)
+ dbgs() << " : " << r << "\n";
+ });
+
+ if (Deps.isPIE && level == 0) {
+ LLVM_DEBUG(dbgs() << " Skipped PIE executable at top level: "
+ << CanonicalPath << "\n";);
+
+ return;
+ }
+
+ bool Added = LibMgr.addLibrary(CanonicalPath, K);
+ if (!Added) {
+ LLVM_DEBUG(dbgs() << " Already added: " << CanonicalPath << "\n";);
+ return;
+ }
+
+ // Heuristic 1: No RPATH/RUNPATH, skip deps
+ if (Deps.rpath.empty() && Deps.runPath.empty()) {
+ LLVM_DEBUG(
+ dbgs() << "LibraryScanner::handleLibrary: Skipping deps (Heuristic1): "
+ << CanonicalPath << "\n";);
+ return;
+ }
+
+ // Heuristic 2: All RPATH and RUNPATH already tracked
+ auto allTracked = [&](const auto &Paths) {
+ LLVM_DEBUG(dbgs() << " Checking : " << Paths.size() << "\n";);
+ return std::all_of(Paths.begin(), Paths.end(), [&](StringRef P) {
+ LLVM_DEBUG(dbgs() << " Checking isTrackedBasePath : " << P << "\n";);
+ return ScanHelper.isTrackedBasePath(
+ DylibResolver::resolvelinkerFlag(P, CanonicalPath));
+ });
+ };
+
+ if (allTracked(Deps.rpath) && allTracked(Deps.runPath)) {
+ LLVM_DEBUG(
+ dbgs() << "LibraryScanner::handleLibrary: Skipping deps (Heuristic2): "
+ << CanonicalPath << "\n";);
+ return;
+ }
+
+ DylibPathValidator Validator(ScanHelper.getPathResolver());
+ DylibResolver Resolver(Validator);
+ Resolver.configure(CanonicalPath,
+ {{Deps.rpath, SearchPathType::RPath},
+ {ScanHelper.getSearchPaths(), SearchPathType::UsrOrSys},
+ {Deps.runPath, SearchPathType::RunPath}});
+ for (StringRef Dep : Deps.deps) {
+ LLVM_DEBUG(dbgs() << " Resolving dep: " << Dep << "\n";);
+ auto DepFullOpt = Resolver.resolve(Dep);
+ if (!DepFullOpt) {
+ LLVM_DEBUG(dbgs() << " Failed to resolve dep: " << Dep << "\n";);
+
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << " Resolved dep to: " << *DepFullOpt << "\n";);
+
+ handleLibrary(*DepFullOpt, K, level + 1);
+ }
+}
+
+void LibraryScanner::scanBaseDir(std::shared_ptr<LibrarySearchPath> SP) {
+ if (!sys::fs::is_directory(SP->BasePath) || SP->BasePath.empty()) {
+ LLVM_DEBUG(
+ dbgs() << "LibraryScanner::scanBaseDir: Invalid or empty basePath: "
+ << SP->BasePath << "\n";);
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "LibraryScanner::scanBaseDir: Scanning directory: "
+ << SP->BasePath << "\n";);
+ std::error_code EC;
+
+ SP->State.store(ScanState::Scanning);
+
+ for (sys::fs::directory_iterator It(SP->BasePath, EC), end; It != end && !EC;
+ It.increment(EC)) {
+ auto Entry = *It;
+ if (!Entry.status())
+ continue;
+
+ auto Status = *Entry.status();
+ if (sys::fs::is_regular_file(Status) || sys::fs::is_symlink_file(Status)) {
+ LLVM_DEBUG(dbgs() << " Found file: " << Entry.path() << "\n";);
+ // async support ?
+ handleLibrary(Entry.path(), SP->Kind);
+ }
+ }
+
+ SP->State.store(ScanState::Scanned);
+}
+
+void LibraryScanner::scanNext(PathType K, size_t BatchSize) {
+ LLVM_DEBUG(dbgs() << "LibraryScanner::scanNext: Scanning next batch of size "
+ << BatchSize << " for kind "
+ << (K == PathType::User ? "User" : "System") << "\n";);
+
+ auto SearchPaths = ScanHelper.getNextBatch(K, BatchSize);
+ for (auto &SP : SearchPaths) {
+ LLVM_DEBUG(dbgs() << " Scanning unit with basePath: " << SP->BasePath
+ << "\n";);
+
+ scanBaseDir(SP);
+ }
+}
+
+} // end namespace llvm::orc
diff --git a/llvm/lib/FileCheck/FileCheckImpl.h b/llvm/lib/FileCheck/FileCheckImpl.h
index a08502e..5851cfc 100644
--- a/llvm/lib/FileCheck/FileCheckImpl.h
+++ b/llvm/lib/FileCheck/FileCheckImpl.h
@@ -528,7 +528,7 @@ public:
SMRange getRange() const { return Range; }
static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg,
- SMRange Range = std::nullopt) {
+ SMRange Range = {}) {
return make_error<ErrorDiagnostic>(
SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg), Range);
}
diff --git a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp
index df88490..b546e81 100644
--- a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp
+++ b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp
@@ -12,7 +12,6 @@
#include "llvm/TargetParser/Triple.h"
namespace llvm {
-extern llvm::cl::opt<bool> DebugInfoCorrelate;
extern llvm::cl::opt<llvm::InstrProfCorrelator::ProfCorrelatorKind>
ProfileCorrelate;
} // namespace llvm
@@ -64,8 +63,7 @@ TargetLibraryInfoImpl *createTLII(const llvm::Triple &TargetTriple,
}
std::string getDefaultProfileGenName() {
- return llvm::DebugInfoCorrelate ||
- llvm::ProfileCorrelate != InstrProfCorrelator::NONE
+ return llvm::ProfileCorrelate != InstrProfCorrelator::NONE
? "default_%m.proflite"
: "default_%m.profraw";
}
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 0e5926f..fff9a81 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -528,7 +528,7 @@ void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
Value *Version = Builder.getInt32(OMP_KERNEL_ARG_VERSION);
Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);
auto Int32Ty = Type::getInt32Ty(Builder.getContext());
- constexpr const size_t MaxDim = 3;
+ constexpr size_t MaxDim = 3;
Value *ZeroArray = Constant::getNullValue(ArrayType::get(Int32Ty, MaxDim));
Value *Flags = Builder.getInt64(KernelArgs.HasNoWait);
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index b838e36..58b7ddd 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -730,7 +730,7 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
// (arm|aarch64).neon.bfdot.*'.
Intrinsic::ID ID =
StringSwitch<Intrinsic::ID>(Name)
- .Cases("v2f32.v8i8", "v4f32.v16i8",
+ .Cases({"v2f32.v8i8", "v4f32.v16i8"},
IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
: (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
.Default(Intrinsic::not_intrinsic);
@@ -1456,7 +1456,7 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
if (F->arg_size() == 1) {
Intrinsic::ID IID =
StringSwitch<Intrinsic::ID>(Name)
- .Cases("brev32", "brev64", Intrinsic::bitreverse)
+ .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
.Case("clz.i", Intrinsic::ctlz)
.Case("popc.i", Intrinsic::ctpop)
.Default(Intrinsic::not_intrinsic);
@@ -1504,6 +1504,10 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
else if (Name.consume_front("fabs."))
// nvvm.fabs.{f,ftz.f,d}
Expand = Name == "f" || Name == "ftz.f" || Name == "d";
+ else if (Name.consume_front("ex2.approx."))
+ // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
+ Expand =
+ Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
else if (Name.consume_front("max.") || Name.consume_front("min."))
// nvvm.{min,max}.{i,ii,ui,ull}
Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
@@ -2550,6 +2554,11 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
: Intrinsic::nvvm_fabs;
Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
+ } else if (Name.consume_front("ex2.approx.")) {
+ // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
+ Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
+ : Intrinsic::nvvm_ex2_approx;
+ Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
} else if (Name.starts_with("atomic.load.add.f32.p") ||
Name.starts_with("atomic.load.add.f64.p")) {
Value *Ptr = CI->getArgOperand(0);
diff --git a/llvm/lib/IR/ConstantsContext.h b/llvm/lib/IR/ConstantsContext.h
index 51fb40b..e3e8d89 100644
--- a/llvm/lib/IR/ConstantsContext.h
+++ b/llvm/lib/IR/ConstantsContext.h
@@ -535,7 +535,7 @@ struct ConstantPtrAuthKeyType {
unsigned getHash() const { return hash_combine_range(Operands); }
- using TypeClass = typename ConstantInfo<ConstantPtrAuth>::TypeClass;
+ using TypeClass = ConstantInfo<ConstantPtrAuth>::TypeClass;
ConstantPtrAuth *create(TypeClass *Ty) const {
return new ConstantPtrAuth(Operands[0], cast<ConstantInt>(Operands[1]),
diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp
index 62fd62c..3394754 100644
--- a/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -34,8 +34,6 @@ static cl::opt<bool> ImportConstantsWithRefs(
"import-constants-with-refs", cl::init(true), cl::Hidden,
cl::desc("Import constant global variables with references"));
-constexpr uint32_t FunctionSummary::ParamAccess::RangeWidth;
-
FunctionSummary FunctionSummary::ExternalNode =
FunctionSummary::makeDummyFunctionSummary(
SmallVector<FunctionSummary::EdgeTy, 0>());
@@ -88,8 +86,6 @@ std::pair<unsigned, unsigned> FunctionSummary::specialRefCounts() const {
return {RORefCnt, WORefCnt};
}
-constexpr uint64_t ModuleSummaryIndex::BitcodeSummaryVersion;
-
uint64_t ModuleSummaryIndex::getFlags() const {
uint64_t Flags = 0;
// Flags & 0x4 is reserved. DO NOT REUSE.
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index b618222..23be42f 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1076,63 +1076,59 @@ Expected<ArrayRef<SymbolResolution>>
LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
ArrayRef<SymbolResolution> Res) {
llvm::TimeTraceScope timeScope("LTO add thin LTO");
+ const auto BMID = BM.getModuleIdentifier();
ArrayRef<SymbolResolution> ResTmp = Res;
for (const InputFile::Symbol &Sym : Syms) {
assert(!ResTmp.empty());
const SymbolResolution &R = ResTmp.consume_front();
- if (!Sym.getIRName().empty()) {
+ if (!Sym.getIRName().empty() && R.Prevailing) {
auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(
GlobalValue::getGlobalIdentifier(Sym.getIRName(),
GlobalValue::ExternalLinkage, ""));
- if (R.Prevailing)
- ThinLTO.setPrevailingModuleForGUID(GUID, BM.getModuleIdentifier());
+ ThinLTO.setPrevailingModuleForGUID(GUID, BMID);
}
}
- if (Error Err =
- BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(),
- [&](GlobalValue::GUID GUID) {
- return ThinLTO.isPrevailingModuleForGUID(
- GUID, BM.getModuleIdentifier());
- }))
+ if (Error Err = BM.readSummary(
+ ThinLTO.CombinedIndex, BMID, [&](GlobalValue::GUID GUID) {
+ return ThinLTO.isPrevailingModuleForGUID(GUID, BMID);
+ }))
return Err;
- LLVM_DEBUG(dbgs() << "Module " << BM.getModuleIdentifier() << "\n");
+ LLVM_DEBUG(dbgs() << "Module " << BMID << "\n");
for (const InputFile::Symbol &Sym : Syms) {
assert(!Res.empty());
const SymbolResolution &R = Res.consume_front();
- if (!Sym.getIRName().empty()) {
+ if (!Sym.getIRName().empty() &&
+ (R.Prevailing || R.FinalDefinitionInLinkageUnit)) {
auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(
GlobalValue::getGlobalIdentifier(Sym.getIRName(),
GlobalValue::ExternalLinkage, ""));
if (R.Prevailing) {
- assert(
- ThinLTO.isPrevailingModuleForGUID(GUID, BM.getModuleIdentifier()));
+ assert(ThinLTO.isPrevailingModuleForGUID(GUID, BMID));
// For linker redefined symbols (via --wrap or --defsym) we want to
// switch the linkage to `weak` to prevent IPOs from happening.
// Find the summary in the module for this very GV and record the new
// linkage so that we can switch it when we import the GV.
if (R.LinkerRedefined)
- if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(
- GUID, BM.getModuleIdentifier()))
+ if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(GUID, BMID))
S->setLinkage(GlobalValue::WeakAnyLinkage);
}
// If the linker resolved the symbol to a local definition then mark it
// as local in the summary for the module we are adding.
if (R.FinalDefinitionInLinkageUnit) {
- if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(
- GUID, BM.getModuleIdentifier())) {
+ if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(GUID, BMID)) {
S->setDSOLocal(true);
}
}
}
}
- if (!ThinLTO.ModuleMap.insert({BM.getModuleIdentifier(), BM}).second)
+ if (!ThinLTO.ModuleMap.insert({BMID, BM}).second)
return make_error<StringError>(
"Expected at most one ThinLTO module per bitcode file",
inconvertibleErrorCode());
@@ -1143,10 +1139,10 @@ LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
// This is a fuzzy name matching where only modules with name containing the
// specified switch values are going to be compiled.
for (const std::string &Name : Conf.ThinLTOModulesToCompile) {
- if (BM.getModuleIdentifier().contains(Name)) {
- ThinLTO.ModulesToCompile->insert({BM.getModuleIdentifier(), BM});
- LLVM_DEBUG(dbgs() << "[ThinLTO] Selecting " << BM.getModuleIdentifier()
- << " to compile\n");
+ if (BMID.contains(Name)) {
+ ThinLTO.ModulesToCompile->insert({BMID, BM});
+ LLVM_DEBUG(dbgs() << "[ThinLTO] Selecting " << BMID << " to compile\n");
+ break;
}
}
}
diff --git a/llvm/lib/MC/GOFFObjectWriter.cpp b/llvm/lib/MC/GOFFObjectWriter.cpp
index 71bd397..a3eaaa7 100644
--- a/llvm/lib/MC/GOFFObjectWriter.cpp
+++ b/llvm/lib/MC/GOFFObjectWriter.cpp
@@ -520,7 +520,7 @@ GOFFObjectWriter::GOFFObjectWriter(
std::unique_ptr<MCGOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS)
: TargetObjectWriter(std::move(MOTW)), OS(OS) {}
-GOFFObjectWriter::~GOFFObjectWriter() {}
+GOFFObjectWriter::~GOFFObjectWriter() = default;
uint64_t GOFFObjectWriter::writeObject() {
uint64_t Size = GOFFWriter(OS, *Asm).writeObject();
diff --git a/llvm/lib/MC/MCDXContainerWriter.cpp b/llvm/lib/MC/MCDXContainerWriter.cpp
index 5eda039..ebed411 100644
--- a/llvm/lib/MC/MCDXContainerWriter.cpp
+++ b/llvm/lib/MC/MCDXContainerWriter.cpp
@@ -16,7 +16,7 @@
using namespace llvm;
-MCDXContainerTargetWriter::~MCDXContainerTargetWriter() {}
+MCDXContainerTargetWriter::~MCDXContainerTargetWriter() = default;
uint64_t DXContainerObjectWriter::writeObject() {
auto &Asm = *this->Asm;
diff --git a/llvm/lib/MC/MCGOFFStreamer.cpp b/llvm/lib/MC/MCGOFFStreamer.cpp
index 8b228db..ad6397b 100644
--- a/llvm/lib/MC/MCGOFFStreamer.cpp
+++ b/llvm/lib/MC/MCGOFFStreamer.cpp
@@ -20,7 +20,7 @@
using namespace llvm;
-MCGOFFStreamer::~MCGOFFStreamer() {}
+MCGOFFStreamer::~MCGOFFStreamer() = default;
GOFFObjectWriter &MCGOFFStreamer::getWriter() {
return static_cast<GOFFObjectWriter &>(getAssembler().getWriter());
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index a6188f0..1af4a29 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCParser/AsmLexer.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SaveAndRestore.h"
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index dd1bc2b..3c9ab8e 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -228,11 +228,9 @@ public:
AssemblerDialect = i;
}
- void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) override;
- bool Warning(SMLoc L, const Twine &Msg,
- SMRange Range = std::nullopt) override;
- bool printError(SMLoc L, const Twine &Msg,
- SMRange Range = std::nullopt) override;
+ void Note(SMLoc L, const Twine &Msg, SMRange Range = {}) override;
+ bool Warning(SMLoc L, const Twine &Msg, SMRange Range = {}) override;
+ bool printError(SMLoc L, const Twine &Msg, SMRange Range = {}) override;
const AsmToken &Lex() override;
@@ -312,7 +310,7 @@ private:
void printMacroInstantiations();
void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
- SMRange Range = std::nullopt) const {
+ SMRange Range = {}) const {
ArrayRef<SMRange> Ranges(Range);
SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
}
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 1a3752f..911d92c 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -695,15 +695,15 @@ bool ELFAsmParser::parseDirectivePrevious(StringRef DirName, SMLoc) {
static MCSymbolAttr MCAttrForString(StringRef Type) {
return StringSwitch<MCSymbolAttr>(Type)
- .Cases("STT_FUNC", "function", MCSA_ELF_TypeFunction)
- .Cases("STT_OBJECT", "object", MCSA_ELF_TypeObject)
- .Cases("STT_TLS", "tls_object", MCSA_ELF_TypeTLS)
- .Cases("STT_COMMON", "common", MCSA_ELF_TypeCommon)
- .Cases("STT_NOTYPE", "notype", MCSA_ELF_TypeNoType)
- .Cases("STT_GNU_IFUNC", "gnu_indirect_function",
- MCSA_ELF_TypeIndFunction)
- .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject)
- .Default(MCSA_Invalid);
+ .Cases({"STT_FUNC", "function"}, MCSA_ELF_TypeFunction)
+ .Cases({"STT_OBJECT", "object"}, MCSA_ELF_TypeObject)
+ .Cases({"STT_TLS", "tls_object"}, MCSA_ELF_TypeTLS)
+ .Cases({"STT_COMMON", "common"}, MCSA_ELF_TypeCommon)
+ .Cases({"STT_NOTYPE", "notype"}, MCSA_ELF_TypeNoType)
+ .Cases({"STT_GNU_IFUNC", "gnu_indirect_function"},
+ MCSA_ELF_TypeIndFunction)
+ .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject)
+ .Default(MCSA_Invalid);
}
/// parseDirectiveELFType
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 8a8f111..3a85770 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -483,11 +483,9 @@ public:
AssemblerDialect = i;
}
- void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) override;
- bool Warning(SMLoc L, const Twine &Msg,
- SMRange Range = std::nullopt) override;
- bool printError(SMLoc L, const Twine &Msg,
- SMRange Range = std::nullopt) override;
+ void Note(SMLoc L, const Twine &Msg, SMRange Range = {}) override;
+ bool Warning(SMLoc L, const Twine &Msg, SMRange Range = {}) override;
+ bool printError(SMLoc L, const Twine &Msg, SMRange Range = {}) override;
enum ExpandKind { ExpandMacros, DoNotExpandMacros };
const AsmToken &Lex(ExpandKind ExpandNextToken);
@@ -592,7 +590,7 @@ private:
bool expandStatement(SMLoc Loc);
void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
- SMRange Range = std::nullopt) const {
+ SMRange Range = {}) const {
ArrayRef<SMRange> Ranges(Range);
SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
}
@@ -5325,10 +5323,10 @@ void MasmParser::initializeDirectiveKindMap() {
bool MasmParser::isMacroLikeDirective() {
if (getLexer().is(AsmToken::Identifier)) {
bool IsMacroLike = StringSwitch<bool>(getTok().getIdentifier())
- .CasesLower("repeat", "rept", true)
+ .CasesLower({"repeat", "rept"}, true)
.CaseLower("while", true)
- .CasesLower("for", "irp", true)
- .CasesLower("forc", "irpc", true)
+ .CasesLower({"for", "irp"}, true)
+ .CasesLower({"forc", "irpc"}, true)
.Default(false);
if (IsMacroLike)
return true;
diff --git a/llvm/lib/ObjCopy/COFF/COFFWriter.h b/llvm/lib/ObjCopy/COFF/COFFWriter.h
index 66d7f01..3ee0e06 100644
--- a/llvm/lib/ObjCopy/COFF/COFFWriter.h
+++ b/llvm/lib/ObjCopy/COFF/COFFWriter.h
@@ -50,7 +50,7 @@ class COFFWriter {
Expected<uint32_t> virtualAddressToFileAddress(uint32_t RVA);
public:
- virtual ~COFFWriter() {}
+ virtual ~COFFWriter() = default;
Error write();
COFFWriter(Object &Obj, raw_ostream &Out)
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h
index 4f6473f..2783ef27 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.h
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.h
@@ -134,7 +134,7 @@ private:
using Elf_Sym = typename ELFT::Sym;
public:
- ~ELFSectionWriter() override {}
+ ~ELFSectionWriter() override = default;
Error visit(const SymbolTableSection &Sec) override;
Error visit(const RelocationSection &Sec) override;
Error visit(const GnuDebugLinkSection &Sec) override;
@@ -180,7 +180,7 @@ public:
class BinarySectionWriter : public SectionWriter {
public:
- ~BinarySectionWriter() override {}
+ ~BinarySectionWriter() override = default;
Error visit(const SymbolTableSection &Sec) override;
Error visit(const RelocationSection &Sec) override;
@@ -346,7 +346,7 @@ private:
size_t totalSize() const;
public:
- ~ELFWriter() override {}
+ ~ELFWriter() override = default;
bool WriteSectionHeaders;
// For --only-keep-debug, select an alternative section/segment layout
@@ -367,7 +367,7 @@ private:
uint64_t TotalSize = 0;
public:
- ~BinaryWriter() override {}
+ ~BinaryWriter() override = default;
Error finalize() override;
Error write() override;
BinaryWriter(Object &Obj, raw_ostream &Out, const CommonConfig &Config)
@@ -784,7 +784,7 @@ private:
SymbolTableSection *Symbols = nullptr;
public:
- ~SectionIndexSection() override {}
+ ~SectionIndexSection() override = default;
void addIndex(uint32_t Index) {
assert(Size > 0);
Indexes.push_back(Index);
diff --git a/llvm/lib/ObjCopy/MachO/MachOReader.h b/llvm/lib/ObjCopy/MachO/MachOReader.h
index e315e6fd..940ba4c 100644
--- a/llvm/lib/ObjCopy/MachO/MachOReader.h
+++ b/llvm/lib/ObjCopy/MachO/MachOReader.h
@@ -23,7 +23,7 @@ namespace macho {
// raw binaries and regular MachO object files.
class Reader {
public:
- virtual ~Reader(){};
+ virtual ~Reader() = default;
virtual Expected<std::unique_ptr<Object>> create() const = 0;
};
diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h b/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h
index 8620548..47639ad 100644
--- a/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h
+++ b/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h
@@ -20,7 +20,7 @@ namespace xcoff {
class XCOFFWriter {
public:
- virtual ~XCOFFWriter() {}
+ virtual ~XCOFFWriter() = default;
XCOFFWriter(Object &Obj, raw_ostream &Out) : Obj(Obj), Out(Out) {}
Error write();
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index 6da97f9..354c51d 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -831,17 +831,17 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
};
uint8_t Version = 0;
- uint8_t Feature = 0;
+ uint16_t Feature = 0;
BBAddrMap::Features FeatEnable{};
while (!ULEBSizeErr && !MetadataDecodeErr && Cur &&
Cur.tell() < Content.size()) {
Version = Data.getU8(Cur);
if (!Cur)
break;
- if (Version < 2 || Version > 4)
+ if (Version < 2 || Version > 5)
return createError("unsupported SHT_LLVM_BB_ADDR_MAP version: " +
Twine(static_cast<int>(Version)));
- Feature = Data.getU8(Cur); // Feature byte
+ Feature = Version < 5 ? Data.getU8(Cur) : Data.getU16(Cur);
if (!Cur)
break;
auto FeatEnableOrErr = BBAddrMap::Features::decode(Feature);
@@ -858,6 +858,11 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
"basic block hash feature is enabled: version = " +
Twine(static_cast<int>(Version)) +
" feature = " + Twine(static_cast<int>(Feature)));
+ if (FeatEnable.PostLinkCfg && Version < 5)
+ return createError("version should be >= 5 for SHT_LLVM_BB_ADDR_MAP when "
+ "post link cfg feature is enabled: version = " +
+ Twine(static_cast<int>(Version)) +
+ " feature = " + Twine(static_cast<int>(Feature)));
uint32_t NumBlocksInBBRange = 0;
uint32_t NumBBRanges = 1;
typename ELFFile<ELFT>::uintX_t RangeBaseAddress = 0;
@@ -946,6 +951,10 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
uint64_t BBF = FeatEnable.BBFreq
? readULEB128As<uint64_t>(Data, Cur, ULEBSizeErr)
: 0;
+ uint32_t PostLinkBBFreq =
+ FeatEnable.PostLinkCfg
+ ? readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr)
+ : 0;
// Branch probability
llvm::SmallVector<PGOAnalysisMap::PGOBBEntry::SuccessorEntry, 2>
@@ -955,13 +964,20 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
for (uint64_t I = 0; I < SuccCount; ++I) {
uint32_t BBID = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
uint32_t BrProb = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
+ uint32_t PostLinkFreq =
+ FeatEnable.PostLinkCfg
+ ? readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr)
+ : 0;
+
if (PGOAnalyses)
- Successors.push_back({BBID, BranchProbability::getRaw(BrProb)});
+ Successors.push_back(
+ {BBID, BranchProbability::getRaw(BrProb), PostLinkFreq});
}
}
if (PGOAnalyses)
- PGOBBEntries.push_back({BlockFrequency(BBF), std::move(Successors)});
+ PGOBBEntries.push_back(
+ {BlockFrequency(BBF), PostLinkBBFreq, std::move(Successors)});
}
if (PGOAnalyses)
diff --git a/llvm/lib/Object/WindowsMachineFlag.cpp b/llvm/lib/Object/WindowsMachineFlag.cpp
index caf357e8..14c14f6 100644
--- a/llvm/lib/Object/WindowsMachineFlag.cpp
+++ b/llvm/lib/Object/WindowsMachineFlag.cpp
@@ -23,8 +23,8 @@ using namespace llvm;
COFF::MachineTypes llvm::getMachineType(StringRef S) {
// Flags must be a superset of Microsoft lib.exe /machine flags.
return StringSwitch<COFF::MachineTypes>(S.lower())
- .Cases("x64", "amd64", COFF::IMAGE_FILE_MACHINE_AMD64)
- .Cases("x86", "i386", COFF::IMAGE_FILE_MACHINE_I386)
+ .Cases({"x64", "amd64"}, COFF::IMAGE_FILE_MACHINE_AMD64)
+ .Cases({"x86", "i386"}, COFF::IMAGE_FILE_MACHINE_I386)
.Case("arm", COFF::IMAGE_FILE_MACHINE_ARMNT)
.Case("arm64", COFF::IMAGE_FILE_MACHINE_ARM64)
.Case("arm64ec", COFF::IMAGE_FILE_MACHINE_ARM64EC)
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index 8b75fbe..8530785 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -1465,13 +1465,19 @@ void ELFState<ELFT>::writeSectionContent(
for (const auto &[Idx, E] : llvm::enumerate(*Section.Entries)) {
// Write version and feature values.
if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP) {
- if (E.Version > 4)
+ if (E.Version > 5)
WithColor::warning() << "unsupported SHT_LLVM_BB_ADDR_MAP version: "
<< static_cast<int>(E.Version)
<< "; encoding using the most recent version";
CBA.write(E.Version);
- CBA.write(E.Feature);
- SHeader.sh_size += 2;
+ SHeader.sh_size += 1;
+ if (E.Version < 5) {
+ CBA.write(static_cast<uint8_t>(E.Feature));
+ SHeader.sh_size += 1;
+ } else {
+ CBA.write<uint16_t>(E.Feature, ELFT::Endianness);
+ SHeader.sh_size += 2;
+ }
}
auto FeatureOrErr = llvm::object::BBAddrMap::Features::decode(E.Feature);
bool MultiBBRangeFeatureEnabled = false;
@@ -1556,11 +1562,15 @@ void ELFState<ELFT>::writeSectionContent(
for (const auto &PGOBBE : PGOBBEntries) {
if (PGOBBE.BBFreq)
SHeader.sh_size += CBA.writeULEB128(*PGOBBE.BBFreq);
+ if (FeatureOrErr->PostLinkCfg || PGOBBE.PostLinkBBFreq.has_value())
+ SHeader.sh_size += CBA.writeULEB128(PGOBBE.PostLinkBBFreq.value_or(0));
if (PGOBBE.Successors) {
SHeader.sh_size += CBA.writeULEB128(PGOBBE.Successors->size());
- for (const auto &[ID, BrProb] : *PGOBBE.Successors) {
+ for (const auto &[ID, BrProb, PostLinkBrFreq] : *PGOBBE.Successors) {
SHeader.sh_size += CBA.writeULEB128(ID);
SHeader.sh_size += CBA.writeULEB128(BrProb);
+ if (FeatureOrErr->PostLinkCfg || PostLinkBrFreq.has_value())
+ SHeader.sh_size += CBA.writeULEB128(PostLinkBrFreq.value_or(0));
}
}
}
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index f8a84b0..e5e5fc2 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -1886,7 +1886,7 @@ void MappingTraits<ELFYAML::BBAddrMapEntry>::mapping(
IO &IO, ELFYAML::BBAddrMapEntry &E) {
assert(IO.getContext() && "The IO context is not initialized");
IO.mapRequired("Version", E.Version);
- IO.mapOptional("Feature", E.Feature, Hex8(0));
+ IO.mapOptional("Feature", E.Feature, Hex16(0));
IO.mapOptional("NumBBRanges", E.NumBBRanges);
IO.mapOptional("BBRanges", E.BBRanges);
}
@@ -1920,6 +1920,7 @@ void MappingTraits<ELFYAML::PGOAnalysisMapEntry::PGOBBEntry>::mapping(
IO &IO, ELFYAML::PGOAnalysisMapEntry::PGOBBEntry &E) {
assert(IO.getContext() && "The IO context is not initialized");
IO.mapOptional("BBFreq", E.BBFreq);
+ IO.mapOptional("PostLinkBBFreq", E.PostLinkBBFreq);
IO.mapOptional("Successors", E.Successors);
}
@@ -1929,6 +1930,7 @@ void MappingTraits<ELFYAML::PGOAnalysisMapEntry::PGOBBEntry::SuccessorEntry>::
assert(IO.getContext() && "The IO context is not initialized");
IO.mapRequired("ID", E.ID);
IO.mapRequired("BrProb", E.BrProb);
+ IO.mapOptional("PostLinkBrFreq", E.PostLinkBrFreq);
}
void MappingTraits<ELFYAML::GnuHashHeader>::mapping(IO &IO,
diff --git a/llvm/lib/ObjectYAML/GOFFYAML.cpp b/llvm/lib/ObjectYAML/GOFFYAML.cpp
index 60bc1f7..ecd7fb6 100644
--- a/llvm/lib/ObjectYAML/GOFFYAML.cpp
+++ b/llvm/lib/ObjectYAML/GOFFYAML.cpp
@@ -15,7 +15,7 @@
namespace llvm {
namespace GOFFYAML {
-Object::Object() {}
+Object::Object() = default;
} // namespace GOFFYAML
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index 7290a86..6b7e980 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -537,7 +537,7 @@ void IRChangedPrinter::handleAfter(StringRef PassID, std::string &Name,
Out << "*** IR Dump After " << PassID << " on " << Name << " ***\n" << After;
}
-IRChangedTester::~IRChangedTester() {}
+IRChangedTester::~IRChangedTester() = default;
void IRChangedTester::registerCallbacks(PassInstrumentationCallbacks &PIC) {
if (TestChanged != "")
@@ -1566,7 +1566,7 @@ void InLineChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) {
TextChangeReporter<IRDataT<EmptyData>>::registerRequiredCallbacks(PIC);
}
-TimeProfilingPassesHandler::TimeProfilingPassesHandler() {}
+TimeProfilingPassesHandler::TimeProfilingPassesHandler() = default;
void TimeProfilingPassesHandler::registerCallbacks(
PassInstrumentationCallbacks &PIC) {
diff --git a/llvm/lib/Remarks/RemarkFormat.cpp b/llvm/lib/Remarks/RemarkFormat.cpp
index 1c52e35..f9fd4af 100644
--- a/llvm/lib/Remarks/RemarkFormat.cpp
+++ b/llvm/lib/Remarks/RemarkFormat.cpp
@@ -19,7 +19,7 @@ using namespace llvm::remarks;
Expected<Format> llvm::remarks::parseFormat(StringRef FormatStr) {
auto Result = StringSwitch<Format>(FormatStr)
- .Cases("", "yaml", Format::YAML)
+ .Cases({"", "yaml"}, Format::YAML)
.Case("bitstream", Format::Bitstream)
.Default(Format::Unknown);
diff --git a/llvm/lib/SandboxIR/Context.cpp b/llvm/lib/SandboxIR/Context.cpp
index fb6ff62..6f5d072 100644
--- a/llvm/lib/SandboxIR/Context.cpp
+++ b/llvm/lib/SandboxIR/Context.cpp
@@ -637,7 +637,7 @@ Context::Context(LLVMContext &LLVMCtx)
: LLVMCtx(LLVMCtx), IRTracker(*this),
LLVMIRBuilder(LLVMCtx, ConstantFolder()) {}
-Context::~Context() {}
+Context::~Context() = default;
void Context::clear() {
// TODO: Ideally we should clear only function-scope objects, and keep global
diff --git a/llvm/lib/Support/AArch64BuildAttributes.cpp b/llvm/lib/Support/AArch64BuildAttributes.cpp
index 4a6b2fd..be4d1f1 100644
--- a/llvm/lib/Support/AArch64BuildAttributes.cpp
+++ b/llvm/lib/Support/AArch64BuildAttributes.cpp
@@ -67,8 +67,8 @@ StringRef AArch64BuildAttributes::getTypeStr(unsigned Type) {
}
SubsectionType AArch64BuildAttributes::getTypeID(StringRef Type) {
return StringSwitch<SubsectionType>(Type)
- .Cases("uleb128", "ULEB128", ULEB128)
- .Cases("ntbs", "NTBS", NTBS)
+ .Cases({"uleb128", "ULEB128"}, ULEB128)
+ .Cases({"ntbs", "NTBS"}, NTBS)
.Default(TYPE_NOT_FOUND);
}
StringRef AArch64BuildAttributes::getSubsectionTypeUnknownError() {
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index e21cf8e..e2645fa 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -269,12 +269,6 @@ bool APFloatBase::isRepresentableBy(const fltSemantics &A,
A.precision <= B.precision;
}
-constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
-constexpr RoundingMode APFloatBase::rmTowardPositive;
-constexpr RoundingMode APFloatBase::rmTowardNegative;
-constexpr RoundingMode APFloatBase::rmTowardZero;
-constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
-
/* A tight upper bound on number of parts required to hold the value
pow(5, power) is
diff --git a/llvm/lib/Support/BalancedPartitioning.cpp b/llvm/lib/Support/BalancedPartitioning.cpp
index 1914f4c..d859abd 100644
--- a/llvm/lib/Support/BalancedPartitioning.cpp
+++ b/llvm/lib/Support/BalancedPartitioning.cpp
@@ -231,7 +231,7 @@ unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes,
}
// Compute move gains
- typedef std::pair<float, BPFunctionNode *> GainPair;
+ using GainPair = std::pair<float, BPFunctionNode *>;
std::vector<GainPair> Gains;
for (auto &N : Nodes) {
bool FromLeftToRight = (N.Bucket == LeftBucket);
diff --git a/llvm/lib/Support/BranchProbability.cpp b/llvm/lib/Support/BranchProbability.cpp
index e376344..143e58a 100644
--- a/llvm/lib/Support/BranchProbability.cpp
+++ b/llvm/lib/Support/BranchProbability.cpp
@@ -20,8 +20,6 @@
using namespace llvm;
-constexpr uint32_t BranchProbability::D;
-
raw_ostream &BranchProbability::print(raw_ostream &OS) const {
if (isUnknown())
return OS << "?%";
@@ -111,3 +109,10 @@ uint64_t BranchProbability::scale(uint64_t Num) const {
uint64_t BranchProbability::scaleByInverse(uint64_t Num) const {
return ::scale<0>(Num, D, N);
}
+
+BranchProbability BranchProbability::pow(unsigned N) const {
+ BranchProbability Res = BranchProbability::getOne();
+ for (unsigned I = 0; I < N; ++I)
+ Res *= *this;
+ return Res;
+}
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index 9491ec0..dab8bee 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -382,7 +382,7 @@ public:
RegisteredSubCommands.erase(sub);
}
- iterator_range<typename SmallPtrSet<SubCommand *, 4>::iterator>
+ iterator_range<SmallPtrSet<SubCommand *, 4>::iterator>
getRegisteredSubcommands() {
return make_range(RegisteredSubCommands.begin(),
RegisteredSubCommands.end());
@@ -2343,10 +2343,10 @@ namespace {
class HelpPrinter {
protected:
const bool ShowHidden;
- typedef SmallVector<std::pair<const char *, Option *>, 128>
- StrOptionPairVector;
- typedef SmallVector<std::pair<const char *, SubCommand *>, 128>
- StrSubCommandPairVector;
+ using StrOptionPairVector =
+ SmallVector<std::pair<const char *, Option *>, 128>;
+ using StrSubCommandPairVector =
+ SmallVector<std::pair<const char *, SubCommand *>, 128>;
// Print the options. Opts is assumed to be alphabetically sorted.
virtual void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) {
for (const auto &Opt : Opts)
@@ -2830,7 +2830,7 @@ StringMap<Option *> &cl::getRegisteredOptions(SubCommand &Sub) {
return Sub.OptionsMap;
}
-iterator_range<typename SmallPtrSet<SubCommand *, 4>::iterator>
+iterator_range<SmallPtrSet<SubCommand *, 4>::iterator>
cl::getRegisteredSubcommands() {
return GlobalParser->getRegisteredSubcommands();
}
diff --git a/llvm/lib/Support/DAGDeltaAlgorithm.cpp b/llvm/lib/Support/DAGDeltaAlgorithm.cpp
index 98153647..3bfae14 100644
--- a/llvm/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/llvm/lib/Support/DAGDeltaAlgorithm.cpp
@@ -47,16 +47,16 @@ class DAGDeltaAlgorithmImpl {
friend class DeltaActiveSetHelper;
public:
- typedef DAGDeltaAlgorithm::change_ty change_ty;
- typedef DAGDeltaAlgorithm::changeset_ty changeset_ty;
- typedef DAGDeltaAlgorithm::changesetlist_ty changesetlist_ty;
- typedef DAGDeltaAlgorithm::edge_ty edge_ty;
+ using change_ty = DAGDeltaAlgorithm::change_ty;
+ using changeset_ty = DAGDeltaAlgorithm::changeset_ty;
+ using changesetlist_ty = DAGDeltaAlgorithm::changesetlist_ty;
+ using edge_ty = DAGDeltaAlgorithm::edge_ty;
private:
- typedef std::vector<change_ty>::iterator pred_iterator_ty;
- typedef std::vector<change_ty>::iterator succ_iterator_ty;
- typedef std::set<change_ty>::iterator pred_closure_iterator_ty;
- typedef std::set<change_ty>::iterator succ_closure_iterator_ty;
+ using pred_iterator_ty = std::vector<change_ty>::iterator;
+ using succ_iterator_ty = std::vector<change_ty>::iterator;
+ using pred_closure_iterator_ty = std::set<change_ty>::iterator;
+ using succ_closure_iterator_ty = std::set<change_ty>::iterator;
DAGDeltaAlgorithm &DDA;
diff --git a/llvm/lib/Support/DynamicLibrary.cpp b/llvm/lib/Support/DynamicLibrary.cpp
index f1c15c0..61566d3 100644
--- a/llvm/lib/Support/DynamicLibrary.cpp
+++ b/llvm/lib/Support/DynamicLibrary.cpp
@@ -23,7 +23,7 @@ using namespace llvm::sys;
// All methods for HandleSet should be used holding SymbolsMutex.
class DynamicLibrary::HandleSet {
- typedef std::vector<void *> HandleList;
+ using HandleList = std::vector<void *>;
HandleList Handles;
void *Process = &Invalid;
diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp
index b6a2f8a..2e8fba8 100644
--- a/llvm/lib/Support/StringRef.cpp
+++ b/llvm/lib/Support/StringRef.cpp
@@ -17,11 +17,6 @@
using namespace llvm;
-// MSVC emits references to this into the translation units which reference it.
-#ifndef _MSC_VER
-constexpr size_t StringRef::npos;
-#endif
-
// strncasecmp() is not available on non-POSIX systems, so define an
// alternative function here.
static int ascii_strncasecmp(StringRef LHS, StringRef RHS) {
diff --git a/llvm/lib/Support/Timer.cpp b/llvm/lib/Support/Timer.cpp
index 9d45096..b08f508 100644
--- a/llvm/lib/Support/Timer.cpp
+++ b/llvm/lib/Support/Timer.cpp
@@ -207,7 +207,7 @@ void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const {
namespace {
-typedef StringMap<Timer> Name2TimerMap;
+using Name2TimerMap = StringMap<Timer>;
class Name2PairMap {
StringMap<std::pair<TimerGroup*, Name2TimerMap> > Map;
diff --git a/llvm/lib/Support/UnicodeNameToCodepoint.cpp b/llvm/lib/Support/UnicodeNameToCodepoint.cpp
index 6f8e091..8f0d24e 100644
--- a/llvm/lib/Support/UnicodeNameToCodepoint.cpp
+++ b/llvm/lib/Support/UnicodeNameToCodepoint.cpp
@@ -251,10 +251,10 @@ constexpr const char *const HangulSyllables[][3] = {
// Unicode 15.0
// 3.12 Conjoining Jamo Behavior Common constants
-constexpr const char32_t SBase = 0xAC00;
-constexpr const uint32_t LCount = 19;
-constexpr const uint32_t VCount = 21;
-constexpr const uint32_t TCount = 28;
+constexpr char32_t SBase = 0xAC00;
+constexpr uint32_t LCount = 19;
+constexpr uint32_t VCount = 21;
+constexpr uint32_t TCount = 28;
static std::size_t findSyllable(StringRef Name, bool Strict,
char &PreviousInName, int &Pos, int Column) {
diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc
index 648d6a5..da68994 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -421,8 +421,13 @@ bool sys::RemoveFileOnSignal(StringRef Filename, std::string *ErrMsg) {
return true;
}
- if (FilesToRemove == NULL)
+ if (FilesToRemove == NULL) {
FilesToRemove = new std::vector<std::string>;
+ std::atexit([]() {
+ delete FilesToRemove;
+ FilesToRemove = NULL;
+ });
+ }
FilesToRemove->push_back(std::string(Filename));
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index 07b9989..d6f27fb 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -61,17 +61,6 @@
using namespace llvm;
-constexpr raw_ostream::Colors raw_ostream::BLACK;
-constexpr raw_ostream::Colors raw_ostream::RED;
-constexpr raw_ostream::Colors raw_ostream::GREEN;
-constexpr raw_ostream::Colors raw_ostream::YELLOW;
-constexpr raw_ostream::Colors raw_ostream::BLUE;
-constexpr raw_ostream::Colors raw_ostream::MAGENTA;
-constexpr raw_ostream::Colors raw_ostream::CYAN;
-constexpr raw_ostream::Colors raw_ostream::WHITE;
-constexpr raw_ostream::Colors raw_ostream::SAVEDCOLOR;
-constexpr raw_ostream::Colors raw_ostream::RESET;
-
raw_ostream::~raw_ostream() {
// raw_ostream's subclasses should take care to flush the buffer
// in their destructors.
diff --git a/llvm/lib/Support/raw_socket_stream.cpp b/llvm/lib/Support/raw_socket_stream.cpp
index 3b510d3..f716317 100644
--- a/llvm/lib/Support/raw_socket_stream.cpp
+++ b/llvm/lib/Support/raw_socket_stream.cpp
@@ -332,7 +332,7 @@ ListeningSocket::~ListeningSocket() {
raw_socket_stream::raw_socket_stream(int SocketFD)
: raw_fd_stream(SocketFD, true) {}
-raw_socket_stream::~raw_socket_stream() {}
+raw_socket_stream::~raw_socket_stream() = default;
Expected<std::unique_ptr<raw_socket_stream>>
raw_socket_stream::createConnectedUnix(StringRef SocketPath) {
diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp
index 30eae6e..e8e6469 100644
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@@ -682,8 +682,10 @@ tgtok::TokKind TGLexer::LexExclaim() {
.Case("instances", tgtok::XInstances)
.Case("substr", tgtok::XSubstr)
.Case("find", tgtok::XFind)
- .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated.
- .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated.
+ .Cases({"setdagop", "setop"},
+ tgtok::XSetDagOp) // !setop is deprecated.
+ .Cases({"getdagop", "getop"},
+ tgtok::XGetDagOp) // !getop is deprecated.
.Case("setdagopname", tgtok::XSetDagOpName)
.Case("getdagopname", tgtok::XGetDagOpName)
.Case("getdagarg", tgtok::XGetDagArg)
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index 7e03b97..45b7120 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -370,6 +370,22 @@ SVEFrameSizes AArch64PrologueEpilogueCommon::getSVEStackFrameSizes() const {
{ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}};
}
+SVEStackAllocations AArch64PrologueEpilogueCommon::getSVEStackAllocations(
+ SVEFrameSizes const &SVE) {
+ StackOffset AfterZPRs = SVE.ZPR.LocalsSize;
+ StackOffset BeforePPRs = SVE.ZPR.CalleeSavesSize + SVE.PPR.CalleeSavesSize;
+ StackOffset AfterPPRs = {};
+ if (SVELayout == SVEStackLayout::Split) {
+ BeforePPRs = SVE.PPR.CalleeSavesSize;
+ // If there are no ZPR CSRs, place all local allocations after the ZPRs.
+ if (SVE.ZPR.CalleeSavesSize)
+ AfterPPRs += SVE.PPR.LocalsSize + SVE.ZPR.CalleeSavesSize;
+ else
+ AfterZPRs += SVE.PPR.LocalsSize; // Group allocation of locals.
+ }
+ return {BeforePPRs, AfterPPRs, AfterZPRs};
+}
+
struct SVEPartitions {
struct {
MachineBasicBlock::iterator Begin, End;
@@ -687,16 +703,19 @@ void AArch64PrologueEmitter::emitPrologue() {
// All of the remaining stack allocations are for locals.
determineLocalsStackSize(NumBytes, PrologueSaveSize);
+ auto [PPR, ZPR] = getSVEStackFrameSizes();
+ SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
+
MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
+ assert(!SVEAllocs.AfterPPRs &&
+ "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
// If we're doing SVE saves first, we need to immediately allocate space
// for fixed objects, then space for the SVE callee saves.
//
// Windows unwind requires that the scalable size is a multiple of 16;
// that's handled when the callee-saved size is computed.
- auto SaveSize =
- StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
- StackOffset::getFixed(FixedObject);
+ auto SaveSize = SVEAllocs.BeforePPRs + StackOffset::getFixed(FixedObject);
allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},
/*FollowupAllocs=*/true);
NumBytes -= FixedObject;
@@ -764,12 +783,11 @@ void AArch64PrologueEmitter::emitPrologue() {
if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
- auto [PPR, ZPR] = getSVEStackFrameSizes();
- StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes);
+ SVEAllocs.AfterZPRs += NonSVELocalsSize;
+
StackOffset CFAOffset =
StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize;
-
MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
// Allocate space for the callee saves and PPR locals (if any).
if (SVELayout != SVEStackLayout::CalleeSavesAboveFrameRecord) {
@@ -780,31 +798,23 @@ void AArch64PrologueEmitter::emitPrologue() {
if (EmitAsyncCFI)
emitCalleeSavedSVELocations(AfterSVESavesI);
- StackOffset AllocateBeforePPRs = SVECalleeSavesSize;
- StackOffset AllocateAfterPPRs = PPR.LocalsSize;
- if (SVELayout == SVEStackLayout::Split) {
- AllocateBeforePPRs = PPR.CalleeSavesSize;
- AllocateAfterPPRs = PPR.LocalsSize + ZPR.CalleeSavesSize;
- }
- allocateStackSpace(PPRRange.Begin, 0, AllocateBeforePPRs,
+ allocateStackSpace(PPRRange.Begin, 0, SVEAllocs.BeforePPRs,
EmitAsyncCFI && !HasFP, CFAOffset,
- MFI.hasVarSizedObjects() || AllocateAfterPPRs ||
- ZPR.LocalsSize || NonSVELocalsSize);
- CFAOffset += AllocateBeforePPRs;
+ MFI.hasVarSizedObjects() || SVEAllocs.AfterPPRs ||
+ SVEAllocs.AfterZPRs);
+ CFAOffset += SVEAllocs.BeforePPRs;
assert(PPRRange.End == ZPRRange.Begin &&
"Expected ZPR callee saves after PPR locals");
- allocateStackSpace(PPRRange.End, RealignmentPadding, AllocateAfterPPRs,
+ allocateStackSpace(PPRRange.End, RealignmentPadding, SVEAllocs.AfterPPRs,
EmitAsyncCFI && !HasFP, CFAOffset,
- MFI.hasVarSizedObjects() || ZPR.LocalsSize ||
- NonSVELocalsSize);
- CFAOffset += AllocateAfterPPRs;
+ MFI.hasVarSizedObjects() || SVEAllocs.AfterZPRs);
+ CFAOffset += SVEAllocs.AfterPPRs;
} else {
assert(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord);
- // Note: With CalleeSavesAboveFrameRecord, the SVE CS have already been
- // allocated (and separate PPR locals are not supported, all SVE locals,
- // both PPR and ZPR, are within the ZPR locals area).
- assert(!PPR.LocalsSize && "Unexpected PPR locals!");
- CFAOffset += SVECalleeSavesSize;
+ // Note: With CalleeSavesAboveFrameRecord, the SVE CS (BeforePPRs) have
+ // already been allocated. PPR locals (included in AfterPPRs) are not
+ // supported (note: this is asserted above).
+ CFAOffset += SVEAllocs.BeforePPRs;
}
// Allocate space for the rest of the frame including ZPR locals. Align the
@@ -815,9 +825,9 @@ void AArch64PrologueEmitter::emitPrologue() {
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the
// correct value here, as NumBytes also includes padding bytes, which
// shouldn't be counted here.
- allocateStackSpace(
- AfterSVESavesI, RealignmentPadding, ZPR.LocalsSize + NonSVELocalsSize,
- EmitAsyncCFI && !HasFP, CFAOffset, MFI.hasVarSizedObjects());
+ allocateStackSpace(AfterSVESavesI, RealignmentPadding, SVEAllocs.AfterZPRs,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects());
}
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -1472,27 +1482,26 @@ void AArch64EpilogueEmitter::emitEpilogue() {
assert(NumBytes >= 0 && "Negative stack allocation size!?");
StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
- StackOffset SVEStackSize =
- SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize;
+ SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
MachineBasicBlock::iterator RestoreBegin = ZPRRange.Begin;
- MachineBasicBlock::iterator RestoreEnd = PPRRange.End;
// Deallocate the SVE area.
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
- StackOffset SVELocalsSize = ZPR.LocalsSize + PPR.LocalsSize;
+ assert(!SVEAllocs.AfterPPRs &&
+ "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
// If the callee-save area is before FP, restoring the FP implicitly
- // deallocates non-callee-save SVE allocations. Otherwise, deallocate them
+ // deallocates non-callee-save SVE allocations. Otherwise, deallocate them
// explicitly.
if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
- SVELocalsSize, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI);
+ SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI);
}
// Deallocate callee-save SVE registers.
- emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
- SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI);
+ emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
+ SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI);
} else if (AFI->hasSVEStackSize()) {
// If we have stack realignment or variable-sized objects we must use the FP
// to restore SVE callee saves (as there is an unknown amount of
@@ -1524,46 +1533,33 @@ void AArch64EpilogueEmitter::emitEpilogue() {
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
-SVECalleeSavesSize, TII, MachineInstr::FrameDestroy);
} else if (BaseForSVEDealloc == AArch64::SP) {
- auto CFAOffset =
- SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize);
-
- if (SVECalleeSavesSize) {
- // Deallocate the non-SVE locals first before we can deallocate (and
- // restore callee saves) from the SVE area.
- auto NonSVELocals = StackOffset::getFixed(NumBytes);
- emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
- NonSVELocals, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
- CFAOffset -= NonSVELocals;
- NumBytes = 0;
- }
-
- if (ZPR.LocalsSize) {
- emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
- ZPR.LocalsSize, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
- CFAOffset -= ZPR.LocalsSize;
+ auto NonSVELocals = StackOffset::getFixed(NumBytes);
+ auto CFAOffset = NonSVELocals + StackOffset::getFixed(PrologueSaveSize) +
+ SVEAllocs.totalSize();
+
+ if (SVECalleeSavesSize || SVELayout == SVEStackLayout::Split) {
+ // Deallocate non-SVE locals now. This is needed to reach the SVE callee
+ // saves, but may also allow combining stack hazard bumps for split SVE.
+ SVEAllocs.AfterZPRs += NonSVELocals;
+ NumBytes -= NonSVELocals.getFixed();
}
-
- StackOffset SVECalleeSavesToDealloc = SVECalleeSavesSize;
- if (SVELayout == SVEStackLayout::Split &&
- (PPR.LocalsSize || ZPR.CalleeSavesSize)) {
- assert(PPRRange.Begin == ZPRRange.End &&
- "Expected PPR restores after ZPR");
- emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP,
- PPR.LocalsSize + ZPR.CalleeSavesSize, TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI,
- &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
- CFAOffset -= PPR.LocalsSize + ZPR.CalleeSavesSize;
- SVECalleeSavesToDealloc -= ZPR.CalleeSavesSize;
- }
-
- // If split SVE is on, this dealloc PPRs, otherwise, deallocs ZPRs + PPRs:
- if (SVECalleeSavesToDealloc)
- emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
- SVECalleeSavesToDealloc, TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI,
- &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
+ // To deallocate the SVE stack adjust by the allocations in reverse.
+ emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
+ SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ CFAOffset);
+ CFAOffset -= SVEAllocs.AfterZPRs;
+ assert(PPRRange.Begin == ZPRRange.End &&
+ "Expected PPR restores after ZPR");
+ emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP,
+ SVEAllocs.AfterPPRs, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ CFAOffset);
+ CFAOffset -= SVEAllocs.AfterPPRs;
+ emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
+ SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ CFAOffset);
}
if (EmitCFI)
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
index bccadda..6e0e283 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
@@ -33,6 +33,11 @@ struct SVEFrameSizes {
} PPR, ZPR;
};
+struct SVEStackAllocations {
+ StackOffset BeforePPRs, AfterPPRs, AfterZPRs;
+ StackOffset totalSize() const { return BeforePPRs + AfterPPRs + AfterZPRs; }
+};
+
class AArch64PrologueEpilogueCommon {
public:
AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -66,6 +71,7 @@ protected:
bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const;
SVEFrameSizes getSVEStackFrameSizes() const;
+ SVEStackAllocations getSVEStackAllocations(SVEFrameSizes const &);
MachineFunction &MF;
MachineBasicBlock &MBB;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index fede586..5b5565a 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -308,9 +308,9 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
}
-bool AArch64TTIImpl::areTypesABICompatible(
- const Function *Caller, const Function *Callee,
- const ArrayRef<Type *> &Types) const {
+bool AArch64TTIImpl::areTypesABICompatible(const Function *Caller,
+ const Function *Callee,
+ ArrayRef<Type *> Types) const {
if (!BaseT::areTypesABICompatible(Caller, Callee, Types))
return false;
@@ -1032,6 +1032,13 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
}
break;
}
+ case Intrinsic::experimental_vector_extract_last_active:
+ if (ST->isSVEorStreamingSVEAvailable()) {
+ auto [LegalCost, _] = getTypeLegalizationCost(ICA.getArgTypes()[0]);
+ // This should turn into chained clastb instructions.
+ return LegalCost;
+ }
+ break;
default:
break;
}
@@ -2220,7 +2227,7 @@ static std::optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
return std::nullopt;
}
-template <Intrinsic::ID MulOpc, typename Intrinsic::ID FuseOpc>
+template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
static std::optional<Instruction *>
instCombineSVEVectorFuseMulAddSub(InstCombiner &IC, IntrinsicInst &II,
bool MergeIntoAddendOp) {
@@ -6650,10 +6657,15 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
Ops.push_back(&Ext->getOperandUse(0));
Ops.push_back(&Op);
- if (isa<SExtInst>(Ext))
+ if (isa<SExtInst>(Ext)) {
NumSExts++;
- else
+ } else {
NumZExts++;
+ // A zext(a) is also a sext(zext(a)), if we take more than 2 steps.
+ if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
+ I->getType()->getScalarSizeInBits())
+ NumSExts++;
+ }
continue;
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index fe2e849..b39546a 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -84,7 +84,7 @@ public:
const Function *Callee) const override;
bool areTypesABICompatible(const Function *Caller, const Function *Callee,
- const ArrayRef<Type *> &Types) const override;
+ ArrayRef<Type *> Types) const override;
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
unsigned DefaultCallPenalty) const override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index cd8b249..67042b7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -69,7 +69,7 @@ FunctionPass *createAMDGPUPreloadKernArgPrologLegacyPass();
ModulePass *createAMDGPUPreloadKernelArgumentsLegacyPass(const TargetMachine *);
struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
- AMDGPUSimplifyLibCallsPass() {}
+ AMDGPUSimplifyLibCallsPass() = default;
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
@@ -371,13 +371,13 @@ public:
class AMDGPUAnnotateUniformValuesPass
: public PassInfoMixin<AMDGPUAnnotateUniformValuesPass> {
public:
- AMDGPUAnnotateUniformValuesPass() {}
+ AMDGPUAnnotateUniformValuesPass() = default;
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
class SIModeRegisterPass : public PassInfoMixin<SIModeRegisterPass> {
public:
- SIModeRegisterPass() {}
+ SIModeRegisterPass() = default;
PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM);
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index 1064e57..dad94b8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -96,7 +96,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ArgDescriptor &Arg) {
}
struct KernArgPreloadDescriptor : public ArgDescriptor {
- KernArgPreloadDescriptor() {}
+ KernArgPreloadDescriptor() = default;
SmallVector<MCRegister> Regs;
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 9907c88f..8669978 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1555,7 +1555,7 @@ private:
AMDGPU::ClusterDimsAttr Attr;
- static constexpr const char AttrName[] = "amdgpu-cluster-dims";
+ static constexpr char AttrName[] = "amdgpu-cluster-dims";
};
AAAMDGPUClusterDims &
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 9ce1224..0c97741 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -221,12 +221,21 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(MachineInstr &I) const {
const DebugLoc &DL = I.getDebugLoc();
MachineBasicBlock *BB = I.getParent();
+ Register VCCReg = I.getOperand(1).getReg();
+ MachineInstr *Cmp;
+
+ // Set SCC as a side effect with S_CMP or S_OR.
+ if (STI.hasScalarCompareEq64()) {
+ unsigned CmpOpc =
+ STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
+ Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc)).addReg(VCCReg).addImm(0);
+ } else {
+ Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Cmp = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
+ .addReg(VCCReg)
+ .addReg(VCCReg);
+ }
- unsigned CmpOpc =
- STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
- MachineInstr *Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc))
- .addReg(I.getOperand(1).getReg())
- .addImm(0);
if (!constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI))
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
index 1e6589e..d7d0292 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
@@ -58,6 +58,8 @@ class AMDGPULowerVGPREncoding {
static constexpr unsigned BitsPerField = 2;
static constexpr unsigned NumFields = 4;
static constexpr unsigned FieldMask = (1 << BitsPerField) - 1;
+ static constexpr unsigned ModeWidth = NumFields * BitsPerField;
+ static constexpr unsigned ModeMask = (1 << ModeWidth) - 1;
using ModeType = PackedVector<unsigned, BitsPerField,
std::bitset<BitsPerField * NumFields>>;
@@ -82,12 +84,12 @@ private:
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
+ // Current basic block.
+ MachineBasicBlock *MBB;
+
/// Most recent s_set_* instruction.
MachineInstr *MostRecentModeSet;
- /// Whether the current mode is known.
- bool CurrentModeKnown;
-
/// Current mode bits.
ModeTy CurrentMode;
@@ -108,10 +110,13 @@ private:
MachineInstr *Clause;
/// Insert mode change before \p I. \returns true if mode was changed.
- bool setMode(ModeTy NewMode, ModeTy Mask, MachineInstr *I);
+ bool setMode(ModeTy NewMode, ModeTy Mask,
+ MachineBasicBlock::instr_iterator I);
/// Reset mode to default.
- void resetMode(MachineInstr *I) { setMode(ModeTy(), ModeTy::fullMask(), I); }
+ void resetMode(MachineBasicBlock::instr_iterator I) {
+ setMode(ModeTy(), ModeTy::fullMask(), I);
+ }
/// If \p MO references VGPRs, return the MSBs. Otherwise, return nullopt.
std::optional<unsigned> getMSBs(const MachineOperand &MO) const;
@@ -130,38 +135,43 @@ private:
/// Check if an instruction \p I is within a clause and returns a suitable
/// iterator to insert mode change. It may also modify the S_CLAUSE
/// instruction to extend it or drop the clause if it cannot be adjusted.
- MachineInstr *handleClause(MachineInstr *I);
+ MachineBasicBlock::instr_iterator
+ handleClause(MachineBasicBlock::instr_iterator I);
};
bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
- MachineInstr *I) {
+ MachineBasicBlock::instr_iterator I) {
assert((NewMode.raw_bits() & ~Mask.raw_bits()).none());
- if (CurrentModeKnown) {
- auto Delta = NewMode.raw_bits() ^ CurrentMode.raw_bits();
+ auto Delta = NewMode.raw_bits() ^ CurrentMode.raw_bits();
- if ((Delta & Mask.raw_bits()).none()) {
- CurrentMask |= Mask;
- return false;
- }
+ if ((Delta & Mask.raw_bits()).none()) {
+ CurrentMask |= Mask;
+ return false;
+ }
- if (MostRecentModeSet && (Delta & CurrentMask.raw_bits()).none()) {
- CurrentMode |= NewMode;
- CurrentMask |= Mask;
+ if (MostRecentModeSet && (Delta & CurrentMask.raw_bits()).none()) {
+ CurrentMode |= NewMode;
+ CurrentMask |= Mask;
- MostRecentModeSet->getOperand(0).setImm(CurrentMode);
- return true;
- }
+ MachineOperand &Op = MostRecentModeSet->getOperand(0);
+
+ // Carry old mode bits from the existing instruction.
+ int64_t OldModeBits = Op.getImm() & (ModeMask << ModeWidth);
+
+ Op.setImm(CurrentMode | OldModeBits);
+ return true;
}
+ // Record previous mode into high 8 bits of the immediate.
+ int64_t OldModeBits = CurrentMode << ModeWidth;
+
I = handleClause(I);
- MostRecentModeSet =
- BuildMI(*I->getParent(), I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
- .addImm(NewMode);
+ MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
+ .addImm(NewMode | OldModeBits);
CurrentMode = NewMode;
CurrentMask = Mask;
- CurrentModeKnown = true;
return true;
}
@@ -233,21 +243,22 @@ bool AMDGPULowerVGPREncoding::runOnMachineInstr(MachineInstr &MI) {
if (Ops.first) {
ModeTy NewMode, Mask;
computeMode(NewMode, Mask, MI, Ops.first, Ops.second);
- return setMode(NewMode, Mask, &MI);
+ return setMode(NewMode, Mask, MI.getIterator());
}
assert(!TII->hasVGPRUses(MI) || MI.isMetaInstruction() || MI.isPseudo());
return false;
}
-MachineInstr *AMDGPULowerVGPREncoding::handleClause(MachineInstr *I) {
+MachineBasicBlock::instr_iterator
+AMDGPULowerVGPREncoding::handleClause(MachineBasicBlock::instr_iterator I) {
if (!ClauseRemaining)
return I;
// A clause cannot start with a special instruction, place it right before
// the clause.
if (ClauseRemaining == ClauseLen) {
- I = Clause->getPrevNode();
+ I = Clause->getPrevNode()->getIterator();
assert(I->isBundle());
return I;
}
@@ -284,9 +295,9 @@ bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
ClauseLen = ClauseRemaining = 0;
CurrentMode.reset();
CurrentMask.reset();
- CurrentModeKnown = true;
for (auto &MBB : MF) {
MostRecentModeSet = nullptr;
+ this->MBB = &MBB;
for (auto &MI : llvm::make_early_inc_range(MBB.instrs())) {
if (MI.isMetaInstruction())
@@ -294,17 +305,16 @@ bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
if (MI.isTerminator() || MI.isCall()) {
if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
- MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
+ MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED)
CurrentMode.reset();
- CurrentModeKnown = true;
- } else
- resetMode(&MI);
+ else
+ resetMode(MI.getIterator());
continue;
}
if (MI.isInlineAsm()) {
if (TII->hasVGPRUses(MI))
- resetMode(&MI);
+ resetMode(MI.getIterator());
continue;
}
@@ -323,14 +333,8 @@ bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
--ClauseRemaining;
}
- // If we're falling through to a block that has at least one other
- // predecessor, we no longer know the mode.
- MachineBasicBlock *Next = MBB.getNextNode();
- if (Next && Next->pred_size() >= 2 &&
- llvm::is_contained(Next->predecessors(), &MBB)) {
- if (CurrentMode.raw_bits().any())
- CurrentModeKnown = false;
- }
+ // Reset the mode if we are falling through.
+ resetMode(MBB.instr_end());
}
return Changed;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 680e7eb..844649ebb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -412,7 +412,7 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
*OutStreamer);
if (isVerbose() && MI->getOpcode() == AMDGPU::S_SET_VGPR_MSB) {
- unsigned V = MI->getOperand(0).getImm();
+ unsigned V = MI->getOperand(0).getImm() & 0xff;
OutStreamer->AddComment(
" msbs: dst=" + Twine(V >> 6) + " src0=" + Twine(V & 3) +
" src1=" + Twine((V >> 2) & 3) + " src2=" + Twine((V >> 4) & 3));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
index cf2ab825..a3be0f5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
@@ -48,7 +48,7 @@ private:
FuncInfoMap FIM;
public:
- AMDGPUPerfHintAnalysis() {}
+ AMDGPUPerfHintAnalysis() = default;
// OldPM
bool runOnSCC(const GCNTargetMachine &TM, CallGraphSCC &SCC);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index e187959..907f830 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
@@ -34,9 +35,17 @@
using namespace llvm;
using namespace AMDGPU;
+using namespace llvm::MIPatternMatch;
namespace {
+// AMDGPU-specific pattern matchers
+template <typename SrcTy>
+inline UnaryOp_match<SrcTy, AMDGPU::G_AMDGPU_READANYLANE>
+m_GAMDGPUReadAnyLane(const SrcTy &Src) {
+ return UnaryOp_match<SrcTy, AMDGPU::G_AMDGPU_READANYLANE>(Src);
+}
+
class AMDGPURegBankLegalize : public MachineFunctionPass {
public:
static char ID;
@@ -160,10 +169,18 @@ AMDGPURegBankLegalizeCombiner::tryMatchRALFromUnmerge(Register Src) {
Register AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrc(Register Src) {
// Src = G_AMDGPU_READANYLANE RALSrc
- auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
- if (RAL)
+ Register RALSrc;
+ if (mi_match(Src, MRI, m_GAMDGPUReadAnyLane(m_Reg(RALSrc))))
return RALSrc;
+ // TruncSrc = G_AMDGPU_READANYLANE RALSrc
+ // AextSrc = G_TRUNC TruncSrc
+ // Src = G_ANYEXT AextSrc
+ if (mi_match(Src, MRI,
+ m_GAnyExt(m_GTrunc(m_GAMDGPUReadAnyLane(m_Reg(RALSrc)))))) {
+ return RALSrc;
+ }
+
// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 5407566..dc8fa7f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -500,6 +500,16 @@ void RegBankLegalizeHelper::lowerUnpackMinMax(MachineInstr &MI) {
MI.eraseFromParent();
}
+void RegBankLegalizeHelper::lowerUnpackAExt(MachineInstr &MI) {
+ auto [Op1Lo, Op1Hi] = unpackAExt(MI.getOperand(1).getReg());
+ auto [Op2Lo, Op2Hi] = unpackAExt(MI.getOperand(2).getReg());
+ auto ResLo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Op1Lo, Op2Lo});
+ auto ResHi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Op1Hi, Op2Hi});
+ B.buildBuildVectorTrunc(MI.getOperand(0).getReg(),
+ {ResLo.getReg(0), ResHi.getReg(0)});
+ MI.eraseFromParent();
+}
+
static bool isSignedBFE(MachineInstr &MI) {
if (GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI))
return (GI->is(Intrinsic::amdgcn_sbfe));
@@ -616,6 +626,23 @@ void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) {
MI.eraseFromParent();
}
+void RegBankLegalizeHelper::lowerSplitTo16(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ assert(MRI.getType(Dst) == V2S16);
+ auto [Op1Lo32, Op1Hi32] = unpackAExt(MI.getOperand(1).getReg());
+ auto [Op2Lo32, Op2Hi32] = unpackAExt(MI.getOperand(2).getReg());
+ unsigned Opc = MI.getOpcode();
+ auto Flags = MI.getFlags();
+ auto Op1Lo = B.buildTrunc(SgprRB_S16, Op1Lo32);
+ auto Op1Hi = B.buildTrunc(SgprRB_S16, Op1Hi32);
+ auto Op2Lo = B.buildTrunc(SgprRB_S16, Op2Lo32);
+ auto Op2Hi = B.buildTrunc(SgprRB_S16, Op2Hi32);
+ auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo, Op2Lo}, Flags);
+ auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi, Op2Hi}, Flags);
+ B.buildMergeLikeInstr(Dst, {Lo, Hi});
+ MI.eraseFromParent();
+}
+
void RegBankLegalizeHelper::lowerSplitTo32Select(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(Dst);
@@ -688,6 +715,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
return lowerUnpackBitShift(MI);
case UnpackMinMax:
return lowerUnpackMinMax(MI);
+ case ScalarizeToS16:
+ return lowerSplitTo16(MI);
case Ext32To64: {
const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
MachineInstrBuilder Hi;
@@ -804,6 +833,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
}
break;
}
+ case UnpackAExt:
+ return lowerUnpackAExt(MI);
case WidenMMOToS32:
return widenMMOToS32(cast<GAnyLoad>(MI));
}
@@ -837,6 +868,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
return LLT::scalar(32);
case Sgpr64:
case Vgpr64:
+ case UniInVgprS64:
return LLT::scalar(64);
case Sgpr128:
case Vgpr128:
@@ -960,6 +992,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
case UniInVcc:
case UniInVgprS16:
case UniInVgprS32:
+ case UniInVgprS64:
case UniInVgprV2S16:
case UniInVgprV4S32:
case UniInVgprB32:
@@ -1092,6 +1125,7 @@ void RegBankLegalizeHelper::applyMappingDst(
break;
}
case UniInVgprS32:
+ case UniInVgprS64:
case UniInVgprV2S16:
case UniInVgprV4S32: {
assert(Ty == getTyFromID(MethodIDs[OpIdx]));
@@ -1120,7 +1154,8 @@ void RegBankLegalizeHelper::applyMappingDst(
assert(RB == SgprRB);
Register NewDst = MRI.createVirtualRegister(SgprRB_S32);
Op.setReg(NewDst);
- B.buildTrunc(Reg, NewDst);
+ if (!MRI.use_empty(Reg))
+ B.buildTrunc(Reg, NewDst);
break;
}
case InvalidMapping: {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
index d937815..e7598f8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
@@ -72,6 +72,7 @@ class RegBankLegalizeHelper {
static constexpr LLT P6 = LLT::pointer(6, 32);
MachineRegisterInfo::VRegAttrs SgprRB_S32 = {SgprRB, S32};
+ MachineRegisterInfo::VRegAttrs SgprRB_S16 = {SgprRB, S16};
MachineRegisterInfo::VRegAttrs VgprRB_S32 = {VgprRB, S32};
MachineRegisterInfo::VRegAttrs VccRB_S1 = {VccRB, S1};
@@ -121,9 +122,11 @@ private:
void lowerV_BFE(MachineInstr &MI);
void lowerS_BFE(MachineInstr &MI);
void lowerSplitTo32(MachineInstr &MI);
+ void lowerSplitTo16(MachineInstr &MI);
void lowerSplitTo32Select(MachineInstr &MI);
void lowerSplitTo32SExtInReg(MachineInstr &MI);
void lowerUnpackMinMax(MachineInstr &MI);
+ void lowerUnpackAExt(MachineInstr &MI);
};
} // end namespace AMDGPU
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index a67b12a..1e5885a2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -202,7 +202,7 @@ bool PredicateMapping::match(const MachineInstr &MI,
return true;
}
-SetOfRulesForOpcode::SetOfRulesForOpcode() {}
+SetOfRulesForOpcode::SetOfRulesForOpcode() = default;
SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes)
: FastTypes(FastTypes) {}
@@ -470,7 +470,19 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
- .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
+ .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
+ .Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackAExt})
+ .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
+ .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}})
+ .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}});
+
+ addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
+ .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
+ .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
+
+ addRulesForGOpcs({G_UADDE, G_USUBE}, Standard)
+ .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32, Sgpr32AExtBoolInReg}})
+ .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
addRulesForGOpcs({G_MUL}, Standard).Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
@@ -901,14 +913,28 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
- addRulesForGOpcs({G_READSTEADYCOUNTER}, Standard).Uni(S64, {{Sgpr64}, {}});
+ addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
+
+ addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
+ .Uni(S64, {{Sgpr64}, {}});
bool hasSALUFloat = ST->hasSALUFloatInsts();
addRulesForGOpcs({G_FADD}, Standard)
+ .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
+ .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
+ .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
.Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
- .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
+ .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
+ .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
+ .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
+ .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
+ .Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, ScalarizeToS16},
+ hasSALUFloat)
+ .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
+ .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32}}})
+ .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32, VgprV2S32}}});
addRulesForGOpcs({G_FPTOUI})
.Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 93e0efd..e6df5d8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -92,8 +92,10 @@ enum UniformityLLTOpPredicateID {
V4S32,
UniV2S16,
+ UniV2S32,
DivV2S16,
+ DivV2S32,
// B types
B32,
@@ -178,7 +180,9 @@ enum RegBankLLTMappingApplyID {
UniInVcc,
UniInVgprS16,
UniInVgprS32,
+ UniInVgprS64,
UniInVgprV2S16,
+ UniInVgprV2S32,
UniInVgprV4S32,
UniInVgprB32,
UniInVgprB64,
@@ -217,13 +221,15 @@ enum LoweringMethodID {
V_BFE,
VgprToVccCopy,
SplitTo32,
+ ScalarizeToS16,
SplitTo32Select,
SplitTo32SExtInReg,
Ext32To64,
UniCstExt,
SplitLoad,
WidenLoad,
- WidenMMOToS32
+ WidenMMOToS32,
+ UnpackAExt
};
enum FastRulesTypes {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 75a94ac..b87b54f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -816,7 +816,7 @@ parseAMDGPUAtomicOptimizerStrategy(StringRef Params) {
Params.consume_front("strategy=");
auto Result = StringSwitch<std::optional<ScanOptions>>(Params)
.Case("dpp", ScanOptions::DPP)
- .Cases("iterative", "", ScanOptions::Iterative)
+ .Cases({"iterative", ""}, ScanOptions::Iterative)
.Case("none", ScanOptions::None)
.Default(std::nullopt);
if (Result)
@@ -1315,6 +1315,9 @@ void AMDGPUPassConfig::addIRPasses() {
isPassEnabled(EnableImageIntrinsicOptimizer))
addPass(createAMDGPUImageIntrinsicOptimizerPass(&TM));
+ if (EnableUniformIntrinsicCombine)
+ addPass(createAMDGPUUniformIntrinsicCombineLegacyPass());
+
// This can be disabled by passing ::Disable here or on the command line
// with --expand-variadics-override=disable.
addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering));
@@ -2066,6 +2069,8 @@ void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const {
if (isPassEnabled(EnableImageIntrinsicOptimizer))
addPass(AMDGPUImageIntrinsicOptimizerPass(TM));
+ if (EnableUniformIntrinsicCombine)
+ addPass(AMDGPUUniformIntrinsicCombinePass());
// This can be disabled by passing ::Disable here or on the command line
// with --expand-variadics-override=disable.
addPass(ExpandVariadicsPass(ExpandVariadicsMode::Lowering));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 733c5d5..ddf9a24 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -225,8 +225,8 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext());
if (DummyReturnBB == nullptr) {
- DummyReturnBB = BasicBlock::Create(F.getContext(),
- "DummyReturnBlock", &F);
+ DummyReturnBB =
+ BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F);
Type *RetTy = F.getReturnType();
Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index 61c5dcd..ded2f5a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -54,7 +54,7 @@ public:
bool CullSGPRHazardsAtMemWait;
unsigned CullSGPRHazardsMemWaitThreshold;
- AMDGPUWaitSGPRHazards() {}
+ AMDGPUWaitSGPRHazards() = default;
// Return the numeric ID 0-127 for a given SGPR.
static std::optional<unsigned> sgprNumber(Register Reg,
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 975781f..f357981 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -183,7 +183,7 @@ class ScheduleMetrics {
unsigned BubbleCycles;
public:
- ScheduleMetrics() {}
+ ScheduleMetrics() = default;
ScheduleMetrics(unsigned L, unsigned BC)
: ScheduleLength(L), BubbleCycles(BC) {}
unsigned getLength() const { return ScheduleLength; }
@@ -217,7 +217,7 @@ class RegionPressureMap {
bool IsLiveOut;
public:
- RegionPressureMap() {}
+ RegionPressureMap() = default;
RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
: DAG(GCNDAG), IsLiveOut(LiveOut) {}
// Build the Instr->LiveReg and RegionIdx->Instr maps
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 013cfeb..28b4da8 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -168,7 +168,7 @@ bool AMDGPUMCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
void AMDGPUMCInstrAnalysis::updateState(const MCInst &Inst, uint64_t Addr) {
if (Inst.getOpcode() == AMDGPU::S_SET_VGPR_MSB_gfx12)
- VgprMSBs = Inst.getOperand(0).getImm();
+ VgprMSBs = Inst.getOperand(0).getImm() & 0xff;
else if (isTerminator(Inst))
VgprMSBs = 0;
}
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index d950131..65dce74 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -2116,8 +2116,10 @@ class VIMAGE_TENSOR_Real <bits<8> op, VIMAGE_TENSOR_Pseudo ps, string opName = p
let vaddr2 = !if(ps.UpTo2D, !cast<int>(SGPR_NULL_gfx11plus.HWEncoding), ?);
let vaddr3 = !if(ps.UpTo2D, !cast<int>(SGPR_NULL_gfx11plus.HWEncoding), ?);
+ // Set VADDR4 to NULL
+ let vaddr4 = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
+
// set to 0 based on SPG.
- let vaddr4 = 0;
let rsrc = 0;
let vdata = 0;
let d16 = 0;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b34ab2a..8bb2808 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7035,9 +7035,15 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
SDLoc SL(N);
if (Src.getOpcode() == ISD::SETCC) {
+ SDValue Op0 = Src.getOperand(0);
+ SDValue Op1 = Src.getOperand(1);
+ // Need to expand bfloat to float for comparison (setcc).
+ if (Op0.getValueType() == MVT::bf16) {
+ Op0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Op0);
+ Op1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Op1);
+ }
// (ballot (ISD::SETCC ...)) -> (AMDGPUISD::SETCC ...)
- return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src.getOperand(0),
- Src.getOperand(1), Src.getOperand(2));
+ return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Op0, Op1, Src.getOperand(2));
}
if (const ConstantSDNode *Arg = dyn_cast<ConstantSDNode>(Src)) {
// (ballot 0) -> 0
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 6dcbced..b7fa899 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1288,18 +1288,38 @@ void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) {
}
void WaitcntBrackets::applyXcnt(const AMDGPU::Waitcnt &Wait) {
+ // On entry to a block with multiple predescessors, there may
+ // be pending SMEM and VMEM events active at the same time.
+ // In such cases, only clear one active event at a time.
+ auto applyPendingXcntGroup = [this](unsigned E) {
+ unsigned LowerBound = getScoreLB(X_CNT);
+ applyWaitcnt(X_CNT, 0);
+ PendingEvents |= (1 << E);
+ setScoreLB(X_CNT, LowerBound);
+ };
+
// Wait on XCNT is redundant if we are already waiting for a load to complete.
// SMEM can return out of order, so only omit XCNT wait if we are waiting till
// zero.
- if (Wait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP))
- return applyWaitcnt(X_CNT, 0);
+ if (Wait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP)) {
+ if (hasPendingEvent(VMEM_GROUP))
+ applyPendingXcntGroup(VMEM_GROUP);
+ else
+ applyWaitcnt(X_CNT, 0);
+ return;
+ }
// If we have pending store we cannot optimize XCnt because we do not wait for
// stores. VMEM loads retun in order, so if we only have loads XCnt is
// decremented to the same number as LOADCnt.
if (Wait.LoadCnt != ~0u && hasPendingEvent(VMEM_GROUP) &&
- !hasPendingEvent(STORE_CNT))
- return applyWaitcnt(X_CNT, std::min(Wait.XCnt, Wait.LoadCnt));
+ !hasPendingEvent(STORE_CNT)) {
+ if (hasPendingEvent(SMEM_GROUP))
+ applyPendingXcntGroup(SMEM_GROUP);
+ else
+ applyWaitcnt(X_CNT, std::min(Wait.XCnt, Wait.LoadCnt));
+ return;
+ }
applyWaitcnt(X_CNT, Wait.XCnt);
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d930a21..45f5919 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6153,7 +6153,7 @@ bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
// information.
if (AMDGPU::isPackedFP32Inst(MI.getOpcode()) && AMDGPU::isGFX12Plus(ST) &&
MO.isReg() && RI.isSGPRReg(MRI, MO.getReg())) {
- constexpr const AMDGPU::OpName OpNames[] = {
+ constexpr AMDGPU::OpName OpNames[] = {
AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
for (auto [I, OpName] : enumerate(OpNames)) {
@@ -6215,8 +6215,8 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
bool SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand(
const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN,
const MachineOperand *MO) const {
- constexpr const unsigned NumOps = 3;
- constexpr const AMDGPU::OpName OpNames[NumOps * 2] = {
+ constexpr unsigned NumOps = 3;
+ constexpr AMDGPU::OpName OpNames[NumOps * 2] = {
AMDGPU::OpName::src0, AMDGPU::OpName::src1,
AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
@@ -10618,6 +10618,42 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
return false;
}
+// SCC is already valid after SCCValid.
+// SCCRedefine will redefine SCC to the same value already available after
+// SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
+// update kill/dead flags if necessary.
+static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
+ const SIRegisterInfo &RI) {
+ MachineInstr *KillsSCC = nullptr;
+ for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
+ SCCRedefine->getIterator())) {
+ if (MI.modifiesRegister(AMDGPU::SCC, &RI))
+ return false;
+ if (MI.killsRegister(AMDGPU::SCC, &RI))
+ KillsSCC = &MI;
+ }
+ if (MachineOperand *SccDef =
+ SCCValid->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
+ SccDef->setIsDead(false);
+ if (KillsSCC)
+ KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
+ SCCRedefine->eraseFromParent();
+ return true;
+}
+
+static bool foldableSelect(const MachineInstr &Def) {
+ if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
+ Def.getOpcode() != AMDGPU::S_CSELECT_B64)
+ return false;
+ bool Op1IsNonZeroImm =
+ Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
+ bool Op2IsZeroImm =
+ Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
+ if (!Op1IsNonZeroImm || !Op2IsZeroImm)
+ return false;
+ return true;
+}
+
bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
Register SrcReg2, int64_t CmpMask,
int64_t CmpValue,
@@ -10637,19 +10673,6 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (!Def || Def->getParent() != CmpInstr.getParent())
return false;
- const auto foldableSelect = [](MachineInstr *Def) -> bool {
- if (Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
- Def->getOpcode() == AMDGPU::S_CSELECT_B64) {
- bool Op1IsNonZeroImm =
- Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0;
- bool Op2IsZeroImm =
- Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0;
- if (Op1IsNonZeroImm && Op2IsZeroImm)
- return true;
- }
- return false;
- };
-
// For S_OP that set SCC = DST!=0, do the transformation
//
// s_cmp_lg_* (S_OP ...), 0 => (S_OP ...)
@@ -10660,24 +10683,12 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
//
// s_cmp_lg_* (S_CSELECT* (non-zero imm), 0), 0 => (S_CSELECT* (non-zero
// imm), 0)
- if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(Def))
+ if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(*Def))
return false;
- MachineInstr *KillsSCC = nullptr;
- for (MachineInstr &MI :
- make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
- if (MI.modifiesRegister(AMDGPU::SCC, &RI))
- return false;
- if (MI.killsRegister(AMDGPU::SCC, &RI))
- KillsSCC = &MI;
- }
+ if (!optimizeSCC(Def, &CmpInstr, RI))
+ return false;
- if (MachineOperand *SccDef =
- Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
- SccDef->setIsDead(false);
- if (KillsSCC)
- KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
- CmpInstr.eraseFromParent();
return true;
};
@@ -10755,21 +10766,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
return false;
- MachineInstr *KillsSCC = nullptr;
- for (MachineInstr &MI :
- make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
- if (MI.modifiesRegister(AMDGPU::SCC, &RI))
- return false;
- if (MI.killsRegister(AMDGPU::SCC, &RI))
- KillsSCC = &MI;
- }
-
- MachineOperand *SccDef =
- Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
- SccDef->setIsDead(false);
- if (KillsSCC)
- KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
- CmpInstr.eraseFromParent();
+ if (!optimizeSCC(Def, &CmpInstr, RI))
+ return false;
if (!MRI->use_nodbg_empty(DefReg)) {
assert(!IsReversedCC);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index fdba454..6b06534 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -601,10 +601,20 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
- if (!Subtarget->hasVFP2Base())
+ if (!Subtarget->hasVFP2Base()) {
setAllExpand(MVT::f32);
- if (!Subtarget->hasFP64())
+ } else {
+ for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})
+ setOperationAction(Op, MVT::f32, Legal);
+ }
+ if (!Subtarget->hasFP64()) {
setAllExpand(MVT::f64);
+ } else {
+ for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})
+ setOperationAction(Op, MVT::f64, Legal);
+ }
}
if (Subtarget->hasFullFP16()) {
@@ -1281,12 +1291,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+ setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, LibCall);
+ setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, LibCall);
}
// fp16 is a special v7 extension that adds f16 <-> f32 conversions.
if (!Subtarget->hasFP16()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
+ setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, LibCall);
+ setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, LibCall);
}
// Strict floating-point comparisons need custom lowering.
@@ -1333,31 +1347,42 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
}
// FP16 often need to be promoted to call lib functions
+ // clang-format off
if (Subtarget->hasFullFP16()) {
- setOperationAction(ISD::FREM, MVT::f16, Promote);
- setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
- setOperationAction(ISD::FSIN, MVT::f16, Promote);
- setOperationAction(ISD::FCOS, MVT::f16, Promote);
- setOperationAction(ISD::FTAN, MVT::f16, Promote);
- setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
- setOperationAction(ISD::FPOWI, MVT::f16, Promote);
- setOperationAction(ISD::FPOW, MVT::f16, Promote);
- setOperationAction(ISD::FEXP, MVT::f16, Promote);
- setOperationAction(ISD::FEXP2, MVT::f16, Promote);
- setOperationAction(ISD::FEXP10, MVT::f16, Promote);
- setOperationAction(ISD::FLOG, MVT::f16, Promote);
- setOperationAction(ISD::FLOG10, MVT::f16, Promote);
- setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::LRINT, MVT::f16, Expand);
setOperationAction(ISD::LROUND, MVT::f16, Expand);
-
- setOperationAction(ISD::FROUND, MVT::f16, Legal);
- setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
- setOperationAction(ISD::FRINT, MVT::f16, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
- setOperationAction(ISD::FCEIL, MVT::f16, Legal);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
+
+ for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
+ ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
+ ISD::FSINCOSPI, ISD::FMODF, ISD::FACOS,
+ ISD::FASIN, ISD::FATAN, ISD::FATAN2,
+ ISD::FCOSH, ISD::FSINH, ISD::FTANH,
+ ISD::FTAN, ISD::FEXP, ISD::FEXP2,
+ ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
+ ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW,
+ ISD::STRICT_FPOWI, ISD::STRICT_FCOS, ISD::STRICT_FSIN,
+ ISD::STRICT_FACOS, ISD::STRICT_FASIN, ISD::STRICT_FATAN,
+ ISD::STRICT_FATAN2, ISD::STRICT_FCOSH, ISD::STRICT_FSINH,
+ ISD::STRICT_FTANH, ISD::STRICT_FEXP, ISD::STRICT_FEXP2,
+ ISD::STRICT_FLOG, ISD::STRICT_FLOG2, ISD::STRICT_FLOG10,
+ ISD::STRICT_FTAN}) {
+ setOperationAction(Op, MVT::f16, Promote);
+ }
+
+ // Round-to-integer need custom lowering for fp16, as Promote doesn't work
+ // because the result type is integer.
+ for (auto Op : {ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT, ISD::STRICT_LLRINT})
+ setOperationAction(Op, MVT::f16, Custom);
+
+ for (auto Op : {ISD::FROUND, ISD::FROUNDEVEN, ISD::FTRUNC,
+ ISD::FNEARBYINT, ISD::FRINT, ISD::FFLOOR,
+ ISD::FCEIL, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN,
+ ISD::STRICT_FTRUNC, ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
+ ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL}) {
+ setOperationAction(Op, MVT::f16, Legal);
+ }
+ // clang-format on
}
if (Subtarget->hasNEON()) {
@@ -10725,6 +10750,19 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerCMP(Op, DAG);
case ISD::ABS:
return LowerABS(Op, DAG);
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT: {
+ assert((Op.getOperand(1).getValueType() == MVT::f16 ||
+ Op.getOperand(1).getValueType() == MVT::bf16) &&
+ "Expected custom lowering of rounding operations only for f16");
+ SDLoc DL(Op);
+ SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
+ {Op.getOperand(0), Op.getOperand(1)});
+ return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
+ {Ext.getValue(1), Ext.getValue(0)});
+ }
}
}
@@ -22071,6 +22109,11 @@ bool ARMTargetLowering::isComplexDeinterleavingOperationSupported(
ScalarTy->isIntegerTy(32));
}
+ArrayRef<MCPhysReg> ARMTargetLowering::getRoundingControlRegisters() const {
+ static const MCPhysReg RCRegs[] = {ARM::FPSCR_RM};
+ return RCRegs;
+}
+
Value *ARMTargetLowering::createComplexDeinterleavingIR(
IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 357d2c5..bf3438b 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -1009,6 +1009,8 @@ class VectorType;
bool isUnsupportedFloatingType(EVT VT) const;
+ ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
+
SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
SDValue ARMcc, SDValue Flags, SelectionDAG &DAG) const;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 10d4cd5..f7176a6 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -473,15 +473,15 @@ def xor_su : PatFrag<(ops node:$lhs, node:$rhs), (xor node:$lhs, node:$rhs)>;
// An 'fmul' node with a single use.
let HasOneUse = 1 in
-def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs)>;
+def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (any_fmul node:$lhs, node:$rhs)>;
// An 'fadd' node which checks for single non-hazardous use.
-def fadd_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{
+def fadd_mlx : PatFrag<(ops node:$lhs, node:$rhs),(any_fadd node:$lhs, node:$rhs),[{
return hasNoVMLxHazardUse(N);
}]>;
// An 'fsub' node which checks for single non-hazardous use.
-def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
+def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(any_fsub node:$lhs, node:$rhs),[{
return hasNoVMLxHazardUse(N);
}]>;
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 6771106..e2cc97b 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -439,14 +439,14 @@ let TwoOperandAliasConstraint = "$Dn = $Dd", mayRaiseFPException = 1, Uses = [FP
def VADDD : ADbI<0b11100, 0b11, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>,
+ [(set DPR:$Dd, (any_fadd DPR:$Dn, (f64 DPR:$Dm)))]>,
Sched<[WriteFPALU64]>;
let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in
def VADDS : ASbIn<0b11100, 0b11, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]>,
+ [(set SPR:$Sd, (any_fadd SPR:$Sn, SPR:$Sm))]>,
Sched<[WriteFPALU32]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
@@ -457,21 +457,21 @@ let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FP
def VADDH : AHbI<0b11100, 0b11, 0, 0,
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm",
- [(set (f16 HPR:$Sd), (fadd (f16 HPR:$Sn), (f16 HPR:$Sm)))]>,
+ [(set (f16 HPR:$Sd), (any_fadd (f16 HPR:$Sn), (f16 HPR:$Sm)))]>,
Sched<[WriteFPALU32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in
def VSUBD : ADbI<0b11100, 0b11, 1, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>,
+ [(set DPR:$Dd, (any_fsub DPR:$Dn, (f64 DPR:$Dm)))]>,
Sched<[WriteFPALU64]>;
let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in
def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]>,
+ [(set SPR:$Sd, (any_fsub SPR:$Sn, SPR:$Sm))]>,
Sched<[WriteFPALU32]>{
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
@@ -482,42 +482,42 @@ let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FP
def VSUBH : AHbI<0b11100, 0b11, 1, 0,
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm",
- [(set (f16 HPR:$Sd), (fsub (f16 HPR:$Sn), (f16 HPR:$Sm)))]>,
+ [(set (f16 HPR:$Sd), (any_fsub (f16 HPR:$Sn), (f16 HPR:$Sm)))]>,
Sched<[WriteFPALU32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in
def VDIVD : ADbI<0b11101, 0b00, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>,
+ [(set DPR:$Dd, (any_fdiv DPR:$Dn, (f64 DPR:$Dm)))]>,
Sched<[WriteFPDIV64]>;
let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in
def VDIVS : ASbI<0b11101, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>,
+ [(set SPR:$Sd, (any_fdiv SPR:$Sn, SPR:$Sm))]>,
Sched<[WriteFPDIV32]>;
let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in
def VDIVH : AHbI<0b11101, 0b00, 0, 0,
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm",
- [(set (f16 HPR:$Sd), (fdiv (f16 HPR:$Sn), (f16 HPR:$Sm)))]>,
+ [(set (f16 HPR:$Sd), (any_fdiv (f16 HPR:$Sn), (f16 HPR:$Sm)))]>,
Sched<[WriteFPDIV32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in
def VMULD : ADbI<0b11100, 0b10, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>,
+ [(set DPR:$Dd, (any_fmul DPR:$Dn, (f64 DPR:$Dm)))]>,
Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in
def VMULS : ASbIn<0b11100, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]>,
+ [(set SPR:$Sd, (any_fmul SPR:$Sn, SPR:$Sm))]>,
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
@@ -528,21 +528,21 @@ let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FP
def VMULH : AHbI<0b11100, 0b10, 0, 0,
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm",
- [(set (f16 HPR:$Sd), (fmul (f16 HPR:$Sn), (f16 HPR:$Sm)))]>,
+ [(set (f16 HPR:$Sd), (any_fmul (f16 HPR:$Sn), (f16 HPR:$Sm)))]>,
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
let TwoOperandAliasConstraint = "$Dn = $Dd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in
def VNMULD : ADbI<0b11100, 0b10, 1, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>,
+ [(set DPR:$Dd, (fneg (any_fmul DPR:$Dn, (f64 DPR:$Dm))))]>,
Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FPSCR_RM] in
def VNMULS : ASbI<0b11100, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]>,
+ [(set SPR:$Sd, (fneg (any_fmul SPR:$Sn, SPR:$Sm)))]>,
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
@@ -553,7 +553,7 @@ let TwoOperandAliasConstraint = "$Sn = $Sd", mayRaiseFPException = 1, Uses = [FP
def VNMULH : AHbI<0b11100, 0b10, 1, 0,
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm",
- [(set (f16 HPR:$Sd), (fneg (fmul (f16 HPR:$Sn), (f16 HPR:$Sm))))]>,
+ [(set (f16 HPR:$Sd), (fneg (any_fmul (f16 HPR:$Sn), (f16 HPR:$Sm))))]>,
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
multiclass vsel_inst<string op, bits<2> opc, int CC> {
@@ -587,7 +587,7 @@ defm VSELGE : vsel_inst<"ge", 0b10, 10>;
defm VSELEQ : vsel_inst<"eq", 0b00, 0>;
defm VSELVS : vsel_inst<"vs", 0b01, 6>;
-multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
+multiclass vmaxmin_inst<string op, bit opc, PatFrags SD> {
let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
isUnpredicable = 1, mayRaiseFPException = 1 in {
def H : AHbInp<0b11101, 0b00, opc,
@@ -610,8 +610,8 @@ multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
}
}
-defm VFP_VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>;
-defm VFP_VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>;
+defm VFP_VMAXNM : vmaxmin_inst<"vmaxnm", 0, any_fmaxnum>;
+defm VFP_VMINNM : vmaxmin_inst<"vminnm", 1, any_fminnum>;
// Match reassociated forms only if not sign dependent rounding.
def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
@@ -746,7 +746,7 @@ let mayRaiseFPException = 1, Uses = [FPSCR_RM] in
def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", "",
- [(set DPR:$Dd, (fpextend SPR:$Sm))]>,
+ [(set DPR:$Dd, (any_fpextend SPR:$Sm))]>,
Sched<[WriteFPCVT]> {
// Instruction operands.
bits<5> Dd;
@@ -766,7 +766,7 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
let mayRaiseFPException = 1, Uses = [FPSCR_RM] in
def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", "",
- [(set SPR:$Sd, (fpround DPR:$Dm))]>,
+ [(set SPR:$Sd, (any_fpround DPR:$Dm))]>,
Sched<[WriteFPCVT]> {
// Instruction operands.
bits<5> Sd;
@@ -796,7 +796,7 @@ def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
-def : FP16Pat<(f32 (fpextend (f16 HPR:$Sm))),
+def : FP16Pat<(f32 (any_fpextend (f16 HPR:$Sm))),
(VCVTBHS (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>;
def : FP16Pat<(f16_to_fp GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
@@ -808,16 +808,16 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sda,
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
-def : FP16Pat<(f16 (fpround SPR:$Sm)),
+def : FP16Pat<(f16 (any_fpround SPR:$Sm)),
(COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$Sm), HPR)>;
def : FP16Pat<(fp_to_f16 SPR:$a),
(i32 (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$a), GPR))>;
-def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane),
+def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (any_fpround (f32 SPR:$src2))), imm_even:$lane),
(v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1),
(VCVTBSH (EXTRACT_SUBREG (v8f16 MQPR:$src1), (SSubReg_f16_reg imm:$lane)),
SPR:$src2),
(SSubReg_f16_reg imm:$lane)))>;
-def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane),
+def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (any_fpround (f32 SPR:$src2))), imm_even:$lane),
(v4f16 (INSERT_SUBREG (v4f16 DPR:$src1),
(VCVTBSH (EXTRACT_SUBREG (v4f16 DPR:$src1), (SSubReg_f16_reg imm:$lane)),
SPR:$src2),
@@ -830,9 +830,9 @@ def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
-def : FP16Pat<(f32 (fpextend (extractelt (v8f16 MQPR:$src), imm_odd:$lane))),
+def : FP16Pat<(f32 (any_fpextend (extractelt (v8f16 MQPR:$src), imm_odd:$lane))),
(VCVTTHS (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_odd:$lane)))>;
-def : FP16Pat<(f32 (fpextend (extractelt (v4f16 DPR:$src), imm_odd:$lane))),
+def : FP16Pat<(f32 (any_fpextend (extractelt (v4f16 DPR:$src), imm_odd:$lane))),
(VCVTTHS (EXTRACT_SUBREG
(v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
(SSubReg_f16_reg imm_odd:$lane)))>;
@@ -844,12 +844,12 @@ def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sda,
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
-def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane),
+def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (any_fpround (f32 SPR:$src2))), imm_odd:$lane),
(v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1),
(VCVTTSH (EXTRACT_SUBREG (v8f16 MQPR:$src1), (SSubReg_f16_reg imm:$lane)),
SPR:$src2),
(SSubReg_f16_reg imm:$lane)))>;
-def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane),
+def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (any_fpround (f32 SPR:$src2))), imm_odd:$lane),
(v4f16 (INSERT_SUBREG (v4f16 DPR:$src1),
(VCVTTSH (EXTRACT_SUBREG (v4f16 DPR:$src1), (SSubReg_f16_reg imm:$lane)),
SPR:$src2),
@@ -872,7 +872,7 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
let hasSideEffects = 0;
}
-def : FullFP16Pat<(f64 (fpextend (f16 HPR:$Sm))),
+def : FullFP16Pat<(f64 (any_fpextend (f16 HPR:$Sm))),
(VCVTBHD (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>,
Requires<[HasFPARMv8, HasDPVFP]>;
def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
@@ -898,7 +898,7 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
let hasSideEffects = 0;
}
-def : FullFP16Pat<(f16 (fpround DPR:$Dm)),
+def : FullFP16Pat<(f16 (any_fpround DPR:$Dm)),
(COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$Dm), HPR)>,
Requires<[HasFPARMv8, HasDPVFP]>;
def : FP16Pat<(fp_to_f16 (f64 DPR:$a)),
@@ -1007,41 +1007,41 @@ multiclass vcvt_inst<string opc, bits<2> rm,
let Predicates = [HasFPARMv8] in {
let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (fp_to_sint (node (f16 HPR:$a)))),
+ def : Pat<(i32 (any_fp_to_sint (node (f16 HPR:$a)))),
(COPY_TO_REGCLASS
(!cast<Instruction>(NAME#"SH") (f16 HPR:$a)),
GPR)>;
- def : Pat<(i32 (fp_to_uint (node (f16 HPR:$a)))),
+ def : Pat<(i32 (any_fp_to_uint (node (f16 HPR:$a)))),
(COPY_TO_REGCLASS
(!cast<Instruction>(NAME#"UH") (f16 HPR:$a)),
GPR)>;
}
- def : Pat<(i32 (fp_to_sint (node SPR:$a))),
+ def : Pat<(i32 (any_fp_to_sint (node SPR:$a))),
(COPY_TO_REGCLASS
(!cast<Instruction>(NAME#"SS") SPR:$a),
GPR)>;
- def : Pat<(i32 (fp_to_uint (node SPR:$a))),
+ def : Pat<(i32 (any_fp_to_uint (node SPR:$a))),
(COPY_TO_REGCLASS
(!cast<Instruction>(NAME#"US") SPR:$a),
GPR)>;
}
let Predicates = [HasFPARMv8, HasDPVFP] in {
- def : Pat<(i32 (fp_to_sint (node (f64 DPR:$a)))),
+ def : Pat<(i32 (any_fp_to_sint (node (f64 DPR:$a)))),
(COPY_TO_REGCLASS
(!cast<Instruction>(NAME#"SD") DPR:$a),
GPR)>;
- def : Pat<(i32 (fp_to_uint (node (f64 DPR:$a)))),
+ def : Pat<(i32 (any_fp_to_uint (node (f64 DPR:$a)))),
(COPY_TO_REGCLASS
(!cast<Instruction>(NAME#"UD") DPR:$a),
GPR)>;
}
}
-defm VCVTA : vcvt_inst<"a", 0b00, fround>;
+defm VCVTA : vcvt_inst<"a", 0b00, any_fround>;
defm VCVTN : vcvt_inst<"n", 0b01>;
-defm VCVTP : vcvt_inst<"p", 0b10, fceil>;
-defm VCVTM : vcvt_inst<"m", 0b11, ffloor>;
+defm VCVTP : vcvt_inst<"p", 0b10, any_fceil>;
+defm VCVTM : vcvt_inst<"m", 0b11, any_ffloor>;
def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
@@ -1103,9 +1103,9 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node,
Requires<[HasFPARMv8,HasDPVFP]>;
}
-defm VRINTZ : vrint_inst_zrx<"z", 0, 1, ftrunc, [], 0>;
-defm VRINTR : vrint_inst_zrx<"r", 0, 0, fnearbyint, [FPSCR_RM], 0>;
-defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint, [FPSCR_RM], 1>;
+defm VRINTZ : vrint_inst_zrx<"z", 0, 1, any_ftrunc, [], 0>;
+defm VRINTR : vrint_inst_zrx<"r", 0, 0, any_fnearbyint, [FPSCR_RM], 0>;
+defm VRINTX : vrint_inst_zrx<"x", 1, 0, any_frint, [FPSCR_RM], 1>;
multiclass vrint_inst_anpm<string opc, bits<2> rm,
SDPatternOperator node = null_frag> {
@@ -1145,30 +1145,31 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm,
Requires<[HasFPARMv8,HasDPVFP]>;
}
-defm VRINTA : vrint_inst_anpm<"a", 0b00, fround>;
-defm VRINTN : vrint_inst_anpm<"n", 0b01, froundeven>;
-defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>;
-defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
+defm VRINTA : vrint_inst_anpm<"a", 0b00, any_fround>;
+defm VRINTN : vrint_inst_anpm<"n", 0b01, any_froundeven>;
+defm VRINTP : vrint_inst_anpm<"p", 0b10, any_fceil>;
+defm VRINTM : vrint_inst_anpm<"m", 0b11, any_ffloor>;
+
let mayRaiseFPException = 1, Uses = [FPSCR_RM] in
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", "",
- [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>,
+ [(set DPR:$Dd, (any_fsqrt (f64 DPR:$Dm)))]>,
Sched<[WriteFPSQRT64]>;
let mayRaiseFPException = 1, Uses = [FPSCR_RM] in
def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", "",
- [(set SPR:$Sd, (fsqrt SPR:$Sm))]>,
+ [(set SPR:$Sd, (any_fsqrt SPR:$Sm))]>,
Sched<[WriteFPSQRT32]>;
let mayRaiseFPException = 1, Uses = [FPSCR_RM] in
def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs HPR:$Sd), (ins HPR:$Sm),
IIC_fpSQRT16, "vsqrt", ".f16\t$Sd, $Sm",
- [(set (f16 HPR:$Sd), (fsqrt (f16 HPR:$Sm)))]>;
+ [(set (f16 HPR:$Sd), (any_fsqrt (f16 HPR:$Sm)))]>;
let hasSideEffects = 0 in {
let isMoveReg = 1 in {
@@ -1509,10 +1510,10 @@ def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
}
let Predicates=[HasVFP2, HasDPVFP] in {
- def : VFPPat<(f64 (sint_to_fp GPR:$a)),
+ def : VFPPat<(f64 (any_sint_to_fp GPR:$a)),
(VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
- def : VFPPat<(f64 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
+ def : VFPPat<(f64 (any_sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VSITOD (VLDRS addrmode5:$a))>;
}
@@ -1529,10 +1530,10 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
let D = VFPNeonA8Domain;
}
-def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)),
+def : VFPNoNEONPat<(f32 (any_sint_to_fp GPR:$a)),
(VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
+def : VFPNoNEONPat<(f32 (any_sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VSITOS (VLDRS addrmode5:$a))>;
let mayRaiseFPException = 1 in
@@ -1545,7 +1546,7 @@ def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
let isUnpredicable = 1;
}
-def : VFPNoNEONPat<(f16 (sint_to_fp GPR:$a)),
+def : VFPNoNEONPat<(f16 (any_sint_to_fp GPR:$a)),
(VSITOH (COPY_TO_REGCLASS GPR:$a, SPR))>;
let mayRaiseFPException = 1 in
@@ -1558,10 +1559,10 @@ def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
}
let Predicates=[HasVFP2, HasDPVFP] in {
- def : VFPPat<(f64 (uint_to_fp GPR:$a)),
+ def : VFPPat<(f64 (any_uint_to_fp GPR:$a)),
(VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
- def : VFPPat<(f64 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
+ def : VFPPat<(f64 (any_uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VUITOD (VLDRS addrmode5:$a))>;
}
@@ -1578,10 +1579,10 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
let D = VFPNeonA8Domain;
}
-def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)),
+def : VFPNoNEONPat<(f32 (any_uint_to_fp GPR:$a)),
(VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
+def : VFPNoNEONPat<(f32 (any_uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VUITOS (VLDRS addrmode5:$a))>;
let mayRaiseFPException = 1 in
@@ -1594,7 +1595,7 @@ def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
let isUnpredicable = 1;
}
-def : VFPNoNEONPat<(f16 (uint_to_fp GPR:$a)),
+def : VFPNoNEONPat<(f16 (any_uint_to_fp GPR:$a)),
(VUITOH (COPY_TO_REGCLASS GPR:$a, SPR))>;
// FP -> Int:
@@ -1669,12 +1670,12 @@ def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
}
let Predicates=[HasVFP2, HasDPVFP] in {
- def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))),
+ def : VFPPat<(i32 (any_fp_to_sint (f64 DPR:$a))),
(COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
def : VFPPat<(i32 (fp_to_sint_sat (f64 DPR:$a), i32)),
(COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
- def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
+ def : VFPPat<(alignedstore32 (i32 (any_fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
(VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
def : VFPPat<(alignedstore32 (i32 (fp_to_sint_sat (f64 DPR:$a), i32)), addrmode5:$ptr),
(VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
@@ -1693,12 +1694,12 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
let D = VFPNeonA8Domain;
}
-def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)),
+def : VFPNoNEONPat<(i32 (any_fp_to_sint SPR:$a)),
(COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
def : VFPPat<(i32 (fp_to_sint_sat SPR:$a, i32)),
(COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
-def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
+def : VFPNoNEONPat<(alignedstore32 (i32 (any_fp_to_sint (f32 SPR:$a))),
addrmode5:$ptr),
(VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
def : VFPPat<(alignedstore32 (i32 (fp_to_sint_sat (f32 SPR:$a), i32)),
@@ -1715,7 +1716,7 @@ def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
let isUnpredicable = 1;
}
-def : VFPNoNEONPat<(i32 (fp_to_sint (f16 HPR:$a))),
+def : VFPNoNEONPat<(i32 (any_fp_to_sint (f16 HPR:$a))),
(COPY_TO_REGCLASS (VTOSIZH (f16 HPR:$a)), GPR)>;
def : VFPPat<(i32 (fp_to_sint_sat (f16 HPR:$a), i32)),
(COPY_TO_REGCLASS (VTOSIZH (f16 HPR:$a)), GPR)>;
@@ -1730,12 +1731,12 @@ def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
}
let Predicates=[HasVFP2, HasDPVFP] in {
- def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))),
+ def : VFPPat<(i32 (any_fp_to_uint (f64 DPR:$a))),
(COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
def : VFPPat<(i32 (fp_to_uint_sat (f64 DPR:$a), i32)),
(COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
- def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
+ def : VFPPat<(alignedstore32 (i32 (any_fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
(VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
def : VFPPat<(alignedstore32 (i32 (fp_to_uint_sat (f64 DPR:$a), i32)), addrmode5:$ptr),
(VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
@@ -1754,12 +1755,12 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
let D = VFPNeonA8Domain;
}
-def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)),
+def : VFPNoNEONPat<(i32 (any_fp_to_uint SPR:$a)),
(COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
def : VFPPat<(i32 (fp_to_uint_sat SPR:$a, i32)),
(COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
-def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
+def : VFPNoNEONPat<(alignedstore32 (i32 (any_fp_to_uint (f32 SPR:$a))),
addrmode5:$ptr),
(VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
def : VFPPat<(alignedstore32 (i32 (fp_to_uint_sat (f32 SPR:$a), i32)),
@@ -1776,7 +1777,7 @@ def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
let isUnpredicable = 1;
}
-def : VFPNoNEONPat<(i32 (fp_to_uint (f16 HPR:$a))),
+def : VFPNoNEONPat<(i32 (any_fp_to_uint (f16 HPR:$a))),
(COPY_TO_REGCLASS (VTOUIZH (f16 HPR:$a)), GPR)>;
def : VFPPat<(i32 (fp_to_uint_sat (f16 HPR:$a), i32)),
(COPY_TO_REGCLASS (VTOUIZH (f16 HPR:$a)), GPR)>;
@@ -2320,13 +2321,13 @@ def : Pat<(fadd_mlx HPR:$dstin, (fmul_su (f16 HPR:$a), HPR:$b)),
// Match @llvm.fma.* intrinsics
// (fma x, y, z) -> (vfms z, x, y)
-def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)),
+def : Pat<(f64 (any_fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)),
(VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4,HasDPVFP]>;
-def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)),
+def : Pat<(f32 (any_fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)),
(VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-def : Pat<(f16 (fma HPR:$Sn, HPR:$Sm, (f16 HPR:$Sdin))),
+def : Pat<(f16 (any_fma HPR:$Sn, HPR:$Sm, (f16 HPR:$Sdin))),
(VFMAH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
@@ -2375,13 +2376,13 @@ def : Pat<(fsub_mlx HPR:$dstin, (fmul_su (f16 HPR:$a), HPR:$b)),
// Match @llvm.fma.* intrinsics
// (fma (fneg x), y, z) -> (vfms z, x, y)
-def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)),
+def : Pat<(f64 (any_fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)),
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4,HasDPVFP]>;
-def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
+def : Pat<(f32 (any_fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-def : Pat<(f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin))),
+def : Pat<(f16 (any_fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin))),
(VFMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
@@ -2427,23 +2428,23 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
// Match @llvm.fma.* intrinsics
// (fneg (fma x, y, z)) -> (vfnma z, x, y)
-def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))),
+def : Pat<(fneg (any_fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))),
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4,HasDPVFP]>;
-def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))),
+def : Pat<(fneg (any_fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-def : Pat<(fneg (fma (f16 HPR:$Sn), (f16 HPR:$Sm), (f16 (f16 HPR:$Sdin)))),
+def : Pat<(fneg (any_fma (f16 HPR:$Sn), (f16 HPR:$Sm), (f16 (f16 HPR:$Sdin)))),
(VFNMAH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
// (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y)
-def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
+def : Pat<(f64 (any_fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4,HasDPVFP]>;
-def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))),
+def : Pat<(f32 (any_fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-def : Pat<(f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (fneg (f16 HPR:$Sdin)))),
+def : Pat<(f16 (any_fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (fneg (f16 HPR:$Sdin)))),
(VFNMAH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
@@ -2488,23 +2489,23 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
// Match @llvm.fma.* intrinsics
// (fma x, y, (fneg z)) -> (vfnms z, x, y))
-def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))),
+def : Pat<(f64 (any_fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4,HasDPVFP]>;
-def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))),
+def : Pat<(f32 (any_fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-def : Pat<(f16 (fma (f16 HPR:$Sn), (f16 HPR:$Sm), (fneg (f16 HPR:$Sdin)))),
+def : Pat<(f16 (any_fma (f16 HPR:$Sn), (f16 HPR:$Sm), (fneg (f16 HPR:$Sdin)))),
(VFNMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y)
-def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
+def : Pat<(fneg (f64 (any_fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4,HasDPVFP]>;
-def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
+def : Pat<(fneg (f32 (any_fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-def : Pat<(fneg (f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin)))),
+def : Pat<(fneg (f16 (any_fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin)))),
(VFNMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index ebfa593..bf7c962f 100644
--- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -47,9 +47,7 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
// Only use a specialized AEABI function if the default version of this
// Libcall is an AEABI function.
- if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
- return SDValue();
-
+ //
// Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
// able to translate memset to memclr and use the value to index the function
// name array.
@@ -61,12 +59,21 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
} AEABILibcall;
switch (LC) {
case RTLIB::MEMCPY:
+ if (TLI->getLibcallImpl(LC) != RTLIB::impl___aeabi_memcpy)
+ return SDValue();
+
AEABILibcall = AEABI_MEMCPY;
break;
case RTLIB::MEMMOVE:
+ if (TLI->getLibcallImpl(LC) != RTLIB::impl___aeabi_memmove)
+ return SDValue();
+
AEABILibcall = AEABI_MEMMOVE;
break;
case RTLIB::MEMSET:
+ if (TLI->getLibcallImpl(LC) != RTLIB::impl___aeabi_memset)
+ return SDValue();
+
AEABILibcall = AEABI_MEMSET;
if (isNullConstant(Src))
AEABILibcall = AEABI_MEMCLR;
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index f60660b..1bb670d 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -426,15 +426,15 @@ class ARMAsmParser : public MCTargetAsmParser {
VPTState.CurPosition = ~0U;
}
- void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) {
+ void Note(SMLoc L, const Twine &Msg, SMRange Range = {}) {
return getParser().Note(L, Msg, Range);
}
- bool Warning(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) {
+ bool Warning(SMLoc L, const Twine &Msg, SMRange Range = {}) {
return getParser().Warning(L, Msg, Range);
}
- bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) {
+ bool Error(SMLoc L, const Twine &Msg, SMRange Range = {}) {
return getParser().Error(L, Msg, Range);
}
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
index e5b4f6e..08f196b 100644
--- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
@@ -884,13 +884,13 @@ CSKYTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
.Case("{t4}", CSKY::R20)
.Case("{t5}", CSKY::R21)
.Case("{t6}", CSKY::R22)
- .Cases("{t7}", "{fp}", CSKY::R23)
- .Cases("{t8}", "{top}", CSKY::R24)
- .Cases("{t9}", "{bsp}", CSKY::R25)
+ .Cases({"{t7}", "{fp}"}, CSKY::R23)
+ .Cases({"{t8}", "{top}"}, CSKY::R24)
+ .Cases({"{t9}", "{bsp}"}, CSKY::R25)
.Case("{r26}", CSKY::R26)
.Case("{r27}", CSKY::R27)
- .Cases("{gb}", "{rgb}", "{rdb}", CSKY::R28)
- .Cases("{tb}", "{rtb}", CSKY::R29)
+ .Cases({"{gb}", "{rgb}", "{rdb}"}, CSKY::R28)
+ .Cases({"{tb}", "{rtb}"}, CSKY::R29)
.Case("{svbr}", CSKY::R30)
.Case("{tls}", CSKY::R31)
.Default(CSKY::NoRegister);
@@ -907,38 +907,38 @@ CSKYTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// use the ABI names in register constraint lists.
if (Subtarget.useHardFloat()) {
unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
- .Cases("{fr0}", "{vr0}", CSKY::F0_32)
- .Cases("{fr1}", "{vr1}", CSKY::F1_32)
- .Cases("{fr2}", "{vr2}", CSKY::F2_32)
- .Cases("{fr3}", "{vr3}", CSKY::F3_32)
- .Cases("{fr4}", "{vr4}", CSKY::F4_32)
- .Cases("{fr5}", "{vr5}", CSKY::F5_32)
- .Cases("{fr6}", "{vr6}", CSKY::F6_32)
- .Cases("{fr7}", "{vr7}", CSKY::F7_32)
- .Cases("{fr8}", "{vr8}", CSKY::F8_32)
- .Cases("{fr9}", "{vr9}", CSKY::F9_32)
- .Cases("{fr10}", "{vr10}", CSKY::F10_32)
- .Cases("{fr11}", "{vr11}", CSKY::F11_32)
- .Cases("{fr12}", "{vr12}", CSKY::F12_32)
- .Cases("{fr13}", "{vr13}", CSKY::F13_32)
- .Cases("{fr14}", "{vr14}", CSKY::F14_32)
- .Cases("{fr15}", "{vr15}", CSKY::F15_32)
- .Cases("{fr16}", "{vr16}", CSKY::F16_32)
- .Cases("{fr17}", "{vr17}", CSKY::F17_32)
- .Cases("{fr18}", "{vr18}", CSKY::F18_32)
- .Cases("{fr19}", "{vr19}", CSKY::F19_32)
- .Cases("{fr20}", "{vr20}", CSKY::F20_32)
- .Cases("{fr21}", "{vr21}", CSKY::F21_32)
- .Cases("{fr22}", "{vr22}", CSKY::F22_32)
- .Cases("{fr23}", "{vr23}", CSKY::F23_32)
- .Cases("{fr24}", "{vr24}", CSKY::F24_32)
- .Cases("{fr25}", "{vr25}", CSKY::F25_32)
- .Cases("{fr26}", "{vr26}", CSKY::F26_32)
- .Cases("{fr27}", "{vr27}", CSKY::F27_32)
- .Cases("{fr28}", "{vr28}", CSKY::F28_32)
- .Cases("{fr29}", "{vr29}", CSKY::F29_32)
- .Cases("{fr30}", "{vr30}", CSKY::F30_32)
- .Cases("{fr31}", "{vr31}", CSKY::F31_32)
+ .Cases({"{fr0}", "{vr0}"}, CSKY::F0_32)
+ .Cases({"{fr1}", "{vr1}"}, CSKY::F1_32)
+ .Cases({"{fr2}", "{vr2}"}, CSKY::F2_32)
+ .Cases({"{fr3}", "{vr3}"}, CSKY::F3_32)
+ .Cases({"{fr4}", "{vr4}"}, CSKY::F4_32)
+ .Cases({"{fr5}", "{vr5}"}, CSKY::F5_32)
+ .Cases({"{fr6}", "{vr6}"}, CSKY::F6_32)
+ .Cases({"{fr7}", "{vr7}"}, CSKY::F7_32)
+ .Cases({"{fr8}", "{vr8}"}, CSKY::F8_32)
+ .Cases({"{fr9}", "{vr9}"}, CSKY::F9_32)
+ .Cases({"{fr10}", "{vr10}"}, CSKY::F10_32)
+ .Cases({"{fr11}", "{vr11}"}, CSKY::F11_32)
+ .Cases({"{fr12}", "{vr12}"}, CSKY::F12_32)
+ .Cases({"{fr13}", "{vr13}"}, CSKY::F13_32)
+ .Cases({"{fr14}", "{vr14}"}, CSKY::F14_32)
+ .Cases({"{fr15}", "{vr15}"}, CSKY::F15_32)
+ .Cases({"{fr16}", "{vr16}"}, CSKY::F16_32)
+ .Cases({"{fr17}", "{vr17}"}, CSKY::F17_32)
+ .Cases({"{fr18}", "{vr18}"}, CSKY::F18_32)
+ .Cases({"{fr19}", "{vr19}"}, CSKY::F19_32)
+ .Cases({"{fr20}", "{vr20}"}, CSKY::F20_32)
+ .Cases({"{fr21}", "{vr21}"}, CSKY::F21_32)
+ .Cases({"{fr22}", "{vr22}"}, CSKY::F22_32)
+ .Cases({"{fr23}", "{vr23}"}, CSKY::F23_32)
+ .Cases({"{fr24}", "{vr24}"}, CSKY::F24_32)
+ .Cases({"{fr25}", "{vr25}"}, CSKY::F25_32)
+ .Cases({"{fr26}", "{vr26}"}, CSKY::F26_32)
+ .Cases({"{fr27}", "{vr27}"}, CSKY::F27_32)
+ .Cases({"{fr28}", "{vr28}"}, CSKY::F28_32)
+ .Cases({"{fr29}", "{vr29}"}, CSKY::F29_32)
+ .Cases({"{fr30}", "{vr30}"}, CSKY::F30_32)
+ .Cases({"{fr31}", "{vr31}"}, CSKY::F31_32)
.Default(CSKY::NoRegister);
if (FReg != CSKY::NoRegister) {
assert(CSKY::F0_32 <= FReg && FReg <= CSKY::F31_32 && "Unknown fp-reg");
diff --git a/llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp b/llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp
index 15def36..b6bbb20 100644
--- a/llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp
+++ b/llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp
@@ -52,6 +52,7 @@ void DXILAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
emitGlobalConstant(GV->getDataLayout(), GV->getInitializer());
}
-extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXAsmPrinter() {
+extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
+LLVMInitializeDirectXAsmPrinter() {
RegisterAsmPrinter<DXILAsmPrinter> X(getTheDirectXTarget());
}
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index bcf8440..84b1a31 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -53,7 +53,8 @@
using namespace llvm;
-extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
+extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
+LLVMInitializeDirectXTarget() {
RegisterTargetMachine<DirectXTargetMachine> X(getTheDirectXTarget());
auto *PR = PassRegistry::getPassRegistry();
initializeDXILIntrinsicExpansionLegacyPass(*PR);
diff --git a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
index 9a14c01..62ad014 100644
--- a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
+++ b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
@@ -132,7 +132,8 @@ static MCRegisterInfo *createDirectXMCRegisterInfo(const Triple &Triple) {
static MCInstrInfo *createDirectXMCInstrInfo() { return new MCInstrInfo(); }
-extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTargetMC() {
+extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
+LLVMInitializeDirectXTargetMC() {
Target &T = getTheDirectXTarget();
RegisterMCAsmInfo<DirectXMCAsmInfo> X(T);
TargetRegistry::RegisterMCInstrInfo(T, createDirectXMCInstrInfo);
diff --git a/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp b/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp
index ae01626..934bd1b 100644
--- a/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp
+++ b/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp
@@ -24,7 +24,8 @@ Target &getTheDirectXTarget() {
using namespace llvm;
-extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTargetInfo() {
+extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
+LLVMInitializeDirectXTargetInfo() {
RegisterTarget<Triple::dxil, /*HasJIT=*/false> X(
getTheDirectXTarget(), "dxil", "DirectX Intermediate Language", "DXIL");
}
diff --git a/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp b/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp
index 3b810d0..79863e1 100644
--- a/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp
@@ -34,7 +34,7 @@ class HexagonCopyHoisting : public MachineFunctionPass {
public:
static char ID;
- HexagonCopyHoisting() : MachineFunctionPass(ID), MFN(nullptr), MRI(nullptr) {}
+ HexagonCopyHoisting() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "Hexagon Copy Hoisting"; }
@@ -56,8 +56,8 @@ public:
void moveCopyInstr(MachineBasicBlock *DestBB,
std::pair<Register, Register> Key, MachineInstr *MI);
- MachineFunction *MFN;
- MachineRegisterInfo *MRI;
+ MachineFunction *MFN = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
std::vector<DenseMap<std::pair<Register, Register>, MachineInstr *>>
CopyMIList;
};
diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
index f4e36fa7..e661c94 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
@@ -26,6 +26,7 @@ def tc_20a4bbec : InstrItinClass;
def tc_227864f7 : InstrItinClass;
def tc_257f6f7c : InstrItinClass;
def tc_26a377fe : InstrItinClass;
+def tc_2a698a03 : InstrItinClass;
def tc_2b4c548e : InstrItinClass;
def tc_2c745bb8 : InstrItinClass;
def tc_2d4051cd : InstrItinClass;
@@ -52,6 +53,7 @@ def tc_561aaa58 : InstrItinClass;
def tc_56c4f9fe : InstrItinClass;
def tc_56e64202 : InstrItinClass;
def tc_58d21193 : InstrItinClass;
+def tc_57a4709c : InstrItinClass;
def tc_5bf8afbb : InstrItinClass;
def tc_5cdf8c84 : InstrItinClass;
def tc_61bf7c03 : InstrItinClass;
@@ -220,6 +222,11 @@ class DepHVXItinV55 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -356,6 +363,11 @@ class DepHVXItinV55 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -812,6 +824,11 @@ class DepHVXItinV60 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -948,6 +965,11 @@ class DepHVXItinV60 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -1404,6 +1426,11 @@ class DepHVXItinV62 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -1540,6 +1567,11 @@ class DepHVXItinV62 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -1996,6 +2028,11 @@ class DepHVXItinV65 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -2132,6 +2169,11 @@ class DepHVXItinV65 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -2588,6 +2630,11 @@ class DepHVXItinV66 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -2724,6 +2771,11 @@ class DepHVXItinV66 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -3180,6 +3232,11 @@ class DepHVXItinV67 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -3316,6 +3373,11 @@ class DepHVXItinV67 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -3772,6 +3834,11 @@ class DepHVXItinV68 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -3908,6 +3975,11 @@ class DepHVXItinV68 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -4364,6 +4436,11 @@ class DepHVXItinV69 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -4500,6 +4577,11 @@ class DepHVXItinV69 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -4956,6 +5038,11 @@ class DepHVXItinV71 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -5092,6 +5179,11 @@ class DepHVXItinV71 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -5548,6 +5640,11 @@ class DepHVXItinV73 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -5684,6 +5781,11 @@ class DepHVXItinV73 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -6140,6 +6242,11 @@ class DepHVXItinV75 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -6276,6 +6383,11 @@ class DepHVXItinV75 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -6732,6 +6844,11 @@ class DepHVXItinV79 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -6868,6 +6985,11 @@ class DepHVXItinV79 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
@@ -7324,6 +7446,11 @@ class DepHVXItinV81 {
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
[HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
@@ -7460,6 +7587,11 @@ class DepHVXItinV81 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_57a4709c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 2],
diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
index f8f1c2a..b188134 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -29939,6 +29939,58 @@ let opNewValue = 0;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vabs_qf16_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf16 = vabs($Vu32.hf)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabs_qf16_qf16 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf16 = vabs($Vu32.qf16)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabs_qf32_qf32 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf32 = vabs($Vu32.qf32)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabs_qf32_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf32 = vabs($Vu32.sf)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vabs_sf : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
@@ -31302,6 +31354,21 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_valign4 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = valign4($Vu32,$Vv32,$Rt8)",
+tc_57a4709c, TypeCVI_VA>, Enc_a30110, Requires<[UseHVXV81]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_valignb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
@@ -32583,6 +32650,32 @@ let isCVI = 1;
let hasHvxTmp = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vconv_bf_qf32 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxWR:$Vuu32),
+"$Vd32.bf = $Vuu32.qf32",
+tc_2a698a03, TypeCVI_VS>, Enc_a33d04, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vconv_f8_qf16 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.f8 = $Vu32.qf16",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vconv_h_hf : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
@@ -32596,6 +32689,19 @@ let opNewValue = 0;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vconv_h_hf_rnd : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.h = $Vu32.hf:rnd",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vconv_hf_h : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
@@ -32635,6 +32741,71 @@ let opNewValue = 0;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vconv_qf16_f8 : HInst<
+(outs HvxWR:$Vdd32),
+(ins HvxVR:$Vu32),
+"$Vdd32.qf16 = $Vu32.f8",
+tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vconv_qf16_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf16 = $Vu32.hf",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vconv_qf16_qf16 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf16 = $Vu32.qf16",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vconv_qf32_qf32 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf32 = $Vu32.qf32",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vconv_qf32_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf32 = $Vu32.sf",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vconv_sf_qf32 : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
@@ -33720,6 +33891,122 @@ let isHVXALU2SRC = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
+def V6_veqhf : HInst<
+(outs HvxQR:$Qd4),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.hf,$Vv32.hf)",
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-2} = 0b000111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_veqhf_and : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.hf,$Vv32.hf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-2} = 0b000111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqhf_or : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.hf,$Vv32.hf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-2} = 0b010111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isAccumulator = 1;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqhf_xor : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.hf,$Vv32.hf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-2} = 0b100111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqsf : HInst<
+(outs HvxQR:$Qd4),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.sf,$Vv32.sf)",
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-2} = 0b000011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_veqsf_and : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.sf,$Vv32.sf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-2} = 0b000011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqsf_or : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.sf,$Vv32.sf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-2} = 0b010011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isAccumulator = 1;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqsf_xor : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.sf,$Vv32.sf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-2} = 0b100011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
def V6_veqw : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -34538,6 +34825,58 @@ let Inst{31-24} = 0b00011110;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vilog2_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.w = vilog2($Vu32.hf)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vilog2_qf16 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.w = vilog2($Vu32.qf16)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vilog2_qf32 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.w = vilog2($Vu32.qf32)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vilog2_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.w = vilog2($Vu32.sf)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vinsertwr : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, IntRegs:$Rt32),
@@ -37170,6 +37509,58 @@ let isCVI = 1;
let isHVXALU = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vneg_qf16_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf16 = vneg($Vu32.hf)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vneg_qf16_qf16 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf16 = vneg($Vu32.qf16)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vneg_qf32_qf32 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf32 = vneg($Vu32.qf32)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vneg_qf32_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.qf32 = vneg($Vu32.sf)",
+tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vnormamth : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
diff --git a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
index 23f4b3a..c11483b 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
@@ -3830,6 +3830,122 @@ def: Pat<(int_hexagon_V6_vsub_hf_f8_128B HvxVR:$src1, HvxVR:$src2),
// V81 HVX Instructions.
+def: Pat<(int_hexagon_V6_vabs_qf16_hf HvxVR:$src1),
+ (V6_vabs_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vabs_qf16_hf_128B HvxVR:$src1),
+ (V6_vabs_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vabs_qf16_qf16 HvxVR:$src1),
+ (V6_vabs_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vabs_qf16_qf16_128B HvxVR:$src1),
+ (V6_vabs_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vabs_qf32_qf32 HvxVR:$src1),
+ (V6_vabs_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vabs_qf32_qf32_128B HvxVR:$src1),
+ (V6_vabs_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vabs_qf32_sf HvxVR:$src1),
+ (V6_vabs_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vabs_qf32_sf_128B HvxVR:$src1),
+ (V6_vabs_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_valign4 HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_valign4 HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[UseHVXV81, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_valign4_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_valign4 HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[UseHVXV81, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vconv_bf_qf32 HvxWR:$src1),
+ (V6_vconv_bf_qf32 HvxWR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_bf_qf32_128B HvxWR:$src1),
+ (V6_vconv_bf_qf32 HvxWR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_f8_qf16 HvxVR:$src1),
+ (V6_vconv_f8_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_f8_qf16_128B HvxVR:$src1),
+ (V6_vconv_f8_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_h_hf_rnd HvxVR:$src1),
+ (V6_vconv_h_hf_rnd HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vconv_h_hf_rnd_128B HvxVR:$src1),
+ (V6_vconv_h_hf_rnd HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vconv_qf16_f8 HvxVR:$src1),
+ (V6_vconv_qf16_f8 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf16_f8_128B HvxVR:$src1),
+ (V6_vconv_qf16_f8 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf16_hf HvxVR:$src1),
+ (V6_vconv_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf16_hf_128B HvxVR:$src1),
+ (V6_vconv_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf16_qf16 HvxVR:$src1),
+ (V6_vconv_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf16_qf16_128B HvxVR:$src1),
+ (V6_vconv_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf32_qf32 HvxVR:$src1),
+ (V6_vconv_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf32_qf32_128B HvxVR:$src1),
+ (V6_vconv_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf32_sf HvxVR:$src1),
+ (V6_vconv_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_qf32_sf_128B HvxVR:$src1),
+ (V6_vconv_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqhf HvxVR:$src1, HvxVR:$src2),
+ (V6_veqhf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqhf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_veqhf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqhf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqhf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqhf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqhf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqhf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqhf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqhf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqhf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqhf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqhf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqhf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqhf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqsf HvxVR:$src1, HvxVR:$src2),
+ (V6_veqsf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqsf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_veqsf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqsf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqsf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_veqsf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vilog2_hf HvxVR:$src1),
+ (V6_vilog2_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vilog2_hf_128B HvxVR:$src1),
+ (V6_vilog2_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vilog2_qf16 HvxVR:$src1),
+ (V6_vilog2_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vilog2_qf16_128B HvxVR:$src1),
+ (V6_vilog2_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vilog2_qf32 HvxVR:$src1),
+ (V6_vilog2_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vilog2_qf32_128B HvxVR:$src1),
+ (V6_vilog2_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vilog2_sf HvxVR:$src1),
+ (V6_vilog2_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vilog2_sf_128B HvxVR:$src1),
+ (V6_vilog2_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vneg_qf16_hf HvxVR:$src1),
+ (V6_vneg_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vneg_qf16_hf_128B HvxVR:$src1),
+ (V6_vneg_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vneg_qf16_qf16 HvxVR:$src1),
+ (V6_vneg_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vneg_qf16_qf16_128B HvxVR:$src1),
+ (V6_vneg_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vneg_qf32_qf32 HvxVR:$src1),
+ (V6_vneg_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vneg_qf32_qf32_128B HvxVR:$src1),
+ (V6_vneg_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vneg_qf32_sf HvxVR:$src1),
+ (V6_vneg_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vneg_qf32_sf_128B HvxVR:$src1),
+ (V6_vneg_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
def: Pat<(int_hexagon_V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2),
(V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
def: Pat<(int_hexagon_V6_vsub_hf_mix_128B HvxVR:$src1, HvxVR:$src2),
diff --git a/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp b/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
index 93418f7..a10c937 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
@@ -34,13 +34,13 @@ STATISTIC(HexagonNumStoreAbsConversions,
namespace {
class HexagonGenMemAbsolute : public MachineFunctionPass {
- const HexagonInstrInfo *TII;
- MachineRegisterInfo *MRI;
- const TargetRegisterInfo *TRI;
+ const HexagonInstrInfo *TII = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
public:
static char ID;
- HexagonGenMemAbsolute() : MachineFunctionPass(ID), TII(0), MRI(0), TRI(0) {}
+ HexagonGenMemAbsolute() : MachineFunctionPass(ID) {}
StringRef getPassName() const override {
return "Hexagon Generate Load/Store Set Absolute Address Instruction";
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 7ee280d..eadf020 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -1815,7 +1815,7 @@ struct WeightedLeaf {
int Weight;
int InsertionOrder;
- WeightedLeaf() {}
+ WeightedLeaf() = default;
WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index 85ce944..e40dbd2 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -3434,6 +3434,19 @@ let AddedComplexity = 100 in {
(C2_not (S4_stored_locked I32:$Rs, I64:$Rt))>;
}
+multiclass FloatClass<SDPatternOperator IntOp, InstHexagon MI,
+ PatFrag RegPred> {
+ let AddedComplexity = 100 in {
+ def: Pat<(i1 (seteq (IntOp RegPred:$Rs, u5_0ImmPred_timm:$u5), 0)),
+ (C2_not (MI RegPred:$Rs, u5_0ImmPred_timm:$u5))>;
+ def: Pat<(i1 (setne (IntOp RegPred:$Rs, u5_0ImmPred_timm:$u5), 0)),
+ (MI RegPred:$Rs, u5_0ImmPred_timm:$u5)>;
+ }
+}
+
+defm : FloatClass<int_hexagon_F2_sfclass, F2_sfclass, F32>;
+defm : FloatClass<int_hexagon_F2_dfclass, F2_dfclass, F64>;
+
def: Pat<(int_hexagon_instrprof_custom (HexagonAtPcrel tglobaladdr:$addr), u32_0ImmPred:$I),
(PS_call_instrprof_custom tglobaladdr:$addr, imm:$I)>;
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
index 1637b91..d19920c 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -612,6 +612,9 @@ let Predicates = [UseHVX] in {
(V6_vandvrt HvxVR:$Vs, (ToI32 0x01010101))>;
def: Pat<(VecQ32 (trunc HVI32:$Vs)),
(V6_vandvrt HvxVR:$Vs, (ToI32 0x01010101))>;
+ def: Pat<(VecQ16 (trunc HWI32:$Vss)),
+ (Combineq(VecQ32(V6_vandvrt (HiVec $Vss), (ToI32 0x01010101))),
+ (VecQ32 (V6_vandvrt (LoVec $Vss), (ToI32 0x01010101))))>;
}
let Predicates = [UseHVX] in {
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index b9cdd6a..ce2de75 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -544,7 +544,7 @@ int HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
if (!hasV60Ops())
return Latency;
- auto &QII = static_cast<const HexagonInstrInfo &>(*getInstrInfo());
+ const HexagonInstrInfo &QII = *getInstrInfo();
// BSB scheduling.
if (QII.isHVXVec(SrcInst) || useBSBScheduling())
Latency = (Latency + 1) >> 1;
diff --git a/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp b/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp
index 71bdfc66..5a85f34 100644
--- a/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp
@@ -43,7 +43,7 @@ namespace {
class HexagonTfrCleanup : public MachineFunctionPass {
public:
static char ID;
- HexagonTfrCleanup() : MachineFunctionPass(ID), HII(0), TRI(0) {}
+ HexagonTfrCleanup() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "Hexagon TFR Cleanup"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
@@ -52,8 +52,8 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
private:
- const HexagonInstrInfo *HII;
- const TargetRegisterInfo *TRI;
+ const HexagonInstrInfo *HII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
typedef DenseMap<unsigned, uint64_t> ImmediateMap;
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index 690dd73..e86b21c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -365,6 +365,7 @@ def : Pat<(f32 (uint_to_fp (i64 (sexti32 (i64 GPR:$src))))),
// FP Rounding
let Predicates = [HasBasicF, IsLA64] in {
def : PatFpr<frint, FRINT_S, FPR32>;
+def : PatFpr<flog2, FLOGB_S, FPR32>;
} // Predicates = [HasBasicF, IsLA64]
let Predicates = [HasBasicF, IsLA32] in {
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index daefbaa..2e88254 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -348,6 +348,7 @@ def : Pat<(bitconvert FPR64:$src), (MOVFR2GR_D FPR64:$src)>;
// FP Rounding
let Predicates = [HasBasicD, IsLA64] in {
def : PatFpr<frint, FRINT_D, FPR64>;
+def : PatFpr<flog2, FLOGB_D, FPR64>;
} // Predicates = [HasBasicD, IsLA64]
/// Pseudo-instructions needed for the soft-float ABI with LA32D
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 80c96c6..fe700e1 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -244,8 +244,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_BF16, MVT::f32,
Subtarget.isSoftFPABI() ? LibCall : Custom);
- if (Subtarget.is64Bit())
+ if (Subtarget.is64Bit()) {
setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FLOG2, MVT::f32, Legal);
+ }
if (!Subtarget.hasBasicD()) {
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
@@ -291,8 +293,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_BF16, MVT::f64,
Subtarget.isSoftFPABI() ? LibCall : Custom);
- if (Subtarget.is64Bit())
+ if (Subtarget.is64Bit()) {
setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FLOG2, MVT::f64, Legal);
+ }
}
// Set operations for 'LSX' feature.
@@ -362,10 +366,17 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::FSQRT, VT, Legal);
setOperationAction(ISD::FNEG, VT, Legal);
+ setOperationAction(ISD::FLOG2, VT, Legal);
setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
ISD::SETUGE, ISD::SETUGT},
VT, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::FMINNUM, VT, Legal);
+ setOperationAction(ISD::FMAXNUM, VT, Legal);
}
setOperationAction(ISD::CTPOP, GRLenVT, Legal);
setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
@@ -443,10 +454,17 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::FSQRT, VT, Legal);
setOperationAction(ISD::FNEG, VT, Legal);
+ setOperationAction(ISD::FLOG2, VT, Legal);
setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
ISD::SETUGE, ISD::SETUGT},
VT, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::FMINNUM, VT, Legal);
+ setOperationAction(ISD::FMAXNUM, VT, Legal);
}
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 613dea6..b502b056 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1558,6 +1558,10 @@ defm : PatXrXrF<fmul, "XVFMUL">;
// XVFDIV_{S/D}
defm : PatXrXrF<fdiv, "XVFDIV">;
+// XVFMAX_{S/D}, XVFMIN_{S/D}
+defm : PatXrXrF<fmaxnum, "XVFMAX">;
+defm : PatXrXrF<fminnum, "XVFMIN">;
+
// XVFMADD_{S/D}
def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa),
(XVFMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>;
@@ -1593,6 +1597,9 @@ def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa),
// XVFSQRT_{S/D}
defm : PatXrF<fsqrt, "XVFSQRT">;
+// XVFLOGB_{S/D}
+defm : PatXrF<flog2, "XVFLOGB">;
+
// XVRECIP_{S/D}
def : Pat<(fdiv vsplatf32_fpimm_eq_1, v8f32:$xj),
(XVFRECIP_S v8f32:$xj)>;
@@ -2024,6 +2031,24 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)),
(XVFTINTRZ_LU_D v4f64:$vj)),
sub_128)>;
+// XVAVG_{B/H/W/D/BU/HU/WU/DU}, XVAVGR_{B/H/W/D/BU/HU/WU/DU}
+defm : VAvgPat<sra, "XVAVG_B", v32i8>;
+defm : VAvgPat<sra, "XVAVG_H", v16i16>;
+defm : VAvgPat<sra, "XVAVG_W", v8i32>;
+defm : VAvgPat<sra, "XVAVG_D", v4i64>;
+defm : VAvgPat<srl, "XVAVG_BU", v32i8>;
+defm : VAvgPat<srl, "XVAVG_HU", v16i16>;
+defm : VAvgPat<srl, "XVAVG_WU", v8i32>;
+defm : VAvgPat<srl, "XVAVG_DU", v4i64>;
+defm : VAvgrPat<sra, "XVAVGR_B", v32i8>;
+defm : VAvgrPat<sra, "XVAVGR_H", v16i16>;
+defm : VAvgrPat<sra, "XVAVGR_W", v8i32>;
+defm : VAvgrPat<sra, "XVAVGR_D", v4i64>;
+defm : VAvgrPat<srl, "XVAVGR_BU", v32i8>;
+defm : VAvgrPat<srl, "XVAVGR_HU", v16i16>;
+defm : VAvgrPat<srl, "XVAVGR_WU", v8i32>;
+defm : VAvgrPat<srl, "XVAVGR_DU", v4i64>;
+
// abs
def : Pat<(abs v32i8:$xj), (XVSIGNCOV_B v32i8:$xj, v32i8:$xj)>;
def : Pat<(abs v16i16:$xj), (XVSIGNCOV_H v16i16:$xj, v16i16:$xj)>;
@@ -2403,6 +2428,12 @@ def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm),
def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm),
(XVPICKVE_D v4f64:$xj, (to_valid_timm timm:$imm))>;
+// Vector floating-point conversion
+defm : PatXrF<fceil, "XVFRINTRP">;
+defm : PatXrF<ffloor, "XVFRINTRM">;
+defm : PatXrF<ftrunc, "XVFRINTRZ">;
+defm : PatXrF<froundeven, "XVFRINTRNE">;
+
// load
def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm),
(XVLD GPR:$rj, (to_valid_timm timm:$imm))>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 4619c6b..6b74a4b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1518,6 +1518,18 @@ multiclass InsertExtractPatV2<ValueType vecty, ValueType elemty> {
}
}
+multiclass VAvgPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
+ def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1))),
+ (!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
+}
+
+multiclass VAvgrPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
+ def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)),
+ (vt (vsplat_imm_eq_1)))),
+ (vt (vsplat_imm_eq_1))),
+ (!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
+}
+
let Predicates = [HasExtLSX] in {
// VADD_{B/H/W/D}
@@ -1748,6 +1760,10 @@ defm : PatVrVrF<fmul, "VFMUL">;
// VFDIV_{S/D}
defm : PatVrVrF<fdiv, "VFDIV">;
+// VFMAX_{S/D}, VFMIN_{S/D}
+defm : PatVrVrF<fmaxnum, "VFMAX">;
+defm : PatVrVrF<fminnum, "VFMIN">;
+
// VFMADD_{S/D}
def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va),
(VFMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>;
@@ -1783,6 +1799,9 @@ def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, v2f64:$va),
// VFSQRT_{S/D}
defm : PatVrF<fsqrt, "VFSQRT">;
+// VFLOGB_{S/D}
+defm : PatVrF<flog2, "VFLOGB">;
+
// VFRECIP_{S/D}
def : Pat<(fdiv vsplatf32_fpimm_eq_1, v4f32:$vj),
(VFRECIP_S v4f32:$vj)>;
@@ -2154,6 +2173,24 @@ def : Pat<(f32 f32imm_vldi:$in),
def : Pat<(f64 f64imm_vldi:$in),
(f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>;
+// VAVG_{B/H/W/D/BU/HU/WU/DU}, VAVGR_{B/H/W/D/BU/HU/WU/DU}
+defm : VAvgPat<sra, "VAVG_B", v16i8>;
+defm : VAvgPat<sra, "VAVG_H", v8i16>;
+defm : VAvgPat<sra, "VAVG_W", v4i32>;
+defm : VAvgPat<sra, "VAVG_D", v2i64>;
+defm : VAvgPat<srl, "VAVG_BU", v16i8>;
+defm : VAvgPat<srl, "VAVG_HU", v8i16>;
+defm : VAvgPat<srl, "VAVG_WU", v4i32>;
+defm : VAvgPat<srl, "VAVG_DU", v2i64>;
+defm : VAvgrPat<sra, "VAVGR_B", v16i8>;
+defm : VAvgrPat<sra, "VAVGR_H", v8i16>;
+defm : VAvgrPat<sra, "VAVGR_W", v4i32>;
+defm : VAvgrPat<sra, "VAVGR_D", v2i64>;
+defm : VAvgrPat<srl, "VAVGR_BU", v16i8>;
+defm : VAvgrPat<srl, "VAVGR_HU", v8i16>;
+defm : VAvgrPat<srl, "VAVGR_WU", v4i32>;
+defm : VAvgrPat<srl, "VAVGR_DU", v2i64>;
+
// abs
def : Pat<(abs v16i8:$vj), (VSIGNCOV_B v16i8:$vj, v16i8:$vj)>;
def : Pat<(abs v8i16:$vj), (VSIGNCOV_H v8i16:$vj, v8i16:$vj)>;
@@ -2519,6 +2556,11 @@ def : Pat<(f64 (froundeven FPR64:$fj)),
(f64 (EXTRACT_SUBREG (VFRINTRNE_D (VREPLVEI_D
(SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>;
+defm : PatVrF<fceil, "VFRINTRP">;
+defm : PatVrF<ffloor, "VFRINTRM">;
+defm : PatVrF<ftrunc, "VFRINTRZ">;
+defm : PatVrF<froundeven, "VFRINTRNE">;
+
// load
def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm),
(VLD GPR:$rj, (to_valid_timm timm:$imm))>;
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
index 7d54565..6d69af5 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
@@ -39,7 +39,7 @@ LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit)
: MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH,
/*HasRelocationAddend=*/true) {}
-LoongArchELFObjectWriter::~LoongArchELFObjectWriter() {}
+LoongArchELFObjectWriter::~LoongArchELFObjectWriter() = default;
unsigned LoongArchELFObjectWriter::getRelocType(const MCFixup &Fixup,
const MCValue &Target,
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
index f0e2bc4..08fa51d 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
@@ -38,7 +38,7 @@ public:
LoongArchMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII)
: Ctx(ctx), MCII(MCII) {}
- ~LoongArchMCCodeEmitter() override {}
+ ~LoongArchMCCodeEmitter() override = default;
void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
diff --git a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
index e37f3a66..fb5cd5c2 100644
--- a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
+++ b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
@@ -690,9 +690,9 @@ bool M68kAsmParser::parseRegisterName(MCRegister &RegNo, SMLoc Loc,
} else {
// Floating point control register.
RegNo = StringSwitch<unsigned>(RegisterNameLower)
- .Cases("fpc", "fpcr", M68k::FPC)
- .Cases("fps", "fpsr", M68k::FPS)
- .Cases("fpi", "fpiar", M68k::FPIAR)
+ .Cases({"fpc", "fpcr"}, M68k::FPC)
+ .Cases({"fps", "fpsr"}, M68k::FPS)
+ .Cases({"fpi", "fpiar"}, M68k::FPIAR)
.Default(M68k::NoRegister);
assert(RegNo != M68k::NoRegister &&
"Unrecognized FP control register name");
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
index fe83dc6..51bafe4 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
@@ -49,7 +49,7 @@ public:
M68kAsmBackend(const Target &T, const MCSubtargetInfo &STI)
: MCAsmBackend(llvm::endianness::big),
Allows32BitBranch(llvm::StringSwitch<bool>(STI.getCPU())
- .CasesLower("m68020", "m68030", "m68040", true)
+ .CasesLower({"m68020", "m68030", "m68040"}, true)
.Default(false)) {}
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &,
diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 97379d7..f588e56 100644
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -6176,7 +6176,7 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
CC = StringSwitch<unsigned>(Name)
.Case("zero", 0)
- .Cases("at", "AT", 1)
+ .Cases({"at", "AT"}, 1)
.Case("a0", 4)
.Case("a1", 5)
.Case("a2", 6)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
index caef8fe7..b832b82 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
+++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
@@ -20,7 +20,7 @@ class MemoryLocation;
class NVPTXAAResult : public AAResultBase {
public:
- NVPTXAAResult() {}
+ NVPTXAAResult() = default;
NVPTXAAResult(NVPTXAAResult &&Arg) : AAResultBase(std::move(Arg)) {}
/// Handle invalidation events from the new pass manager.
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 7e7ee75..c667a09 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -1871,17 +1871,6 @@ bool NVPTXScopes::empty() const { return Scopes.size() == 0; }
(is_ch ? (CP_ASYNC_BULK_TENSOR_OPCODE(RED, dim, mode, is_s32, _CH)) \
: (CP_ASYNC_BULK_TENSOR_OPCODE(RED, dim, mode, is_s32, )))
-#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(dim, mode, is_mc, is_ch, is_s32) \
- [&]() -> auto { \
- if (is_mc && is_ch) \
- return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, _MC_CH); \
- if (is_ch) \
- return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, _CH); \
- if (is_mc) \
- return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, _MC); \
- return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, ); \
- }()
-
static unsigned GetCpAsyncBulkTensorS2GReductionOpcode(size_t Dim,
bool IsShared32,
bool IsCacheHint,
@@ -1925,112 +1914,6 @@ static unsigned GetCpAsyncBulkTensorS2GReductionOpcode(size_t Dim,
}
}
-static unsigned GetCpAsyncBulkTensorG2SOpcode(size_t Dim, bool IsShared32,
- bool IsMultiCast,
- bool IsCacheHint, bool IsIm2Col) {
- if (IsIm2Col) {
- switch (Dim) {
- case 3:
- return GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(3D, IM2COL, IsMultiCast,
- IsCacheHint, IsShared32);
- case 4:
- return GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(4D, IM2COL, IsMultiCast,
- IsCacheHint, IsShared32);
- case 5:
- return GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(5D, IM2COL, IsMultiCast,
- IsCacheHint, IsShared32);
- default:
- llvm_unreachable("Invalid Dimension in im2col mode for "
- "GetCpAsyncBulkTensorG2SOpcode.");
- }
- } else {
- switch (Dim) {
- case 1:
- return GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(1D, TILE, IsMultiCast,
- IsCacheHint, IsShared32);
- case 2:
- return GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(2D, TILE, IsMultiCast,
- IsCacheHint, IsShared32);
- case 3:
- return GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(3D, TILE, IsMultiCast,
- IsCacheHint, IsShared32);
- case 4:
- return GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(4D, TILE, IsMultiCast,
- IsCacheHint, IsShared32);
- case 5:
- return GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(5D, TILE, IsMultiCast,
- IsCacheHint, IsShared32);
- default:
- llvm_unreachable(
- "Invalid Dimension in tile mode for GetCpAsyncBulkTensorG2SOpcode.");
- }
- }
-}
-
-static size_t GetDimsFromIntrinsic(unsigned IID) {
- switch (IID) {
- case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
- case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_3d:
- return 3;
- case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
- case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_4d:
- return 4;
- case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
- case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_5d:
- return 5;
- default:
- llvm_unreachable("Invalid im2col intrinsic in GetDimsFromIntrinsic.");
- }
-}
-
-void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorG2SCommon(SDNode *N,
- bool IsIm2Col) {
- // We have {Chain, Intrinsic-ID} followed by the actual intrisic args:
- // {dst, mbar, src, dims{d0...dN}, im2col_offsets{dims-2}
- // multicast, cache_hint,
- // multicast_flag, cache_hint_flag, cta_group_flag}
- // NumOperands = {Chain, IID} + {Actual intrinsic args}
- // = {2} + {8 + dims + im2col_offsets}
- size_t NumOps = N->getNumOperands();
- size_t NumDims = IsIm2Col ? GetDimsFromIntrinsic(N->getConstantOperandVal(1))
- : (NumOps - 10);
- // Offsets is always 'NumDims - 2' and only for im2col mode
- size_t NumOffsets = IsIm2Col ? (NumDims - 2) : 0;
- bool IsCacheHint = N->getConstantOperandVal(NumOps - 2) == 1;
- bool IsMultiCast = N->getConstantOperandVal(NumOps - 3) == 1;
- size_t NumBaseArgs = NumDims + NumOffsets + 3; // for {dst, mbar, src}
- size_t MultiCastIdx = NumBaseArgs + 2; // for Chain and IID
-
- unsigned CTAGroupVal = N->getConstantOperandVal(NumOps - 1);
- if ((CTAGroupVal > 0) && !Subtarget->hasCpAsyncBulkTensorCTAGroupSupport())
- report_fatal_error(
- formatv("CpAsyncBulkTensorG2S cta_group::1/2 is not supported on sm_{}",
- Subtarget->getSmVersion()));
-
- SDLoc DL(N);
- SmallVector<SDValue, 8> Ops(N->ops().slice(2, NumBaseArgs));
-
- // Push MultiCast operand, if available
- if (IsMultiCast)
- Ops.push_back(N->getOperand(MultiCastIdx));
-
- // Push CacheHint operand, if available
- if (IsCacheHint)
- Ops.push_back(N->getOperand(MultiCastIdx + 1));
-
- // Flag for CTA Group
- Ops.push_back(getI32Imm(CTAGroupVal, DL));
-
- // Finally, the chain operand
- Ops.push_back(N->getOperand(0));
-
- bool IsShared32 =
- CurDAG->getDataLayout().getPointerSizeInBits(ADDRESS_SPACE_SHARED) == 32;
- unsigned Opcode = GetCpAsyncBulkTensorG2SOpcode(
- NumDims, IsShared32, IsMultiCast, IsCacheHint, IsIm2Col);
- ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops));
-}
-
void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorReduceCommon(SDNode *N,
unsigned RedOp,
bool IsIm2Col) {
@@ -2175,18 +2058,6 @@ bool NVPTXDAGToDAGISel::tryIntrinsicVoid(SDNode *N) {
switch (IID) {
default:
return false;
- case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
- case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
- case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
- case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
- case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d:
- SelectCpAsyncBulkTensorG2SCommon(N);
- return true;
- case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
- case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
- case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
- SelectCpAsyncBulkTensorG2SCommon(N, /*IsIm2Col=*/true);
- return true;
case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_1d:
case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_2d:
case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_3d:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index c912e70..1cb579b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -86,7 +86,6 @@ private:
bool tryEXTRACT_VECTOR_ELEMENT(SDNode *N);
void SelectV2I64toI128(SDNode *N);
void SelectI128toV2I64(SDNode *N);
- void SelectCpAsyncBulkTensorG2SCommon(SDNode *N, bool IsIm2Col = false);
void SelectCpAsyncBulkTensorReduceCommon(SDNode *N, unsigned RedOp,
bool IsIm2Col = false);
void SelectTcgen05Ld(SDNode *N, bool hasOffset = false);
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index dfde0cc..b260221 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -139,7 +139,6 @@ def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">;
def hasDotInstructions : Predicate<"Subtarget->hasDotInstructions()">;
def hasTcgen05Instructions : Predicate<"Subtarget->hasTcgen05Instructions()">;
def hasTcgen05MMAScaleInputDImm : Predicate<"Subtarget->hasTcgen05MMAScaleInputDImm()">;
-def hasTMACTAGroupSupport : Predicate<"Subtarget->hasCpAsyncBulkTensorCTAGroupSupport()">;
def hasF32x2Instructions : Predicate<"Subtarget->hasF32x2Instructions()">;
class hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index c923f0e..50827bd 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -599,75 +599,15 @@ class TMA_IM2COL_UTIL<int dim, string mode> {
string base_str = !interleave(!foreach(i, !range(offsets), "$im2col" # i), ", ");
}
-// From Global to Shared memory (G2S)
-class G2S_STRINGS<int dim, string mode, bit mc, bit ch, bit is_shared32 = 0> {
- string prefix = "cp.async.bulk.tensor";
- string dir = "shared::cluster.global";
- string completion = "mbarrier::complete_tx::bytes";
- string inst_name = prefix
- # "." # dim # "d"
- # "." # dir
- # "." # mode
- # "." # completion
- # !if(mc, ".multicast::cluster", "")
- # !if(ch, ".L2::cache_hint", "");
- string intr_name = "CP_ASYNC_BULK_TENSOR_G2S_"
- # dim # "D"
- # !if(is_shared32, "_SHARED32", "")
- # !if(!eq(mode, "tile"), "_TILE", "_IM2COL");
-}
-
def CTAGroupFlags : Operand<i32> {
let PrintMethod = "printCTAGroup";
}
-multiclass CP_ASYNC_BULK_TENSOR_G2S_INTR<int dim, bit is_shared32, string mode> {
- defvar dims_dag = TMA_DIMS_UTIL<dim>.ins_dag;
- defvar dims_str = TMA_DIMS_UTIL<dim>.base_str;
- defvar asm_str_default = "$cg [$dst], [$tmap, {{" # dims_str # "}}], [$mbar]";
- defvar rc = !if(is_shared32, B32, B64);
-
- defvar num_im2col = !if(!ge(dim, 3), !add(dim, -2), 0);
- defvar im2col_dag = !if(!eq(mode, "im2col"),
- !dag(ins, !listsplat(B16, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)),
- (ins));
- defvar im2col_str = !interleave(!foreach(i, !range(num_im2col), "$im2col" # i), ", ");
- defvar im2col_asm_str = ", {{" # im2col_str # "}}";
-
- defvar asm_str = !if(!eq(mode, "im2col"),
- !strconcat(asm_str_default, im2col_asm_str), asm_str_default);
+def tma_cta_group_imm0 : TImmLeaf<i32, [{return Imm == 0;}]>;
+def tma_cta_group_imm_any : TImmLeaf<i32, [{return Imm >= 0;}]>;
- def "" : NVPTXInst<(outs),
- !con((ins rc:$dst, rc:$mbar, B64:$tmap), dims_dag, im2col_dag, (ins CTAGroupFlags:$cg)),
- !strconcat(G2S_STRINGS<dim, mode, 0, 0>.inst_name, asm_str, ";")>,
- Requires<[hasPTX<80>, hasSM<90>]>;
- def _MC : NVPTXInst<(outs),
- !con((ins rc:$dst, rc:$mbar, B64:$tmap), dims_dag, im2col_dag,
- (ins B16:$mc, CTAGroupFlags:$cg)),
- !strconcat(G2S_STRINGS<dim, mode, 1, 0>.inst_name, asm_str, ", $mc;")>,
- Requires<[hasPTX<80>, hasSM<90>]>;
- def _CH : NVPTXInst<(outs),
- !con((ins rc:$dst, rc:$mbar, B64:$tmap), dims_dag, im2col_dag,
- (ins B64:$ch, CTAGroupFlags:$cg)),
- !strconcat(G2S_STRINGS<dim, mode, 0, 1>.inst_name, asm_str, ", $ch;")>,
- Requires<[hasPTX<80>, hasSM<90>]>;
- def _MC_CH : NVPTXInst<(outs),
- !con((ins rc:$dst, rc:$mbar, B64:$tmap), dims_dag, im2col_dag,
- (ins B16:$mc, B64:$ch, CTAGroupFlags:$cg)),
- !strconcat(G2S_STRINGS<dim, mode, 1, 1>.inst_name, asm_str, ", $mc, $ch;")>,
- Requires<[hasPTX<80>, hasSM<90>]>;
-}
-
-foreach dim = [1, 2, 3, 4, 5] in {
- foreach shared32 = [true, false] in {
- foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in {
- defm G2S_STRINGS<dim, mode, 0, 0, shared32>.intr_name :
- CP_ASYNC_BULK_TENSOR_G2S_INTR<dim, shared32, mode>;
- }
- }
-}
-
-multiclass TMA_TENSOR_G2S_INTR<int dim, string mode, list<Predicate> pred = []> {
+multiclass TMA_TENSOR_G2S_INTR<int dim, string mode, list<Predicate> pred,
+ TImmLeaf cta_group_type = tma_cta_group_imm_any> {
defvar dims_dag = TMA_DIMS_UTIL<dim>.ins_dag;
defvar dims_str = TMA_DIMS_UTIL<dim>.base_str;
defvar asm_str_base = "$cg [$dst], [$tmap, {{" # dims_str # "}}], [$mbar]";
@@ -697,10 +637,10 @@ multiclass TMA_TENSOR_G2S_INTR<int dim, string mode, list<Predicate> pred = []>
!setdagop(dims_dag, intr),
!setdagop(im2col_dag, intr),
(intr B16:$mc, B64:$ch));
- defvar intr_dag_no_hints = !con(intr_dag_base, (intr 0, 0, timm:$cg));
- defvar intr_dag_with_mc = !con(intr_dag_base, (intr -1, 0, timm:$cg));
- defvar intr_dag_with_ch = !con(intr_dag_base, (intr 0, -1, timm:$cg));
- defvar intr_dag_with_mc_ch = !con(intr_dag_base, (intr -1, -1, timm:$cg));
+ defvar intr_dag_no_hints = !con(intr_dag_base, (intr 0, 0, cta_group_type:$cg));
+ defvar intr_dag_with_mc = !con(intr_dag_base, (intr -1, 0, cta_group_type:$cg));
+ defvar intr_dag_with_ch = !con(intr_dag_base, (intr 0, -1, cta_group_type:$cg));
+ defvar intr_dag_with_mc_ch = !con(intr_dag_base, (intr -1, -1, cta_group_type:$cg));
def "" : NVPTXInst<(outs), ins_dag,
inst_name # asm_str # ";",
@@ -719,14 +659,30 @@ multiclass TMA_TENSOR_G2S_INTR<int dim, string mode, list<Predicate> pred = []>
[intr_dag_with_mc_ch]>,
Requires<pred>;
}
+
+foreach dim = 1...5 in {
+ defm TMA_G2S_TILE_CG0_ # dim # "D"
+ : TMA_TENSOR_G2S_INTR<dim, "tile", [hasPTX<80>, hasSM<90>],
+ tma_cta_group_imm0>;
+ defm TMA_G2S_TILE_ # dim # "D"
+ : TMA_TENSOR_G2S_INTR<dim, "tile",
+ [callSubtarget<"hasTMABlackwellSupport">]>;
+}
foreach dim = 3...5 in {
+ defm TMA_G2S_IM2COL_CG0_ # dim # "D"
+ : TMA_TENSOR_G2S_INTR<dim, "im2col", [hasPTX<80>, hasSM<90>],
+ tma_cta_group_imm0>;
+ defm TMA_G2S_IM2COL_ # dim # "D"
+ : TMA_TENSOR_G2S_INTR<dim, "im2col",
+ [callSubtarget<"hasTMABlackwellSupport">]>;
foreach mode = ["im2col_w", "im2col_w_128"] in {
defm TMA_G2S_ # !toupper(mode) # "_" # dim # "D"
- : TMA_TENSOR_G2S_INTR<dim, mode, [hasTMACTAGroupSupport]>;
+ : TMA_TENSOR_G2S_INTR<dim, mode,
+ [callSubtarget<"hasTMABlackwellSupport">]>;
}
}
defm TMA_G2S_TILE_GATHER4_2D : TMA_TENSOR_G2S_INTR<5, "tile_gather4",
- [hasTMACTAGroupSupport]>;
+ [callSubtarget<"hasTMABlackwellSupport">]>;
multiclass TMA_TENSOR_G2S_CTA_INTR<int dim, string mode, list<Predicate> pred = []> {
defvar dims_dag = TMA_DIMS_UTIL<dim>.ins_dag;
@@ -784,7 +740,8 @@ foreach dim = 3...5 in {
: TMA_TENSOR_G2S_CTA_INTR<dim, "im2col_w", [hasPTX<86>, hasSM<100>]>;
defm TMA_G2S_CTA_IM2COL_W_128_ # dim # "D"
- : TMA_TENSOR_G2S_CTA_INTR<dim, "im2col_w_128", [hasTMACTAGroupSupport]>;
+ : TMA_TENSOR_G2S_CTA_INTR<dim, "im2col_w_128",
+ [callSubtarget<"hasTMABlackwellSupport">]>;
}
defm TMA_G2S_CTA_TILE_GATHER4_2D : TMA_TENSOR_G2S_CTA_INTR<5, "tile_gather4",
[hasPTX<86>, hasSM<100>]>;
@@ -835,7 +792,7 @@ foreach dim = 1...5 in {
}
}
defm TMA_S2G_TILE_SCATTER4_2D : TMA_TENSOR_S2G_INTR<5, "tile_scatter4",
- [hasTMACTAGroupSupport]>;
+ [callSubtarget<"hasTMABlackwellSupport">]>;
def TMAReductionFlags : Operand<i32> {
let PrintMethod = "printTmaReductionMode";
@@ -930,11 +887,11 @@ foreach dim = 3...5 in {
foreach mode = ["im2col_w", "im2col_w_128"] in {
defvar suffix = !toupper(mode) # "_" # dim # "D";
defm TMA_TENSOR_PF_ # suffix : TMA_TENSOR_PREFETCH_INTR<dim, mode,
- [hasTMACTAGroupSupport]>;
+ [callSubtarget<"hasTMABlackwellSupport">]>;
}
}
defm TMA_TENSOR_PF_TILE_GATHER4_2D : TMA_TENSOR_PREFETCH_INTR<5, "tile_gather4",
- [hasTMACTAGroupSupport]>;
+ [callSubtarget<"hasTMABlackwellSupport">]>;
//Prefetchu and Prefetch
@@ -1605,12 +1562,17 @@ def : Pat<(int_nvvm_saturate_d f64:$a), (CVT_f64_f64 $a, CvtSAT)>;
// Exp2 Log2
//
-def : Pat<(int_nvvm_ex2_approx_ftz_f f32:$a), (EX2_APPROX_f32 $a, FTZ)>;
-def : Pat<(int_nvvm_ex2_approx_f f32:$a), (EX2_APPROX_f32 $a, NoFTZ)>;
+def : Pat<(f32 (int_nvvm_ex2_approx_ftz f32:$a)), (EX2_APPROX_f32 $a, FTZ)>;
+def : Pat<(f32 (int_nvvm_ex2_approx f32:$a)), (EX2_APPROX_f32 $a, NoFTZ)>;
let Predicates = [hasPTX<70>, hasSM<75>] in {
- def : Pat<(int_nvvm_ex2_approx_f16 f16:$a), (EX2_APPROX_f16 $a)>;
- def : Pat<(int_nvvm_ex2_approx_f16x2 v2f16:$a), (EX2_APPROX_f16x2 $a)>;
+ def : Pat<(f16 (int_nvvm_ex2_approx f16:$a)), (EX2_APPROX_f16 $a)>;
+ def : Pat<(v2f16 (int_nvvm_ex2_approx v2f16:$a)), (EX2_APPROX_f16x2 $a)>;
+}
+
+let Predicates = [hasPTX<78>, hasSM<90>] in {
+ def : Pat<(bf16 (int_nvvm_ex2_approx_ftz bf16:$a)), (EX2_APPROX_bf16 $a)>;
+ def : Pat<(v2bf16 (int_nvvm_ex2_approx_ftz v2bf16:$a)), (EX2_APPROX_bf16x2 $a)>;
}
def LG2_APPROX_f32 :
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 194dbdc..021b1f6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -166,18 +166,15 @@ public:
// f32x2 instructions in Blackwell family
bool hasF32x2Instructions() const;
- // TMA G2S copy with cta_group::1/2 support
- bool hasCpAsyncBulkTensorCTAGroupSupport() const {
- // TODO: Update/tidy-up after the family-conditional support arrives
- switch (FullSmVersion) {
- case 1003:
- case 1013:
- return PTXVersion >= 86;
- case 1033:
- return PTXVersion >= 88;
- default:
- return false;
- }
+ // Checks support for following in TMA:
+ // - cta_group::1/2 support
+ // - im2col_w/w_128 mode support
+ // - tile_gather4 mode support
+ // - tile_scatter4 mode support
+ bool hasTMABlackwellSupport() const {
+ return hasPTXWithFamilySMs(90, {100, 110}) ||
+ hasPTXWithFamilySMs(88, {100, 101}) ||
+ hasPTXWithAccelSMs(86, {100, 101});
}
// Prior to CUDA 12.3 ptxas did not recognize that the trap instruction
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 729c077..64593e6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -318,7 +318,7 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC,
// answer. These include:
//
// - nvvm_cos_approx_{f,ftz_f}
- // - nvvm_ex2_approx_{d,f,ftz_f}
+ // - nvvm_ex2_approx(_ftz)
// - nvvm_lg2_approx_{d,f,ftz_f}
// - nvvm_sin_approx_{f,ftz_f}
// - nvvm_sqrt_approx_{f,ftz_f}
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index bcb3f50..780e124 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2702,7 +2702,7 @@ static bool isSpecialLLVMGlobalArrayToSkip(const GlobalVariable *GV) {
static bool isSpecialLLVMGlobalArrayForStaticInit(const GlobalVariable *GV) {
return StringSwitch<bool>(GV->getName())
- .Cases("llvm.global_ctors", "llvm.global_dtors", true)
+ .Cases({"llvm.global_ctors", "llvm.global_dtors"}, true)
.Default(false);
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index da3efdc..0c2e44e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -360,6 +360,10 @@ let Predicates = [HasVSX, IsISAFuture] in {
def LXVPRLL : XForm_XTp5_RAB5<31, 621, (outs vsrprc:$XTp),
(ins (memr $RA):$addr, g8rc:$RB),
"lxvprll $XTp, $addr, $RB", IIC_LdStLFD, []>;
+ def LXVPB32X
+ : XForm_XTp5_RAB5<31, 877, (outs vsrprc:$XTp),
+ (ins (memr $RA):$addr, g8rc:$RB),
+ "lxvpb32x $XTp, $addr, $RB", IIC_LdStLFD, []>;
}
let mayStore = 1 in {
@@ -376,6 +380,10 @@ let Predicates = [HasVSX, IsISAFuture] in {
: XForm_XTp5_RAB5<31, 749, (outs),
(ins vsrprc:$XTp, (memr $RA):$addr, g8rc:$RB),
"stxvprll $XTp, $addr, $RB", IIC_LdStLFD, []>;
+ def STXVPB32X
+ : XForm_XTp5_RAB5<31, 1005, (outs),
+ (ins vsrprc:$XTp, (memr $RA):$addr, g8rc:$RB),
+ "stxvpb32x $XTp, $addr, $RB", IIC_LdStLFD, []>;
}
def VUPKHSNTOB : VXForm_VRTB5<387, 0, (outs vrrc:$VRT), (ins vrrc:$VRB),
diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index 3640d25..70df59d 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -1316,7 +1316,7 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
// useless and possible to break some original well-form addressing mode
// to make this pre-inc prep for it.
if (PointerElementType->isIntegerTy(64)) {
- const SCEV *LSCEV = SE->getSCEVAtScope(const_cast<Value *>(PtrValue), L);
+ const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);
if (!LARSCEV || LARSCEV->getLoop() != L)
return false;
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 000d296..4ff489d 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -296,8 +296,9 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
CodeGenOptLevel OL, bool JIT)
- : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU,
- computeFSAdditions(FS, OL, TT), Options,
+ : CodeGenTargetMachineImpl(T,
+ TT.computeDataLayout(Options.MCOptions.ABIName),
+ TT, CPU, computeFSAdditions(FS, OL, TT), Options,
getEffectiveRelocModel(TT, RM),
getEffectivePPCCodeModel(TT, CM, JIT), OL),
TLOF(createTLOF(getTargetTriple())),
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 2fba090..b04e887 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -912,7 +912,7 @@ bool PPCTTIImpl::areInlineCompatible(const Function *Caller,
bool PPCTTIImpl::areTypesABICompatible(const Function *Caller,
const Function *Callee,
- const ArrayRef<Type *> &Types) const {
+ ArrayRef<Type *> Types) const {
// We need to ensure that argument promotion does not
// attempt to promote pointers to MMA types (__vector_pair
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 475472a..8d7f255 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -147,7 +147,7 @@ public:
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const override;
bool areTypesABICompatible(const Function *Caller, const Function *Callee,
- const ArrayRef<Type *> &Types) const override;
+ ArrayRef<Type *> Types) const override;
bool supportsTailCallFor(const CallBase *CB) const override;
private:
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 8198173..282cf5d 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -92,6 +92,10 @@ private:
void emitFence(AtomicOrdering FenceOrdering, SyncScope::ID FenceSSID,
MachineIRBuilder &MIB) const;
bool selectUnmergeValues(MachineInstr &MI, MachineIRBuilder &MIB) const;
+ void addVectorLoadStoreOperands(MachineInstr &I,
+ SmallVectorImpl<SrcOp> &SrcOps,
+ unsigned &CurOp, bool IsMasked,
+ bool IsStrided) const;
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineIRBuilder &MIB) const;
@@ -716,6 +720,26 @@ static unsigned selectRegImmLoadStoreOp(unsigned GenericOpc, unsigned OpSize) {
return GenericOpc;
}
+void RISCVInstructionSelector::addVectorLoadStoreOperands(
+ MachineInstr &I, SmallVectorImpl<SrcOp> &SrcOps, unsigned &CurOp,
+ bool IsMasked, bool IsStrided) const {
+ // Base Pointer
+ auto PtrReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(PtrReg);
+
+ // Stride
+ if (IsStrided) {
+ auto StrideReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(StrideReg);
+ }
+
+ // Mask
+ if (IsMasked) {
+ auto MaskReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(MaskReg);
+ }
+}
+
bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
MachineInstr &I, MachineIRBuilder &MIB) const {
// Find the intrinsic ID.
@@ -752,21 +776,7 @@ bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
SrcOps.push_back(Register(RISCV::NoRegister));
}
- // Base Pointer
- auto PtrReg = I.getOperand(CurOp++).getReg();
- SrcOps.push_back(PtrReg);
-
- // Stride
- if (IsStrided) {
- auto StrideReg = I.getOperand(CurOp++).getReg();
- SrcOps.push_back(StrideReg);
- }
-
- // Mask
- if (IsMasked) {
- auto MaskReg = I.getOperand(CurOp++).getReg();
- SrcOps.push_back(MaskReg);
- }
+ addVectorLoadStoreOperands(I, SrcOps, CurOp, IsMasked, IsStrided);
RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(VT));
const RISCV::VLEPseudo *P =
@@ -795,6 +805,48 @@ bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
I.eraseFromParent();
return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);
}
+ case Intrinsic::riscv_vsm:
+ case Intrinsic::riscv_vse:
+ case Intrinsic::riscv_vse_mask:
+ case Intrinsic::riscv_vsse:
+ case Intrinsic::riscv_vsse_mask: {
+ bool IsMasked = IntrinID == Intrinsic::riscv_vse_mask ||
+ IntrinID == Intrinsic::riscv_vsse_mask;
+ bool IsStrided = IntrinID == Intrinsic::riscv_vsse ||
+ IntrinID == Intrinsic::riscv_vsse_mask;
+ LLT VT = MRI->getType(I.getOperand(1).getReg());
+ unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
+
+ // Sources
+ unsigned CurOp = 1;
+ SmallVector<SrcOp, 4> SrcOps; // Source registers.
+
+ // Store value
+ auto PassthruReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(PassthruReg);
+
+ addVectorLoadStoreOperands(I, SrcOps, CurOp, IsMasked, IsStrided);
+
+ RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(VT));
+ const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
+ IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
+
+ auto PseudoMI = MIB.buildInstr(P->Pseudo, {}, SrcOps);
+
+ // Select VL
+ auto VLOpFn = renderVLOp(I.getOperand(CurOp++));
+ for (auto &RenderFn : *VLOpFn)
+ RenderFn(PseudoMI);
+
+ // SEW
+ PseudoMI.addImm(Log2SEW);
+
+ // Memref
+ PseudoMI.cloneMemRefs(I);
+
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);
+ }
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 4105618..526675a 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -127,6 +127,10 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoCCAND:
case RISCV::PseudoCCOR:
case RISCV::PseudoCCXOR:
+ case RISCV::PseudoCCMAX:
+ case RISCV::PseudoCCMAXU:
+ case RISCV::PseudoCCMIN:
+ case RISCV::PseudoCCMINU:
case RISCV::PseudoCCADDW:
case RISCV::PseudoCCSUBW:
case RISCV::PseudoCCSLL:
@@ -217,6 +221,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
.addImm(0);
} else {
unsigned NewOpc;
+ // clang-format off
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode!");
@@ -228,6 +233,10 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
case RISCV::PseudoCCAND: NewOpc = RISCV::AND; break;
case RISCV::PseudoCCOR: NewOpc = RISCV::OR; break;
case RISCV::PseudoCCXOR: NewOpc = RISCV::XOR; break;
+ case RISCV::PseudoCCMAX: NewOpc = RISCV::MAX; break;
+ case RISCV::PseudoCCMIN: NewOpc = RISCV::MIN; break;
+ case RISCV::PseudoCCMAXU: NewOpc = RISCV::MAXU; break;
+ case RISCV::PseudoCCMINU: NewOpc = RISCV::MINU; break;
case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break;
case RISCV::PseudoCCSLLI: NewOpc = RISCV::SLLI; break;
case RISCV::PseudoCCSRLI: NewOpc = RISCV::SRLI; break;
@@ -250,6 +259,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
case RISCV::PseudoCCNDS_BFOS: NewOpc = RISCV::NDS_BFOS; break;
case RISCV::PseudoCCNDS_BFOZ: NewOpc = RISCV::NDS_BFOZ; break;
}
+ // clang-format on
if (NewOpc == RISCV::NDS_BFOZ || NewOpc == RISCV::NDS_BFOS) {
BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg)
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index b4556f6..cfee6ab 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1851,6 +1851,11 @@ def TuneShortForwardBranchOpt
def HasShortForwardBranchOpt : Predicate<"Subtarget->hasShortForwardBranchOpt()">;
def NoShortForwardBranchOpt : Predicate<"!Subtarget->hasShortForwardBranchOpt()">;
+def TuneShortForwardBranchIMinMax
+ : SubtargetFeature<"short-forward-branch-i-minmax", "HasShortForwardBranchIMinMax",
+ "true", "Enable short forward branch optimization for min,max instructions in Zbb",
+ [TuneShortForwardBranchOpt]>;
+
// Some subtargets require a S2V transfer buffer to move scalars into vectors.
// FIXME: Forming .vx/.vf/.wx/.wf can reduce register pressure.
def TuneNoSinkSplatOperands
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 9a6afa1..b25a054 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3995,6 +3995,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
case RISCV::CTZW:
case RISCV::CPOPW:
case RISCV::SLLI_UW:
+ case RISCV::ABSW:
case RISCV::FMV_W_X:
case RISCV::FCVT_H_W:
case RISCV::FCVT_H_W_INX:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1c930ac..e0cf739 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -433,6 +433,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtP() ||
(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
setOperationAction(ISD::ABS, XLenVT, Legal);
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::ABS, MVT::i32, Custom);
} else if (Subtarget.hasShortForwardBranchOpt()) {
// We can use PseudoCCSUB to implement ABS.
setOperationAction(ISD::ABS, XLenVT, Legal);
@@ -14816,8 +14818,16 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
+ if (Subtarget.hasStdExtP()) {
+ SDValue Src =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+ SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
+ return;
+ }
+
if (Subtarget.hasStdExtZbb()) {
- // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
+ // Emit a special node that will be expanded to NEGW+MAX at isel.
// This allows us to remember that the result is sign extended. Expanding
// to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
@@ -19784,7 +19794,9 @@ legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index,
// LLVM's legalization take care of the splitting.
// FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
- IndexVT.changeVectorElementType(XLenVT), Index);
+ EVT::getVectorVT(*DAG.getContext(), XLenVT,
+ IndexVT.getVectorElementCount()),
+ Index);
}
IndexType = ISD::UNSIGNED_SCALED;
return true;
@@ -20290,6 +20302,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
+ case RISCVISD::ABSW:
case RISCVISD::CLZW:
case RISCVISD::CTZW: {
// Only the lower 32 bits of the first operand are read
@@ -21862,6 +21875,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
case RISCVISD::REMUW:
case RISCVISD::ROLW:
case RISCVISD::RORW:
+ case RISCVISD::ABSW:
case RISCVISD::FCVT_W_RV64:
case RISCVISD::FCVT_WU_RV64:
case RISCVISD::STRICT_FCVT_W_RV64:
@@ -23932,7 +23946,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
.Case("{t0}", RISCV::X5)
.Case("{t1}", RISCV::X6)
.Case("{t2}", RISCV::X7)
- .Cases("{s0}", "{fp}", RISCV::X8)
+ .Cases({"{s0}", "{fp}"}, RISCV::X8)
.Case("{s1}", RISCV::X9)
.Case("{a0}", RISCV::X10)
.Case("{a1}", RISCV::X11)
@@ -23969,38 +23983,38 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// use the ABI names in register constraint lists.
if (Subtarget.hasStdExtF()) {
unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
- .Cases("{f0}", "{ft0}", RISCV::F0_F)
- .Cases("{f1}", "{ft1}", RISCV::F1_F)
- .Cases("{f2}", "{ft2}", RISCV::F2_F)
- .Cases("{f3}", "{ft3}", RISCV::F3_F)
- .Cases("{f4}", "{ft4}", RISCV::F4_F)
- .Cases("{f5}", "{ft5}", RISCV::F5_F)
- .Cases("{f6}", "{ft6}", RISCV::F6_F)
- .Cases("{f7}", "{ft7}", RISCV::F7_F)
- .Cases("{f8}", "{fs0}", RISCV::F8_F)
- .Cases("{f9}", "{fs1}", RISCV::F9_F)
- .Cases("{f10}", "{fa0}", RISCV::F10_F)
- .Cases("{f11}", "{fa1}", RISCV::F11_F)
- .Cases("{f12}", "{fa2}", RISCV::F12_F)
- .Cases("{f13}", "{fa3}", RISCV::F13_F)
- .Cases("{f14}", "{fa4}", RISCV::F14_F)
- .Cases("{f15}", "{fa5}", RISCV::F15_F)
- .Cases("{f16}", "{fa6}", RISCV::F16_F)
- .Cases("{f17}", "{fa7}", RISCV::F17_F)
- .Cases("{f18}", "{fs2}", RISCV::F18_F)
- .Cases("{f19}", "{fs3}", RISCV::F19_F)
- .Cases("{f20}", "{fs4}", RISCV::F20_F)
- .Cases("{f21}", "{fs5}", RISCV::F21_F)
- .Cases("{f22}", "{fs6}", RISCV::F22_F)
- .Cases("{f23}", "{fs7}", RISCV::F23_F)
- .Cases("{f24}", "{fs8}", RISCV::F24_F)
- .Cases("{f25}", "{fs9}", RISCV::F25_F)
- .Cases("{f26}", "{fs10}", RISCV::F26_F)
- .Cases("{f27}", "{fs11}", RISCV::F27_F)
- .Cases("{f28}", "{ft8}", RISCV::F28_F)
- .Cases("{f29}", "{ft9}", RISCV::F29_F)
- .Cases("{f30}", "{ft10}", RISCV::F30_F)
- .Cases("{f31}", "{ft11}", RISCV::F31_F)
+ .Cases({"{f0}", "{ft0}"}, RISCV::F0_F)
+ .Cases({"{f1}", "{ft1}"}, RISCV::F1_F)
+ .Cases({"{f2}", "{ft2}"}, RISCV::F2_F)
+ .Cases({"{f3}", "{ft3}"}, RISCV::F3_F)
+ .Cases({"{f4}", "{ft4}"}, RISCV::F4_F)
+ .Cases({"{f5}", "{ft5}"}, RISCV::F5_F)
+ .Cases({"{f6}", "{ft6}"}, RISCV::F6_F)
+ .Cases({"{f7}", "{ft7}"}, RISCV::F7_F)
+ .Cases({"{f8}", "{fs0}"}, RISCV::F8_F)
+ .Cases({"{f9}", "{fs1}"}, RISCV::F9_F)
+ .Cases({"{f10}", "{fa0}"}, RISCV::F10_F)
+ .Cases({"{f11}", "{fa1}"}, RISCV::F11_F)
+ .Cases({"{f12}", "{fa2}"}, RISCV::F12_F)
+ .Cases({"{f13}", "{fa3}"}, RISCV::F13_F)
+ .Cases({"{f14}", "{fa4}"}, RISCV::F14_F)
+ .Cases({"{f15}", "{fa5}"}, RISCV::F15_F)
+ .Cases({"{f16}", "{fa6}"}, RISCV::F16_F)
+ .Cases({"{f17}", "{fa7}"}, RISCV::F17_F)
+ .Cases({"{f18}", "{fs2}"}, RISCV::F18_F)
+ .Cases({"{f19}", "{fs3}"}, RISCV::F19_F)
+ .Cases({"{f20}", "{fs4}"}, RISCV::F20_F)
+ .Cases({"{f21}", "{fs5}"}, RISCV::F21_F)
+ .Cases({"{f22}", "{fs6}"}, RISCV::F22_F)
+ .Cases({"{f23}", "{fs7}"}, RISCV::F23_F)
+ .Cases({"{f24}", "{fs8}"}, RISCV::F24_F)
+ .Cases({"{f25}", "{fs9}"}, RISCV::F25_F)
+ .Cases({"{f26}", "{fs10}"}, RISCV::F26_F)
+ .Cases({"{f27}", "{fs11}"}, RISCV::F27_F)
+ .Cases({"{f28}", "{ft8}"}, RISCV::F28_F)
+ .Cases({"{f29}", "{ft9}"}, RISCV::F29_F)
+ .Cases({"{f30}", "{ft10}"}, RISCV::F30_F)
+ .Cases({"{f31}", "{ft11}"}, RISCV::F31_F)
.Default(RISCV::NoRegister);
if (FReg != RISCV::NoRegister) {
assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
diff --git a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
index a1c8e23..c58a5c0 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
@@ -48,7 +48,7 @@ class VXRMInfo {
} State = Uninitialized;
public:
- VXRMInfo() {}
+ VXRMInfo() = default;
static VXRMInfo getUnknown() {
VXRMInfo Info;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 912b82d..c9df787 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -869,7 +869,7 @@ std::optional<unsigned> getFoldedOpcode(MachineFunction &MF, MachineInstr &MI,
}
}
-// This is the version used during inline spilling
+// This is the version used during InlineSpiller::spillAroundUses
MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
@@ -1699,6 +1699,10 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
case RISCV::AND: return RISCV::PseudoCCAND;
case RISCV::OR: return RISCV::PseudoCCOR;
case RISCV::XOR: return RISCV::PseudoCCXOR;
+ case RISCV::MAX: return RISCV::PseudoCCMAX;
+ case RISCV::MAXU: return RISCV::PseudoCCMAXU;
+ case RISCV::MIN: return RISCV::PseudoCCMIN;
+ case RISCV::MINU: return RISCV::PseudoCCMINU;
case RISCV::ADDI: return RISCV::PseudoCCADDI;
case RISCV::SLLI: return RISCV::PseudoCCSLLI;
@@ -1735,7 +1739,8 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
/// return the defining instruction.
static MachineInstr *canFoldAsPredicatedOp(Register Reg,
const MachineRegisterInfo &MRI,
- const TargetInstrInfo *TII) {
+ const TargetInstrInfo *TII,
+ const RISCVSubtarget &STI) {
if (!Reg.isVirtual())
return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
@@ -1743,6 +1748,12 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg,
MachineInstr *MI = MRI.getVRegDef(Reg);
if (!MI)
return nullptr;
+
+ if (!STI.hasShortForwardBranchIMinMax() &&
+ (MI->getOpcode() == RISCV::MAX || MI->getOpcode() == RISCV::MIN ||
+ MI->getOpcode() == RISCV::MINU || MI->getOpcode() == RISCV::MAXU))
+ return nullptr;
+
// Check if MI can be predicated and folded into the CCMOV.
if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
return nullptr;
@@ -1806,10 +1817,10 @@ RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
MachineInstr *DefMI =
- canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this);
+ canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this, STI);
bool Invert = !DefMI;
if (!DefMI)
- DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this);
+ DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this, STI);
if (!DefMI)
return nullptr;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 7c89686..9cb53fb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -768,7 +768,7 @@ def BGE : BranchCC_rri<0b101, "bge">;
def BLTU : BranchCC_rri<0b110, "bltu">;
def BGEU : BranchCC_rri<0b111, "bgeu">;
-let IsSignExtendingOpW = 1 in {
+let IsSignExtendingOpW = 1, canFoldAsLoad = 1 in {
def LB : Load_ri<0b000, "lb">, Sched<[WriteLDB, ReadMemBase]>;
def LH : Load_ri<0b001, "lh">, Sched<[WriteLDH, ReadMemBase]>;
def LW : Load_ri<0b010, "lw">, Sched<[WriteLDW, ReadMemBase]>;
@@ -889,8 +889,10 @@ def CSRRCI : CSR_ii<0b111, "csrrci">;
/// RV64I instructions
let Predicates = [IsRV64] in {
+let canFoldAsLoad = 1 in {
def LWU : Load_ri<0b110, "lwu">, Sched<[WriteLDW, ReadMemBase]>;
def LD : Load_ri<0b011, "ld">, Sched<[WriteLDD, ReadMemBase]>;
+}
def SD : Store_rri<0b011, "sd">, Sched<[WriteSTD, ReadStoreData, ReadMemBase]>;
let IsSignExtendingOpW = 1 in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index afac37d..4ffe3e6 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -71,6 +71,7 @@ defvar DExtsRV64 = [DExt, ZdinxExt];
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtD] in {
+let canFoldAsLoad = 1 in
def FLD : FPLoad_r<0b011, "fld", FPR64, WriteFLD64>;
// Operands for stores are in the order srcreg, base, offset rather than
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 6571d99..b30f8ec 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -330,6 +330,7 @@ class PseudoFROUND<DAGOperand Ty, ValueType vt, ValueType intvt = XLenVT>
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtF] in {
+let canFoldAsLoad = 1 in
def FLW : FPLoad_r<0b010, "flw", FPR32, WriteFLD32>;
// Operands for stores are in the order srcreg, base, offset rather than
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index cc085bb..4cbbba3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1461,5 +1461,10 @@ let Predicates = [HasStdExtP, IsRV32] in {
// Codegen patterns
//===----------------------------------------------------------------------===//
+def riscv_absw : RVSDNode<"ABSW", SDTIntUnaryOp>;
+
let Predicates = [HasStdExtP] in
def : PatGpr<abs, ABS>;
+
+let Predicates = [HasStdExtP, IsRV64] in
+def : PatGpr<riscv_absw, ABSW>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
index 0114fbd..5a67a5a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
@@ -106,6 +106,10 @@ def PseudoCCSRA : SFBALU_rr;
def PseudoCCAND : SFBALU_rr;
def PseudoCCOR : SFBALU_rr;
def PseudoCCXOR : SFBALU_rr;
+def PseudoCCMAX : SFBALU_rr;
+def PseudoCCMIN : SFBALU_rr;
+def PseudoCCMAXU : SFBALU_rr;
+def PseudoCCMINU : SFBALU_rr;
def PseudoCCADDI : SFBALU_ri;
def PseudoCCANDI : SFBALU_ri;
diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index d08115b..ea98cdb 100644
--- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -172,6 +172,7 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI,
case RISCV::CTZW:
case RISCV::CPOPW:
case RISCV::SLLI_UW:
+ case RISCV::ABSW:
case RISCV::FMV_W_X:
case RISCV::FCVT_H_W:
case RISCV::FCVT_H_W_INX:
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index e9f43b9..84bb294 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -438,18 +438,19 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II,
TypeSize VRegSize = OldLoc.getValue().divideCoefficientBy(NumRegs);
Register VLENB = 0;
- unsigned PreHandledNum = 0;
+ unsigned VLENBShift = 0;
+ unsigned PrevHandledNum = 0;
unsigned I = 0;
while (I != NumRegs) {
auto [LMulHandled, RegClass, Opcode] =
getSpillReloadInfo(NumRegs - I, RegEncoding, IsSpill);
auto [RegNumHandled, _] = RISCVVType::decodeVLMUL(LMulHandled);
bool IsLast = I + RegNumHandled == NumRegs;
- if (PreHandledNum) {
+ if (PrevHandledNum) {
Register Step;
// Optimize for constant VLEN.
if (auto VLEN = STI.getRealVLen()) {
- int64_t Offset = *VLEN / 8 * PreHandledNum;
+ int64_t Offset = *VLEN / 8 * PrevHandledNum;
Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset);
} else {
@@ -457,15 +458,21 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II,
VLENB = MRI.createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VLENB);
}
- uint32_t ShiftAmount = Log2_32(PreHandledNum);
- if (ShiftAmount == 0)
- Step = VLENB;
- else {
- Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step)
- .addReg(VLENB, getKillRegState(IsLast))
- .addImm(ShiftAmount);
+ uint32_t ShiftAmount = Log2_32(PrevHandledNum);
+ // To avoid using an extra register, we shift the VLENB register and
+ // remember how much it has been shifted. We can then use relative
+ // shifts to adjust to the desired shift amount.
+ if (VLENBShift > ShiftAmount) {
+ BuildMI(MBB, II, DL, TII->get(RISCV::SRLI), VLENB)
+ .addReg(VLENB, RegState::Kill)
+ .addImm(VLENBShift - ShiftAmount);
+ } else if (VLENBShift < ShiftAmount) {
+ BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VLENB)
+ .addReg(VLENB, RegState::Kill)
+ .addImm(ShiftAmount - VLENBShift);
}
+ VLENBShift = ShiftAmount;
+ Step = VLENB;
}
BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
@@ -489,7 +496,7 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II,
if (IsSpill)
MIB.addReg(Reg, RegState::Implicit);
- PreHandledNum = RegNumHandled;
+ PrevHandledNum = RegNumHandled;
RegEncoding += RegNumHandled;
I += RegNumHandled;
}
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp
index 0a318e0..ed6d355 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp
@@ -15,4 +15,4 @@
using namespace llvm;
SPIRVTargetStreamer::SPIRVTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
-SPIRVTargetStreamer::~SPIRVTargetStreamer() {}
+SPIRVTargetStreamer::~SPIRVTargetStreamer() = default;
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index 9e11c3a..dd57b74 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -149,23 +149,23 @@ static FunctionType *getOriginalFunctionType(const Function &F) {
return isa<MDString>(N->getOperand(0)) &&
cast<MDString>(N->getOperand(0))->getString() == F.getName();
});
- // TODO: probably one function can have numerous type mutations,
- // so we should support this.
if (ThisFuncMDIt != NamedMD->op_end()) {
auto *ThisFuncMD = *ThisFuncMDIt;
- MDNode *MD = dyn_cast<MDNode>(ThisFuncMD->getOperand(1));
- assert(MD && "MDNode operand is expected");
- ConstantInt *Const = getConstInt(MD, 0);
- if (Const) {
- auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(1));
- assert(CMeta && "ConstantAsMetadata operand is expected");
- assert(Const->getSExtValue() >= -1);
- // Currently -1 indicates return value, greater values mean
- // argument numbers.
- if (Const->getSExtValue() == -1)
- RetTy = CMeta->getType();
- else
- ArgTypes[Const->getSExtValue()] = CMeta->getType();
+ for (unsigned I = 1; I != ThisFuncMD->getNumOperands(); ++I) {
+ MDNode *MD = dyn_cast<MDNode>(ThisFuncMD->getOperand(I));
+ assert(MD && "MDNode operand is expected");
+ ConstantInt *Const = getConstInt(MD, 0);
+ if (Const) {
+ auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(1));
+ assert(CMeta && "ConstantAsMetadata operand is expected");
+ assert(Const->getSExtValue() >= -1);
+ // Currently -1 indicates return value, greater values mean
+ // argument numbers.
+ if (Const->getSExtValue() == -1)
+ RetTy = CMeta->getType();
+ else
+ ArgTypes[Const->getSExtValue()] = CMeta->getType();
+ }
}
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 3fea21e..3f0424f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -3151,6 +3151,14 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
return selectInsertElt(ResVReg, ResType, I);
case Intrinsic::spv_gep:
return selectGEP(ResVReg, ResType, I);
+ case Intrinsic::spv_bitcast: {
+ Register OpReg = I.getOperand(2).getReg();
+ SPIRVType *OpType =
+ OpReg.isValid() ? GR.getSPIRVTypeForVReg(OpReg) : nullptr;
+ if (!GR.isBitcastCompatible(ResType, OpType))
+ report_fatal_error("incompatible result and operand types in a bitcast");
+ return selectOpWithSrcs(ResVReg, ResType, I, {OpReg}, SPIRV::OpBitcast);
+ }
case Intrinsic::spv_unref_global:
case Intrinsic::spv_init_global: {
MachineInstr *MI = MRI->getVRegDef(I.getOperand(1).getReg());
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
index 6e444c9..65dffc7 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
@@ -73,16 +73,23 @@ class SPIRVLegalizePointerCast : public FunctionPass {
// Returns the loaded value.
Value *loadVectorFromVector(IRBuilder<> &B, FixedVectorType *SourceType,
FixedVectorType *TargetType, Value *Source) {
- assert(TargetType->getNumElements() <= SourceType->getNumElements());
LoadInst *NewLoad = B.CreateLoad(SourceType, Source);
buildAssignType(B, SourceType, NewLoad);
Value *AssignValue = NewLoad;
if (TargetType->getElementType() != SourceType->getElementType()) {
+ const DataLayout &DL = B.GetInsertBlock()->getModule()->getDataLayout();
+ [[maybe_unused]] TypeSize TargetTypeSize =
+ DL.getTypeSizeInBits(TargetType);
+ [[maybe_unused]] TypeSize SourceTypeSize =
+ DL.getTypeSizeInBits(SourceType);
+ assert(TargetTypeSize == SourceTypeSize);
AssignValue = B.CreateIntrinsic(Intrinsic::spv_bitcast,
{TargetType, SourceType}, {NewLoad});
buildAssignType(B, TargetType, AssignValue);
+ return AssignValue;
}
+ assert(TargetType->getNumElements() < SourceType->getNumElements());
SmallVector<int> Mask(/* Size= */ TargetType->getNumElements());
for (unsigned I = 0; I < TargetType->getNumElements(); ++I)
Mask[I] = I;
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index f7cdfcb..db036a5 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -613,8 +613,7 @@ static void collectOtherInstr(MachineInstr &MI, SPIRV::ModuleAnalysisInfo &MAI,
<< FinalFlags << "\n";
MachineInstr *OrigMINonConst = const_cast<MachineInstr *>(OrigMI);
MachineOperand &OrigFlagsOp = OrigMINonConst->getOperand(2);
- OrigFlagsOp =
- MachineOperand::CreateImm(static_cast<unsigned>(FinalFlags));
+ OrigFlagsOp = MachineOperand::CreateImm(FinalFlags);
return; // Merge done, so we found a duplicate; don't add it to MAI.MS
}
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
index 2d19f6de..44b6c66 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
@@ -81,7 +81,7 @@ private:
void initAvailableCapabilitiesForVulkan(const SPIRVSubtarget &ST);
public:
- RequirementHandler() {}
+ RequirementHandler() = default;
void clear() {
MinimalCaps.clear();
AllCaps.clear();
diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
index db6f2d6..d538009 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
@@ -192,31 +192,43 @@ static void buildOpBitcast(SPIRVGlobalRegistry *GR, MachineIRBuilder &MIB,
.addUse(OpReg);
}
-// We do instruction selections early instead of calling MIB.buildBitcast()
-// generating the general op code G_BITCAST. When MachineVerifier validates
-// G_BITCAST we see a check of a kind: if Source Type is equal to Destination
-// Type then report error "bitcast must change the type". This doesn't take into
-// account the notion of a typed pointer that is important for SPIR-V where a
-// user may and should use bitcast between pointers with different pointee types
-// (https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpBitcast).
-// It's important for correct lowering in SPIR-V, because interpretation of the
-// data type is not left to instructions that utilize the pointer, but encoded
-// by the pointer declaration, and the SPIRV target can and must handle the
-// declaration and use of pointers that specify the type of data they point to.
-// It's not feasible to improve validation of G_BITCAST using just information
-// provided by low level types of source and destination. Therefore we don't
-// produce G_BITCAST as the general op code with semantics different from
-// OpBitcast, but rather lower to OpBitcast immediately. As for now, the only
-// difference would be that CombinerHelper couldn't transform known patterns
-// around G_BUILD_VECTOR. See discussion
-// in https://github.com/llvm/llvm-project/pull/110270 for even more context.
-static void selectOpBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR,
- MachineIRBuilder MIB) {
+// We lower G_BITCAST to OpBitcast here to avoid a MachineVerifier error.
+// The verifier checks if the source and destination LLTs of a G_BITCAST are
+// different, but this check is too strict for SPIR-V's typed pointers, which
+// may have the same LLT but different SPIRVType (e.g. pointers to different
+// pointee types). By lowering to OpBitcast here, we bypass the verifier's
+// check. See discussion in https://github.com/llvm/llvm-project/pull/110270
+// for more context.
+//
+// We also handle the llvm.spv.bitcast intrinsic here. If the source and
+// destination SPIR-V types are the same, we lower it to a COPY to enable
+// further optimizations like copy propagation.
+static void lowerBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR,
+ MachineIRBuilder MIB) {
SmallVector<MachineInstr *, 16> ToErase;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
+ if (isSpvIntrinsic(MI, Intrinsic::spv_bitcast)) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(2).getReg();
+ SPIRVType *DstType = GR->getSPIRVTypeForVReg(DstReg);
+ assert(
+ DstType &&
+ "Expected destination SPIR-V type to have been assigned already.");
+ SPIRVType *SrcType = GR->getSPIRVTypeForVReg(SrcReg);
+ assert(SrcType &&
+ "Expected source SPIR-V type to have been assigned already.");
+ if (DstType == SrcType) {
+ MIB.setInsertPt(*MI.getParent(), MI);
+ MIB.buildCopy(DstReg, SrcReg);
+ ToErase.push_back(&MI);
+ continue;
+ }
+ }
+
if (MI.getOpcode() != TargetOpcode::G_BITCAST)
continue;
+
MIB.setInsertPt(*MI.getParent(), MI);
buildOpBitcast(GR, MIB, MI.getOperand(0).getReg(),
MI.getOperand(1).getReg());
@@ -237,16 +249,11 @@ static void insertBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR,
SmallVector<MachineInstr *, 10> ToErase;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
- if (!isSpvIntrinsic(MI, Intrinsic::spv_bitcast) &&
- !isSpvIntrinsic(MI, Intrinsic::spv_ptrcast))
+ if (!isSpvIntrinsic(MI, Intrinsic::spv_ptrcast))
continue;
assert(MI.getOperand(2).isReg());
MIB.setInsertPt(*MI.getParent(), MI);
ToErase.push_back(&MI);
- if (isSpvIntrinsic(MI, Intrinsic::spv_bitcast)) {
- MIB.buildBitcast(MI.getOperand(0).getReg(), MI.getOperand(2).getReg());
- continue;
- }
Register Def = MI.getOperand(0).getReg();
Register Source = MI.getOperand(2).getReg();
Type *ElemTy = getMDOperandAsType(MI.getOperand(3).getMetadata(), 0);
@@ -1089,7 +1096,7 @@ bool SPIRVPreLegalizer::runOnMachineFunction(MachineFunction &MF) {
removeImplicitFallthroughs(MF, MIB);
insertSpirvDecorations(MF, GR, MIB);
insertInlineAsm(MF, GR, ST, MIB);
- selectOpBitcasts(MF, GR, MIB);
+ lowerBitcasts(MF, GR, MIB);
return true;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
index 7dd0b95..5ba0356 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -69,7 +69,7 @@ static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
}
// Pin SPIRVTargetObjectFile's vtables to this file.
-SPIRVTargetObjectFile::~SPIRVTargetObjectFile() {}
+SPIRVTargetObjectFile::~SPIRVTargetObjectFile() = default;
SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h b/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h
index 9d0adbb..87ec256 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h
@@ -16,7 +16,7 @@ namespace llvm {
/// This implementation is used for SystemZ ELF targets.
class SystemZELFTargetObjectFile : public TargetLoweringObjectFileELF {
public:
- SystemZELFTargetObjectFile() {}
+ SystemZELFTargetObjectFile() = default;
/// Describe a TLS variable address within debug info.
const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
index 7845cdf..1bfc61f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
@@ -76,7 +76,7 @@ public:
BlockSet.insert(MBB);
}
ArrayRef<MachineBasicBlock *> getBlocks() const { return Blocks; }
- using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator;
+ using block_iterator = ArrayRef<MachineBasicBlock *>::const_iterator;
block_iterator block_begin() const { return getBlocks().begin(); }
block_iterator block_end() const { return getBlocks().end(); }
inline iterator_range<block_iterator> blocks() const {
@@ -96,7 +96,7 @@ public:
void addSubException(std::unique_ptr<WebAssemblyException> E) {
SubExceptions.push_back(std::move(E));
}
- using iterator = typename decltype(SubExceptions)::const_iterator;
+ using iterator = decltype(SubExceptions)::const_iterator;
iterator begin() const { return SubExceptions.begin(); }
iterator end() const { return SubExceptions.end(); }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index 27f7e1a..5a1779c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -81,7 +81,7 @@ WebAssemblyFrameLowering::getLocalForStackObject(MachineFunction &MF,
// Abuse object size to record number of WebAssembly locals allocated to
// this object.
MFI.setObjectSize(FrameIndex, ValueVTs.size());
- return static_cast<unsigned>(Local);
+ return Local;
}
/// We need a base pointer in the case of having items on the stack that
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index ff4d6469..ee575e3 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -207,8 +207,7 @@ template <> struct MappingTraits<WebAssemblyFunctionInfo> {
template <> struct CustomMappingTraits<BBNumberMap> {
static void inputOne(IO &YamlIO, StringRef Key,
BBNumberMap &SrcToUnwindDest) {
- YamlIO.mapRequired(Key.str().c_str(),
- SrcToUnwindDest[std::atoi(Key.str().c_str())]);
+ YamlIO.mapRequired(Key, SrcToUnwindDest[std::atoi(Key.str().c_str())]);
}
static void output(IO &YamlIO, BBNumberMap &SrcToUnwindDest) {
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h
index e92bf17..96b8a4e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h
@@ -35,7 +35,7 @@ public:
virtual MachineBasicBlock *getHeader() const = 0;
virtual bool contains(const MachineBasicBlock *MBB) const = 0;
virtual unsigned getNumBlocks() const = 0;
- using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator;
+ using block_iterator = ArrayRef<MachineBasicBlock *>::const_iterator;
virtual iterator_range<block_iterator> blocks() const = 0;
virtual bool isLoop() const = 0;
};
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 127ee67..bac3692 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1121,7 +1121,7 @@ private:
void setTypeInfo(AsmTypeInfo Type) { CurType = Type; }
};
- bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt,
+ bool Error(SMLoc L, const Twine &Msg, SMRange Range = {},
bool MatchingInlineAsm = false) {
MCAsmParser &Parser = getParser();
if (MatchingInlineAsm) {
@@ -2470,10 +2470,10 @@ bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
// Report back its kind, or IOK_INVALID if does not evaluated as a known one
unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
return StringSwitch<unsigned>(Name)
- .Cases("TYPE","type",IOK_TYPE)
- .Cases("SIZE","size",IOK_SIZE)
- .Cases("LENGTH","length",IOK_LENGTH)
- .Default(IOK_INVALID);
+ .Cases({"TYPE", "type"}, IOK_TYPE)
+ .Cases({"SIZE", "size"}, IOK_SIZE)
+ .Cases({"LENGTH", "length"}, IOK_LENGTH)
+ .Default(IOK_INVALID);
}
/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
@@ -2516,8 +2516,8 @@ unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {
return StringSwitch<unsigned>(Name.lower())
.Case("type", MOK_TYPE)
- .Cases("size", "sizeof", MOK_SIZEOF)
- .Cases("length", "lengthof", MOK_LENGTHOF)
+ .Cases({"size", "sizeof"}, MOK_SIZEOF)
+ .Cases({"length", "lengthof"}, MOK_LENGTHOF)
.Default(MOK_INVALID);
}
@@ -2581,21 +2581,21 @@ bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size,
StringRef *SizeStr) {
Size = StringSwitch<unsigned>(getTok().getString())
- .Cases("BYTE", "byte", 8)
- .Cases("WORD", "word", 16)
- .Cases("DWORD", "dword", 32)
- .Cases("FLOAT", "float", 32)
- .Cases("LONG", "long", 32)
- .Cases("FWORD", "fword", 48)
- .Cases("DOUBLE", "double", 64)
- .Cases("QWORD", "qword", 64)
- .Cases("MMWORD","mmword", 64)
- .Cases("XWORD", "xword", 80)
- .Cases("TBYTE", "tbyte", 80)
- .Cases("XMMWORD", "xmmword", 128)
- .Cases("YMMWORD", "ymmword", 256)
- .Cases("ZMMWORD", "zmmword", 512)
- .Default(0);
+ .Cases({"BYTE", "byte"}, 8)
+ .Cases({"WORD", "word"}, 16)
+ .Cases({"DWORD", "dword"}, 32)
+ .Cases({"FLOAT", "float"}, 32)
+ .Cases({"LONG", "long"}, 32)
+ .Cases({"FWORD", "fword"}, 48)
+ .Cases({"DOUBLE", "double"}, 64)
+ .Cases({"QWORD", "qword"}, 64)
+ .Cases({"MMWORD", "mmword"}, 64)
+ .Cases({"XWORD", "xword"}, 80)
+ .Cases({"TBYTE", "tbyte"}, 80)
+ .Cases({"XMMWORD", "xmmword"}, 128)
+ .Cases({"YMMWORD", "ymmword"}, 256)
+ .Cases({"ZMMWORD", "zmmword"}, 512)
+ .Default(0);
if (Size) {
if (SizeStr)
*SizeStr = getTok().getString();
@@ -2886,22 +2886,22 @@ bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
// otherwise the EFLAGS Condition Code enumerator.
X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
return StringSwitch<X86::CondCode>(CC)
- .Case("o", X86::COND_O) // Overflow
- .Case("no", X86::COND_NO) // No Overflow
- .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal
- .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
- .Cases("e", "z", X86::COND_E) // Equal/Zero
- .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
- .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
- .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal
- .Case("s", X86::COND_S) // Sign
- .Case("ns", X86::COND_NS) // No Sign
- .Cases("p", "pe", X86::COND_P) // Parity/Parity Even
- .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
- .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal
- .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
- .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
- .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal
+ .Case("o", X86::COND_O) // Overflow
+ .Case("no", X86::COND_NO) // No Overflow
+ .Cases({"b", "nae"}, X86::COND_B) // Below/Neither Above nor Equal
+ .Cases({"ae", "nb"}, X86::COND_AE) // Above or Equal/Not Below
+ .Cases({"e", "z"}, X86::COND_E) // Equal/Zero
+ .Cases({"ne", "nz"}, X86::COND_NE) // Not Equal/Not Zero
+ .Cases({"be", "na"}, X86::COND_BE) // Below or Equal/Not Above
+ .Cases({"a", "nbe"}, X86::COND_A) // Above/Neither Below nor Equal
+ .Case("s", X86::COND_S) // Sign
+ .Case("ns", X86::COND_NS) // No Sign
+ .Cases({"p", "pe"}, X86::COND_P) // Parity/Parity Even
+ .Cases({"np", "po"}, X86::COND_NP) // No Parity/Parity Odd
+ .Cases({"l", "nge"}, X86::COND_L) // Less/Neither Greater nor Equal
+ .Cases({"ge", "nl"}, X86::COND_GE) // Greater or Equal/Not Less
+ .Cases({"le", "ng"}, X86::COND_LE) // Less or Equal/Not Greater
+ .Cases({"g", "nle"}, X86::COND_G) // Greater/Neither Less nor Equal
.Default(X86::COND_INVALID);
}
@@ -4322,7 +4322,7 @@ bool X86AsmParser::matchAndEmitATTInstruction(
SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands,
MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) {
X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
- SMRange EmptyRange = std::nullopt;
+ SMRange EmptyRange;
// In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
// when matching the instruction.
if (ForcedDataPrefix == X86::Is32Bit)
@@ -4548,7 +4548,7 @@ bool X86AsmParser::matchAndEmitIntelInstruction(
SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands,
MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) {
X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
- SMRange EmptyRange = std::nullopt;
+ SMRange EmptyRange;
// Find one unsized memory operand, if present.
X86Operand *UnsizedMemOp = nullptr;
for (const auto &Op : Operands) {
diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 89ac53e..a922725 100644
--- a/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -620,37 +620,6 @@ struct X86Operand final : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::createReg(Reg));
}
- bool isTILEPair() const {
- return Kind == Register &&
- X86MCRegisterClasses[X86::TILERegClassID].contains(getReg());
- }
-
- void addTILEPairOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- MCRegister Reg = getReg();
- switch (Reg.id()) {
- default:
- llvm_unreachable("Invalid tile register!");
- case X86::TMM0:
- case X86::TMM1:
- Reg = X86::TMM0_TMM1;
- break;
- case X86::TMM2:
- case X86::TMM3:
- Reg = X86::TMM2_TMM3;
- break;
- case X86::TMM4:
- case X86::TMM5:
- Reg = X86::TMM4_TMM5;
- break;
- case X86::TMM6:
- case X86::TMM7:
- Reg = X86::TMM6_TMM7;
- break;
- }
- Inst.addOperand(MCOperand::createReg(Reg));
- }
-
void addMemOperands(MCInst &Inst, unsigned N) const {
assert((N == 5) && "Invalid number of operands!");
if (getMemBaseReg())
diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 4927b45..7d2b5eb 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -810,10 +810,6 @@ static int readModRM(struct InternalInstruction *insn) {
if (index > 7) \
*valid = 0; \
return prefix##_TMM0 + index; \
- case TYPE_TMM_PAIR: \
- if (index > 7) \
- *valid = 0; \
- return prefix##_TMM0_TMM1 + (index / 2); \
case TYPE_VK: \
index &= 0xf; \
if (index > 7) \
@@ -2323,7 +2319,6 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_YMM:
case TYPE_ZMM:
case TYPE_TMM:
- case TYPE_TMM_PAIR:
case TYPE_VK_PAIR:
case TYPE_VK:
case TYPE_DEBUGREG:
diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index dc9af2c..b0aa70b 100644
--- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -535,12 +535,6 @@ namespace X86Disassembler {
ENTRY(TMM6) \
ENTRY(TMM7)
-#define REGS_TMM_PAIRS \
- ENTRY(TMM0_TMM1) \
- ENTRY(TMM2_TMM3) \
- ENTRY(TMM4_TMM5) \
- ENTRY(TMM6_TMM7)
-
#define ALL_EA_BASES \
EA_BASES_16BIT \
EA_BASES_32BIT \
@@ -565,7 +559,6 @@ namespace X86Disassembler {
REGS_DEBUG \
REGS_CONTROL \
REGS_TMM \
- REGS_TMM_PAIRS \
ENTRY(RIP)
/// All possible values of the base field for effective-address
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index 1c5f166..759d95e 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -467,22 +467,3 @@ void X86InstPrinterCommon::printVKPair(const MCInst *MI, unsigned OpNo,
}
llvm_unreachable("Unknown mask pair register name");
}
-
-void X86InstPrinterCommon::printTILEPair(const MCInst *MI, unsigned OpNo,
- raw_ostream &OS) {
- switch (MI->getOperand(OpNo).getReg()) {
- case X86::TMM0_TMM1:
- printRegName(OS, X86::TMM0);
- return;
- case X86::TMM2_TMM3:
- printRegName(OS, X86::TMM2);
- return;
- case X86::TMM4_TMM5:
- printRegName(OS, X86::TMM4);
- return;
- case X86::TMM6_TMM7:
- printRegName(OS, X86::TMM6);
- return;
- }
- llvm_unreachable("Unknown mask pair register name");
-}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
index 2c9467c..cb55f2f 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
@@ -40,7 +40,6 @@ protected:
const MCSubtargetInfo &STI);
void printOptionalSegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printVKPair(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
- void printTILEPair(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index a1fd366..9e291a6 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -274,9 +274,6 @@ def FeatureAMXFP8 : SubtargetFeature<"amx-fp8", "HasAMXFP8", "true",
def FeatureAMXMOVRS : SubtargetFeature<"amx-movrs", "HasAMXMOVRS", "true",
"Support AMX-MOVRS instructions",
[FeatureAMXTILE]>;
-def FeatureAMXTRANSPOSE : SubtargetFeature<"amx-transpose", "HasAMXTRANSPOSE", "true",
- "Support AMX amx-transpose instructions",
- [FeatureAMXTILE]>;
def FeatureAMXAVX512 : SubtargetFeature<"amx-avx512",
"HasAMXAVX512", "true",
"Support AMX-AVX512 instructions",
@@ -1177,8 +1174,7 @@ def ProcessorFeatures {
FeatureAMXMOVRS,
FeatureAMXAVX512,
FeatureAMXFP8,
- FeatureAMXTF32,
- FeatureAMXTRANSPOSE];
+ FeatureAMXTF32];
list<SubtargetFeature> DMRFeatures =
!listconcat(GNRDFeatures, DMRAdditionalFeatures);
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 4a9b824..e3c44c0 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -649,149 +649,6 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MI.setDesc(TII->get(Opc));
return true;
}
- // TILEPAIRLOAD is just for TILEPair spill, we don't have corresponding
- // AMX instruction to support it. So, split it to 2 load instructions:
- // "TILEPAIRLOAD TMM0:TMM1, Base, Scale, Index, Offset, Segment" -->
- // "TILELOAD TMM0, Base, Scale, Index, Offset, Segment" +
- // "TILELOAD TMM1, Base, Scale, Index, Offset + TMM_SIZE, Segment"
- case X86::PTILEPAIRLOAD: {
- int64_t Disp = MBBI->getOperand(1 + X86::AddrDisp).getImm();
- Register TReg = MBBI->getOperand(0).getReg();
- bool DstIsDead = MBBI->getOperand(0).isDead();
- Register TReg0 = TRI->getSubReg(TReg, X86::sub_t0);
- Register TReg1 = TRI->getSubReg(TReg, X86::sub_t1);
- unsigned TmmSize = TRI->getRegSizeInBits(X86::TILERegClass) / 8;
-
- MachineInstrBuilder MIBLo =
- BuildMI(MBB, MBBI, DL, TII->get(X86::TILELOADD))
- .addReg(TReg0, RegState::Define | getDeadRegState(DstIsDead));
- MachineInstrBuilder MIBHi =
- BuildMI(MBB, MBBI, DL, TII->get(X86::TILELOADD))
- .addReg(TReg1, RegState::Define | getDeadRegState(DstIsDead));
-
- for (int i = 0; i < X86::AddrNumOperands; ++i) {
- MIBLo.add(MBBI->getOperand(1 + i));
- if (i == X86::AddrDisp)
- MIBHi.addImm(Disp + TmmSize);
- else
- MIBHi.add(MBBI->getOperand(1 + i));
- }
-
- // Make sure the first stride reg used in first tileload is alive.
- MachineOperand &Stride =
- MIBLo.getInstr()->getOperand(1 + X86::AddrIndexReg);
- Stride.setIsKill(false);
-
- // Split the memory operand, adjusting the offset and size for the halves.
- MachineMemOperand *OldMMO = MBBI->memoperands().front();
- MachineFunction *MF = MBB.getParent();
- MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, TmmSize);
- MachineMemOperand *MMOHi =
- MF->getMachineMemOperand(OldMMO, TmmSize, TmmSize);
-
- MIBLo.setMemRefs(MMOLo);
- MIBHi.setMemRefs(MMOHi);
-
- // Delete the pseudo.
- MBB.erase(MBBI);
- return true;
- }
- // Similar with TILEPAIRLOAD, TILEPAIRSTORE is just for TILEPair spill, no
- // corresponding AMX instruction to support it. So, split it too:
- // "TILEPAIRSTORE Base, Scale, Index, Offset, Segment, TMM0:TMM1" -->
- // "TILESTORE Base, Scale, Index, Offset, Segment, TMM0" +
- // "TILESTORE Base, Scale, Index, Offset + TMM_SIZE, Segment, TMM1"
- case X86::PTILEPAIRSTORE: {
- int64_t Disp = MBBI->getOperand(X86::AddrDisp).getImm();
- Register TReg = MBBI->getOperand(X86::AddrNumOperands).getReg();
- bool SrcIsKill = MBBI->getOperand(X86::AddrNumOperands).isKill();
- Register TReg0 = TRI->getSubReg(TReg, X86::sub_t0);
- Register TReg1 = TRI->getSubReg(TReg, X86::sub_t1);
- unsigned TmmSize = TRI->getRegSizeInBits(X86::TILERegClass) / 8;
-
- MachineInstrBuilder MIBLo =
- BuildMI(MBB, MBBI, DL, TII->get(X86::TILESTORED));
- MachineInstrBuilder MIBHi =
- BuildMI(MBB, MBBI, DL, TII->get(X86::TILESTORED));
-
- for (int i = 0; i < X86::AddrNumOperands; ++i) {
- MIBLo.add(MBBI->getOperand(i));
- if (i == X86::AddrDisp)
- MIBHi.addImm(Disp + TmmSize);
- else
- MIBHi.add(MBBI->getOperand(i));
- }
- MIBLo.addReg(TReg0, getKillRegState(SrcIsKill));
- MIBHi.addReg(TReg1, getKillRegState(SrcIsKill));
-
- // Make sure the first stride reg used in first tilestore is alive.
- MachineOperand &Stride = MIBLo.getInstr()->getOperand(X86::AddrIndexReg);
- Stride.setIsKill(false);
-
- // Split the memory operand, adjusting the offset and size for the halves.
- MachineMemOperand *OldMMO = MBBI->memoperands().front();
- MachineFunction *MF = MBB.getParent();
- MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, TmmSize);
- MachineMemOperand *MMOHi =
- MF->getMachineMemOperand(OldMMO, TmmSize, TmmSize);
-
- MIBLo.setMemRefs(MMOLo);
- MIBHi.setMemRefs(MMOHi);
-
- // Delete the pseudo.
- MBB.erase(MBBI);
- return true;
- }
- case X86::PT2RPNTLVWZ0V:
- case X86::PT2RPNTLVWZ0T1V:
- case X86::PT2RPNTLVWZ1V:
- case X86::PT2RPNTLVWZ1T1V:
- case X86::PT2RPNTLVWZ0RSV:
- case X86::PT2RPNTLVWZ0RST1V:
- case X86::PT2RPNTLVWZ1RSV:
- case X86::PT2RPNTLVWZ1RST1V: {
- for (unsigned i = 3; i > 0; --i)
- MI.removeOperand(i);
- unsigned Opc;
- switch (Opcode) {
- case X86::PT2RPNTLVWZ0V:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
- break;
- case X86::PT2RPNTLVWZ0T1V:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
- break;
- case X86::PT2RPNTLVWZ1V:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
- break;
- case X86::PT2RPNTLVWZ1T1V:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
- break;
- case X86::PT2RPNTLVWZ0RSV:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
- break;
- case X86::PT2RPNTLVWZ0RST1V:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
- break;
- case X86::PT2RPNTLVWZ1RSV:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
- break;
- case X86::PT2RPNTLVWZ1RST1V:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
- break;
- default:
- llvm_unreachable("Impossible Opcode!");
- }
- MI.setDesc(TII->get(Opc));
- return true;
- }
- case X86::PTTRANSPOSEDV:
- case X86::PTCONJTFP16V: {
- for (int i = 2; i > 0; --i)
- MI.removeOperand(i);
- MI.setDesc(TII->get(Opcode == X86::PTTRANSPOSEDV ? X86::TTRANSPOSED
- : X86::TCONJTFP16));
- return true;
- }
case X86::PTCMMIMFP16PSV:
case X86::PTCMMRLFP16PSV:
case X86::PTDPBSSDV:
@@ -800,13 +657,7 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case X86::PTDPBUUDV:
case X86::PTDPBF16PSV:
case X86::PTDPFP16PSV:
- case X86::PTTDPBF16PSV:
- case X86::PTTDPFP16PSV:
- case X86::PTTCMMIMFP16PSV:
- case X86::PTTCMMRLFP16PSV:
- case X86::PTCONJTCMMIMFP16PSV:
case X86::PTMMULTF32PSV:
- case X86::PTTMMULTF32PSV:
case X86::PTDPBF8PSV:
case X86::PTDPBHF8PSV:
case X86::PTDPHBF8PSV:
@@ -816,6 +667,7 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MI.removeOperand(i);
unsigned Opc;
switch (Opcode) {
+ // clang-format off
case X86::PTCMMIMFP16PSV: Opc = X86::TCMMIMFP16PS; break;
case X86::PTCMMRLFP16PSV: Opc = X86::TCMMRLFP16PS; break;
case X86::PTDPBSSDV: Opc = X86::TDPBSSD; break;
@@ -824,40 +676,12 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case X86::PTDPBUUDV: Opc = X86::TDPBUUD; break;
case X86::PTDPBF16PSV: Opc = X86::TDPBF16PS; break;
case X86::PTDPFP16PSV: Opc = X86::TDPFP16PS; break;
- case X86::PTTDPBF16PSV:
- Opc = X86::TTDPBF16PS;
- break;
- case X86::PTTDPFP16PSV:
- Opc = X86::TTDPFP16PS;
- break;
- case X86::PTTCMMIMFP16PSV:
- Opc = X86::TTCMMIMFP16PS;
- break;
- case X86::PTTCMMRLFP16PSV:
- Opc = X86::TTCMMRLFP16PS;
- break;
- case X86::PTCONJTCMMIMFP16PSV:
- Opc = X86::TCONJTCMMIMFP16PS;
- break;
- case X86::PTMMULTF32PSV:
- Opc = X86::TMMULTF32PS;
- break;
- case X86::PTTMMULTF32PSV:
- Opc = X86::TTMMULTF32PS;
- break;
- case X86::PTDPBF8PSV:
- Opc = X86::TDPBF8PS;
- break;
- case X86::PTDPBHF8PSV:
- Opc = X86::TDPBHF8PS;
- break;
- case X86::PTDPHBF8PSV:
- Opc = X86::TDPHBF8PS;
- break;
- case X86::PTDPHF8PSV:
- Opc = X86::TDPHF8PS;
- break;
-
+ case X86::PTMMULTF32PSV: Opc = X86::TMMULTF32PS; break;
+ case X86::PTDPBF8PSV: Opc = X86::TDPBF8PS; break;
+ case X86::PTDPBHF8PSV: Opc = X86::TDPBHF8PS; break;
+ case X86::PTDPHBF8PSV: Opc = X86::TDPHBF8PS; break;
+ case X86::PTDPHF8PSV: Opc = X86::TDPHF8PS; break;
+ // clang-format on
default:
llvm_unreachable("Unexpected Opcode");
}
diff --git a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp
index 787b71d..06f729a 100644
--- a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp
@@ -267,24 +267,16 @@ void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,
<< printReg(TileReg, TRI) << '\n');
}
-static unsigned getTileDefNum(MachineRegisterInfo *MRI, Register Reg) {
- if (Reg.isVirtual()) {
- unsigned RegClassID = MRI->getRegClass(Reg)->getID();
- if (RegClassID == X86::TILERegClassID)
- return 1;
- if (RegClassID == X86::TILEPAIRRegClassID)
- return 2;
- } else {
- if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
- return 1;
- if (Reg >= X86::TMM0_TMM1 && Reg <= X86::TMM6_TMM7)
- return 2;
+static bool isTileRegister(MachineRegisterInfo *MRI, Register Reg) {
+ if (Reg.isVirtual() &&
+ (MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)) {
+ return true;
}
- return 0;
-}
-static bool isTileRegister(MachineRegisterInfo *MRI, Register VirtReg) {
- return getTileDefNum(MRI, VirtReg) > 0;
+ if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
+ return true;
+
+ return false;
}
static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
@@ -296,7 +288,7 @@ static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
if (!MO.isReg())
return false;
- return getTileDefNum(MRI, MO.getReg()) > 0;
+ return isTileRegister(MRI, MO.getReg());
}
static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) {
@@ -636,19 +628,7 @@ bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
else if (dominates(MBB, LastShapeMI, ColMI))
LastShapeMI = ColMI;
}
- unsigned TileDefNum = getTileDefNum(MRI, MI.getOperand(0).getReg());
- if (TileDefNum > 1) {
- for (unsigned I = 1; I < TileDefNum; I++) {
- MachineOperand *ColxMO = &MI.getOperand(2 + I);
- MachineInstr *ColxMI = MRI->getVRegDef(ColxMO->getReg());
- if (ColxMI->getParent() == &MBB) {
- if (!LastShapeMI)
- LastShapeMI = ColxMI;
- else if (dominates(MBB, LastShapeMI, ColxMI))
- LastShapeMI = ColxMI;
- }
- }
- }
+
// If there is user live out of the tilecfg, spill it and reload in
// before the user.
Register TileReg = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp
index 11d331b..d86ae36 100644
--- a/llvm/lib/Target/X86/X86FastTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp
@@ -77,14 +77,14 @@ INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE,
INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE,
"Fast Tile Register Configure", false, false)
-static unsigned getNumDefTiles(MachineRegisterInfo *MRI, MachineInstr &MI) {
+static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
// There is no phi instruction after register allocation.
assert(MI.isPHI() == false);
// The instruction must have 3 operands: tile def, row, col.
// It should be AMX pseudo instruction that have shape operand.
if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 ||
!MI.isPseudo())
- return 0;
+ return false;
MachineOperand &MO = MI.getOperand(0);
if (MO.isReg()) {
@@ -93,24 +93,18 @@ static unsigned getNumDefTiles(MachineRegisterInfo *MRI, MachineInstr &MI) {
// register is not rewritten yet.
if (Reg.isVirtual()) {
if (MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
- return 1;
- if (MRI->getRegClass(Reg)->getID() == X86::TILEPAIRRegClassID)
- return 2;
+ return true;
}
if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
- return 1;
- if (Reg >= X86::TMM0_TMM1 && Reg <= X86::TMM6_TMM7)
- return 2;
+ return true;
}
- return 0;
+ return false;
}
static unsigned getTMMIndex(Register Reg) {
if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
return Reg - X86::TMM0;
- if (Reg >= X86::TMM0_TMM1 && Reg <= X86::TMM6_TMM7)
- return (Reg - X86::TMM0_TMM1) * 2;
llvm_unreachable("Invalid Tmm Reg!");
}
@@ -120,17 +114,14 @@ bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
bool Change = false;
SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos;
for (MachineInstr &MI : reverse(MBB)) {
- unsigned DefNum = getNumDefTiles(MRI, MI);
- if (DefNum == 0 && MI.getOpcode() != X86::PLDTILECFGV)
+ if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV)
continue;
// AMX instructions that define tile register.
if (MI.getOpcode() != X86::PLDTILECFGV) {
MachineOperand &Row = MI.getOperand(1);
unsigned TMMIdx = getTMMIndex(MI.getOperand(0).getReg());
- for (unsigned I = 0; I < DefNum; I++) {
- MachineOperand &Col = MI.getOperand(2 + I);
- ShapeInfos.push_back({TMMIdx + I, ShapeT(&Row, &Col)});
- }
+ MachineOperand &Col = MI.getOperand(2);
+ ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)});
} else { // PLDTILECFGV
// Rewrite the shape information to memory. Stack slot should have
// been initialized to zero in pre config.
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 4393f6e..d4418c8 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -337,23 +337,8 @@ namespace {
// lowering but before ISEL.
bool isAMXSDNode(SDNode *N) const {
// Check if N is AMX SDNode:
- // 1. check specific opcode since these carry MVT::Untyped instead of
- // x86amx_type;
- // 2. check result type;
- // 3. check operand type;
- switch (N->getOpcode()) {
- default:
- break;
- case X86::PT2RPNTLVWZ0V:
- case X86::PT2RPNTLVWZ0T1V:
- case X86::PT2RPNTLVWZ1V:
- case X86::PT2RPNTLVWZ1T1V:
- case X86::PT2RPNTLVWZ0RSV:
- case X86::PT2RPNTLVWZ0RST1V:
- case X86::PT2RPNTLVWZ1RSV:
- case X86::PT2RPNTLVWZ1RST1V:
- return true;
- }
+ // 1. check result type;
+ // 2. check operand type;
for (unsigned Idx = 0, E = N->getNumValues(); Idx != E; ++Idx) {
if (N->getValueType(Idx) == MVT::x86amx)
return true;
@@ -5398,65 +5383,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, CNode);
return;
}
- case Intrinsic::x86_t2rpntlvwz0rs:
- case Intrinsic::x86_t2rpntlvwz0rst1:
- case Intrinsic::x86_t2rpntlvwz1rs:
- case Intrinsic::x86_t2rpntlvwz1rst1:
- if (!Subtarget->hasAMXMOVRS())
- break;
- [[fallthrough]];
- case Intrinsic::x86_t2rpntlvwz0:
- case Intrinsic::x86_t2rpntlvwz0t1:
- case Intrinsic::x86_t2rpntlvwz1:
- case Intrinsic::x86_t2rpntlvwz1t1: {
- if (!Subtarget->hasAMXTRANSPOSE())
- break;
- auto *MFI =
- CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
- MFI->setAMXProgModel(AMXProgModelEnum::DirectReg);
- unsigned Opc;
- switch (IntNo) {
- default:
- llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::x86_t2rpntlvwz0:
- Opc = X86::PT2RPNTLVWZ0;
- break;
- case Intrinsic::x86_t2rpntlvwz0t1:
- Opc = X86::PT2RPNTLVWZ0T1;
- break;
- case Intrinsic::x86_t2rpntlvwz1:
- Opc = X86::PT2RPNTLVWZ1;
- break;
- case Intrinsic::x86_t2rpntlvwz1t1:
- Opc = X86::PT2RPNTLVWZ1T1;
- break;
- case Intrinsic::x86_t2rpntlvwz0rs:
- Opc = X86::PT2RPNTLVWZ0RS;
- break;
- case Intrinsic::x86_t2rpntlvwz0rst1:
- Opc = X86::PT2RPNTLVWZ0RST1;
- break;
- case Intrinsic::x86_t2rpntlvwz1rs:
- Opc = X86::PT2RPNTLVWZ1RS;
- break;
- case Intrinsic::x86_t2rpntlvwz1rst1:
- Opc = X86::PT2RPNTLVWZ1RST1;
- break;
- }
- // FIXME: Match displacement and scale.
- unsigned TIndex = Node->getConstantOperandVal(2);
- SDValue TReg = getI8Imm(TIndex, dl);
- SDValue Base = Node->getOperand(3);
- SDValue Scale = getI8Imm(1, dl);
- SDValue Index = Node->getOperand(4);
- SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
- SDValue Segment = CurDAG->getRegister(0, MVT::i16);
- SDValue Chain = Node->getOperand(0);
- SDValue Ops[] = {TReg, Base, Scale, Index, Disp, Segment, Chain};
- MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
- ReplaceNode(Node, CNode);
- return;
- }
}
break;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 624cff2..2970cf4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22861,6 +22861,13 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y,
if (!OpVT.isScalarInteger() || OpSize < 128)
return SDValue();
+ // Don't do this if we're not supposed to use the FPU.
+ bool NoImplicitFloatOps =
+ DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+ if (Subtarget.useSoftFloat() || NoImplicitFloatOps)
+ return SDValue();
+
// Ignore a comparison with zero because that gets special treatment in
// EmitTest(). But make an exception for the special case of a pair of
// logically-combined vector-sized operands compared to zero. This pattern may
@@ -22883,13 +22890,9 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y,
// Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands.
// Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands.
// Otherwise use PCMPEQ (plus AND) and mask testing.
- bool NoImplicitFloatOps =
- DAG.getMachineFunction().getFunction().hasFnAttribute(
- Attribute::NoImplicitFloat);
- if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
- ((OpSize == 128 && Subtarget.hasSSE2()) ||
- (OpSize == 256 && Subtarget.hasAVX()) ||
- (OpSize == 512 && Subtarget.useAVX512Regs()))) {
+ if ((OpSize == 128 && Subtarget.hasSSE2()) ||
+ (OpSize == 256 && Subtarget.hasAVX()) ||
+ (OpSize == 512 && Subtarget.useAVX512Regs())) {
bool HasPT = Subtarget.hasSSE41();
// PTEST and MOVMSK are slow on Knights Landing and Knights Mill and widened
@@ -27946,67 +27949,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
Operation.getValue(1));
}
- case Intrinsic::x86_t2rpntlvwz0rs_internal:
- case Intrinsic::x86_t2rpntlvwz0rst1_internal:
- case Intrinsic::x86_t2rpntlvwz1rs_internal:
- case Intrinsic::x86_t2rpntlvwz1rst1_internal:
- case Intrinsic::x86_t2rpntlvwz0_internal:
- case Intrinsic::x86_t2rpntlvwz0t1_internal:
- case Intrinsic::x86_t2rpntlvwz1_internal:
- case Intrinsic::x86_t2rpntlvwz1t1_internal: {
- auto *X86MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
- X86MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
- unsigned IntNo = Op.getConstantOperandVal(1);
- unsigned Opc = 0;
- switch (IntNo) {
- default:
- llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::x86_t2rpntlvwz0_internal:
- Opc = X86::PT2RPNTLVWZ0V;
- break;
- case Intrinsic::x86_t2rpntlvwz0t1_internal:
- Opc = X86::PT2RPNTLVWZ0T1V;
- break;
- case Intrinsic::x86_t2rpntlvwz1_internal:
- Opc = X86::PT2RPNTLVWZ1V;
- break;
- case Intrinsic::x86_t2rpntlvwz1t1_internal:
- Opc = X86::PT2RPNTLVWZ1T1V;
- break;
- case Intrinsic::x86_t2rpntlvwz0rs_internal:
- Opc = X86::PT2RPNTLVWZ0RSV;
- break;
- case Intrinsic::x86_t2rpntlvwz0rst1_internal:
- Opc = X86::PT2RPNTLVWZ0RST1V;
- break;
- case Intrinsic::x86_t2rpntlvwz1rs_internal:
- Opc = X86::PT2RPNTLVWZ1RSV;
- break;
- case Intrinsic::x86_t2rpntlvwz1rst1_internal:
- Opc = X86::PT2RPNTLVWZ1RST1V;
- break;
- }
-
- SDLoc DL(Op);
- SDVTList VTs = DAG.getVTList(MVT::Untyped, MVT::Other);
-
- SDValue Ops[] = {Op.getOperand(2), // Row
- Op.getOperand(3), // Col0
- Op.getOperand(4), // Col1
- Op.getOperand(5), // Base
- DAG.getTargetConstant(1, DL, MVT::i8), // Scale
- Op.getOperand(6), // Index
- DAG.getTargetConstant(0, DL, MVT::i32), // Disp
- DAG.getRegister(0, MVT::i16), // Segment
- Op.getOperand(0)}; // Chain
-
- MachineSDNode *Res = DAG.getMachineNode(Opc, DL, VTs, Ops);
- SDValue Res0 = DAG.getTargetExtractSubreg(X86::sub_t0, DL, MVT::x86amx,
- SDValue(Res, 0));
- SDValue Res1 = DAG.getTargetExtractSubreg(X86::sub_t1, DL, MVT::x86amx,
- SDValue(Res, 0));
- return DAG.getMergeValues({Res0, Res1, SDValue(Res, 1)}, DL);
- }
case Intrinsic::x86_atomic_bts_rm:
case Intrinsic::x86_atomic_btc_rm:
case Intrinsic::x86_atomic_btr_rm: {
@@ -37745,10 +37687,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
assert (Imm < 8 && "Illegal tmm index");
return X86::TMM0 + Imm;
};
- auto TMMImmToTMMPair = [](unsigned Imm) {
- assert(Imm < 8 && "Illegal tmm pair index.");
- return X86::TMM0_TMM1 + Imm / 2;
- };
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected instr type to insert");
@@ -38129,53 +38067,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTDPBHF8PS:
case X86::PTDPHBF8PS:
case X86::PTDPHF8PS:
- case X86::PTTDPBF16PS:
- case X86::PTTDPFP16PS:
- case X86::PTTCMMIMFP16PS:
- case X86::PTTCMMRLFP16PS:
- case X86::PTCONJTCMMIMFP16PS:
- case X86::PTMMULTF32PS:
- case X86::PTTMMULTF32PS: {
+ case X86::PTMMULTF32PS: {
unsigned Opc;
switch (MI.getOpcode()) {
default: llvm_unreachable("illegal opcode!");
+ // clang-format off
case X86::PTDPBSSD: Opc = X86::TDPBSSD; break;
case X86::PTDPBSUD: Opc = X86::TDPBSUD; break;
case X86::PTDPBUSD: Opc = X86::TDPBUSD; break;
case X86::PTDPBUUD: Opc = X86::TDPBUUD; break;
case X86::PTDPBF16PS: Opc = X86::TDPBF16PS; break;
case X86::PTDPFP16PS: Opc = X86::TDPFP16PS; break;
- case X86::PTCMMIMFP16PS:
- Opc = X86::TCMMIMFP16PS;
- break;
- case X86::PTCMMRLFP16PS:
- Opc = X86::TCMMRLFP16PS;
- break;
+ case X86::PTCMMIMFP16PS: Opc = X86::TCMMIMFP16PS; break;
+ case X86::PTCMMRLFP16PS: Opc = X86::TCMMRLFP16PS; break;
case X86::PTDPBF8PS: Opc = X86::TDPBF8PS; break;
case X86::PTDPBHF8PS: Opc = X86::TDPBHF8PS; break;
case X86::PTDPHBF8PS: Opc = X86::TDPHBF8PS; break;
case X86::PTDPHF8PS: Opc = X86::TDPHF8PS; break;
- case X86::PTTDPBF16PS:
- Opc = X86::TTDPBF16PS;
- break;
- case X86::PTTDPFP16PS:
- Opc = X86::TTDPFP16PS;
- break;
- case X86::PTTCMMIMFP16PS:
- Opc = X86::TTCMMIMFP16PS;
- break;
- case X86::PTTCMMRLFP16PS:
- Opc = X86::TTCMMRLFP16PS;
- break;
- case X86::PTCONJTCMMIMFP16PS:
- Opc = X86::TCONJTCMMIMFP16PS;
- break;
- case X86::PTMMULTF32PS:
- Opc = X86::TMMULTF32PS;
- break;
- case X86::PTTMMULTF32PS:
- Opc = X86::TTMMULTF32PS;
- break;
+ case X86::PTMMULTF32PS: Opc = X86::TMMULTF32PS; break;
+ // clang-format on
}
MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
@@ -38246,70 +38156,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
- case X86::PT2RPNTLVWZ0:
- case X86::PT2RPNTLVWZ0T1:
- case X86::PT2RPNTLVWZ1:
- case X86::PT2RPNTLVWZ1T1:
- case X86::PT2RPNTLVWZ0RS:
- case X86::PT2RPNTLVWZ0RST1:
- case X86::PT2RPNTLVWZ1RS:
- case X86::PT2RPNTLVWZ1RST1: {
- const DebugLoc &DL = MI.getDebugLoc();
- unsigned Opc;
-#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("Unexpected instruction!");
- case X86::PT2RPNTLVWZ0:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
- break;
- case X86::PT2RPNTLVWZ0T1:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
- break;
- case X86::PT2RPNTLVWZ1:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
- break;
- case X86::PT2RPNTLVWZ1T1:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
- break;
- case X86::PT2RPNTLVWZ0RS:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
- break;
- case X86::PT2RPNTLVWZ0RST1:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
- break;
- case X86::PT2RPNTLVWZ1RS:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
- break;
- case X86::PT2RPNTLVWZ1RST1:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
- break;
- }
-#undef GET_EGPR_IF_ENABLED
- MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
- MIB.addReg(TMMImmToTMMPair(MI.getOperand(0).getImm()), RegState::Define);
-
- MIB.add(MI.getOperand(1)); // base
- MIB.add(MI.getOperand(2)); // scale
- MIB.add(MI.getOperand(3)); // index
- MIB.add(MI.getOperand(4)); // displacement
- MIB.add(MI.getOperand(5)); // segment
- MI.eraseFromParent(); // The pseudo is gone now.
- return BB;
- }
- case X86::PTTRANSPOSED:
- case X86::PTCONJTFP16: {
- const DebugLoc &DL = MI.getDebugLoc();
- unsigned Opc = MI.getOpcode() == X86::PTTRANSPOSED ? X86::TTRANSPOSED
- : X86::TCONJTFP16;
-
- MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
- MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Define);
- MIB.addReg(TMMImmToTMMReg(MI.getOperand(1).getImm()), RegState::Undef);
-
- MI.eraseFromParent(); // The pseudo is gone now.
- return BB;
- }
case X86::PTCVTROWPS2BF16Hrri:
case X86::PTCVTROWPS2BF16Lrri:
case X86::PTCVTROWPS2PHHrri:
@@ -48778,10 +48624,9 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
SDValue BC0 = peekThroughBitcasts(Op0);
if (BC0.getOpcode() == X86ISD::PCMPEQ &&
ISD::isBuildVectorAllZeros(BC0.getOperand(1).getNode())) {
- SDLoc DL(EFLAGS);
CC = (CC == X86::COND_B ? X86::COND_E : X86::COND_NE);
- SDValue X = DAG.getBitcast(OpVT, BC0.getOperand(0));
- return DAG.getNode(EFLAGS.getOpcode(), DL, VT, X, X);
+ SDValue X = DAG.getBitcast(OpVT, DAG.getFreeze(BC0.getOperand(0)));
+ return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, X, X);
}
}
}
@@ -48837,7 +48682,7 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
MVT FloatSVT = MVT::getFloatingPointVT(EltBits);
MVT FloatVT =
MVT::getVectorVT(FloatSVT, OpVT.getSizeInBits() / EltBits);
- Res = DAG.getBitcast(FloatVT, Res);
+ Res = DAG.getBitcast(FloatVT, DAG.getFreeze(Res));
return DAG.getNode(X86ISD::TESTP, SDLoc(EFLAGS), VT, Res, Res);
} else if (EltBits == 16) {
MVT MovmskVT = BCVT.is128BitVector() ? MVT::v16i8 : MVT::v32i8;
@@ -48856,8 +48701,30 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
}
// TESTZ(X,-1) == TESTZ(X,X)
- if (ISD::isBuildVectorAllOnes(Op1.getNode()))
+ if (ISD::isBuildVectorAllOnes(Op1.getNode())) {
+ Op0 = DAG.getFreeze(Op0);
return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0);
+ }
+
+ // Attempt to convert PTESTZ(X,SIGNMASK) -> VTESTPD/PSZ(X,X) on AVX targets.
+ if (EFLAGS.getOpcode() == X86ISD::PTEST && Subtarget.hasAVX()) {
+ KnownBits KnownOp1 = DAG.computeKnownBits(Op1);
+ assert(KnownOp1.getBitWidth() == 64 &&
+ "Illegal PTEST vector element width");
+ if (KnownOp1.isConstant()) {
+ const APInt &Mask = KnownOp1.getConstant();
+ if (Mask.isSignMask()) {
+ MVT FpVT = MVT::getVectorVT(MVT::f64, OpVT.getSizeInBits() / 64);
+ Op0 = DAG.getBitcast(FpVT, DAG.getFreeze(Op0));
+ return DAG.getNode(X86ISD::TESTP, SDLoc(EFLAGS), VT, Op0, Op0);
+ }
+ if (Mask.isSplat(32) && Mask.trunc(32).isSignMask()) {
+ MVT FpVT = MVT::getVectorVT(MVT::f32, OpVT.getSizeInBits() / 32);
+ Op0 = DAG.getBitcast(FpVT, DAG.getFreeze(Op0));
+ return DAG.getNode(X86ISD::TESTP, SDLoc(EFLAGS), VT, Op0, Op0);
+ }
+ }
+ }
// TESTZ(OR(LO(X),HI(X)),OR(LO(Y),HI(Y))) -> TESTZ(X,Y)
// TODO: Add COND_NE handling?
@@ -53480,6 +53347,80 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Look for a RMW operation that only touches one bit of a larger than legal
+// type and fold it to a BTC/BTR/BTS pattern acting on a single i32 sub value.
+static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ using namespace SDPatternMatch;
+
+ // Only handle normal stores and its chain was a matching normal load.
+ auto *Ld = dyn_cast<LoadSDNode>(St->getChain());
+ if (!ISD::isNormalStore(St) || !St->isSimple() || !Ld ||
+ !ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
+ Ld->getBasePtr() != St->getBasePtr() ||
+ Ld->getOffset() != St->getOffset())
+ return SDValue();
+
+ SDValue LoadVal(Ld, 0);
+ SDValue StoredVal = St->getValue();
+ EVT VT = StoredVal.getValueType();
+
+ // Only narrow larger than legal scalar integers.
+ if (!VT.isScalarInteger() ||
+ VT.getSizeInBits() <= (Subtarget.is64Bit() ? 64 : 32))
+ return SDValue();
+
+ // BTR: X & ~(1 << ShAmt)
+ // BTS: X | (1 << ShAmt)
+ // BTC: X ^ (1 << ShAmt)
+ SDValue ShAmt;
+ if (!StoredVal.hasOneUse() ||
+ !(sd_match(StoredVal, m_And(m_Specific(LoadVal),
+ m_Not(m_Shl(m_One(), m_Value(ShAmt))))) ||
+ sd_match(StoredVal,
+ m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
+ sd_match(StoredVal,
+ m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt))))))
+ return SDValue();
+
+ // Ensure the shift amount is in bounds.
+ KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
+ if (KnownAmt.getMaxValue().uge(VT.getSizeInBits()))
+ return SDValue();
+
+ // Split the shift into an alignment shift that moves the active i32 block to
+ // the bottom bits for truncation and a modulo shift that can act on the i32.
+ EVT AmtVT = ShAmt.getValueType();
+ SDValue AlignAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
+ DAG.getSignedConstant(-32LL, DL, AmtVT));
+ SDValue ModuloAmt =
+ DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT));
+
+ // Compute the byte offset for the i32 block that is changed by the RMW.
+ // combineTruncate will adjust the load for us in a similar way.
+ EVT PtrVT = St->getBasePtr().getValueType();
+ SDValue PtrBitOfs = DAG.getZExtOrTrunc(AlignAmt, DL, PtrVT);
+ SDValue PtrByteOfs = DAG.getNode(ISD::SRL, DL, PtrVT, PtrBitOfs,
+ DAG.getShiftAmountConstant(3, PtrVT, DL));
+ SDValue NewPtr = DAG.getMemBasePlusOffset(St->getBasePtr(), PtrByteOfs, DL,
+ SDNodeFlags::NoUnsignedWrap);
+
+ // Reconstruct the BTC/BTR/BTS pattern for the i32 block and store.
+ SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt);
+ X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
+
+ SDValue Mask =
+ DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32),
+ DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
+ if (StoredVal.getOpcode() == ISD::AND)
+ Mask = DAG.getNOT(DL, Mask, MVT::i32);
+
+ SDValue Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask);
+ return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(),
+ Align(), St->getMemOperand()->getFlags());
+}
+
static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -53706,6 +53647,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
}
}
+ if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget))
+ return R;
+
// Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC)
// store(cmov(x, load(p), CC), p) to cstore(x, p, InvertCC)
if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
@@ -54493,6 +54437,7 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
const SDLoc &DL) {
+ using namespace SDPatternMatch;
if (!VT.isVector() || !Subtarget.hasSSSE3())
return SDValue();
@@ -54502,42 +54447,19 @@ static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
return SDValue();
SDValue SSatVal = detectSSatPattern(In, VT);
- if (!SSatVal || SSatVal.getOpcode() != ISD::ADD)
+ if (!SSatVal)
return SDValue();
- // Ok this is a signed saturation of an ADD. See if this ADD is adding pairs
- // of multiplies from even/odd elements.
- SDValue N0 = SSatVal.getOperand(0);
- SDValue N1 = SSatVal.getOperand(1);
-
- if (N0.getOpcode() != ISD::MUL || N1.getOpcode() != ISD::MUL)
- return SDValue();
-
- SDValue N00 = N0.getOperand(0);
- SDValue N01 = N0.getOperand(1);
- SDValue N10 = N1.getOperand(0);
- SDValue N11 = N1.getOperand(1);
-
+ // See if this is a signed saturation of an ADD, adding pairs of multiplies
+ // from even/odd elements, from zero_extend/sign_extend operands.
+ //
// TODO: Handle constant vectors and use knownbits/computenumsignbits?
- // Canonicalize zero_extend to LHS.
- if (N01.getOpcode() == ISD::ZERO_EXTEND)
- std::swap(N00, N01);
- if (N11.getOpcode() == ISD::ZERO_EXTEND)
- std::swap(N10, N11);
-
- // Ensure we have a zero_extend and a sign_extend.
- if (N00.getOpcode() != ISD::ZERO_EXTEND ||
- N01.getOpcode() != ISD::SIGN_EXTEND ||
- N10.getOpcode() != ISD::ZERO_EXTEND ||
- N11.getOpcode() != ISD::SIGN_EXTEND)
+ SDValue N00, N01, N10, N11;
+ if (!sd_match(SSatVal,
+ m_Add(m_Mul(m_ZExt(m_Value(N00)), m_SExt(m_Value(N01))),
+ m_Mul(m_ZExt(m_Value(N10)), m_SExt(m_Value(N11))))))
return SDValue();
- // Peek through the extends.
- N00 = N00.getOperand(0);
- N01 = N01.getOperand(0);
- N10 = N10.getOperand(0);
- N11 = N11.getOperand(0);
-
// Ensure the extend is from vXi8.
if (N00.getValueType().getVectorElementType() != MVT::i8 ||
N01.getValueType().getVectorElementType() != MVT::i8 ||
@@ -54660,8 +54582,9 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
// truncation, see if we can convert the shift into a pointer offset instead.
// Limit this to normal (non-ext) scalar integer loads.
if (SrcVT.isScalarInteger() && Src.getOpcode() == ISD::SRL &&
- Src.hasOneUse() && Src.getOperand(0).hasOneUse() &&
- ISD::isNormalLoad(Src.getOperand(0).getNode())) {
+ Src.hasOneUse() && ISD::isNormalLoad(Src.getOperand(0).getNode()) &&
+ (Src.getOperand(0).hasOneUse() ||
+ !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, SrcVT))) {
auto *Ld = cast<LoadSDNode>(Src.getOperand(0));
if (Ld->isSimple() && VT.isByteSized() &&
isPowerOf2_64(VT.getSizeInBits())) {
@@ -54669,9 +54592,11 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
// Check the shift amount is byte aligned.
// Check the truncation doesn't use any shifted in (zero) top bits.
+ // Check the shift amount doesn't depend on the original load.
if (KnownAmt.countMinTrailingZeros() >= 3 &&
KnownAmt.getMaxValue().ule(SrcVT.getSizeInBits() -
- VT.getSizeInBits())) {
+ VT.getSizeInBits()) &&
+ !Ld->isPredecessorOf(ShAmt.getNode())) {
EVT PtrVT = Ld->getBasePtr().getValueType();
SDValue PtrBitOfs = DAG.getZExtOrTrunc(ShAmt, DL, PtrVT);
SDValue PtrByteOfs =
@@ -54682,8 +54607,7 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
SDValue NewLoad =
DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getPointerInfo(),
Align(), Ld->getMemOperand()->getFlags());
- DAG.ReplaceAllUsesOfValueWith(Src.getOperand(0).getValue(1),
- NewLoad.getValue(1));
+ DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
return NewLoad;
}
}
@@ -56459,6 +56383,7 @@ static SDValue combineAVX512SetCCToKMOV(EVT VT, SDValue Op0, ISD::CondCode CC,
static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ using namespace SDPatternMatch;
const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
const SDValue LHS = N->getOperand(0);
const SDValue RHS = N->getOperand(1);
@@ -56517,6 +56442,37 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
if (SDValue AndN = MatchAndCmpEq(RHS, LHS))
return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);
+ // If we're performing a bit test on a larger than legal type, attempt
+ // to (aligned) shift down the value to the bottom 32-bits and then
+ // perform the bittest on the i32 value.
+ // ICMP_ZERO(AND(X,SHL(1,IDX)))
+ // --> ICMP_ZERO(AND(TRUNC(SRL(X,AND(IDX,-32))),SHL(1,AND(IDX,31))))
+ if (isNullConstant(RHS) &&
+ OpVT.getScalarSizeInBits() > (Subtarget.is64Bit() ? 64 : 32)) {
+ SDValue X, ShAmt;
+ if (sd_match(LHS, m_OneUse(m_And(m_Value(X),
+ m_Shl(m_One(), m_Value(ShAmt)))))) {
+ // Only attempt this if the shift amount is known to be in bounds.
+ KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
+ if (KnownAmt.getMaxValue().ult(OpVT.getScalarSizeInBits())) {
+ EVT AmtVT = ShAmt.getValueType();
+ SDValue AlignAmt =
+ DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
+ DAG.getSignedConstant(-32LL, DL, AmtVT));
+ SDValue ModuloAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
+ DAG.getConstant(31, DL, AmtVT));
+ SDValue Mask = DAG.getNode(
+ ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32),
+ DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
+ X = DAG.getNode(ISD::SRL, DL, OpVT, X, AlignAmt);
+ X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
+ X = DAG.getNode(ISD::AND, DL, MVT::i32, X, Mask);
+ return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, MVT::i32),
+ CC);
+ }
+ }
+ }
+
// cmpeq(trunc(x),C) --> cmpeq(x,C)
// cmpne(trunc(x),C) --> cmpne(x,C)
// iff x upper bits are zero.
diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td
index 69a5115..522782a 100644
--- a/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/llvm/lib/Target/X86/X86InstrAMX.td
@@ -338,188 +338,6 @@ let Predicates = [HasAMXFP8, In64BitMode] in {
}
}
-let Predicates = [HasAMXTILE, In64BitMode], isPseudo = true, SchedRW = [WriteSystem] in {
- let mayStore = 1 in
- def PTILEPAIRSTORE : PseudoI<(outs), (ins opaquemem:$src1, TILEPair:$src2), []>;
- let mayLoad = 1 in
- def PTILEPAIRLOAD : PseudoI<(outs TILEPair:$dst), (ins opaquemem:$src), []>;
-}
-
-multiclass T2RPNTLVW_Base<bits<8> op1, bits<8> op2, string rs, string suffix> {
- def Z0#rs#suffix : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
- "t2rpntlvwz0" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PS;
- def Z0#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
- "t2rpntlvwz0" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PS;
- def Z1#rs#suffix : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
- "t2rpntlvwz1" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PD;
- def Z1#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
- "t2rpntlvwz1" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PD;
-}
-
-let Predicates = [HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in
- defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "">, T8, VEX;
-
-let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
- defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "_EVEX">, T8, EVEX, NoCD8;
-
-let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in
- defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "">, T_MAP5, VEX;
-
-let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
- defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "_EVEX">, T_MAP5, EVEX, NoCD8;
-
-let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
- let SchedRW = [WriteSystem] in {
- def TTRANSPOSED : I<0x5f, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
- "ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8, XS;
- let isPseudo = true in {
- def PT2RPNTLVWZ0V : PseudoI<(outs TILEPair:$dst),
- (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
- []>;
- def PT2RPNTLVWZ0T1V : PseudoI<(outs TILEPair:$dst),
- (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
- []>;
- def PT2RPNTLVWZ1V : PseudoI<(outs TILEPair:$dst),
- (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
- []>;
- def PT2RPNTLVWZ1T1V : PseudoI<(outs TILEPair:$dst),
- (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
- []>;
- }
-
- def PTTRANSPOSEDV : PseudoI<(outs TILE:$dst),
- (ins GR16:$src1, GR16:$src2, TILE:$src),
- [(set TILE: $dst,
- (int_x86_ttransposed_internal GR16:$src1, GR16:$src2,
- TILE:$src))]>;
-
- let usesCustomInserter = 1 in {
- def PT2RPNTLVWZ0 : PseudoI<(outs), (ins u8imm:$dst,
- sibmem:$src1), []>;
- def PT2RPNTLVWZ0T1 : PseudoI<(outs), (ins u8imm:$dst,
- sibmem:$src1), []>;
- def PT2RPNTLVWZ1 : PseudoI<(outs), (ins u8imm:$dst,
- sibmem:$src1), []>;
- def PT2RPNTLVWZ1T1 : PseudoI<(outs), (ins u8imm:$dst,
- sibmem:$src1), []>;
- def PTTRANSPOSED : PseudoI<(outs), (ins u8imm:$dst, u8imm:$src),
- [(int_x86_ttransposed timm:$dst, timm:$src)]>;
- }
- }
-} // HasAMXTILE, HasAMXTRANSPOSE
-
-let Predicates = [HasAMXBF16, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
- let Constraints = "$src1 = $dst" in
- def TTDPBF16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
- (ins TILE:$src1, TILE:$src2, TILE:$src3),
- "ttdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- []>, VEX, VVVV, T8,XS;
- let Constraints = "$src4 = $dst" in
- def PTTDPBF16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
- GR16:$src2, GR16:$src3, TILE:$src4,
- TILE:$src5, TILE:$src6),
- [(set TILE: $dst,
- (int_x86_ttdpbf16ps_internal GR16:$src1, GR16:$src2,
- GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
- let usesCustomInserter = 1 in
- def PTTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
- [(int_x86_ttdpbf16ps timm:$src1, timm:$src2, timm:$src3)]>;
-}
-
-let Predicates = [HasAMXFP16, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
- let Constraints = "$src1 = $dst" in
- def TTDPFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
- (ins TILE:$src1, TILE:$src2, TILE:$src3),
- "ttdpfp16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- []>, VEX, VVVV, T8,XD;
- let Constraints = "$src4 = $dst" in
- def PTTDPFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
- GR16:$src2, GR16:$src3, TILE:$src4,
- TILE:$src5, TILE:$src6),
- [(set TILE: $dst,
- (int_x86_ttdpfp16ps_internal GR16:$src1, GR16:$src2,
- GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
- let usesCustomInserter = 1 in
- def PTTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
- [(int_x86_ttdpfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
-}
-
-let Predicates = [HasAMXCOMPLEX, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
- let Constraints = "$src1 = $dst" in {
- def TTCMMIMFP16PS : I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst),
- (ins TILE:$src1, TILE:$src2, TILE:$src3),
- "ttcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
- []>, VEX, VVVV, T8,XD;
- def TTCMMRLFP16PS: I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst),
- (ins TILE:$src1, TILE:$src2, TILE:$src3),
- "ttcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
- []>, VEX, VVVV, T8,XS;
- def TCONJTCMMIMFP16PS : I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst),
- (ins TILE:$src1, TILE:$src2, TILE:$src3),
- "tconjtcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
- []>, VEX, VVVV, WIG, T8,PS;
- }
- def TCONJTFP16 : I<0x6b, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
- "tconjtfp16\t{$src, $dst|$dst, $src}", []>, VEX, T8,PD;
-
- let Constraints = "$src4 = $dst" in {
- def PTTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
- GR16:$src2, GR16:$src3, TILE:$src4,
- TILE:$src5, TILE:$src6),
- [(set TILE: $dst,
- (int_x86_ttcmmimfp16ps_internal GR16:$src1, GR16:$src2,
- GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
- def PTTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
- GR16:$src2, GR16:$src3, TILE:$src4,
- TILE:$src5, TILE:$src6),
- [(set TILE: $dst,
- (int_x86_ttcmmrlfp16ps_internal GR16:$src1, GR16:$src2,
- GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
- def PTCONJTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
- GR16:$src2, GR16:$src3, TILE:$src4,
- TILE:$src5, TILE:$src6),
- [(set TILE: $dst,
- (int_x86_tconjtcmmimfp16ps_internal GR16:$src1, GR16:$src2,
- GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
- }
- def PTCONJTFP16V : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3),
- [(set TILE: $dst, (int_x86_tconjtfp16_internal GR16:$src1, GR16:$src2, TILE:$src3))]>;
-
- let usesCustomInserter = 1 in {
- def PTTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
- [(int_x86_ttcmmimfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
- def PTTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
- [(int_x86_ttcmmrlfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
- def PTCONJTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
- [(int_x86_tconjtcmmimfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
- def PTCONJTFP16 : PseudoI<(outs), (ins u8imm:$dst, u8imm:$src),
- [(int_x86_tconjtfp16 timm:$dst, timm:$src)]>;
- }
-}
-
-let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
- let isPseudo = true in {
- def PT2RPNTLVWZ0RSV : PseudoI<(outs TILEPair:$dst),
- (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
- []>;
- def PT2RPNTLVWZ0RST1V : PseudoI<(outs TILEPair:$dst),
- (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
- []>;
- def PT2RPNTLVWZ1RSV : PseudoI<(outs TILEPair:$dst),
- (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
- []>;
- def PT2RPNTLVWZ1RST1V : PseudoI<(outs TILEPair:$dst),
- (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
- []>;
- }
- let usesCustomInserter = 1 in {
- def PT2RPNTLVWZ0RS : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
- def PT2RPNTLVWZ0RST1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
- def PT2RPNTLVWZ1RS : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
- def PT2RPNTLVWZ1RST1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
- }
-} // HasAMXMOVRS, HasAMXTRANSPOSE
-
multiclass TILELOADDRS_Base<string suffix> {
def suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1),
"tileloaddrs\t{$src1, $dst|$dst, $src1}", []>, T8, XD;
@@ -721,29 +539,3 @@ let Predicates = [HasAMXTF32, In64BitMode] in {
}
} // SchedRW = [WriteSystem]
} // HasAMXTF32
-
-let Predicates = [HasAMXTF32, HasAMXTRANSPOSE, In64BitMode] in {
- let SchedRW = [WriteSystem] in {
- let Constraints = "$src1 = $dst" in {
- def TTMMULTF32PS: I<0x48, MRMSrcReg4VOp3, (outs TILE:$dst),
- (ins TILE:$src1, TILE:$src2, TILE:$src3),
- "ttmmultf32ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- []>, VEX, VVVV, T8, PS;
- }
- let Constraints = "$src4 = $dst" in {
- def PTTMMULTF32PSV : PseudoI<(outs TILE:$dst),
- (ins GR16:$src1, GR16:$src2, GR16:$src3,
- TILE:$src4, TILE:$src5, TILE:$src6),
- [(set TILE:$dst,
- (int_x86_ttmmultf32ps_internal GR16:$src1,
- GR16:$src2, GR16:$src3, TILE:$src4,
- TILE:$src5, TILE:$src6))]>;
- }
- let usesCustomInserter = 1 in {
- def PTTMMULTF32PS : PseudoI<(outs),
- (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
- [(int_x86_ttmmultf32ps timm:$src1, timm:$src2,
- timm:$src3)]>;
- }
- } // SchedRW = [WriteSystem]
-} // HasAMXTF32, HasAMXTRANSPOSE
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 5c23f91..6b2a7a4 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4544,11 +4544,6 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
return Load ? GET_EGPR_IF_ENABLED(X86::TILELOADD)
: GET_EGPR_IF_ENABLED(X86::TILESTORED);
#undef GET_EGPR_IF_ENABLED
- case 2048:
- assert(X86::TILEPAIRRegClass.hasSubClassEq(RC) &&
- "Unknown 2048-byte regclass");
- assert(STI.hasAMXTILE() && "Using 2048-bit register requires AMX-TILE");
- return Load ? X86::PTILEPAIRLOAD : X86::PTILEPAIRSTORE;
}
}
@@ -4743,8 +4738,6 @@ static bool isAMXOpcode(unsigned Opc) {
case X86::TILESTORED:
case X86::TILELOADD_EVEX:
case X86::TILESTORED_EVEX:
- case X86::PTILEPAIRLOAD:
- case X86::PTILEPAIRSTORE:
return true;
}
}
@@ -4757,8 +4750,7 @@ void X86InstrInfo::loadStoreTileReg(MachineBasicBlock &MBB,
default:
llvm_unreachable("Unexpected special opcode!");
case X86::TILESTORED:
- case X86::TILESTORED_EVEX:
- case X86::PTILEPAIRSTORE: {
+ case X86::TILESTORED_EVEX: {
// tilestored %tmm, (%sp, %idx)
MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
@@ -4772,8 +4764,7 @@ void X86InstrInfo::loadStoreTileReg(MachineBasicBlock &MBB,
break;
}
case X86::TILELOADD:
- case X86::TILELOADD_EVEX:
- case X86::PTILEPAIRLOAD: {
+ case X86::TILELOADD_EVEX: {
// tileloadd (%sp, %idx), %tmm
MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
diff --git a/llvm/lib/Target/X86/X86InstrOperands.td b/llvm/lib/Target/X86/X86InstrOperands.td
index 5207eca..6ba07f7 100644
--- a/llvm/lib/Target/X86/X86InstrOperands.td
+++ b/llvm/lib/Target/X86/X86InstrOperands.td
@@ -536,10 +536,3 @@ def VK8Pair : RegisterOperand<VK8PAIR, "printVKPair"> {
def VK16Pair : RegisterOperand<VK16PAIR, "printVKPair"> {
let ParserMatchClass = VK16PairAsmOperand;
}
-
-let RenderMethod = "addTILEPairOperands" in
- def TILEPairAsmOperand : AsmOperandClass { let Name = "TILEPair"; }
-
-def TILEPair : RegisterOperand<TILEPAIR, "printTILEPair"> {
- let ParserMatchClass = TILEPairAsmOperand;
-}
diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td
index c20bb05..98104a6f 100644
--- a/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -183,7 +183,6 @@ def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">;
def HasAMXCOMPLEX : Predicate<"Subtarget->hasAMXCOMPLEX()">;
def HasAMXFP8 : Predicate<"Subtarget->hasAMXFP8()">;
def HasAMXMOVRS : Predicate<"Subtarget->hasAMXMOVRS()">;
-def HasAMXTRANSPOSE : Predicate<"Subtarget->hasAMXTRANSPOSE()">;
def HasAMXAVX512 : Predicate<"Subtarget->hasAMXAVX512()">;
def HasAMXTF32 : Predicate<"Subtarget->hasAMXTF32()">;
def HasUINTR : Predicate<"Subtarget->hasUINTR()">;
diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
index 090060e..3b96e70 100644
--- a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
@@ -115,9 +115,9 @@ struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> {
static constexpr MachineInstr *const ArgNodeSentinel = nullptr;
using GraphT = ImmutableGraph<MachineInstr *, int>;
- using Node = typename GraphT::Node;
- using Edge = typename GraphT::Edge;
- using size_type = typename GraphT::size_type;
+ using Node = GraphT::Node;
+ using Edge = GraphT::Edge;
+ using size_type = GraphT::size_type;
MachineGadgetGraph(std::unique_ptr<Node[]> Nodes,
std::unique_ptr<Edge[]> Edges, size_type NodesSize,
size_type EdgesSize, int NumFences = 0, int NumGadgets = 0)
@@ -191,10 +191,10 @@ template <>
struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {
using GraphType = MachineGadgetGraph;
using Traits = llvm::GraphTraits<GraphType *>;
- using NodeRef = typename Traits::NodeRef;
- using EdgeRef = typename Traits::EdgeRef;
- using ChildIteratorType = typename Traits::ChildIteratorType;
- using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType;
+ using NodeRef = Traits::NodeRef;
+ using EdgeRef = Traits::EdgeRef;
+ using ChildIteratorType = Traits::ChildIteratorType;
+ using ChildEdgeIteratorType = Traits::ChildEdgeIteratorType;
DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
@@ -227,9 +227,6 @@ struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {
} // end namespace llvm
-constexpr MachineInstr *MachineGadgetGraph::ArgNodeSentinel;
-constexpr int MachineGadgetGraph::GadgetEdgeSentinel;
-
char X86LoadValueInjectionLoadHardeningPass::ID = 0;
void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage(
@@ -335,7 +332,7 @@ X86LoadValueInjectionLoadHardeningPass::getGadgetGraph(
L.computePhiInfo();
GraphBuilder Builder;
- using GraphIter = typename GraphBuilder::BuilderNodeRef;
+ using GraphIter = GraphBuilder::BuilderNodeRef;
DenseMap<MachineInstr *, GraphIter> NodeMap;
int FenceCount = 0, GadgetCount = 0;
auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) {
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index 8ffd454..2fc5d38 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -74,22 +74,6 @@ static bool isAMXCast(Instruction *II) {
match(II, m_Intrinsic<Intrinsic::x86_cast_tile_to_vector>(m_Value()));
}
-// Some instructions may return more than one tiles.
-// e.g: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal
-static unsigned getNumDefTiles(IntrinsicInst *II) {
- Type *Ty = II->getType();
- if (Ty->isX86_AMXTy())
- return 1;
-
- unsigned Num = 0;
- for (unsigned i = 0; i < Ty->getNumContainedTypes(); i++) {
- Type *STy = Ty->getContainedType(i);
- if (STy->isX86_AMXTy())
- Num++;
- }
- return Num;
-}
-
static bool isAMXIntrinsic(Value *I) {
auto *II = dyn_cast<IntrinsicInst>(I);
if (!II)
@@ -98,7 +82,7 @@ static bool isAMXIntrinsic(Value *I) {
return false;
// Check if return type or parameter is x86_amx. If it is x86_amx
// the intrinsic must be x86 amx intrinsics.
- if (getNumDefTiles(II) > 0)
+ if (II->getType()->isX86_AMXTy())
return true;
for (Value *V : II->args()) {
if (V->getType()->isX86_AMXTy())
@@ -137,27 +121,7 @@ static Instruction *getFirstNonAllocaInTheEntryBlock(Function &F) {
llvm_unreachable("No terminator in the entry block!");
}
-class ShapeCalculator {
-private:
- const TargetMachine *TM = nullptr;
-
- // In AMX intrinsics we let Shape = {Row, Col}, but the
- // RealCol = Col / ElementSize. We may use the RealCol
- // as a new Row for other new created AMX intrinsics.
- std::map<Value *, Value *> Col2Row, Row2Col;
-
-public:
- ShapeCalculator(const TargetMachine *TargetM) : TM(TargetM) {}
- std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo);
- std::pair<Value *, Value *> getShape(PHINode *Phi);
- Value *getRowFromCol(Instruction *II, Value *V, unsigned Granularity);
- Value *getColFromRow(Instruction *II, Value *V, unsigned Granularity);
-};
-
-Value *ShapeCalculator::getRowFromCol(Instruction *II, Value *V,
- unsigned Granularity) {
- if (auto It = Col2Row.find(V); It != Col2Row.end())
- return It->second;
+static Value *getRowFromCol(Instruction *II, Value *V, unsigned Granularity) {
IRBuilder<> Builder(II);
Value *RealRow = nullptr;
if (isa<ConstantInt>(V))
@@ -186,47 +150,16 @@ Value *ShapeCalculator::getRowFromCol(Instruction *II, Value *V,
getFirstNonAllocaInTheEntryBlock(*II->getFunction()));
RealRow = NewBuilder.CreateUDiv(V, NewBuilder.getInt16(Granularity));
}
- Col2Row[V] = RealRow;
return RealRow;
}
-Value *ShapeCalculator::getColFromRow(Instruction *II, Value *V,
- unsigned Granularity) {
- if (auto It = Row2Col.find(V); It != Row2Col.end())
- return It->second;
- IRBuilder<> Builder(II);
- Value *RealCol = nullptr;
- if (isa<ConstantInt>(V))
- RealCol =
- Builder.getInt16((cast<ConstantInt>(V)->getSExtValue()) * Granularity);
- else if (isa<Instruction>(V)) {
- Builder.SetInsertPoint(cast<Instruction>(V));
- RealCol = Builder.CreateNUWMul(V, Builder.getInt16(Granularity));
- cast<Instruction>(RealCol)->moveAfter(cast<Instruction>(V));
- } else {
- // When it is not a const value and it is a function argument, we create
- // Row at the entry bb.
- IRBuilder<> NewBuilder(
- getFirstNonAllocaInTheEntryBlock(*II->getFunction()));
- RealCol = NewBuilder.CreateNUWMul(V, NewBuilder.getInt16(Granularity));
- }
- Row2Col[V] = RealCol;
- return RealCol;
-}
-
// TODO: Refine the row and col-in-bytes of tile to row and col of matrix.
-std::pair<Value *, Value *> ShapeCalculator::getShape(IntrinsicInst *II,
- unsigned OpNo) {
- (void)TM;
+std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo) {
IRBuilder<> Builder(II);
Value *Row = nullptr, *Col = nullptr;
switch (II->getIntrinsicID()) {
default:
llvm_unreachable("Expect amx intrinsics");
- case Intrinsic::x86_t2rpntlvwz0_internal:
- case Intrinsic::x86_t2rpntlvwz0t1_internal:
- case Intrinsic::x86_t2rpntlvwz1_internal:
- case Intrinsic::x86_t2rpntlvwz1t1_internal:
case Intrinsic::x86_tileloadd64_internal:
case Intrinsic::x86_tileloaddt164_internal:
case Intrinsic::x86_tilestored64_internal:
@@ -271,13 +204,6 @@ std::pair<Value *, Value *> ShapeCalculator::getShape(IntrinsicInst *II,
}
break;
}
- case Intrinsic::x86_ttransposed_internal:
- case Intrinsic::x86_tconjtfp16_internal: {
- assert((OpNo == 2) && "Illegal Operand Number.");
- Row = getRowFromCol(II, II->getArgOperand(1), 4);
- Col = getColFromRow(II, II->getArgOperand(0), 4);
- break;
- }
case Intrinsic::x86_tcvtrowd2ps_internal:
case Intrinsic::x86_tcvtrowps2bf16h_internal:
case Intrinsic::x86_tcvtrowps2bf16l_internal:
@@ -289,34 +215,12 @@ std::pair<Value *, Value *> ShapeCalculator::getShape(IntrinsicInst *II,
Col = II->getArgOperand(1);
break;
}
- case Intrinsic::x86_ttdpbf16ps_internal:
- case Intrinsic::x86_ttdpfp16ps_internal:
- case Intrinsic::x86_ttcmmimfp16ps_internal:
- case Intrinsic::x86_ttcmmrlfp16ps_internal:
- case Intrinsic::x86_tconjtcmmimfp16ps_internal:
- case Intrinsic::x86_ttmmultf32ps_internal: {
- switch (OpNo) {
- case 3:
- Row = II->getArgOperand(0);
- Col = II->getArgOperand(1);
- break;
- case 4:
- Row = getRowFromCol(II, II->getArgOperand(2), 4);
- Col = getColFromRow(II, II->getArgOperand(0), 4);
- break;
- case 5:
- Row = getRowFromCol(II, II->getArgOperand(2), 4);
- Col = II->getArgOperand(1);
- break;
- }
- break;
- }
}
return std::make_pair(Row, Col);
}
-std::pair<Value *, Value *> ShapeCalculator::getShape(PHINode *Phi) {
+static std::pair<Value *, Value *> getShape(PHINode *Phi) {
Use &U = *(Phi->use_begin());
unsigned OpNo = U.getOperandNo();
User *V = U.getUser();
@@ -349,15 +253,14 @@ std::pair<Value *, Value *> ShapeCalculator::getShape(PHINode *Phi) {
namespace {
class X86LowerAMXType {
Function &Func;
- ShapeCalculator *SC;
// In AMX intrinsics we let Shape = {Row, Col}, but the
// RealCol = Col / ElementSize. We may use the RealCol
// as a new Row for other new created AMX intrinsics.
- std::map<Value *, Value *> Col2Row, Row2Col;
+ std::map<Value *, Value *> Col2Row;
public:
- X86LowerAMXType(Function &F, ShapeCalculator *ShapeC) : Func(F), SC(ShapeC) {}
+ X86LowerAMXType(Function &F) : Func(F) {}
bool visit();
void combineLoadBitcast(LoadInst *LD, BitCastInst *Bitcast);
void combineBitcastStore(BitCastInst *Bitcast, StoreInst *ST);
@@ -374,7 +277,7 @@ void X86LowerAMXType::combineLoadBitcast(LoadInst *LD, BitCastInst *Bitcast) {
Use &U = *(Bitcast->use_begin());
unsigned OpNo = U.getOperandNo();
auto *II = cast<IntrinsicInst>(U.getUser());
- std::tie(Row, Col) = SC->getShape(II, OpNo);
+ std::tie(Row, Col) = getShape(II, OpNo);
IRBuilder<> Builder(Bitcast);
// Use the maximun column as stride.
Value *Stride = Builder.getInt64(64);
@@ -454,7 +357,7 @@ bool X86LowerAMXType::transformBitcast(BitCastInst *Bitcast) {
Builder.CreateStore(Src, AllocaAddr);
// TODO we can pick an constant operand for the shape.
Value *Row = nullptr, *Col = nullptr;
- std::tie(Row, Col) = SC->getShape(II, OpNo);
+ std::tie(Row, Col) = getShape(II, OpNo);
std::array<Value *, 4> Args = {Row, Col, I8Ptr, Stride};
Value *NewInst =
Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, Args);
@@ -594,18 +497,11 @@ static Value *getAllocaPos(BasicBlock *BB) {
static Instruction *createTileStore(Instruction *TileDef, Value *Ptr) {
assert(TileDef->getType()->isX86_AMXTy() && "Not define tile!");
- auto *II = dyn_cast<IntrinsicInst>(TileDef);
- unsigned Idx = 0;
- // Extract tile from multiple tiles' def.
- if (auto *Extr = dyn_cast<ExtractValueInst>(TileDef)) {
- assert(Extr->hasIndices() && "Tile extract miss index!");
- Idx = Extr->getIndices()[0];
- II = cast<IntrinsicInst>(Extr->getOperand(0));
- }
+ auto *II = cast<IntrinsicInst>(TileDef);
assert(II && "Not tile intrinsic!");
- Value *Row = II->getOperand(Idx);
- Value *Col = II->getOperand(Idx + 1);
+ Value *Row = II->getOperand(0);
+ Value *Col = II->getOperand(1);
BasicBlock *BB = TileDef->getParent();
BasicBlock::iterator Iter = TileDef->getIterator();
@@ -624,20 +520,14 @@ static void replaceWithTileLoad(Use &U, Value *Ptr, bool IsPHI = false) {
// Get tile shape.
IntrinsicInst *II = nullptr;
- unsigned Idx = 0;
if (IsPHI) {
Value *PhiOp = cast<PHINode>(V)->getIncomingValue(0);
II = cast<IntrinsicInst>(PhiOp);
- } else if (auto *Extr = dyn_cast<ExtractValueInst>(V)) {
- // Extract tile from multiple tiles' def.
- assert(Extr->hasIndices() && "Tile extract miss index!");
- Idx = Extr->getIndices()[0];
- II = cast<IntrinsicInst>(Extr->getOperand(0));
} else {
II = cast<IntrinsicInst>(V);
}
- Value *Row = II->getOperand(Idx);
- Value *Col = II->getOperand(Idx + 1);
+ Value *Row = II->getOperand(0);
+ Value *Col = II->getOperand(1);
Instruction *UserI = cast<Instruction>(U.getUser());
IRBuilder<> Builder(UserI);
@@ -848,12 +738,10 @@ namespace {
class X86LowerAMXCast {
Function &Func;
- ShapeCalculator *SC;
std::unique_ptr<DominatorTree> DT;
public:
- X86LowerAMXCast(Function &F, ShapeCalculator *ShapeC)
- : Func(F), SC(ShapeC), DT(nullptr) {}
+ X86LowerAMXCast(Function &F) : Func(F), DT(nullptr) {}
bool combineCastStore(IntrinsicInst *Cast, StoreInst *ST);
bool combineLoadCast(IntrinsicInst *Cast, LoadInst *LD);
bool combineTilezero(IntrinsicInst *Cast);
@@ -932,7 +820,7 @@ bool X86LowerAMXCast::optimizeAMXCastFromPhi(
if (!isa<UndefValue>(IncValue) && !IncConst->isZeroValue())
return false;
Value *Row = nullptr, *Col = nullptr;
- std::tie(Row, Col) = SC->getShape(OldPN);
+ std::tie(Row, Col) = getShape(OldPN);
// TODO: If it is not constant the Row and Col must domoniate tilezero
// that we are going to create.
if (!Row || !Col || !isa<Constant>(Row) || !isa<Constant>(Col))
@@ -1063,19 +951,6 @@ bool X86LowerAMXCast::optimizeAMXCastFromPhi(
return true;
}
-static Value *getShapeFromAMXIntrinsic(Value *Inst, unsigned ShapeIdx,
- bool IsRow) {
- if (!isAMXIntrinsic(Inst))
- return nullptr;
-
- auto *II = cast<IntrinsicInst>(Inst);
- if (IsRow)
- return II->getOperand(0);
-
- assert(ShapeIdx < 2 && "Currently 2 shapes in 1 instruction at most!");
- return II->getOperand(ShapeIdx + 1);
-}
-
// %43 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %42)
// store <256 x i32> %43, <256 x i32>* %p, align 64
// -->
@@ -1090,38 +965,13 @@ bool X86LowerAMXCast::combineCastStore(IntrinsicInst *Cast, StoreInst *ST) {
if (!Tile->hasOneUse())
return false;
- // We don't fetch shape from tilestore, we only get shape from tiledef,
- // so we can set the max tile shape to tilestore for special cases.
+ auto *II = cast<IntrinsicInst>(Tile);
+ // Tile is output from AMX intrinsic. The first operand of the
+ // intrinsic is row, the second operand of the intrinsic is column.
+ Value *Row = II->getOperand(0);
+ Value *Col = II->getOperand(1);
+
IRBuilder<> Builder(ST);
- Value *Row = nullptr;
- Value *Col = nullptr;
-
- if (isAMXIntrinsic(Tile)) {
- auto *II = cast<IntrinsicInst>(Tile);
- // Tile is output from AMX intrinsic. The first operand of the
- // intrinsic is row, the second operand of the intrinsic is column.
- Row = II->getOperand(0);
- Col = II->getOperand(1);
- } else {
- // Now we supported multi-tiles value in structure, so we may get tile
- // from extracting multi-tiles structure.
- // For example:
- // %6 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 %1,
- // i16 %2, i16 %3, i8* %4, i64 %5)
- // %7 = extractvalue { x86_amx, x86_amx } %6, 0
- // %8 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %7)
- // store <256 x i32> %8, <256 x i32>* %0, align 1024
- //
- // TODO: Currently we only handle extractvalue case, enhance me for other
- // cases if possible.
- auto *II = cast<ExtractValueInst>(Tile);
- assert(II && "We meet unhandle source in fetching tile value!");
- unsigned ShapeIdx = II->getIndices()[0];
- Value *Tiles = II->getOperand(0);
- Row = getShapeFromAMXIntrinsic(Tiles, ShapeIdx, true);
- Col = getShapeFromAMXIntrinsic(Tiles, ShapeIdx, false);
- }
- assert(Row && Col && "Shape got failed!");
// Stride should be equal to col(measured by bytes)
Value *Stride = Builder.CreateSExt(Col, Builder.getInt64Ty());
@@ -1146,7 +996,7 @@ bool X86LowerAMXCast::combineLoadCast(IntrinsicInst *Cast, LoadInst *LD) {
// shape information through def-use chain.
if (!isAMXIntrinsic(II))
return false;
- std::tie(Row, Col) = SC->getShape(II, OpNo);
+ std::tie(Row, Col) = getShape(II, OpNo);
IRBuilder<> Builder(LD);
// Stride should be equal to col(measured by bytes)
Value *Stride = Builder.CreateSExt(Col, Builder.getInt64Ty());
@@ -1189,7 +1039,7 @@ bool X86LowerAMXCast::combineTilezero(IntrinsicInst *Cast) {
if (!isAMXIntrinsic(II))
return false;
- std::tie(Row, Col) = SC->getShape(II, OpNo);
+ std::tie(Row, Col) = getShape(II, OpNo);
IRBuilder<> Builder(Cast);
Value *NewInst =
@@ -1384,7 +1234,7 @@ bool X86LowerAMXCast::transformAMXCast(IntrinsicInst *AMXCast) {
Builder.CreateStore(Src, AllocaAddr);
// TODO we can pick an constant operand for the shape.
Value *Row = nullptr, *Col = nullptr;
- std::tie(Row, Col) = SC->getShape(II, OpNo);
+ std::tie(Row, Col) = getShape(II, OpNo);
std::array<Value *, 4> Args = {
Row, Col, I8Ptr, Builder.CreateSExt(Col, Builder.getInt64Ty())};
Value *NewInst =
@@ -1445,14 +1295,13 @@ bool lowerAmxType(Function &F, const TargetMachine *TM,
return false;
bool C = false;
- ShapeCalculator SC(TM);
- X86LowerAMXCast LAC(F, &SC);
+ X86LowerAMXCast LAC(F);
C |= LAC.combineAMXcast(TLI);
// There might be remaining AMXcast after combineAMXcast and they should be
// handled elegantly.
C |= LAC.transformAllAMXCast();
- X86LowerAMXType LAT(F, &SC);
+ X86LowerAMXType LAT(F);
C |= LAT.visit();
// Prepare for fast register allocation at O0.
diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp
index 2a1c499..8a1d00d 100644
--- a/llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -141,15 +141,10 @@ class X86PreTileConfig : public MachineFunctionPass {
if (!MO.isReg() || !MO.getReg().isVirtual())
return false;
- unsigned Shapes = 0;
- if (MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID)
- Shapes = 1;
- if (MRI->getRegClass(MO.getReg())->getID() == X86::TILEPAIRRegClassID)
- Shapes = 2;
- if (!Shapes)
+ if (MRI->getRegClass(MO.getReg())->getID() != X86::TILERegClassID)
return false;
- collectShapeInfo(MI, Shapes);
+ collectShapeInfo(MI);
return true;
}
@@ -165,7 +160,7 @@ class X86PreTileConfig : public MachineFunctionPass {
}
/// Collect the shape def information for later use.
- void collectShapeInfo(MachineInstr &MI, unsigned Shapes);
+ void collectShapeInfo(MachineInstr &MI);
/// Try to hoist shapes definded below AMX instructions.
bool hoistShapesInBB(MachineBasicBlock *MBB, SmallVectorImpl<MIRef> &Shapes) {
@@ -231,7 +226,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_END(X86PreTileConfig, "tilepreconfig",
"Tile Register Pre-configure", false, false)
-void X86PreTileConfig::collectShapeInfo(MachineInstr &MI, unsigned Shapes) {
+void X86PreTileConfig::collectShapeInfo(MachineInstr &MI) {
auto RecordShape = [&](MachineInstr *MI, MachineBasicBlock *MBB) {
MIRef MIR(MI, MBB);
auto &Refs = ShapeBBs[MBB];
@@ -240,10 +235,8 @@ void X86PreTileConfig::collectShapeInfo(MachineInstr &MI, unsigned Shapes) {
Refs.insert(I, MIR);
};
- // All shapes have same row in multi-tile operand.
- SmallVector<Register, 8> WorkList;
- for (unsigned I = 1; I < Shapes + 2; ++I)
- WorkList.push_back(MI.getOperand(I).getReg());
+ SmallVector<Register, 8> WorkList(
+ {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
while (!WorkList.empty()) {
Register R = WorkList.pop_back_val();
MachineInstr *DefMI = MRI->getVRegDef(R);
@@ -252,13 +245,6 @@ void X86PreTileConfig::collectShapeInfo(MachineInstr &MI, unsigned Shapes) {
if (DefMI->isMoveImmediate() || !DefVisited.insert(DefMI).second)
continue;
- // This happens when column = 0 in multi-tile operand.
- if (DefMI->getOpcode() == X86::COPY) {
- MachineInstr *MI = MRI->getVRegDef(DefMI->getOperand(1).getReg());
- if (MI && MI->isMoveImmediate())
- continue;
- }
-
if (DefMI->isPHI()) {
for (unsigned I = 1; I < DefMI->getNumOperands(); I += 2)
if (isLoopBackEdge(DefMBB, DefMI->getOperand(I + 1).getMBB()))
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 76979e3..72f3813 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -597,10 +597,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(*AI);
}
- // Reserve low half pair registers in case they are used by RA aggressively.
- Reserved.set(X86::TMM0_TMM1);
- Reserved.set(X86::TMM2_TMM3);
-
assert(checkAllSuperRegsMarked(Reserved,
{X86::SIL, X86::DIL, X86::BPL, X86::SPL,
X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
@@ -621,7 +617,7 @@ unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const {
// and try to return the minimum number of registers supported by the target.
static_assert((X86::R15WH + 1 == X86::YMM0) && (X86::YMM15 + 1 == X86::K0) &&
(X86::K6_K7 + 1 == X86::TMMCFG) &&
- (X86::TMM6_TMM7 + 1 == X86::R16) &&
+ (X86::TMM7 + 1 == X86::R16) &&
(X86::R31WH + 1 == X86::NUM_TARGET_REGS),
"Register number may be incorrect");
@@ -694,8 +690,7 @@ bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF,
}
bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const {
- return RC->getID() == X86::TILERegClassID ||
- RC->getID() == X86::TILEPAIRRegClassID;
+ return RC->getID() == X86::TILERegClassID;
}
void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
@@ -1062,17 +1057,9 @@ static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
case X86::PTDPFP16PSV:
case X86::PTCMMIMFP16PSV:
case X86::PTCMMRLFP16PSV:
- case X86::PTTRANSPOSEDV:
- case X86::PTTDPBF16PSV:
- case X86::PTTDPFP16PSV:
- case X86::PTTCMMIMFP16PSV:
- case X86::PTTCMMRLFP16PSV:
- case X86::PTCONJTCMMIMFP16PSV:
- case X86::PTCONJTFP16V:
case X86::PTILELOADDRSV:
case X86::PTILELOADDRST1V:
case X86::PTMMULTF32PSV:
- case X86::PTTMMULTF32PSV:
case X86::PTDPBF8PSV:
case X86::PTDPBHF8PSV:
case X86::PTDPHBF8PSV:
@@ -1083,56 +1070,7 @@ static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
VRM->assignVirt2Shape(VirtReg, Shape);
return Shape;
}
- case X86::PT2RPNTLVWZ0V:
- case X86::PT2RPNTLVWZ0T1V:
- case X86::PT2RPNTLVWZ1V:
- case X86::PT2RPNTLVWZ1T1V:
- case X86::PT2RPNTLVWZ0RSV:
- case X86::PT2RPNTLVWZ0RST1V:
- case X86::PT2RPNTLVWZ1RSV:
- case X86::PT2RPNTLVWZ1RST1V: {
- MachineOperand &MO1 = MI->getOperand(1);
- MachineOperand &MO2 = MI->getOperand(2);
- MachineOperand &MO3 = MI->getOperand(3);
- ShapeT Shape({&MO1, &MO2, &MO1, &MO3}, MRI);
- VRM->assignVirt2Shape(VirtReg, Shape);
- return Shape;
- }
- }
-}
-
-static bool canHintShape(ShapeT &PhysShape, ShapeT &VirtShape) {
- unsigned PhysShapeNum = PhysShape.getShapeNum();
- unsigned VirtShapeNum = VirtShape.getShapeNum();
-
- if (PhysShapeNum < VirtShapeNum)
- return false;
-
- if (PhysShapeNum == VirtShapeNum) {
- if (PhysShapeNum == 1)
- return PhysShape == VirtShape;
-
- for (unsigned I = 0; I < PhysShapeNum; I++) {
- ShapeT PShape(PhysShape.getRow(I), PhysShape.getCol(I));
- ShapeT VShape(VirtShape.getRow(I), VirtShape.getCol(I));
- if (VShape != PShape)
- return false;
- }
- return true;
- }
-
- // Hint subreg of mult-tile reg to single tile reg.
- if (VirtShapeNum == 1) {
- for (unsigned I = 0; I < PhysShapeNum; I++) {
- ShapeT PShape(PhysShape.getRow(I), PhysShape.getCol(I));
- if (VirtShape == PShape)
- return true;
- }
}
-
- // Note: Currently we have no requirement for case of
- // (VirtShapeNum > 1 and PhysShapeNum > VirtShapeNum)
- return false;
}
bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
@@ -1153,7 +1091,7 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
if (!VRM)
return BaseImplRetVal;
- if (ID != X86::TILERegClassID && ID != X86::TILEPAIRRegClassID) {
+ if (ID != X86::TILERegClassID) {
if (DisableRegAllocNDDHints || !ST.hasNDD() ||
!TRI.isGeneralPurposeRegisterClass(&RC))
return BaseImplRetVal;
@@ -1204,7 +1142,7 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
return;
}
ShapeT PhysShape = getTileShape(VReg, const_cast<VirtRegMap *>(VRM), MRI);
- if (canHintShape(PhysShape, VirtShape))
+ if (PhysShape == VirtShape)
Hints.push_back(PhysReg);
};
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index 99b7910..692e42a 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -30,8 +30,6 @@ let Namespace = "X86" in {
def sub_ymm : SubRegIndex<256>;
def sub_mask_0 : SubRegIndex<-1>;
def sub_mask_1 : SubRegIndex<-1, -1>;
- def sub_t0 : SubRegIndex<8192>;
- def sub_t1 : SubRegIndex<8192, 8192>;
}
//===----------------------------------------------------------------------===//
@@ -432,10 +430,6 @@ def TMM4: X86Reg<"tmm4", 4>;
def TMM5: X86Reg<"tmm5", 5>;
def TMM6: X86Reg<"tmm6", 6>;
def TMM7: X86Reg<"tmm7", 7>;
-// TMM register pairs
-def TPAIRS : RegisterTuples<[sub_t0, sub_t1],
- [(add TMM0, TMM2, TMM4, TMM6),
- (add TMM1, TMM3, TMM5, TMM7)]>;
}
// Floating point stack registers. These don't map one-to-one to the FP
@@ -862,9 +856,6 @@ def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
let CopyCost = -1 in // Don't allow copying of tile registers
def TILE : RegisterClass<"X86", [x86amx], 8192,
(sequence "TMM%u", 0, 7)> {let Size = 8192;}
-// Need check alignment 3rd operand size=1024*2*8
-let isAllocatable = 1 in
-def TILEPAIR : RegisterClass<"X86", [untyped], 512, (add TPAIRS)> {let Size = 16384;}
//===----------------------------------------------------------------------===//
// Register categories.
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 3d8d0a23..0b1430e 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -6562,7 +6562,7 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
bool X86TTIImpl::areTypesABICompatible(const Function *Caller,
const Function *Callee,
- const ArrayRef<Type *> &Types) const {
+ ArrayRef<Type *> Types) const {
if (!BaseT::areTypesABICompatible(Caller, Callee, Types))
return false;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 133b366..de5e1c2 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -296,7 +296,7 @@ public:
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const override;
bool areTypesABICompatible(const Function *Caller, const Function *Callee,
- const ArrayRef<Type *> &Type) const override;
+ ArrayRef<Type *> Type) const override;
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
return ST->getMaxInlineSizeThreshold();
diff --git a/llvm/lib/Target/X86/X86TileConfig.cpp b/llvm/lib/Target/X86/X86TileConfig.cpp
index 17a44dd..09ef8fb 100644
--- a/llvm/lib/Target/X86/X86TileConfig.cpp
+++ b/llvm/lib/Target/X86/X86TileConfig.cpp
@@ -74,63 +74,6 @@ INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
INITIALIZE_PASS_END(X86TileConfig, DEBUG_TYPE, "Tile Register Configure", false,
false)
-unsigned getAMXRegNum(MachineRegisterInfo *MRI, Register Reg) {
- if (Reg.isVirtual()) {
- unsigned RegClassID = MRI->getRegClass(Reg)->getID();
- if (RegClassID == X86::TILERegClassID)
- return 1;
- if (RegClassID == X86::TILEPAIRRegClassID)
- return 2;
- } else {
- if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
- return 1;
- if (Reg >= X86::TMM0_TMM1 && Reg <= X86::TMM6_TMM7)
- return 2;
- }
- return 0;
-}
-
-static void collectVirtRegShapes(MachineRegisterInfo *MRI, VirtRegMap &VRM,
- Register VirtReg,
- SmallVector<ShapeT, 8> &Phys2Shapes) {
- unsigned Num = getAMXRegNum(MRI, VirtReg);
- MCRegister PhysReg = VRM.getPhys(VirtReg);
- if (!PhysReg)
- return;
-
- if (Num == 1) {
- unsigned Index = PhysReg - X86::TMM0;
- if (!Phys2Shapes[Index].isValid()) {
- ShapeT Shape = VRM.getShape(VirtReg);
- Phys2Shapes[Index] = std::move(Shape);
- return;
- }
- }
- // Split tile pair shape info to 2 single tile shape info. e.g:
- // Put TMM0_TMM1's Shape to TMM0's shape + TMM1's Shape in Phys2Shapes.
- if (Num == 2) {
- unsigned Index0 = (PhysReg - X86::TMM0_TMM1) * 2;
- unsigned Index1 = (PhysReg - X86::TMM0_TMM1) * 2 + 1;
-
- ShapeT Shape = VRM.getShape(VirtReg);
- assert(Shape.getShapeNum() == 2 && "Unexpected shape number!");
-
- if (!Phys2Shapes[Index0].isValid()) {
- ShapeT Shape0(Shape.getRow(0), Shape.getCol(0), MRI);
- Phys2Shapes[Index0] = std::move(Shape0);
- }
-
- if (!Phys2Shapes[Index1].isValid()) {
- ShapeT Shape1(Shape.getRow(1), Shape.getCol(1), MRI);
- Phys2Shapes[Index1] = std::move(Shape1);
- }
- }
-}
-
-static bool isAMXRegClass(MachineRegisterInfo *MRI, Register Reg) {
- return getAMXRegNum(MRI, Reg) > 0;
-}
-
bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) {
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
// Early exit in the common case of non-AMX code.
@@ -138,7 +81,7 @@ bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) {
return false;
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
- const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+ const X86RegisterInfo *TRI = ST.getRegisterInfo();
const TargetInstrInfo *TII = ST.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
@@ -176,24 +119,29 @@ bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) {
assert(ConstMI && "Cannot find an insertion point");
unsigned AMXRegNum = TRI->getRegClass(X86::TILERegClassID)->getNumRegs();
- SmallVector<ShapeT, 8> Phys2Shapes(AMXRegNum, ShapeT());
+ SmallVector<Register, 8> Phys2Virt(AMXRegNum, 0);
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
Register VirtReg = Register::index2VirtReg(I);
if (MRI.reg_nodbg_empty(VirtReg))
continue;
- if (!isAMXRegClass(&MRI, VirtReg))
+ if (!TRI->isTileRegisterClass(MRI.getRegClass(VirtReg)))
+ continue;
+ MCRegister PhysReg = VRM.getPhys(VirtReg);
+ if (!PhysReg)
continue;
- collectVirtRegShapes(&MRI, VRM, VirtReg, Phys2Shapes);
+ unsigned Index = PhysReg - X86::TMM0;
+ if (!Phys2Virt[Index])
+ Phys2Virt[Index] = VirtReg;
}
// Fill in the shape of each tile physical register.
for (unsigned I = 0; I < AMXRegNum; ++I) {
- ShapeT Shape = Phys2Shapes[I];
- if (!Shape.isValid())
+ if (!Phys2Virt[I])
continue;
DebugLoc DL;
bool IsRow = true;
MachineInstr *NewMI = nullptr;
+ ShapeT Shape = VRM.getShape(Phys2Virt[I]);
for (auto &R : {Shape.getRow()->getReg(), Shape.getCol()->getReg()}) {
// Here is the data format for the tile config.
// 0 palette
@@ -222,14 +170,7 @@ bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) {
"Cannot initialize with different shapes");
continue;
}
- if (DefMI.getOperand(1).isImm()) {
- Imm = DefMI.getOperand(1).getImm();
- } else {
- assert(DefMI.getOpcode() == X86::MOV32r0 &&
- "The opcode is assumed to be MOV32r0 if the operand is not "
- "immediate.");
- Imm = 0;
- }
+ Imm = DefMI.getOperand(1).getImm();
NewMI = addFrameReference(
BuildMI(MF.front(), ++ConstMI->getIterator(), DL,
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 0849fc7..c164762 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -2192,7 +2192,6 @@ StringMap<bool> sys::getHostCPUFeatures() {
bool HasLeaf1E = MaxLevel >= 0x1e &&
!getX86CpuIDAndInfoEx(0x1e, 0x1, &EAX, &EBX, &ECX, &EDX);
Features["amx-fp8"] = HasLeaf1E && ((EAX >> 4) & 1) && HasAMXSave;
- Features["amx-transpose"] = HasLeaf1E && ((EAX >> 5) & 1) && HasAMXSave;
Features["amx-tf32"] = HasLeaf1E && ((EAX >> 6) & 1) && HasAMXSave;
Features["amx-avx512"] = HasLeaf1E && ((EAX >> 7) & 1) && HasAMXSave;
Features["amx-movrs"] = HasLeaf1E && ((EAX >> 8) & 1) && HasAMXSave;
diff --git a/llvm/lib/TargetParser/PPCTargetParser.cpp b/llvm/lib/TargetParser/PPCTargetParser.cpp
index d510445..f74d670 100644
--- a/llvm/lib/TargetParser/PPCTargetParser.cpp
+++ b/llvm/lib/TargetParser/PPCTargetParser.cpp
@@ -48,9 +48,9 @@ StringRef normalizeCPUName(StringRef CPUName) {
// accepting it. Clang has always ignored it and passed the
// generic CPU ID to the back end.
return StringSwitch<StringRef>(CPUName)
- .Cases("common", "405", "generic")
- .Cases("ppc440", "440fp", "440")
- .Cases("630", "power3", "pwr3")
+ .Cases({"common", "405"}, "generic")
+ .Cases({"ppc440", "440fp"}, "440")
+ .Cases({"630", "power3"}, "pwr3")
.Case("G3", "g3")
.Case("G4", "g4")
.Case("G4+", "g4+")
@@ -69,7 +69,7 @@ StringRef normalizeCPUName(StringRef CPUName) {
.Case("power9", "pwr9")
.Case("power10", "pwr10")
.Case("power11", "pwr11")
- .Cases("powerpc", "powerpc32", "ppc")
+ .Cases({"powerpc", "powerpc32"}, "ppc")
.Case("powerpc64", "ppc64")
.Case("powerpc64le", "ppc64le")
.Default(CPUName);
diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp b/llvm/lib/TargetParser/TargetDataLayout.cpp
index d765d9c..d735923 100644
--- a/llvm/lib/TargetParser/TargetDataLayout.cpp
+++ b/llvm/lib/TargetParser/TargetDataLayout.cpp
@@ -208,7 +208,7 @@ static std::string computeMipsDataLayout(const Triple &TT, StringRef ABIName) {
return Ret;
}
-static std::string computePowerDataLayout(const Triple &T) {
+static std::string computePowerDataLayout(const Triple &T, StringRef ABIName) {
bool is64Bit = T.isPPC64();
std::string Ret;
@@ -228,7 +228,8 @@ static std::string computePowerDataLayout(const Triple &T) {
// If the target ABI uses function descriptors, then the alignment of function
// pointers depends on the alignment used to emit the descriptor. Otherwise,
// function pointers are aligned to 32 bits because the instructions must be.
- if ((T.getArch() == Triple::ppc64 && !T.isPPC64ELFv2ABI())) {
+ if ((T.getArch() == Triple::ppc64 &&
+ (!T.isPPC64ELFv2ABI() && ABIName != "elfv2"))) {
Ret += "-Fi64";
} else if (T.isOSAIX()) {
Ret += is64Bit ? "-Fi64" : "-Fi32";
@@ -573,7 +574,7 @@ std::string Triple::computeDataLayout(StringRef ABIName) const {
case Triple::ppcle:
case Triple::ppc64:
case Triple::ppc64le:
- return computePowerDataLayout(*this);
+ return computePowerDataLayout(*this, ABIName);
case Triple::r600:
case Triple::amdgcn:
return computeAMDDataLayout(*this);
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index b13c795..37e8ad9 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -143,7 +143,7 @@ constexpr FeatureBitset FeaturesDiamondRapids =
FeatureAVXVNNIINT8 | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 |
FeatureSM4 | FeatureEGPR | FeatureZU | FeatureCCMP | FeaturePush2Pop2 |
FeaturePPX | FeatureNDD | FeatureNF | FeatureMOVRS | FeatureAMX_MOVRS |
- FeatureAMX_AVX512 | FeatureAMX_FP8 | FeatureAMX_TF32 | FeatureAMX_TRANSPOSE;
+ FeatureAMX_AVX512 | FeatureAMX_FP8 | FeatureAMX_TF32;
// Intel Atom processors.
// Bonnell has feature parity with Core2 and adds MOVBE.
@@ -615,7 +615,6 @@ constexpr FeatureBitset ImpliedFeaturesAMX_FP16 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_COMPLEX = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_FP8 = FeatureAMX_TILE;
-constexpr FeatureBitset ImpliedFeaturesAMX_TRANSPOSE = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_MOVRS = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_AVX512 =
FeatureAMX_TILE | FeatureAVX10_2;
diff --git a/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp b/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp
index cda07e8..f55bc9c 100644
--- a/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp
+++ b/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp
@@ -32,7 +32,7 @@ using namespace llvm::MachO;
using namespace llvm::MachO::DylibReader;
using TripleVec = std::vector<Triple>;
-static typename TripleVec::iterator emplace(TripleVec &Container, Triple &&T) {
+static TripleVec::iterator emplace(TripleVec &Container, Triple &&T) {
auto I = partition_point(Container, [=](const Triple &CT) {
return std::forward_as_tuple(CT.getArch(), CT.getOS(),
CT.getEnvironment()) <
diff --git a/llvm/lib/TextAPI/RecordVisitor.cpp b/llvm/lib/TextAPI/RecordVisitor.cpp
index d333b33..24971a7 100644
--- a/llvm/lib/TextAPI/RecordVisitor.cpp
+++ b/llvm/lib/TextAPI/RecordVisitor.cpp
@@ -15,7 +15,7 @@
using namespace llvm;
using namespace llvm::MachO;
-RecordVisitor::~RecordVisitor() {}
+RecordVisitor::~RecordVisitor() = default;
void RecordVisitor::visitObjCInterface(const ObjCInterfaceRecord &) {}
void RecordVisitor::visitObjCCategory(const ObjCCategoryRecord &) {}
diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h b/llvm/lib/Transforms/Coroutines/CoroCloner.h
index e05fe28..1e549f1 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCloner.h
+++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h
@@ -77,7 +77,7 @@ public:
: OrigF(OrigF), Suffix(Suffix), Shape(Shape), FKind(FKind),
Builder(OrigF.getContext()), TTI(TTI) {}
- virtual ~BaseCloner() {}
+ virtual ~BaseCloner() = default;
/// Create a clone for a continuation lowering.
static Function *createClone(Function &OrigF, const Twine &Suffix,
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 5048561..5ed47ae 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -3619,7 +3619,7 @@ struct AAIntraFnReachabilityFunction final
return true;
RQITy StackRQI(A, From, To, ExclusionSet, false);
- typename RQITy::Reachable Result;
+ RQITy::Reachable Result;
if (!NonConstThis->checkQueryCache(A, StackRQI, Result))
return NonConstThis->isReachableImpl(A, StackRQI,
/*IsTemporaryRQI=*/true);
@@ -10701,7 +10701,7 @@ struct AAInterFnReachabilityFunction
auto *NonConstThis = const_cast<AAInterFnReachabilityFunction *>(this);
RQITy StackRQI(A, From, To, ExclusionSet, false);
- typename RQITy::Reachable Result;
+ RQITy::Reachable Result;
if (!NonConstThis->checkQueryCache(A, StackRQI, Result))
return NonConstThis->isReachableImpl(A, StackRQI,
/*IsTemporaryRQI=*/true);
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 894d83f..d35ae47 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -1034,11 +1034,11 @@ private:
} // namespace
template <>
-struct llvm::DenseMapInfo<typename CallsiteContextGraph<
+struct llvm::DenseMapInfo<CallsiteContextGraph<
ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo>
: public DenseMapInfo<std::pair<Instruction *, unsigned>> {};
template <>
-struct llvm::DenseMapInfo<typename CallsiteContextGraph<
+struct llvm::DenseMapInfo<CallsiteContextGraph<
IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo>
: public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
template <>
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index d7eb745..2a87a0f 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -208,7 +208,7 @@ namespace KernelInfo {
// };
#define KERNEL_ENVIRONMENT_IDX(MEMBER, IDX) \
- constexpr const unsigned MEMBER##Idx = IDX;
+ constexpr unsigned MEMBER##Idx = IDX;
KERNEL_ENVIRONMENT_IDX(Configuration, 0)
KERNEL_ENVIRONMENT_IDX(Ident, 1)
@@ -216,7 +216,7 @@ KERNEL_ENVIRONMENT_IDX(Ident, 1)
#undef KERNEL_ENVIRONMENT_IDX
#define KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MEMBER, IDX) \
- constexpr const unsigned MEMBER##Idx = IDX;
+ constexpr unsigned MEMBER##Idx = IDX;
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(UseGenericStateMachine, 0)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MayUseNestedParallelism, 1)
@@ -258,7 +258,7 @@ KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxTeams)
GlobalVariable *
getKernelEnvironementGVFromKernelInitCB(CallBase *KernelInitCB) {
- constexpr const int InitKernelEnvironmentArgNo = 0;
+ constexpr int InitKernelEnvironmentArgNo = 0;
return cast<GlobalVariable>(
KernelInitCB->getArgOperand(InitKernelEnvironmentArgNo)
->stripPointerCasts());
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3ddf182..cbaff29 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3997,6 +3997,27 @@ static Value *foldOrUnsignedUMulOverflowICmp(BinaryOperator &I,
return nullptr;
}
+/// Fold select(X >s 0, 0, -X) | smax(X, 0) --> abs(X)
+/// select(X <s 0, -X, 0) | smax(X, 0) --> abs(X)
+static Value *FoldOrOfSelectSmaxToAbs(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ Value *X;
+ Value *Sel;
+ if (match(&I,
+ m_c_Or(m_Value(Sel), m_OneUse(m_SMax(m_Value(X), m_ZeroInt()))))) {
+ auto NegX = m_Neg(m_Specific(X));
+ if (match(Sel, m_Select(m_SpecificICmp(ICmpInst::ICMP_SGT, m_Specific(X),
+ m_ZeroInt()),
+ m_ZeroInt(), NegX)) ||
+ match(Sel, m_Select(m_SpecificICmp(ICmpInst::ICMP_SLT, m_Specific(X),
+ m_ZeroInt()),
+ NegX, m_ZeroInt())))
+ return Builder.CreateBinaryIntrinsic(Intrinsic::abs, X,
+ Builder.getFalse());
+ }
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -4545,6 +4566,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Value *V = SimplifyAddWithRemainder(I))
return replaceInstUsesWith(I, V);
+ if (Value *Res = FoldOrOfSelectSmaxToAbs(I, Builder))
+ return replaceInstUsesWith(I, Res);
+
return nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index f5130da..9572f9d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3599,6 +3599,21 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
m_Not(m_Specific(SelCond->getTrueValue())));
if (MayNeedFreeze)
C = Builder.CreateFreeze(C);
+ if (!ProfcheckDisableMetadataFixes) {
+ Value *C2 = nullptr, *A2 = nullptr, *B2 = nullptr;
+ if (match(CondVal, m_LogicalAnd(m_Specific(C), m_Value(A2))) &&
+ SelCond) {
+ return SelectInst::Create(C, A, B, "", nullptr, SelCond);
+ } else if (match(FalseVal,
+ m_LogicalAnd(m_Not(m_Value(C2)), m_Value(B2))) &&
+ SelFVal) {
+ SelectInst *NewSI = SelectInst::Create(C, A, B, "", nullptr, SelFVal);
+ NewSI->swapProfMetadata();
+ return NewSI;
+ } else {
+ return createSelectInstWithUnknownProfile(C, A, B);
+ }
+ }
return SelectInst::Create(C, A, B);
}
@@ -3615,6 +3630,20 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
m_Not(m_Specific(SelFVal->getTrueValue())));
if (MayNeedFreeze)
C = Builder.CreateFreeze(C);
+ if (!ProfcheckDisableMetadataFixes) {
+ Value *C2 = nullptr, *A2 = nullptr, *B2 = nullptr;
+ if (match(CondVal, m_LogicalAnd(m_Not(m_Value(C2)), m_Value(A2))) &&
+ SelCond) {
+ SelectInst *NewSI = SelectInst::Create(C, B, A, "", nullptr, SelCond);
+ NewSI->swapProfMetadata();
+ return NewSI;
+ } else if (match(FalseVal, m_LogicalAnd(m_Specific(C), m_Value(B2))) &&
+ SelFVal) {
+ return SelectInst::Create(C, B, A, "", nullptr, SelFVal);
+ } else {
+ return createSelectInstWithUnknownProfile(C, B, A);
+ }
+ }
return SelectInst::Create(C, B, A);
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 7795cce..b5548d4 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -69,14 +69,6 @@ namespace llvm {
// Command line option to enable vtable value profiling. Defined in
// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
extern cl::opt<bool> EnableVTableValueProfiling;
-// TODO: Remove -debug-info-correlate in next LLVM release, in favor of
-// -profile-correlate=debug-info.
-cl::opt<bool> DebugInfoCorrelate(
- "debug-info-correlate",
- cl::desc("Use debug info to correlate profiles. (Deprecated, use "
- "-profile-correlate=debug-info)"),
- cl::init(false));
-
LLVM_ABI cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate(
"profile-correlate",
cl::desc("Use debug info or binary file to correlate profiles."),
@@ -1047,7 +1039,7 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
// in lightweight mode. We need to move the value profile pointer to the
// Counter struct to get this working.
assert(
- !DebugInfoCorrelate && ProfileCorrelate == InstrProfCorrelator::NONE &&
+ ProfileCorrelate == InstrProfCorrelator::NONE &&
"Value profiling is not yet supported with lightweight instrumentation");
GlobalVariable *Name = Ind->getName();
auto It = ProfileDataMap.find(Name);
@@ -1504,7 +1496,7 @@ static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) {
}
void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) {
- assert(!DebugInfoCorrelate &&
+ assert(ProfileCorrelate != InstrProfCorrelator::DEBUG_INFO &&
"Value profiling is not supported with lightweight instrumentation");
if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
return;
@@ -1584,8 +1576,7 @@ GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc,
// Use internal rather than private linkage so the counter variable shows up
// in the symbol table when using debug info for correlation.
- if ((DebugInfoCorrelate ||
- ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) &&
+ if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO &&
TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage)
Linkage = GlobalValue::InternalLinkage;
@@ -1691,8 +1682,7 @@ InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {
auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts);
PD.RegionCounters = CounterPtr;
- if (DebugInfoCorrelate ||
- ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) {
+ if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) {
LLVMContext &Ctx = M.getContext();
Function *Fn = Inc->getParent()->getParent();
if (auto *SP = Fn->getSubprogram()) {
@@ -1737,7 +1727,7 @@ InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {
void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
// When debug information is correlated to profile data, a data variable
// is not needed.
- if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
+ if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
return;
GlobalVariable *NamePtr = Inc->getName();
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
index 2f256df..b72d41a 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
@@ -127,15 +127,19 @@ static uint64_t computeStackId(const memprof::Frame &Frame) {
return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
}
+static AllocationType getAllocType(const AllocationInfo *AllocInfo) {
+ return getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
+ AllocInfo->Info.getAllocCount(),
+ AllocInfo->Info.getTotalLifetime());
+}
+
static AllocationType addCallStack(CallStackTrie &AllocTrie,
const AllocationInfo *AllocInfo,
uint64_t FullStackId) {
SmallVector<uint64_t> StackIds;
for (const auto &StackFrame : AllocInfo->CallStack)
StackIds.push_back(computeStackId(StackFrame));
- auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
- AllocInfo->Info.getAllocCount(),
- AllocInfo->Info.getTotalLifetime());
+ auto AllocType = getAllocType(AllocInfo);
std::vector<ContextTotalSize> ContextSizeInfo;
if (recordContextSizeInfoForAnalysis()) {
auto TotalSize = AllocInfo->Info.getTotalSize();
@@ -405,22 +409,39 @@ handleAllocSite(Instruction &I, CallBase *CI,
const std::set<const AllocationInfo *> &AllocInfoSet,
std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
&FullStackIdToAllocMatchInfo) {
+ // TODO: Remove this once the profile creation logic deduplicates contexts
+ // that are the same other than the IsInlineFrame bool. Until then, keep the
+ // largest.
+ DenseMap<uint64_t, const AllocationInfo *> UniqueFullContextIdAllocInfo;
+ for (auto *AllocInfo : AllocInfoSet) {
+ auto FullStackId = computeFullStackId(AllocInfo->CallStack);
+ auto [It, Inserted] =
+ UniqueFullContextIdAllocInfo.insert({FullStackId, AllocInfo});
+ // If inserted entry, done.
+ if (Inserted)
+ continue;
+ // Keep the larger one, or the noncold one if they are the same size.
+ auto CurSize = It->second->Info.getTotalSize();
+ auto NewSize = AllocInfo->Info.getTotalSize();
+ if ((CurSize > NewSize) ||
+ (CurSize == NewSize &&
+ getAllocType(AllocInfo) != AllocationType::NotCold))
+ continue;
+ It->second = AllocInfo;
+ }
// We may match this instruction's location list to multiple MIB
// contexts. Add them to a Trie specialized for trimming the contexts to
// the minimal needed to disambiguate contexts with unique behavior.
CallStackTrie AllocTrie(&ORE, MaxColdSize);
uint64_t TotalSize = 0;
uint64_t TotalColdSize = 0;
- for (auto *AllocInfo : AllocInfoSet) {
+ for (auto &[FullStackId, AllocInfo] : UniqueFullContextIdAllocInfo) {
// Check the full inlined call stack against this one.
// If we found and thus matched all frames on the call, include
// this MIB.
if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
InlinedCallStack)) {
NumOfMemProfMatchedAllocContexts++;
- uint64_t FullStackId = 0;
- if (ClPrintMemProfMatchInfo || recordContextSizeInfoForAnalysis())
- FullStackId = computeFullStackId(AllocInfo->CallStack);
auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
TotalSize += AllocInfo->Info.getTotalSize();
if (AllocType == AllocationType::Cold)
diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
index 80e77e09..a2fad02 100644
--- a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
@@ -161,7 +161,7 @@ template <char NsanTypeId>
class ShadowTypeConfigImpl : public ShadowTypeConfig {
public:
char getNsanTypeId() const override { return NsanTypeId; }
- static constexpr const char kNsanTypeId = NsanTypeId;
+ static constexpr char kNsanTypeId = NsanTypeId;
};
// `double` (`d`) shadow type.
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 71736cf..af53fa0 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -456,7 +456,7 @@ createIRLevelProfileFlagVar(Module &M,
ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
if (PGOInstrumentLoopEntries)
ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES;
- if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
+ if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
if (PGOFunctionEntryCoverage)
ProfileVersion |=
diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
index 78d4a57e..87eba5f 100644
--- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
@@ -58,6 +58,18 @@ static cl::opt<bool>
cl::desc("Writes always set the type"), cl::Hidden,
cl::init(false));
+static cl::opt<bool> ClOutlineInstrumentation(
+ "tysan-outline-instrumentation",
+ cl::desc("Uses function calls for all TySan instrumentation, reducing "
+ "ELF size"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClVerifyOutlinedInstrumentation(
+ "tysan-verify-outlined-instrumentation",
+ cl::desc("Check types twice with both inlined instrumentation and "
+ "function calls. This verifies that they behave the same."),
+ cl::Hidden, cl::init(false));
+
STATISTIC(NumInstrumentedAccesses, "Number of instrumented accesses");
namespace {
@@ -105,12 +117,16 @@ private:
Regex AnonNameRegex;
Type *IntptrTy;
uint64_t PtrShift;
- IntegerType *OrdTy;
+ IntegerType *OrdTy, *U64Ty;
/// Callbacks to run-time library are computed in initializeCallbacks.
FunctionCallee TysanCheck;
FunctionCallee TysanCtorFunction;
+ FunctionCallee TysanIntrumentMemInst;
+ FunctionCallee TysanInstrumentWithShadowUpdate;
+ FunctionCallee TysanSetShadowType;
+
/// Callback to set types for gloabls.
Function *TysanGlobalsSetTypeFunction;
};
@@ -130,6 +146,8 @@ TypeSanitizer::TypeSanitizer(Module &M)
void TypeSanitizer::initializeCallbacks(Module &M) {
IRBuilder<> IRB(M.getContext());
OrdTy = IRB.getInt32Ty();
+ U64Ty = IRB.getInt64Ty();
+ Type *BoolType = IRB.getInt1Ty();
AttributeList Attr;
Attr = Attr.addFnAttribute(M.getContext(), Attribute::NoUnwind);
@@ -144,6 +162,30 @@ void TypeSanitizer::initializeCallbacks(Module &M) {
TysanCtorFunction =
M.getOrInsertFunction(kTysanModuleCtorName, Attr, IRB.getVoidTy());
+
+ TysanIntrumentMemInst = M.getOrInsertFunction(
+ "__tysan_instrument_mem_inst", Attr, IRB.getVoidTy(),
+ IRB.getPtrTy(), // Pointer of data to be written to
+ IRB.getPtrTy(), // Pointer of data to write
+ U64Ty, // Size of the data in bytes
+ BoolType // Do we need to call memmove
+ );
+
+ TysanInstrumentWithShadowUpdate = M.getOrInsertFunction(
+ "__tysan_instrument_with_shadow_update", Attr, IRB.getVoidTy(),
+ IRB.getPtrTy(), // Pointer to data to be read
+ IRB.getPtrTy(), // Pointer to type descriptor
+ BoolType, // Do we need to type check this
+ U64Ty, // Size of data we access in bytes
+ OrdTy // Flags
+ );
+
+ TysanSetShadowType = M.getOrInsertFunction(
+ "__tysan_set_shadow_type", Attr, IRB.getVoidTy(),
+ IRB.getPtrTy(), // Pointer of data to be written to
+ IRB.getPtrTy(), // Pointer to the new type descriptor
+ U64Ty // Size of data we access in bytes
+ );
}
void TypeSanitizer::instrumentGlobals(Module &M) {
@@ -587,6 +629,29 @@ bool TypeSanitizer::instrumentWithShadowUpdate(
Value *TD = IRB.CreateBitCast(TDGV, IRB.getPtrTy());
+ if (ClOutlineInstrumentation) {
+ if (!ForceSetType && (!ClWritesAlwaysSetType || IsRead)) {
+ // We need to check the type here. If the type is unknown, then the read
+ // sets the type. If the type is known, then it is checked. If the type
+ // doesn't match, then we call the runtime type check (which may yet
+ // determine that the mismatch is okay).
+
+ Constant *Flags =
+ ConstantInt::get(OrdTy, (int)IsRead | (((int)IsWrite) << 1));
+
+ IRB.CreateCall(TysanInstrumentWithShadowUpdate,
+ {Ptr, TD,
+ SanitizeFunction ? IRB.getTrue() : IRB.getFalse(),
+ IRB.getInt64(AccessSize), Flags});
+ } else if (ForceSetType || IsWrite) {
+ // In the mode where writes always set the type, for a write (which does
+ // not also read), we just set the type.
+ IRB.CreateCall(TysanSetShadowType, {Ptr, TD, IRB.getInt64(AccessSize)});
+ }
+
+ return true;
+ }
+
Value *ShadowDataInt = convertToShadowDataInt(IRB, Ptr, IntptrTy, PtrShift,
ShadowBase, AppMemMask);
Type *Int8PtrPtrTy = PointerType::get(IRB.getContext(), 0);
@@ -838,37 +903,47 @@ bool TypeSanitizer::instrumentMemInst(Value *V, Instruction *ShadowBase,
}
}
- if (!ShadowBase)
- ShadowBase = getShadowBase(*F);
- if (!AppMemMask)
- AppMemMask = getAppMemMask(*F);
+ if (ClOutlineInstrumentation) {
+ if (!Src)
+ Src = ConstantPointerNull::get(IRB.getPtrTy());
- Value *ShadowDataInt = IRB.CreateAdd(
- IRB.CreateShl(
- IRB.CreateAnd(IRB.CreatePtrToInt(Dest, IntptrTy), AppMemMask),
- PtrShift),
- ShadowBase);
- Value *ShadowData = IRB.CreateIntToPtr(ShadowDataInt, IRB.getPtrTy());
-
- if (!Src) {
- IRB.CreateMemSet(ShadowData, IRB.getInt8(0), IRB.CreateShl(Size, PtrShift),
- Align(1ull << PtrShift));
+ IRB.CreateCall(
+ TysanIntrumentMemInst,
+ {Dest, Src, Size, NeedsMemMove ? IRB.getTrue() : IRB.getFalse()});
return true;
- }
-
- Value *SrcShadowDataInt = IRB.CreateAdd(
- IRB.CreateShl(
- IRB.CreateAnd(IRB.CreatePtrToInt(Src, IntptrTy), AppMemMask),
- PtrShift),
- ShadowBase);
- Value *SrcShadowData = IRB.CreateIntToPtr(SrcShadowDataInt, IRB.getPtrTy());
-
- if (NeedsMemMove) {
- IRB.CreateMemMove(ShadowData, Align(1ull << PtrShift), SrcShadowData,
- Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
} else {
- IRB.CreateMemCpy(ShadowData, Align(1ull << PtrShift), SrcShadowData,
- Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
+ if (!ShadowBase)
+ ShadowBase = getShadowBase(*F);
+ if (!AppMemMask)
+ AppMemMask = getAppMemMask(*F);
+
+ Value *ShadowDataInt = IRB.CreateAdd(
+ IRB.CreateShl(
+ IRB.CreateAnd(IRB.CreatePtrToInt(Dest, IntptrTy), AppMemMask),
+ PtrShift),
+ ShadowBase);
+ Value *ShadowData = IRB.CreateIntToPtr(ShadowDataInt, IRB.getPtrTy());
+
+ if (!Src) {
+ IRB.CreateMemSet(ShadowData, IRB.getInt8(0),
+ IRB.CreateShl(Size, PtrShift), Align(1ull << PtrShift));
+ return true;
+ }
+
+ Value *SrcShadowDataInt = IRB.CreateAdd(
+ IRB.CreateShl(
+ IRB.CreateAnd(IRB.CreatePtrToInt(Src, IntptrTy), AppMemMask),
+ PtrShift),
+ ShadowBase);
+ Value *SrcShadowData = IRB.CreateIntToPtr(SrcShadowDataInt, IRB.getPtrTy());
+
+ if (NeedsMemMove) {
+ IRB.CreateMemMove(ShadowData, Align(1ull << PtrShift), SrcShadowData,
+ Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
+ } else {
+ IRB.CreateMemCpy(ShadowData, Align(1ull << PtrShift), SrcShadowData,
+ Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
+ }
}
return true;
@@ -890,6 +965,16 @@ PreservedAnalyses TypeSanitizerPass::run(Module &M,
for (Function &F : M) {
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
TySan.sanitizeFunction(F, TLI);
+ if (ClVerifyOutlinedInstrumentation && ClOutlineInstrumentation) {
+ // Outlined instrumentation is a new option, and so this exists to
+ // verify there is no difference in behaviour between the options.
+ // If the outlined instrumentation triggers a verification failure
+ // when the original inlined instrumentation does not, or vice versa,
+ // then there is a discrepency which should be investigated.
+ ClOutlineInstrumentation = false;
+ TySan.sanitizeFunction(F, TLI);
+ ClOutlineInstrumentation = true;
+ }
}
return PreservedAnalyses::none();
diff --git a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
index 89980d5..a577f51 100644
--- a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
+++ b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
@@ -122,7 +122,8 @@ DropUnnecessaryAssumesPass::run(Function &F, FunctionAnalysisManager &FAM) {
Value *Cond = Assume->getArgOperand(0);
// Don't drop type tests, which have special semantics.
- if (match(Cond, m_Intrinsic<Intrinsic::type_test>()))
+ if (match(Cond, m_Intrinsic<Intrinsic::type_test>()) ||
+ match(Cond, m_Intrinsic<Intrinsic::public_type_test>()))
continue;
SmallVector<Value *> Affected;
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
index a06f832..d564e32 100644
--- a/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -514,7 +514,7 @@ public:
class GVNSink {
public:
- GVNSink() {}
+ GVNSink() = default;
bool run(Function &F) {
LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 7ebcc21..4ba4ba3 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -162,8 +162,6 @@ class IndVarSimplify {
const SCEV *ExitCount,
PHINode *IndVar, SCEVExpander &Rewriter);
- bool sinkUnusedInvariants(Loop *L);
-
public:
IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
const DataLayout &DL, TargetLibraryInfo *TLI,
@@ -1079,85 +1077,6 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
return true;
}
-//===----------------------------------------------------------------------===//
-// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
-//===----------------------------------------------------------------------===//
-
-/// If there's a single exit block, sink any loop-invariant values that
-/// were defined in the preheader but not used inside the loop into the
-/// exit block to reduce register pressure in the loop.
-bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
- BasicBlock *ExitBlock = L->getExitBlock();
- if (!ExitBlock) return false;
-
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) return false;
-
- bool MadeAnyChanges = false;
- for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
-
- // Skip BB Terminator.
- if (Preheader->getTerminator() == &I)
- continue;
-
- // New instructions were inserted at the end of the preheader.
- if (isa<PHINode>(I))
- break;
-
- // Don't move instructions which might have side effects, since the side
- // effects need to complete before instructions inside the loop. Also don't
- // move instructions which might read memory, since the loop may modify
- // memory. Note that it's okay if the instruction might have undefined
- // behavior: LoopSimplify guarantees that the preheader dominates the exit
- // block.
- if (I.mayHaveSideEffects() || I.mayReadFromMemory())
- continue;
-
- // Skip debug or pseudo instructions.
- if (I.isDebugOrPseudoInst())
- continue;
-
- // Skip eh pad instructions.
- if (I.isEHPad())
- continue;
-
- // Don't sink alloca: we never want to sink static alloca's out of the
- // entry block, and correctly sinking dynamic alloca's requires
- // checks for stacksave/stackrestore intrinsics.
- // FIXME: Refactor this check somehow?
- if (isa<AllocaInst>(&I))
- continue;
-
- // Determine if there is a use in or before the loop (direct or
- // otherwise).
- bool UsedInLoop = false;
- for (Use &U : I.uses()) {
- Instruction *User = cast<Instruction>(U.getUser());
- BasicBlock *UseBB = User->getParent();
- if (PHINode *P = dyn_cast<PHINode>(User)) {
- unsigned i =
- PHINode::getIncomingValueNumForOperand(U.getOperandNo());
- UseBB = P->getIncomingBlock(i);
- }
- if (UseBB == Preheader || L->contains(UseBB)) {
- UsedInLoop = true;
- break;
- }
- }
-
- // If there is, the def must remain in the preheader.
- if (UsedInLoop)
- continue;
-
- // Otherwise, sink it to the exit block.
- I.moveBefore(ExitBlock->getFirstInsertionPt());
- SE->forgetValue(&I);
- MadeAnyChanges = true;
- }
-
- return MadeAnyChanges;
-}
-
static void replaceExitCond(BranchInst *BI, Value *NewCond,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
auto *OldCond = BI->getCondition();
@@ -2065,10 +1984,6 @@ bool IndVarSimplify::run(Loop *L) {
// The Rewriter may not be used from this point on.
- // Loop-invariant instructions in the preheader that aren't used in the
- // loop may be sunk below the loop to reduce register pressure.
- Changed |= sinkUnusedInvariants(L);
-
// rewriteFirstIterationLoopExitValues does not rely on the computation of
// trip count and therefore can further simplify exit values in addition to
// rewriteLoopExitValues.
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index b2c526b..d13b990 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -211,9 +211,15 @@ static Instruction *cloneInstructionInExitBlock(
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU);
-static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
- ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
+static void moveInstructionBefore(
+ Instruction &I, BasicBlock::iterator Dest, ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
+ MemorySSA::InsertionPlace Point = MemorySSA::BeforeTerminator);
+
+static bool sinkUnusedInvariantsFromPreheaderToExit(
+ Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE, DominatorTree *DT,
+ SinkAndHoistLICMFlags &SinkFlags, OptimizationRemarkEmitter *ORE);
static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
function_ref<void(Instruction *)> Fn);
@@ -471,6 +477,12 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE)
: sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
MSSAU, &SafetyInfo, Flags, ORE);
+
+ // sink pre-header defs that are unused in-loop into the unique exit to reduce
+ // pressure.
+ Changed |= sinkUnusedInvariantsFromPreheaderToExit(L, AA, &SafetyInfo, MSSAU,
+ SE, DT, Flags, ORE);
+
Flags.setIsSink(false);
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L,
@@ -1456,19 +1468,80 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater &MSSAU,
- ScalarEvolution *SE) {
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
+ MemorySSA::InsertionPlace Point) {
SafetyInfo.removeInstruction(&I);
SafetyInfo.insertInstructionTo(&I, Dest->getParent());
I.moveBefore(*Dest->getParent(), Dest);
if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
MSSAU.getMemorySSA()->getMemoryAccess(&I)))
- MSSAU.moveToPlace(OldMemAcc, Dest->getParent(),
- MemorySSA::BeforeTerminator);
+ MSSAU.moveToPlace(OldMemAcc, Dest->getParent(), Point);
if (SE)
SE->forgetBlockAndLoopDispositions(&I);
}
+// If there's a single exit block, sink any loop-invariant values that were
+// defined in the preheader but not used inside the loop into the exit block
+// to reduce register pressure in the loop.
+static bool sinkUnusedInvariantsFromPreheaderToExit(
+ Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE, DominatorTree *DT,
+ SinkAndHoistLICMFlags &SinkFlags, OptimizationRemarkEmitter *ORE) {
+ BasicBlock *ExitBlock = L->getExitBlock();
+ if (!ExitBlock)
+ return false;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader)
+ return false;
+
+ bool MadeAnyChanges = false;
+
+ for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
+
+ // Skip terminator.
+ if (Preheader->getTerminator() == &I)
+ continue;
+
+ // New instructions were inserted at the end of the preheader.
+ if (isa<PHINode>(I))
+ break;
+
+ // Don't move instructions which might have side effects, since the side
+ // effects need to complete before instructions inside the loop. Note that
+ // it's okay if the instruction might have undefined behavior: LoopSimplify
+ // guarantees that the preheader dominates the exit block.
+ if (I.mayHaveSideEffects())
+ continue;
+
+ if (!canSinkOrHoistInst(I, AA, DT, L, MSSAU, true, SinkFlags, nullptr))
+ continue;
+
+ // Determine if there is a use in or before the loop (direct or
+ // otherwise).
+ bool UsedInLoopOrPreheader = false;
+ for (Use &U : I.uses()) {
+ auto *UserI = cast<Instruction>(U.getUser());
+ BasicBlock *UseBB = UserI->getParent();
+ if (auto *PN = dyn_cast<PHINode>(UserI)) {
+ UseBB = PN->getIncomingBlock(U);
+ }
+ if (UseBB == Preheader || L->contains(UseBB)) {
+ UsedInLoopOrPreheader = true;
+ break;
+ }
+ }
+ if (UsedInLoopOrPreheader)
+ continue;
+
+ moveInstructionBefore(I, ExitBlock->getFirstInsertionPt(), *SafetyInfo,
+ MSSAU, SE, MemorySSA::Beginning);
+ MadeAnyChanges = true;
+ }
+
+ return MadeAnyChanges;
+}
+
static Instruction *sinkThroughTriviallyReplaceablePHI(
PHINode *TPN, Instruction *I, LoopInfo *LI,
SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 1a279b6..001215a 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1318,6 +1318,11 @@ public:
/// the loop, in which case some special-case heuristics may be used.
bool AllFixupsOutsideLoop = true;
+ /// This records whether all of the fixups using this LSRUse are unconditional
+ /// within the loop, meaning they will be executed on every path to the loop
+ /// latch. This includes fixups before early exits.
+ bool AllFixupsUnconditional = true;
+
/// RigidFormula is set to true to guarantee that this use will be associated
/// with a single formula--the one that initially matched. Some SCEV
/// expressions cannot be expanded. This allows LSR to consider the registers
@@ -1421,16 +1426,22 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,
if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||
TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {
const SCEV *Start;
- const SCEVConstant *Step;
- if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_SCEVConstant(Step))))
+ const APInt *Step;
+ if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_scev_APInt(Step)))) {
// If the step size matches the base offset, we could use pre-indexed
// addressing.
- if (((AMK & TTI::AMK_PreIndexed) && F.BaseOffset.isFixed() &&
- Step->getAPInt() == F.BaseOffset.getFixedValue()) ||
- ((AMK & TTI::AMK_PostIndexed) && !isa<SCEVConstant>(Start) &&
- SE->isLoopInvariant(Start, L)))
+ bool CanPreIndex = (AMK & TTI::AMK_PreIndexed) &&
+ F.BaseOffset.isFixed() &&
+ *Step == F.BaseOffset.getFixedValue();
+ bool CanPostIndex = (AMK & TTI::AMK_PostIndexed) &&
+ !isa<SCEVConstant>(Start) &&
+ SE->isLoopInvariant(Start, L);
+ // We can only pre or post index when the load/store is unconditional.
+ if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)
LoopCost = 0;
+ }
}
+
// If the loop counts down to zero and we'll be using a hardware loop then
// the addrec will be combined into the hardware loop instruction.
if (LU.Kind == LSRUse::ICmpZero && F.countsDownToZero() &&
@@ -1783,6 +1794,9 @@ void LSRUse::print(raw_ostream &OS) const {
if (AllFixupsOutsideLoop)
OS << ", all-fixups-outside-loop";
+ if (AllFixupsUnconditional)
+ OS << ", all-fixups-unconditional";
+
if (WidestFixupType)
OS << ", widest fixup type: " << *WidestFixupType;
}
@@ -2213,6 +2227,7 @@ class LSRInstance {
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void CountRegisters(const Formula &F, size_t LUIdx);
bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
+ bool IsFixupExecutedEachIncrement(const LSRFixup &LF) const;
void CollectLoopInvariantFixupsAndFormulae();
@@ -3607,6 +3622,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.PostIncLoops = TmpPostIncLoops;
LF.Offset = Offset;
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
// Create SCEV as Formula for calculating baseline cost
if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
@@ -3680,6 +3696,14 @@ bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
return true;
}
+/// Test whether this fixup will be executed each time the corresponding IV
+/// increment instruction is executed.
+bool LSRInstance::IsFixupExecutedEachIncrement(const LSRFixup &LF) const {
+ // If the fixup block dominates the IV increment block then there is no path
+ // through the loop to the increment that doesn't pass through the fixup.
+ return DT.dominates(LF.UserInst->getParent(), IVIncInsertPos->getParent());
+}
+
/// Check for other uses of loop-invariant values which we're tracking. These
/// other uses will pin these values in registers, making them less profitable
/// for elimination.
@@ -3803,6 +3827,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LF.OperandValToReplace = U;
LF.Offset = Offset;
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
if (!LU.WidestFixupType ||
SE.getTypeSizeInBits(LU.WidestFixupType) <
SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
@@ -4940,6 +4965,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+ LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;
// Transfer the fixups of LU to LUThatHas.
for (LSRFixup &Fixup : LU.Fixups) {
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 3487e81..7e70ba2 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -245,11 +245,14 @@ raw_ostream &operator<<(raw_ostream &OS, ShapeInfo SI) {
} // namespace
-static bool isUniformShape(Value *V) {
+static bool isShapePreserving(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I)
return true;
+ if (isa<SelectInst>(I))
+ return true;
+
if (I->isBinaryOp())
return true;
@@ -300,6 +303,16 @@ static bool isUniformShape(Value *V) {
}
}
+/// Return an iterator over the operands of \p I that should share shape
+/// information with \p I.
+static iterator_range<Use *> getShapedOperandsForInst(Instruction *I) {
+ assert(isShapePreserving(I) &&
+ "Can't retrieve shaped operands for an instruction that does not "
+ "preserve shape information");
+ auto Ops = I->operands();
+ return isa<SelectInst>(I) ? drop_begin(Ops) : Ops;
+}
+
/// Return the ShapeInfo for the result of \p I, it it can be determined.
static std::optional<ShapeInfo>
computeShapeInfoForInst(Instruction *I,
@@ -329,9 +342,8 @@ computeShapeInfoForInst(Instruction *I,
return OpShape->second;
}
- if (isUniformShape(I) || isa<SelectInst>(I)) {
- auto Ops = I->operands();
- auto ShapedOps = isa<SelectInst>(I) ? drop_begin(Ops) : Ops;
+ if (isShapePreserving(I)) {
+ auto ShapedOps = getShapedOperandsForInst(I);
// Find the first operand that has a known shape and use that.
for (auto &Op : ShapedOps) {
auto OpShape = ShapeMap.find(Op.get());
@@ -710,10 +722,9 @@ public:
case Intrinsic::matrix_column_major_store:
return true;
default:
- return isUniformShape(II);
+ break;
}
- return isUniformShape(V) || isa<StoreInst>(V) || isa<LoadInst>(V) ||
- isa<SelectInst>(V);
+ return isShapePreserving(V) || isa<StoreInst>(V) || isa<LoadInst>(V);
}
/// Propagate the shape information of instructions to their users.
@@ -800,9 +811,8 @@ public:
} else if (isa<StoreInst>(V)) {
// Nothing to do. We forward-propagated to this so we would just
// backward propagate to an instruction with an already known shape.
- } else if (isUniformShape(V) || isa<SelectInst>(V)) {
- auto Ops = cast<Instruction>(V)->operands();
- auto ShapedOps = isa<SelectInst>(V) ? drop_begin(Ops) : Ops;
+ } else if (isShapePreserving(V)) {
+ auto ShapedOps = getShapedOperandsForInst(cast<Instruction>(V));
// Propagate to all operands.
ShapeInfo Shape = ShapeMap[V];
for (Use &U : ShapedOps) {
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index e043d07..08be5df 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1534,8 +1534,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
bool SrcNotDom = false;
auto CaptureTrackingWithModRef =
- [&](Instruction *AI,
- function_ref<bool(Instruction *)> ModRefCallback) -> bool {
+ [&](Instruction *AI, function_ref<bool(Instruction *)> ModRefCallback,
+ bool &AddressCaptured) -> bool {
SmallVector<Instruction *, 8> Worklist;
Worklist.push_back(AI);
unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
@@ -1559,8 +1559,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
if (!Visited.insert(&U).second)
continue;
UseCaptureInfo CI = DetermineUseCaptureKind(U, AI);
- if (capturesAnything(CI.UseCC))
+ if (capturesAnyProvenance(CI.UseCC))
return false;
+ AddressCaptured |= capturesAddress(CI.UseCC);
if (UI->mayReadOrWriteMemory()) {
if (UI->isLifetimeStartOrEnd()) {
@@ -1627,7 +1628,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return true;
};
- if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
+ bool DestAddressCaptured = false;
+ if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback,
+ DestAddressCaptured))
return false;
// Bailout if Dest may have any ModRef before Store.
if (!ReachabilityWorklist.empty() &&
@@ -1653,7 +1656,14 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return true;
};
- if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
+ bool SrcAddressCaptured = false;
+ if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback,
+ SrcAddressCaptured))
+ return false;
+
+ // If both the source and destination address are captured, the fact that they
+ // are no longer two separate allocations may be observed.
+ if (DestAddressCaptured && SrcAddressCaptured)
return false;
// We can do the transformation. First, move the SrcAlloca to the start of the
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 5af6c96..bb6c879 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -81,6 +81,7 @@ STATISTIC(
STATISTIC(NumInvariantConditionsInjected,
"Number of invariant conditions injected and unswitched");
+namespace llvm {
static cl::opt<bool> EnableNonTrivialUnswitch(
"enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
cl::desc("Forcibly enables non-trivial loop unswitching rather than "
@@ -131,11 +132,17 @@ static cl::opt<bool> InjectInvariantConditions(
static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(
"simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
- cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
- "unswitch on them to eliminate branches that are "
- "not-taken 1/<this option> times or less."),
+ cl::Hidden,
+ cl::desc("Only try to inject loop invariant conditions and "
+ "unswitch on them to eliminate branches that are "
+ "not-taken 1/<this option> times or less."),
cl::init(16));
+static cl::opt<bool> EstimateProfile("simple-loop-unswitch-estimate-profile",
+ cl::Hidden, cl::init(true));
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
+
AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key;
namespace {
struct CompareDesc {
@@ -268,13 +275,42 @@ static bool areLoopExitPHIsLoopInvariant(const Loop &L,
llvm_unreachable("Basic blocks should never be empty!");
}
-/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
+/// Copy a set of loop invariant values \p Invariants and insert them at the
/// end of \p BB and conditionally branch on the copied condition. We only
/// branch on a single value.
+/// We attempt to estimate the profile of the resulting conditional branch from
+/// \p ComputeProfFrom, which is the original conditional branch we're
+/// unswitching.
+/// When \p Direction is true, the \p Invariants form a disjunction, and the
+/// branch conditioned on it exits the loop on the "true" case. When \p
+/// Direction is false, the \p Invariants form a conjunction and the branch
+/// exits on the "false" case.
static void buildPartialUnswitchConditionalBranch(
BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
- const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
+ const Instruction *I, AssumptionCache *AC, const DominatorTree &DT,
+ const BranchInst &ComputeProfFrom) {
+
+ SmallVector<uint32_t> BranchWeights;
+ bool HasBranchWeights = EstimateProfile && !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(ComputeProfFrom, BranchWeights);
+ // If Direction is true, that means we had a disjunction and that the "true"
+ // case exits. The probability of the disjunction of the subset of terms is at
+ // most as high as the original one. So, if the probability is higher than the
+ // one we'd assign in absence of a profile (i.e. 0.5), we will use 0.5,
+ // but if it's lower, we will use the original probability.
+ // Conversely, if Direction is false, that means we had a conjunction, and the
+ // probability of exiting is captured in the second branch weight. That
+ // probability is a disjunction (of the negation of the original terms). The
+ // same reasoning applies as above.
+ // Issue #165649: should we expect BFI to conserve, and use that to calculate
+ // the branch weights?
+ if (HasBranchWeights &&
+ static_cast<double>(BranchWeights[Direction ? 0 : 1]) /
+ static_cast<double>(sum_of(BranchWeights)) >
+ 0.5)
+ HasBranchWeights = false;
+
IRBuilder<> IRB(&BB);
IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());
@@ -287,8 +323,14 @@ static void buildPartialUnswitchConditionalBranch(
Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
: IRB.CreateAnd(FrozenInvariants);
- IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
- Direction ? &NormalSucc : &UnswitchedSucc);
+ auto *BR = IRB.CreateCondBr(
+ Cond, Direction ? &UnswitchedSucc : &NormalSucc,
+ Direction ? &NormalSucc : &UnswitchedSucc,
+ HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof)
+ : nullptr);
+ if (!HasBranchWeights)
+ setExplicitlyUnknownBranchWeightsIfProfiled(
+ *BR, *BR->getParent()->getParent(), DEBUG_TYPE);
}
/// Copy a set of loop invariant values, and conditionally branch on them.
@@ -658,7 +700,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
" condition!");
buildPartialUnswitchConditionalBranch(
*OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
- FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
+ FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT, BI);
}
// Update the dominator tree with the added edge.
@@ -2477,7 +2519,7 @@ static void unswitchNontrivialInvariants(
else {
buildPartialUnswitchConditionalBranch(
*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
- FreezeLoopUnswitchCond, BI, &AC, DT);
+ FreezeLoopUnswitchCond, BI, &AC, DT, *BI);
}
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 0f3978f..5f6f66a 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -143,8 +143,8 @@ struct SubGraphTraits {
class WrappedSuccIterator
: public iterator_adaptor_base<
WrappedSuccIterator, BaseSuccIterator,
- typename std::iterator_traits<BaseSuccIterator>::iterator_category,
- NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
+ std::iterator_traits<BaseSuccIterator>::iterator_category, NodeRef,
+ std::ptrdiff_t, NodeRef *, NodeRef> {
SmallDenseSet<RegionNode *> *Nodes;
public:
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 9829d4d..11db0ec 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -674,6 +674,79 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU, BBName);
}
+/// Helper function to update the cycle or loop information after inserting a
+/// new block between a callbr instruction and one of its target blocks. Adds
+/// the new block to the innermost cycle or loop that the callbr instruction and
+/// the original target block share.
+/// \p LCI cycle or loop information to update
+/// \p CallBrBlock block containing the callbr instruction
+/// \p CallBrTarget new target block of the callbr instruction
+/// \p Succ original target block of the callbr instruction
+template <typename TI, typename T>
+static bool updateCycleLoopInfo(TI *LCI, BasicBlock *CallBrBlock,
+ BasicBlock *CallBrTarget, BasicBlock *Succ) {
+ static_assert(std::is_same_v<TI, CycleInfo> || std::is_same_v<TI, LoopInfo>,
+ "type must be CycleInfo or LoopInfo");
+ if (!LCI)
+ return false;
+
+ T *LC;
+ if constexpr (std::is_same_v<TI, CycleInfo>)
+ LC = LCI->getSmallestCommonCycle(CallBrBlock, Succ);
+ else
+ LC = LCI->getSmallestCommonLoop(CallBrBlock, Succ);
+ if (!LC)
+ return false;
+
+ if constexpr (std::is_same_v<TI, CycleInfo>)
+ LCI->addBlockToCycle(CallBrTarget, LC);
+ else
+ LC->addBasicBlockToLoop(CallBrTarget, *LCI);
+
+ return true;
+}
+
+BasicBlock *llvm::SplitCallBrEdge(BasicBlock *CallBrBlock, BasicBlock *Succ,
+ unsigned SuccIdx, DomTreeUpdater *DTU,
+ CycleInfo *CI, LoopInfo *LI,
+ bool *UpdatedLI) {
+ CallBrInst *CallBr = dyn_cast<CallBrInst>(CallBrBlock->getTerminator());
+ assert(CallBr && "expected callbr terminator");
+ assert(SuccIdx < CallBr->getNumSuccessors() &&
+ Succ == CallBr->getSuccessor(SuccIdx) && "invalid successor index");
+
+ // Create a new block between callbr and the specified successor.
+ // splitBlockBefore cannot be re-used here since it cannot split if the split
+ // point is a PHI node (because BasicBlock::splitBasicBlockBefore cannot
+ // handle that). But we don't need to rewire every part of a potential PHI
+ // node. We only care about the edge between CallBrBlock and the original
+ // successor.
+ BasicBlock *CallBrTarget =
+ BasicBlock::Create(CallBrBlock->getContext(),
+ CallBrBlock->getName() + ".target." + Succ->getName(),
+ CallBrBlock->getParent());
+ // Rewire control flow from the new target block to the original successor.
+ Succ->replacePhiUsesWith(CallBrBlock, CallBrTarget);
+ // Rewire control flow from callbr to the new target block.
+ CallBr->setSuccessor(SuccIdx, CallBrTarget);
+ // Jump from the new target block to the original successor.
+ BranchInst::Create(Succ, CallBrTarget);
+
+ bool Updated =
+ updateCycleLoopInfo<LoopInfo, Loop>(LI, CallBrBlock, CallBrTarget, Succ);
+ if (UpdatedLI)
+ *UpdatedLI = Updated;
+ updateCycleLoopInfo<CycleInfo, Cycle>(CI, CallBrBlock, CallBrTarget, Succ);
+ if (DTU) {
+ DTU->applyUpdates({{DominatorTree::Insert, CallBrBlock, CallBrTarget}});
+ if (DTU->getDomTree().dominates(CallBrBlock, Succ))
+ DTU->applyUpdates({{DominatorTree::Delete, CallBrBlock, Succ},
+ {DominatorTree::Insert, CallBrTarget, Succ}});
+ }
+
+ return CallBrTarget;
+}
+
void llvm::setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {
if (auto *II = dyn_cast<InvokeInst>(TI))
II->setUnwindDest(Succ);
diff --git a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
index 0046a00..287a177 100644
--- a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/Utils/ControlFlowUtils.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/ValueHandle.h"
@@ -281,7 +282,9 @@ std::pair<BasicBlock *, bool> ControlFlowHub::finalize(
for (auto [BB, Succ0, Succ1] : Branches) {
#ifndef NDEBUG
- assert(Incoming.insert(BB).second && "Duplicate entry for incoming block.");
+ assert(
+ (Incoming.insert(BB).second || isa<CallBrInst>(BB->getTerminator())) &&
+ "Duplicate entry for incoming block.");
#endif
if (Succ0)
Outgoing.insert(Succ0);
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index 45e1d12..804af22 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -79,6 +79,53 @@
// Limitation: The pass cannot handle switch statements and indirect
// branches. Both must be lowered to plain branches first.
//
+// CallBr support: CallBr is handled as a more general branch instruction which
+// can have multiple successors. The pass redirects the edges to intermediate
+// target blocks that unconditionally branch to the original callbr target
+// blocks. This allows the control flow hub to know to which of the original
+// target blocks to jump to.
+// Example input CFG:
+// Entry (callbr)
+// / \
+// v v
+// H ----> B
+// ^ /|
+// `----' |
+// v
+// Exit
+//
+// becomes:
+// Entry (callbr)
+// / \
+// v v
+// target.H target.B
+// | |
+// v v
+// H ----> B
+// ^ /|
+// `----' |
+// v
+// Exit
+//
+// Note
+// OUTPUT CFG: Converted to a natural loop with a new header N.
+//
+// Entry (callbr)
+// / \
+// v v
+// target.H target.B
+// \ /
+// \ /
+// v v
+// N <---.
+// / \ \
+// / \ |
+// v v /
+// H --> B --'
+// |
+// v
+// Exit
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/FixIrreducible.h"
@@ -231,6 +278,7 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
return false;
LLVM_DEBUG(dbgs() << "Processing cycle:\n" << CI.print(&C) << "\n";);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
ControlFlowHub CHub;
SetVector<BasicBlock *> Predecessors;
@@ -242,18 +290,32 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
}
for (BasicBlock *P : Predecessors) {
- auto *Branch = cast<BranchInst>(P->getTerminator());
- // Exactly one of the two successors is the header.
- BasicBlock *Succ0 = Branch->getSuccessor(0) == Header ? Header : nullptr;
- BasicBlock *Succ1 = Succ0 ? nullptr : Header;
- if (!Succ0)
- assert(Branch->getSuccessor(1) == Header);
- assert(Succ0 || Succ1);
- CHub.addBranch(P, Succ0, Succ1);
-
- LLVM_DEBUG(dbgs() << "Added internal branch: " << P->getName() << " -> "
- << (Succ0 ? Succ0->getName() : "") << " "
- << (Succ1 ? Succ1->getName() : "") << "\n");
+ if (BranchInst *Branch = dyn_cast<BranchInst>(P->getTerminator())) {
+ // Exactly one of the two successors is the header.
+ BasicBlock *Succ0 = Branch->getSuccessor(0) == Header ? Header : nullptr;
+ BasicBlock *Succ1 = Succ0 ? nullptr : Header;
+ assert(Succ0 || Branch->getSuccessor(1) == Header);
+ assert(Succ0 || Succ1);
+ CHub.addBranch(P, Succ0, Succ1);
+
+ LLVM_DEBUG(dbgs() << "Added internal branch: " << printBasicBlock(P)
+ << " -> " << printBasicBlock(Succ0)
+ << (Succ0 && Succ1 ? " " : "") << printBasicBlock(Succ1)
+ << '\n');
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator())) {
+ for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
+ BasicBlock *Succ = CallBr->getSuccessor(I);
+ if (Succ != Header)
+ continue;
+ BasicBlock *NewSucc = SplitCallBrEdge(P, Succ, I, &DTU, &CI, LI);
+ CHub.addBranch(NewSucc, Succ);
+ LLVM_DEBUG(dbgs() << "Added internal branch: "
+ << printBasicBlock(NewSucc) << " -> "
+ << printBasicBlock(Succ) << '\n');
+ }
+ } else {
+ llvm_unreachable("unsupported block terminator");
+ }
}
// Redirect external incoming edges. This includes the edges on the header.
@@ -266,17 +328,32 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
}
for (BasicBlock *P : Predecessors) {
- auto *Branch = cast<BranchInst>(P->getTerminator());
- BasicBlock *Succ0 = Branch->getSuccessor(0);
- Succ0 = C.contains(Succ0) ? Succ0 : nullptr;
- BasicBlock *Succ1 =
- Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
- Succ1 = Succ1 && C.contains(Succ1) ? Succ1 : nullptr;
- CHub.addBranch(P, Succ0, Succ1);
-
- LLVM_DEBUG(dbgs() << "Added external branch: " << P->getName() << " -> "
- << (Succ0 ? Succ0->getName() : "") << " "
- << (Succ1 ? Succ1->getName() : "") << "\n");
+ if (BranchInst *Branch = dyn_cast<BranchInst>(P->getTerminator()); Branch) {
+ BasicBlock *Succ0 = Branch->getSuccessor(0);
+ Succ0 = C.contains(Succ0) ? Succ0 : nullptr;
+ BasicBlock *Succ1 =
+ Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
+ Succ1 = Succ1 && C.contains(Succ1) ? Succ1 : nullptr;
+ CHub.addBranch(P, Succ0, Succ1);
+
+ LLVM_DEBUG(dbgs() << "Added external branch: " << printBasicBlock(P)
+ << " -> " << printBasicBlock(Succ0)
+ << (Succ0 && Succ1 ? " " : "") << printBasicBlock(Succ1)
+ << '\n');
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator())) {
+ for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
+ BasicBlock *Succ = CallBr->getSuccessor(I);
+ if (!C.contains(Succ))
+ continue;
+ BasicBlock *NewSucc = SplitCallBrEdge(P, Succ, I, &DTU, &CI, LI);
+ CHub.addBranch(NewSucc, Succ);
+ LLVM_DEBUG(dbgs() << "Added external branch: "
+ << printBasicBlock(NewSucc) << " -> "
+ << printBasicBlock(Succ) << '\n');
+ }
+ } else {
+ llvm_unreachable("unsupported block terminator");
+ }
}
// Redirect all the backedges through a "hub" consisting of a series
@@ -292,7 +369,6 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
SetVector<BasicBlock *> Entries;
Entries.insert(C.entry_rbegin(), C.entry_rend());
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
CHub.finalize(&DTU, GuardBlocks, "irr");
#if defined(EXPENSIVE_CHECKS)
assert(DT.verify(DominatorTree::VerificationLevel::Full));
@@ -325,8 +401,6 @@ static bool FixIrreducibleImpl(Function &F, CycleInfo &CI, DominatorTree &DT,
LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
<< F.getName() << "\n");
- assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
-
bool Changed = false;
for (Cycle *TopCycle : CI.toplevel_cycles()) {
for (Cycle *C : depth_first(TopCycle)) {
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 4fe736a..94dfd3a 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -499,9 +499,9 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
- unsigned EstimatedLoopInvocationWeight = 0;
std::optional<unsigned> OriginalTripCount =
- llvm::getLoopEstimatedTripCount(L, &EstimatedLoopInvocationWeight);
+ llvm::getLoopEstimatedTripCount(L);
+ BranchProbability OriginalLoopProb = llvm::getLoopProbability(L);
// Effectively "DCE" unrolled iterations that are beyond the max tripcount
// and will never be executed.
@@ -592,11 +592,11 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
: isEpilogProfitable(L);
if (ULO.Runtime &&
- !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount,
- EpilogProfitability, ULO.UnrollRemainder,
- ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
- PreserveLCSSA, ULO.SCEVExpansionBudget,
- ULO.RuntimeUnrollMultiExit, RemainderLoop)) {
+ !UnrollRuntimeLoopRemainder(
+ L, ULO.Count, ULO.AllowExpensiveTripCount, EpilogProfitability,
+ ULO.UnrollRemainder, ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
+ PreserveLCSSA, ULO.SCEVExpansionBudget, ULO.RuntimeUnrollMultiExit,
+ RemainderLoop, OriginalTripCount, OriginalLoopProb)) {
if (ULO.Force)
ULO.Runtime = false;
else {
@@ -1130,11 +1130,46 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
LI->erase(L);
// We shouldn't try to use `L` anymore.
L = nullptr;
- } else if (OriginalTripCount) {
- // Update the trip count. Note that the remainder has already logic
- // computing it in `UnrollRuntimeLoopRemainder`.
- setLoopEstimatedTripCount(L, *OriginalTripCount / ULO.Count,
- EstimatedLoopInvocationWeight);
+ } else {
+ // Update metadata for the loop's branch weights and estimated trip count:
+ // - If ULO.Runtime, UnrollRuntimeLoopRemainder sets the guard branch
+ // weights, latch branch weights, and estimated trip count of the
+ // remainder loop it creates. It also sets the branch weights for the
+ // unrolled loop guard it creates. The branch weights for the unrolled
+ // loop latch are adjusted below. FIXME: Handle prologue loops.
+ // - Otherwise, if unrolled loop iteration latches become unconditional,
+ // branch weights are adjusted above. FIXME: Actually handle such
+ // unconditional latches.
+ // - Otherwise, the original loop's branch weights are correct for the
+ // unrolled loop, so do not adjust them.
+ // - In all cases, the unrolled loop's estimated trip count is set below.
+ //
+ // As an example of the last case, consider what happens if the unroll count
+ // is 4 for a loop with an estimated trip count of 10 when we do not create
+ // a remainder loop and all iterations' latches remain conditional. Each
+ // unrolled iteration's latch still has the same probability of exiting the
+ // loop as it did when in the original loop, and thus it should still have
+ // the same branch weights. Each unrolled iteration's non-zero probability
+ // of exiting already appropriately reduces the probability of reaching the
+ // remaining iterations just as it did in the original loop. Trying to also
+ // adjust the branch weights of the final unrolled iteration's latch (i.e.,
+ // the backedge for the unrolled loop as a whole) to reflect its new trip
+ // count of 3 will erroneously further reduce its block frequencies.
+ // However, in case an analysis later needs to estimate the trip count of
+ // the unrolled loop as a whole without considering the branch weights for
+ // each unrolled iteration's latch within it, we store the new trip count as
+ // separate metadata.
+ if (!OriginalLoopProb.isUnknown() && ULO.Runtime && EpilogProfitability) {
+ // Where p is always the probability of executing at least 1 more
+ // iteration, the probability for at least n more iterations is p^n.
+ setLoopProbability(L, OriginalLoopProb.pow(ULO.Count));
+ }
+ if (OriginalTripCount) {
+ unsigned NewTripCount = *OriginalTripCount / ULO.Count;
+ if (!ULO.Runtime && *OriginalTripCount % ULO.Count)
+ ++NewTripCount;
+ setLoopEstimatedTripCount(L, NewTripCount);
+ }
}
// LoopInfo should not be valid, confirm that.
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 6312831..1e8f6cc 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -40,6 +40,7 @@
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include <cmath>
using namespace llvm;
@@ -195,6 +196,21 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
}
}
+/// Assume, due to our position in the remainder loop or its guard, anywhere
+/// from 0 to \p N more iterations can possibly execute. Among such cases in
+/// the original loop (with loop probability \p OriginalLoopProb), what is the
+/// probability of executing at least one more iteration?
+static BranchProbability
+probOfNextInRemainder(BranchProbability OriginalLoopProb, unsigned N) {
+ // Each of these variables holds the original loop's probability that the
+ // number of iterations it will execute is some m in the specified range.
+ BranchProbability ProbOne = OriginalLoopProb; // 1 <= m
+ BranchProbability ProbTooMany = ProbOne.pow(N + 1); // N + 1 <= m
+ BranchProbability ProbNotTooMany = ProbTooMany.getCompl(); // 0 <= m <= N
+ BranchProbability ProbOneNotTooMany = ProbOne - ProbTooMany; // 1 <= m <= N
+ return ProbOneNotTooMany / ProbNotTooMany;
+}
+
/// Connect the unrolling epilog code to the original loop.
/// The unrolling epilog code contains code to execute the
/// 'extra' iterations if the run-time trip count modulo the
@@ -221,7 +237,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
ValueToValueMapTy &VMap, DominatorTree *DT,
LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE,
- unsigned Count, AssumptionCache &AC) {
+ unsigned Count, AssumptionCache &AC,
+ BranchProbability OriginalLoopProb) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
@@ -332,12 +349,19 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
PreserveLCSSA);
// Add the branch to the exit block (around the epilog loop)
MDNode *BranchWeights = nullptr;
- if (hasBranchWeightMD(*Latch->getTerminator())) {
+ if (OriginalLoopProb.isUnknown() &&
+ hasBranchWeightMD(*Latch->getTerminator())) {
// Assume equal distribution in interval [0, Count).
MDBuilder MDB(B.getContext());
BranchWeights = MDB.createBranchWeights(1, Count - 1);
}
- B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights);
+ BranchInst *RemainderLoopGuard =
+ B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights);
+ if (!OriginalLoopProb.isUnknown()) {
+ setBranchProbability(RemainderLoopGuard,
+ probOfNextInRemainder(OriginalLoopProb, Count - 1),
+ /*ForFirstTarget=*/true);
+ }
InsertPt->eraseFromParent();
if (DT) {
auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);
@@ -357,14 +381,15 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
/// The cloned blocks should be inserted between InsertTop and InsertBot.
/// InsertTop should be new preheader, InsertBot new loop exit.
/// Returns the new cloned loop that is created.
-static Loop *
-CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
- const bool UnrollRemainder,
- BasicBlock *InsertTop,
- BasicBlock *InsertBot, BasicBlock *Preheader,
+static Loop *CloneLoopBlocks(Loop *L, Value *NewIter,
+ const bool UseEpilogRemainder,
+ const bool UnrollRemainder, BasicBlock *InsertTop,
+ BasicBlock *InsertBot, BasicBlock *Preheader,
std::vector<BasicBlock *> &NewBlocks,
LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
- DominatorTree *DT, LoopInfo *LI, unsigned Count) {
+ DominatorTree *DT, LoopInfo *LI, unsigned Count,
+ std::optional<unsigned> OriginalTripCount,
+ BranchProbability OriginalLoopProb) {
StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
@@ -419,7 +444,8 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
MDNode *BranchWeights = nullptr;
- if (hasBranchWeightMD(*LatchBR)) {
+ if ((OriginalLoopProb.isUnknown() || !UseEpilogRemainder) &&
+ hasBranchWeightMD(*LatchBR)) {
uint32_t ExitWeight;
uint32_t BackEdgeWeight;
if (Count >= 3) {
@@ -437,7 +463,29 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
MDBuilder MDB(Builder.getContext());
BranchWeights = MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
}
- Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights);
+ BranchInst *RemainderLoopLatch =
+ Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights);
+ if (!OriginalLoopProb.isUnknown() && UseEpilogRemainder) {
+ // Compute the total frequency of the original loop body from the
+ // remainder iterations. Once we've reached them, the first of them
+ // always executes, so its frequency and probability are 1.
+ double FreqRemIters = 1;
+ if (Count > 2) {
+ BranchProbability ProbReaching = BranchProbability::getOne();
+ for (unsigned N = Count - 2; N >= 1; --N) {
+ ProbReaching *= probOfNextInRemainder(OriginalLoopProb, N);
+ FreqRemIters += double(ProbReaching.getNumerator()) /
+ ProbReaching.getDenominator();
+ }
+ }
+ // Solve for the loop probability that would produce that frequency.
+ // Sum(i=0..inf)(Prob^i) = 1/(1-Prob) = FreqRemIters.
+ double ProbDouble = 1 - 1 / FreqRemIters;
+ BranchProbability Prob = BranchProbability::getBranchProbability(
+ std::round(ProbDouble * BranchProbability::getDenominator()),
+ BranchProbability::getDenominator());
+ setBranchProbability(RemainderLoopLatch, Prob, /*ForFirstTarget=*/true);
+ }
NewIdx->addIncoming(Zero, InsertTop);
NewIdx->addIncoming(IdxNext, NewBB);
LatchBR->eraseFromParent();
@@ -460,25 +508,13 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
Loop *NewLoop = NewLoops[L];
assert(NewLoop && "L should have been cloned");
- MDNode *LoopID = NewLoop->getLoopID();
- // Only add loop metadata if the loop is not going to be completely
- // unrolled.
- if (UnrollRemainder)
- return NewLoop;
-
- std::optional<MDNode *> NewLoopID = makeFollowupLoopID(
- LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
- if (NewLoopID) {
- NewLoop->setLoopID(*NewLoopID);
-
- // Do not setLoopAlreadyUnrolled if loop attributes have been defined
- // explicitly.
- return NewLoop;
- }
+ if (OriginalTripCount && UseEpilogRemainder)
+ setLoopEstimatedTripCount(NewLoop, *OriginalTripCount % Count);
// Add unroll disable metadata to disable future unrolling for this loop.
- NewLoop->setLoopAlreadyUnrolled();
+ if (!UnrollRemainder)
+ NewLoop->setLoopAlreadyUnrolled();
return NewLoop;
}
@@ -603,7 +639,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
const TargetTransformInfo *TTI, bool PreserveLCSSA,
unsigned SCEVExpansionBudget, bool RuntimeUnrollMultiExit,
- Loop **ResultLoop) {
+ Loop **ResultLoop, std::optional<unsigned> OriginalTripCount,
+ BranchProbability OriginalLoopProb) {
LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
LLVM_DEBUG(L->dump());
LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
@@ -823,12 +860,23 @@ bool llvm::UnrollRuntimeLoopRemainder(
BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
// Branch to either remainder (extra iterations) loop or unrolling loop.
MDNode *BranchWeights = nullptr;
- if (hasBranchWeightMD(*Latch->getTerminator())) {
+ if ((OriginalLoopProb.isUnknown() || !UseEpilogRemainder) &&
+ hasBranchWeightMD(*Latch->getTerminator())) {
// Assume loop is nearly always entered.
MDBuilder MDB(B.getContext());
BranchWeights = MDB.createBranchWeights(EpilogHeaderWeights);
}
- B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights);
+ BranchInst *UnrollingLoopGuard =
+ B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights);
+ if (!OriginalLoopProb.isUnknown() && UseEpilogRemainder) {
+ // The original loop's first iteration always happens. Compute the
+ // probability of the original loop executing Count-1 iterations after that
+ // to complete the first iteration of the unrolled loop.
+ BranchProbability ProbOne = OriginalLoopProb;
+ BranchProbability ProbRest = ProbOne.pow(Count - 1);
+ setBranchProbability(UnrollingLoopGuard, ProbRest,
+ /*ForFirstTarget=*/false);
+ }
PreHeaderBR->eraseFromParent();
if (DT) {
if (UseEpilogRemainder)
@@ -855,9 +903,10 @@ bool llvm::UnrollRuntimeLoopRemainder(
// iterations. This function adds the appropriate CFG connections.
BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
- Loop *remainderLoop = CloneLoopBlocks(
- L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
- NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI, Count);
+ Loop *remainderLoop =
+ CloneLoopBlocks(L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop,
+ InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT,
+ LI, Count, OriginalTripCount, OriginalLoopProb);
// Insert the cloned blocks into the function.
F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end());
@@ -956,7 +1005,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
// Connect the epilog code to the original loop and update the
// PHI functions.
ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,
- NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count, *AC);
+ NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count, *AC,
+ OriginalLoopProb);
// Update counter in loop for unrolling.
// Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index b6ba822..6e60b94 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -962,13 +962,54 @@ bool llvm::setLoopEstimatedTripCount(
if (LatchBranch->getSuccessor(0) != L->getHeader())
std::swap(BackedgeTakenWeight, LatchExitWeight);
- MDBuilder MDB(LatchBranch->getContext());
-
// Set/Update profile metadata.
- LatchBranch->setMetadata(
- LLVMContext::MD_prof,
- MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight));
+ setBranchWeights(*LatchBranch, {BackedgeTakenWeight, LatchExitWeight},
+ /*IsExpected=*/false);
+
+ return true;
+}
+
+BranchProbability llvm::getLoopProbability(Loop *L) {
+ BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L);
+ if (!LatchBranch)
+ return BranchProbability::getUnknown();
+ bool FirstTargetIsLoop = LatchBranch->getSuccessor(0) == L->getHeader();
+ return getBranchProbability(LatchBranch, FirstTargetIsLoop);
+}
+bool llvm::setLoopProbability(Loop *L, BranchProbability P) {
+ BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L);
+ if (!LatchBranch)
+ return false;
+ bool FirstTargetIsLoop = LatchBranch->getSuccessor(0) == L->getHeader();
+ return setBranchProbability(LatchBranch, P, FirstTargetIsLoop);
+}
+
+BranchProbability llvm::getBranchProbability(BranchInst *B,
+ bool ForFirstTarget) {
+ if (B->getNumSuccessors() != 2)
+ return BranchProbability::getUnknown();
+ uint64_t Weight0, Weight1;
+ if (!extractBranchWeights(*B, Weight0, Weight1))
+ return BranchProbability::getUnknown();
+ uint64_t Denominator = Weight0 + Weight1;
+ if (Denominator == 0)
+ return BranchProbability::getUnknown();
+ if (!ForFirstTarget)
+ std::swap(Weight0, Weight1);
+ return BranchProbability::getBranchProbability(Weight0, Denominator);
+}
+
+bool llvm::setBranchProbability(BranchInst *B, BranchProbability P,
+ bool ForFirstTarget) {
+ if (B->getNumSuccessors() != 2)
+ return false;
+ BranchProbability Prob0 = P;
+ BranchProbability Prob1 = P.getCompl();
+ if (!ForFirstTarget)
+ std::swap(Prob0, Prob1);
+ setBranchWeights(*B, {Prob0.getNumerator(), Prob1.getNumerator()},
+ /*IsExpected=*/false);
return true;
}
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index b03fb62..cbc604e 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -80,6 +80,7 @@
#include <algorithm>
#include <cassert>
#include <climits>
+#include <cmath>
#include <cstddef>
#include <cstdint>
#include <iterator>
@@ -5955,7 +5956,7 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
}
// Update weight for the newly-created conditional branch.
- if (hasBranchWeightMD(*SI)) {
+ if (hasBranchWeightMD(*SI) && NewBI->isConditional()) {
SmallVector<uint64_t, 8> Weights;
getBranchWeights(SI, Weights);
if (Weights.size() == 1 + SI->getNumCases()) {
@@ -5977,14 +5978,14 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
}
// Prune obsolete incoming values off the successors' PHI nodes.
- for (auto BBI = Dest->begin(); isa<PHINode>(BBI); ++BBI) {
+ for (auto &PHI : make_early_inc_range(Dest->phis())) {
unsigned PreviousEdges = Cases->size();
if (Dest == SI->getDefaultDest())
++PreviousEdges;
for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
- cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ PHI.removeIncomingValue(SI->getParent());
}
- for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
+ for (auto &PHI : make_early_inc_range(OtherDest->phis())) {
unsigned PreviousEdges = OtherCases->size();
if (OtherDest == SI->getDefaultDest())
++PreviousEdges;
@@ -5993,7 +5994,7 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
if (NewBI->isUnconditional())
++E;
for (unsigned I = 0; I != E; ++I)
- cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ PHI.removeIncomingValue(SI->getParent());
}
// Clean up the default block - it may have phis or other instructions before
@@ -7632,7 +7633,33 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
auto *DefaultCaseBB = SI->getDefaultDest();
BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
auto It = OrigBB->getTerminator()->getIterator();
+ SmallVector<uint32_t> Weights;
+ auto HasWeights =
+ !ProfcheckDisableMetadataFixes && extractBranchWeights(*SI, Weights);
auto *BI = BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
+ if (HasWeights && any_of(Weights, [](const auto &V) { return V != 0; })) {
+ // IsPow2 covers a subset of the cases in which we'd go to the default
+ // label. The other is those powers of 2 that don't appear in the case
+ // statement. We don't know the distribution of the values coming in, so
+ // the safest is to split 50-50 the original probability to `default`.
+ uint64_t OrigDenominator = sum_of(map_range(
+ Weights, [](const auto &V) { return static_cast<uint64_t>(V); }));
+ SmallVector<uint64_t> NewWeights(2);
+ NewWeights[1] = Weights[0] / 2;
+ NewWeights[0] = OrigDenominator - NewWeights[1];
+ setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false);
+
+ // For the original switch, we reduce the weight of the default by the
+ // amount by which the previous branch contributes to getting to default,
+ // and then make sure the remaining weights have the same relative ratio
+ // wrt eachother.
+ uint64_t CasesDenominator = OrigDenominator - Weights[0];
+ Weights[0] /= 2;
+ for (auto &W : drop_begin(Weights))
+ W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
+
+ setBranchWeights(*SI, Weights, /*IsExpected=*/false);
+ }
// BI is handling the default case for SI, and so should share its DebugLoc.
BI->setDebugLoc(SI->getDebugLoc());
It->eraseFromParent();
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 9f338db..94c5c170 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -12,7 +12,11 @@
//
// Limitation: This assumes that all terminators in the CFG are direct branches
// (the "br" instruction). The presence of any other control flow
-// such as indirectbr, switch or callbr will cause an assert.
+// such as indirectbr or switch will cause an assert.
+// The callbr terminator is supported by creating intermediate
+// target blocks that unconditionally branch to the original target
+// blocks. These intermediate target blocks can then be redirected
+// through the ControlFlowHub as usual.
//
//===----------------------------------------------------------------------===//
@@ -150,25 +154,55 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ SmallVector<BasicBlock *, 8> CallBrTargetBlocksToFix;
// Redirect exiting edges through a control flow hub.
ControlFlowHub CHub;
- for (auto *BB : ExitingBlocks) {
- auto *Branch = cast<BranchInst>(BB->getTerminator());
- BasicBlock *Succ0 = Branch->getSuccessor(0);
- Succ0 = L->contains(Succ0) ? nullptr : Succ0;
-
- BasicBlock *Succ1 =
- Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
- Succ1 = L->contains(Succ1) ? nullptr : Succ1;
- CHub.addBranch(BB, Succ0, Succ1);
-
- LLVM_DEBUG(dbgs() << "Added exiting branch: " << BB->getName() << " -> {"
- << (Succ0 ? Succ0->getName() : "<none>") << ", "
- << (Succ1 ? Succ1->getName() : "<none>") << "}\n");
+
+ for (unsigned I = 0; I < ExitingBlocks.size(); ++I) {
+ BasicBlock *BB = ExitingBlocks[I];
+ if (BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator())) {
+ BasicBlock *Succ0 = Branch->getSuccessor(0);
+ Succ0 = L->contains(Succ0) ? nullptr : Succ0;
+
+ BasicBlock *Succ1 =
+ Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
+ Succ1 = L->contains(Succ1) ? nullptr : Succ1;
+ CHub.addBranch(BB, Succ0, Succ1);
+
+ LLVM_DEBUG(dbgs() << "Added extiting branch: " << printBasicBlock(BB)
+ << " -> " << printBasicBlock(Succ0)
+ << (Succ0 && Succ1 ? " " : "") << printBasicBlock(Succ1)
+ << '\n');
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(BB->getTerminator())) {
+ for (unsigned J = 0; J < CallBr->getNumSuccessors(); ++J) {
+ BasicBlock *Succ = CallBr->getSuccessor(J);
+ if (L->contains(Succ))
+ continue;
+ bool UpdatedLI = false;
+ BasicBlock *NewSucc =
+ SplitCallBrEdge(BB, Succ, J, &DTU, nullptr, &LI, &UpdatedLI);
+ // Even if CallBr and Succ do not have a common parent loop, we need to
+ // add the new target block to the parent loop of the current loop.
+ if (!UpdatedLI)
+ CallBrTargetBlocksToFix.push_back(NewSucc);
+ // ExitingBlocks is later used to restore SSA, so we need to make sure
+ // that the blocks used for phi nodes in the guard blocks match the
+ // predecessors of the guard blocks, which, in the case of callbr, are
+ // the new intermediate target blocks instead of the callbr blocks
+ // themselves.
+ ExitingBlocks[I] = NewSucc;
+ CHub.addBranch(NewSucc, Succ);
+ LLVM_DEBUG(dbgs() << "Added exiting branch: "
+ << printBasicBlock(NewSucc) << " -> "
+ << printBasicBlock(Succ) << '\n');
+ }
+ } else {
+ llvm_unreachable("unsupported block terminator");
+ }
}
SmallVector<BasicBlock *, 8> GuardBlocks;
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
BasicBlock *LoopExitBlock;
bool ChangedCFG;
std::tie(LoopExitBlock, ChangedCFG) = CHub.finalize(
@@ -187,10 +221,19 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
// The guard blocks were created outside the loop, so they need to become
// members of the parent loop.
- if (auto ParentLoop = L->getParentLoop()) {
+ // Same goes for the callbr target blocks. Although we try to add them to the
+ // smallest common parent loop of the callbr block and the corresponding
+ // original target block, there might not have been such a loop, in which case
+ // the newly created callbr target blocks are not part of any loop. For nested
+ // loops, this might result in them leading to a loop with multiple entry
+ // points.
+ if (auto *ParentLoop = L->getParentLoop()) {
for (auto *G : GuardBlocks) {
ParentLoop->addBasicBlockToLoop(G, LI);
}
+ for (auto *C : CallBrTargetBlocksToFix) {
+ ParentLoop->addBasicBlockToLoop(C, LI);
+ }
ParentLoop->verifyLoop();
}
@@ -218,8 +261,6 @@ bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) {
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
-
return runImpl(LI, DT);
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 3fed003..04b0562 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -167,7 +167,7 @@ public:
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
return tryInsertInstruction(
- new VPInstruction(Opcode, Operands, Flags, DL, Name));
+ new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));
}
VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
@@ -184,7 +184,7 @@ public:
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
return tryInsertInstruction(
- new VPInstruction(Opcode, Operands, WrapFlags, DL, Name));
+ new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
}
VPInstruction *createNot(VPValue *Operand,
@@ -205,7 +205,7 @@ public:
return tryInsertInstruction(new VPInstruction(
Instruction::BinaryOps::Or, {LHS, RHS},
- VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
+ VPRecipeWithIRFlags::DisjointFlagsTy(false), {}, DL, Name));
}
VPInstruction *createLogicalAnd(VPValue *LHS, VPValue *RHS,
@@ -221,7 +221,7 @@ public:
std::optional<FastMathFlags> FMFs = std::nullopt) {
auto *Select =
FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
- *FMFs, DL, Name)
+ *FMFs, {}, DL, Name)
: new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
DL, Name);
return tryInsertInstruction(Select);
@@ -235,7 +235,7 @@ public:
assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&
Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
return tryInsertInstruction(
- new VPInstruction(Instruction::ICmp, {A, B}, Pred, DL, Name));
+ new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
}
/// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
@@ -246,7 +246,7 @@ public:
assert(Pred >= CmpInst::FIRST_FCMP_PREDICATE &&
Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
return tryInsertInstruction(
- new VPInstruction(Instruction::FCmp, {A, B}, Pred, DL, Name));
+ new VPInstruction(Instruction::FCmp, {A, B}, Pred, {}, DL, Name));
}
VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset,
@@ -254,7 +254,7 @@ public:
const Twine &Name = "") {
return tryInsertInstruction(
new VPInstruction(VPInstruction::PtrAdd, {Ptr, Offset},
- GEPNoWrapFlags::none(), DL, Name));
+ GEPNoWrapFlags::none(), {}, DL, Name));
}
VPInstruction *createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset,
@@ -262,7 +262,7 @@ public:
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
return tryInsertInstruction(new VPInstruction(
- VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, DL, Name));
+ VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));
}
VPInstruction *createWidePtrAdd(VPValue *Ptr, VPValue *Offset,
@@ -270,7 +270,7 @@ public:
const Twine &Name = "") {
return tryInsertInstruction(
new VPInstruction(VPInstruction::WidePtrAdd, {Ptr, Offset},
- GEPNoWrapFlags::none(), DL, Name));
+ GEPNoWrapFlags::none(), {}, DL, Name));
}
VPPhi *createScalarPhi(ArrayRef<VPValue *> IncomingValues, DebugLoc DL,
@@ -280,8 +280,7 @@ public:
VPValue *createElementCount(Type *Ty, ElementCount EC) {
VPlan &Plan = *getInsertBlock()->getPlan();
- VPValue *RuntimeEC =
- Plan.getOrAddLiveIn(ConstantInt::get(Ty, EC.getKnownMinValue()));
+ VPValue *RuntimeEC = Plan.getConstantInt(Ty, EC.getKnownMinValue());
if (EC.isScalable()) {
VPValue *VScale = createNaryOp(VPInstruction::VScale, {}, Ty);
RuntimeEC = EC.getKnownMinValue() == 1
@@ -304,9 +303,11 @@ public:
}
VPInstruction *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
- Type *ResultTy, DebugLoc DL) {
+ Type *ResultTy, DebugLoc DL,
+ const VPIRFlags &Flags = {},
+ const VPIRMetadata &Metadata = {}) {
return tryInsertInstruction(
- new VPInstructionWithType(Opcode, Op, ResultTy, {}, DL));
+ new VPInstructionWithType(Opcode, Op, ResultTy, DL, Flags, Metadata));
}
VPValue *createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f7968ab..e5c3f17 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3908,7 +3908,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
continue;
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
- *CM.PSE.getSE());
+ *CM.PSE.getSE(), OrigLoop);
precomputeCosts(*Plan, VF, CostCtx);
auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
@@ -4166,7 +4166,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
// Add on other costs that are modelled in VPlan, but not in the legacy
// cost model.
VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind,
- *CM.PSE.getSE());
+ *CM.PSE.getSE(), OrigLoop);
VPRegionBlock *VectorRegion = P->getVectorLoopRegion();
assert(VectorRegion && "Expected to have a vector region!");
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
@@ -5750,13 +5750,18 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
getMemoryInstructionCost(I, ElementCount::getFixed(1))));
UpdateMemOpUserCost(cast<LoadInst>(I));
} else if (const auto *Group = getInterleavedAccessGroup(I)) {
- // Scalarize an interleave group of address loads.
- for (unsigned I = 0; I < Group->getFactor(); ++I) {
- if (Instruction *Member = Group->getMember(I)) {
- setWideningDecision(
- Member, VF, CM_Scalarize,
- (VF.getKnownMinValue() *
- getMemoryInstructionCost(Member, ElementCount::getFixed(1))));
+ // Scalarize all members of this interleaved group when any member
+ // is used as an address. The address-used load skips scalarization
+ // overhead, other members include it.
+ for (unsigned Idx = 0; Idx < Group->getFactor(); ++Idx) {
+ if (Instruction *Member = Group->getMember(Idx)) {
+ InstructionCost Cost =
+ AddrDefs.contains(Member)
+ ? (VF.getKnownMinValue() *
+ getMemoryInstructionCost(Member,
+ ElementCount::getFixed(1)))
+ : getMemInstScalarizationCost(Member, VF);
+ setWideningDecision(Member, VF, CM_Scalarize, Cost);
UpdateMemOpUserCost(cast<LoadInst>(Member));
}
}
@@ -6871,7 +6876,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
ElementCount VF) const {
- VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE());
+ VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE(),
+ OrigLoop);
InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx);
// Now compute and add the VPlan-based cost.
@@ -7105,12 +7111,13 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
// case, don't trigger the assertion, as the extra simplifications may cause a
// different VF to be picked by the VPlan-based cost model.
VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind,
- *CM.PSE.getSE());
+ *CM.PSE.getSE(), OrigLoop);
precomputeCosts(BestPlan, BestFactor.Width, CostCtx);
// Verify that the VPlan-based and legacy cost models agree, except for VPlans
// with early exits and plans with additional VPlan simplifications. The
// legacy cost model doesn't properly model costs for such loops.
assert((BestFactor.Width == LegacyVF.Width || BestPlan.hasEarlyExit() ||
+ !Legal->getLAI()->getSymbolicStrides().empty() ||
planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width),
CostCtx, OrigLoop,
BestFactor.Width) ||
@@ -7745,8 +7752,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
if (CM.isPredicatedInst(I)) {
SmallVector<VPValue *> Ops(Operands);
VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
- VPValue *One =
- Plan.getOrAddLiveIn(ConstantInt::get(I->getType(), 1u, false));
+ VPValue *One = Plan.getConstantInt(I->getType(), 1u);
auto *SafeRHS = Builder.createSelect(Mask, Ops[1], One, I->getDebugLoc());
Ops[1] = SafeRHS;
return new VPWidenRecipe(*I, Ops);
@@ -7799,11 +7805,10 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
}
case Instruction::ExtractValue: {
SmallVector<VPValue *> NewOps(Operands);
- Type *I32Ty = IntegerType::getInt32Ty(I->getContext());
auto *EVI = cast<ExtractValueInst>(I);
assert(EVI->getNumIndices() == 1 && "Expected one extractvalue index");
unsigned Idx = EVI->getIndices()[0];
- NewOps.push_back(Plan.getOrAddLiveIn(ConstantInt::get(I32Ty, Idx, false)));
+ NewOps.push_back(Plan.getConstantInt(32, Idx));
return new VPWidenRecipe(*I, NewOps);
}
};
@@ -8172,8 +8177,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
"Expected an ADD or SUB operation for predicated partial "
"reductions (because the neutral element in the mask is zero)!");
Cond = getBlockInMask(Builder.getInsertBlock());
- VPValue *Zero =
- Plan.getOrAddLiveIn(ConstantInt::get(Reduction->getType(), 0));
+ VPValue *Zero = Plan.getConstantInt(Reduction->getType(), 0);
BinOp = Builder.createSelect(Cond, BinOp, Zero, Reduction->getDebugLoc());
}
return new VPPartialReductionRecipe(ReductionOpcode, Accumulator, BinOp, Cond,
@@ -8335,11 +8339,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
&R) ||
(isa<VPInstruction>(&R) && !UnderlyingValue))
continue;
-
- // FIXME: VPlan0, which models a copy of the original scalar loop, should
- // not use VPWidenPHIRecipe to model the phis.
- assert((isa<VPWidenPHIRecipe>(&R) || isa<VPInstruction>(&R)) &&
- UnderlyingValue && "unsupported recipe");
+ assert(isa<VPInstruction>(&R) && UnderlyingValue && "unsupported recipe");
// TODO: Gradually replace uses of underlying instruction by analyses on
// VPlan.
@@ -8440,7 +8440,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// and mulacc-reduction are implemented.
if (!CM.foldTailWithEVL()) {
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
- *CM.PSE.getSE());
+ *CM.PSE.getSE(), OrigLoop);
VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan,
CostCtx, Range);
}
@@ -8640,7 +8640,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
} else if (PhiR->isInLoop() && Kind == RecurKind::AddChainWithSubs &&
CurrentLinkI->getOpcode() == Instruction::Sub) {
Type *PhiTy = PhiR->getUnderlyingValue()->getType();
- auto *Zero = Plan->getOrAddLiveIn(ConstantInt::get(PhiTy, 0));
+ auto *Zero = Plan->getConstantInt(PhiTy, 0);
VPWidenRecipe *Sub = new VPWidenRecipe(
Instruction::Sub, {Zero, CurrentLink->getOperand(1)}, {},
VPIRMetadata(), CurrentLinkI->getDebugLoc());
@@ -8854,8 +8854,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
ToDelete.push_back(Select);
// Convert the reduction phi to operate on bools.
- PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse(
- OrigLoop->getHeader()->getContext())));
+ PhiR->setOperand(0, Plan->getFalse());
continue;
}
@@ -8877,9 +8876,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
unsigned ScaleFactor =
RecipeBuilder.getScalingForReduction(RdxDesc.getLoopExitInstr())
.value_or(1);
- Type *I32Ty = IntegerType::getInt32Ty(PhiTy->getContext());
- auto *ScaleFactorVPV =
- Plan->getOrAddLiveIn(ConstantInt::get(I32Ty, ScaleFactor));
+ auto *ScaleFactorVPV = Plan->getConstantInt(32, ScaleFactor);
VPValue *StartV = PHBuilder.createNaryOp(
VPInstruction::ReductionStartVector,
{PhiR->getStartValue(), Iden, ScaleFactorVPV},
@@ -9910,7 +9907,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
bool ForceVectorization =
Hints.getForce() == LoopVectorizeHints::FK_Enabled;
VPCostContext CostCtx(CM.TTI, *CM.TLI, LVP.getPlanFor(VF.Width), CM,
- CM.CostKind, *CM.PSE.getSE());
+ CM.CostKind, *CM.PSE.getSE(), L);
if (!ForceVectorization &&
!isOutsideLoopWorkProfitable(Checks, VF, L, PSE, CostCtx,
LVP.getPlanFor(VF.Width), SEL,
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1b55a3b..bf3f52c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -20975,6 +20975,27 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
if (isa<PHINode>(S.getMainOp()) ||
isVectorLikeInstWithConstOps(S.getMainOp()))
return nullptr;
+ // If the parent node is non-schedulable and the current node is copyable, and
+ // any of parent instructions are used outside several basic blocks or in
+ // bin-op node - cancel scheduling, it may cause wrong def-use deps in
+ // analysis, leading to a crash.
+ // Non-scheduled nodes may not have related ScheduleData model, which may lead
+ // to a skipped dep analysis.
+ if (S.areInstructionsWithCopyableElements() && EI && EI.UserTE->hasState() &&
+ EI.UserTE->doesNotNeedToSchedule() &&
+ EI.UserTE->getOpcode() != Instruction::PHI &&
+ any_of(EI.UserTE->Scalars, [](Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I || I->hasOneUser())
+ return false;
+ for (User *U : I->users()) {
+ auto *UI = cast<Instruction>(U);
+ if (isa<BinaryOperator>(UI))
+ return true;
+ }
+ return false;
+ }))
+ return std::nullopt;
bool HasCopyables = S.areInstructionsWithCopyableElements();
if (((!HasCopyables && doesNotNeedToSchedule(VL)) ||
all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) {
@@ -22134,6 +22155,27 @@ bool BoUpSLP::collectValuesToDemote(
{VectorizableTree[E.CombinedEntriesWithIndices.front().first].get(),
VectorizableTree[E.CombinedEntriesWithIndices.back().first].get()});
+ if (E.isAltShuffle()) {
+ // Combining these opcodes may lead to incorrect analysis, skip for now.
+ auto IsDangerousOpcode = [](unsigned Opcode) {
+ switch (Opcode) {
+ case Instruction::Shl:
+ case Instruction::AShr:
+ case Instruction::LShr:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ return true;
+ default:
+ break;
+ }
+ return false;
+ };
+ if (IsDangerousOpcode(E.getAltOpcode()))
+ return FinalAnalysis();
+ }
+
switch (E.getOpcode()) {
// We can always demote truncations and extensions. Since truncations can
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp
index 9c869dd..d354933 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp
@@ -92,7 +92,7 @@ void MemDGNode::print(raw_ostream &OS, bool PrintDeps) const {
DGNode::print(OS, false);
if (PrintDeps) {
// Print memory preds.
- static constexpr const unsigned Indent = 4;
+ static constexpr unsigned Indent = 4;
for (auto *Pred : MemPreds)
OS.indent(Indent) << "<-" << *Pred->getInstruction() << "\n";
}
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
index 86dbd21..5534da9 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
@@ -25,14 +25,14 @@ static cl::opt<bool>
"emit new instructions (*very* expensive)."));
#endif // NDEBUG
-static constexpr const unsigned long StopAtDisabled =
+static constexpr unsigned long StopAtDisabled =
std::numeric_limits<unsigned long>::max();
static cl::opt<unsigned long>
StopAt("sbvec-stop-at", cl::init(StopAtDisabled), cl::Hidden,
cl::desc("Vectorize if the invocation count is < than this. 0 "
"disables vectorization."));
-static constexpr const unsigned long StopBundleDisabled =
+static constexpr unsigned long StopBundleDisabled =
std::numeric_limits<unsigned long>::max();
static cl::opt<unsigned long>
StopBundle("sbvec-stop-bndl", cl::init(StopBundleDisabled), cl::Hidden,
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
index ed2f80b..2de6921 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
@@ -43,7 +43,7 @@ cl::opt<std::string> AllowFiles(
"sbvec-allow-files", cl::init(".*"), cl::Hidden,
cl::desc("Run the vectorizer only on file paths that match any in the "
"list of comma-separated regex's."));
-static constexpr const char AllowFilesDelim = ',';
+static constexpr char AllowFilesDelim = ',';
SandboxVectorizerPass::SandboxVectorizerPass() : FPM("fpm") {
if (UserDefinedPassPipeline == DefaultPipelineMagicStr) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 1f10058..cfe1f1e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -939,7 +939,7 @@ class VPIRMetadata {
SmallVector<std::pair<unsigned, MDNode *>> Metadata;
public:
- VPIRMetadata() {}
+ VPIRMetadata() = default;
/// Adds metatadata that can be preserved from the original instruction
/// \p I.
@@ -950,12 +950,9 @@ public:
VPIRMetadata(Instruction &I, LoopVersioning *LVer);
/// Copy constructor for cloning.
- VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {}
+ VPIRMetadata(const VPIRMetadata &Other) = default;
- VPIRMetadata &operator=(const VPIRMetadata &Other) {
- Metadata = Other.Metadata;
- return *this;
- }
+ VPIRMetadata &operator=(const VPIRMetadata &Other) = default;
/// Add all metadata to \p I.
void applyMetadata(Instruction &I) const;
@@ -1107,14 +1104,14 @@ public:
VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {}
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
- const VPIRFlags &Flags, DebugLoc DL = DebugLoc::getUnknown(),
- const Twine &Name = "");
+ const VPIRFlags &Flags, const VPIRMetadata &MD = {},
+ DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
VPInstruction *clone() override {
- SmallVector<VPValue *, 2> Operands(operands());
- auto *New = new VPInstruction(Opcode, Operands, *this, getDebugLoc(), Name);
+ auto *New = new VPInstruction(Opcode, operands(), *this, *this,
+ getDebugLoc(), Name);
if (getUnderlyingValue())
New->setUnderlyingValue(getUnderlyingInstr());
return New;
@@ -1196,7 +1193,14 @@ public:
VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL,
const Twine &Name = "")
- : VPInstruction(Opcode, Operands, Flags, DL, Name), ResultTy(ResultTy) {}
+ : VPInstruction(Opcode, Operands, Flags, {}, DL, Name),
+ ResultTy(ResultTy) {}
+
+ VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
+ Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags,
+ const VPIRMetadata &Metadata, const Twine &Name = "")
+ : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
+ ResultTy(ResultTy) {}
static inline bool classof(const VPRecipeBase *R) {
// VPInstructionWithType are VPInstructions with specific opcodes requiring
@@ -1221,10 +1225,9 @@ public:
}
VPInstruction *clone() override {
- SmallVector<VPValue *, 2> Operands(operands());
auto *New =
- new VPInstructionWithType(getOpcode(), Operands, getResultType(), *this,
- getDebugLoc(), getName());
+ new VPInstructionWithType(getOpcode(), operands(), getResultType(),
+ *this, getDebugLoc(), getName());
New->setUnderlyingValue(getUnderlyingValue());
return New;
}
@@ -3206,6 +3209,9 @@ protected:
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
Alignment(Alignment), Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
+ assert(isa<VPVectorEndPointerRecipe>(getAddr()) ||
+ !Reverse &&
+ "Reversed acccess without VPVectorEndPointerRecipe address?");
}
public:
@@ -3977,7 +3983,7 @@ class VPIRBasicBlock : public VPBasicBlock {
IRBB(IRBB) {}
public:
- ~VPIRBasicBlock() override {}
+ ~VPIRBasicBlock() override = default;
static inline bool classof(const VPBlockBase *V) {
return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
@@ -4029,7 +4035,7 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
IsReplicator(IsReplicator) {}
public:
- ~VPRegionBlock() override {}
+ ~VPRegionBlock() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPBlockBase *V) {
@@ -4109,6 +4115,12 @@ public:
const VPCanonicalIVPHIRecipe *getCanonicalIV() const {
return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
}
+
+ /// Return the type of the canonical IV for loop regions.
+ Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
+ const Type *getCanonicalIVType() const {
+ return getCanonicalIV()->getScalarType();
+ }
};
inline VPRegionBlock *VPRecipeBase::getRegion() {
@@ -4387,15 +4399,25 @@ public:
}
/// Return a VPValue wrapping i1 true.
- VPValue *getTrue() {
- LLVMContext &Ctx = getContext();
- return getOrAddLiveIn(ConstantInt::getTrue(Ctx));
- }
+ VPValue *getTrue() { return getConstantInt(1, 1); }
/// Return a VPValue wrapping i1 false.
- VPValue *getFalse() {
- LLVMContext &Ctx = getContext();
- return getOrAddLiveIn(ConstantInt::getFalse(Ctx));
+ VPValue *getFalse() { return getConstantInt(1, 0); }
+
+ /// Return a VPValue wrapping a ConstantInt with the given type and value.
+ VPValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
+ return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
+ }
+
+ /// Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
+ VPValue *getConstantInt(unsigned BitWidth, uint64_t Val,
+ bool IsSigned = false) {
+ return getConstantInt(APInt(BitWidth, Val, IsSigned));
+ }
+
+ /// Return a VPValue wrapping a ConstantInt with the given APInt value.
+ VPValue *getConstantInt(const APInt &Val) {
+ return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
}
/// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 65688a3..1a66d20 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -612,8 +612,7 @@ void VPlanTransforms::addMiddleCheck(VPlan &Plan,
if (!RequiresScalarEpilogueCheck)
Cmp = Plan.getFalse();
else if (TailFolded)
- Cmp = Plan.getOrAddLiveIn(
- ConstantInt::getTrue(IntegerType::getInt1Ty(Plan.getContext())));
+ Cmp = Plan.getTrue();
else
Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
&Plan.getVectorTripCount(), LatchDL, "cmp.n");
@@ -712,8 +711,8 @@ void VPlanTransforms::addMinimumIterationCheck(
// additional overflow check is required before entering the vector loop.
// Get the maximum unsigned value for the type.
- VPValue *MaxUIntTripCount = Plan.getOrAddLiveIn(ConstantInt::get(
- TripCountTy, cast<IntegerType>(TripCountTy)->getMask()));
+ VPValue *MaxUIntTripCount =
+ Plan.getConstantInt(cast<IntegerType>(TripCountTy)->getMask());
VPValue *DistanceToMax = Builder.createNaryOp(
Instruction::Sub, {MaxUIntTripCount, TripCountVPV},
DebugLoc::getUnknown());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index 2aaabd9..965426f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -350,13 +350,14 @@ struct VPCostContext {
SmallPtrSet<Instruction *, 8> SkipCostComputation;
TargetTransformInfo::TargetCostKind CostKind;
ScalarEvolution &SE;
+ const Loop *L;
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
const VPlan &Plan, LoopVectorizationCostModel &CM,
TargetTransformInfo::TargetCostKind CostKind,
- ScalarEvolution &SE)
+ ScalarEvolution &SE, const Loop *L)
: TTI(TTI), TLI(TLI), Types(Plan), LLVMCtx(Plan.getContext()), CM(CM),
- CostKind(CostKind), SE(SE) {}
+ CostKind(CostKind), SE(SE), L(L) {}
/// Return the cost for \p UI with \p VF using the legacy cost model as
/// fallback until computing the cost of all recipes migrates to VPlan.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index b5b98c6..b57c448 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -313,7 +313,8 @@ private:
// Check for recipes that do not have opcodes.
if constexpr (std::is_same_v<RecipeTy, VPScalarIVStepsRecipe> ||
std::is_same_v<RecipeTy, VPCanonicalIVPHIRecipe> ||
- std::is_same_v<RecipeTy, VPDerivedIVRecipe>)
+ std::is_same_v<RecipeTy, VPDerivedIVRecipe> ||
+ std::is_same_v<RecipeTy, VPVectorEndPointerRecipe>)
return DefR;
else
return DefR && DefR->getOpcode() == Opcode;
@@ -686,6 +687,64 @@ m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
return VPDerivedIV_match<Op0_t, Op1_t, Op2_t>({Op0, Op1, Op2});
}
+template <typename Addr_t, typename Mask_t> struct Load_match {
+ Addr_t Addr;
+ Mask_t Mask;
+
+ Load_match(Addr_t Addr, Mask_t Mask) : Addr(Addr), Mask(Mask) {}
+
+ template <typename OpTy> bool match(const OpTy *V) const {
+ auto *Load = dyn_cast<VPWidenLoadRecipe>(V);
+ if (!Load || !Addr.match(Load->getAddr()) || !Load->isMasked() ||
+ !Mask.match(Load->getMask()))
+ return false;
+ return true;
+ }
+};
+
+/// Match a (possibly reversed) masked load.
+template <typename Addr_t, typename Mask_t>
+inline Load_match<Addr_t, Mask_t> m_MaskedLoad(const Addr_t &Addr,
+ const Mask_t &Mask) {
+ return Load_match<Addr_t, Mask_t>(Addr, Mask);
+}
+
+template <typename Addr_t, typename Val_t, typename Mask_t> struct Store_match {
+ Addr_t Addr;
+ Val_t Val;
+ Mask_t Mask;
+
+ Store_match(Addr_t Addr, Val_t Val, Mask_t Mask)
+ : Addr(Addr), Val(Val), Mask(Mask) {}
+
+ template <typename OpTy> bool match(const OpTy *V) const {
+ auto *Store = dyn_cast<VPWidenStoreRecipe>(V);
+ if (!Store || !Addr.match(Store->getAddr()) ||
+ !Val.match(Store->getStoredValue()) || !Store->isMasked() ||
+ !Mask.match(Store->getMask()))
+ return false;
+ return true;
+ }
+};
+
+/// Match a (possibly reversed) masked store.
+template <typename Addr_t, typename Val_t, typename Mask_t>
+inline Store_match<Addr_t, Val_t, Mask_t>
+m_MaskedStore(const Addr_t &Addr, const Val_t &Val, const Mask_t &Mask) {
+ return Store_match<Addr_t, Val_t, Mask_t>(Addr, Val, Mask);
+}
+
+template <typename Op0_t, typename Op1_t>
+using VectorEndPointerRecipe_match =
+ Recipe_match<std::tuple<Op0_t, Op1_t>, 0,
+ /*Commutative*/ false, VPVectorEndPointerRecipe>;
+
+template <typename Op0_t, typename Op1_t>
+VectorEndPointerRecipe_match<Op0_t, Op1_t> m_VecEndPtr(const Op0_t &Op0,
+ const Op1_t &Op1) {
+ return VectorEndPointerRecipe_match<Op0_t, Op1_t>(Op0, Op1);
+}
+
/// Match a call argument at a given argument index.
template <typename Opnd_t> struct Argument_match {
/// Call argument index to match.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 9a63c80..1ee405a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -162,8 +162,12 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPPredInstPHISC:
case VPVectorEndPointerSC:
return false;
- case VPInstructionSC:
- return mayWriteToMemory();
+ case VPInstructionSC: {
+ auto *VPI = cast<VPInstruction>(this);
+ return mayWriteToMemory() ||
+ VPI->getOpcode() == VPInstruction::BranchOnCount ||
+ VPI->getOpcode() == VPInstruction::BranchOnCond;
+ }
case VPWidenCallSC: {
Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
@@ -490,10 +494,10 @@ template class VPUnrollPartAccessor<3>;
}
VPInstruction::VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
- const VPIRFlags &Flags, DebugLoc DL,
- const Twine &Name)
+ const VPIRFlags &Flags, const VPIRMetadata &MD,
+ DebugLoc DL, const Twine &Name)
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, Flags, DL),
- VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {
+ VPIRMetadata(MD), Opcode(Opcode), Name(Name.str()) {
assert(flagsValidForOpcode(getOpcode()) &&
"Set flags not supported for the provided opcode");
assert((getNumOperandsForOpcode(Opcode) == -1u ||
@@ -1241,6 +1245,8 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case Instruction::Select:
case Instruction::PHI:
case VPInstruction::AnyOf:
+ case VPInstruction::BranchOnCond:
+ case VPInstruction::BranchOnCount:
case VPInstruction::Broadcast:
case VPInstruction::BuildStructVector:
case VPInstruction::BuildVector:
@@ -2372,9 +2378,8 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
return false;
auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
- auto *CanIV = getRegion()->getCanonicalIV();
return StartC && StartC->isZero() && StepC && StepC->isOne() &&
- getScalarType() == CanIV->getScalarType();
+ getScalarType() == getRegion()->getCanonicalIVType();
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3167,26 +3172,30 @@ bool VPReplicateRecipe::shouldPack() const {
});
}
-/// Returns true if \p Ptr is a pointer computation for which the legacy cost
-/// model computes a SCEV expression when computing the address cost.
-static bool shouldUseAddressAccessSCEV(const VPValue *Ptr) {
+/// Returns a SCEV expression for \p Ptr if it is a pointer computation for
+/// which the legacy cost model computes a SCEV expression when computing the
+/// address cost. Computing SCEVs for VPValues is incomplete and returns
+/// SCEVCouldNotCompute in cases the legacy cost model can compute SCEVs. In
+/// those cases we fall back to the legacy cost model. Otherwise return nullptr.
+static const SCEV *getAddressAccessSCEV(const VPValue *Ptr, ScalarEvolution &SE,
+ const Loop *L) {
auto *PtrR = Ptr->getDefiningRecipe();
if (!PtrR || !((isa<VPReplicateRecipe>(PtrR) &&
cast<VPReplicateRecipe>(PtrR)->getOpcode() ==
Instruction::GetElementPtr) ||
isa<VPWidenGEPRecipe>(PtrR) ||
match(Ptr, m_GetElementPtr(m_VPValue(), m_VPValue()))))
- return false;
+ return nullptr;
// We are looking for a GEP where all indices are either loop invariant or
// inductions.
for (VPValue *Opd : drop_begin(PtrR->operands())) {
if (!Opd->isDefinedOutsideLoopRegions() &&
!isa<VPScalarIVStepsRecipe, VPWidenIntOrFpInductionRecipe>(Opd))
- return false;
+ return nullptr;
}
- return true;
+ return vputils::getSCEVExprForVPValue(Ptr, SE, L);
}
/// Returns true if \p V is used as part of the address of another load or
@@ -3354,9 +3363,8 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
bool IsLoad = UI->getOpcode() == Instruction::Load;
const VPValue *PtrOp = getOperand(!IsLoad);
- // TODO: Handle cases where we need to pass a SCEV to
- // getAddressComputationCost.
- if (shouldUseAddressAccessSCEV(PtrOp))
+ const SCEV *PtrSCEV = getAddressAccessSCEV(PtrOp, Ctx.SE, Ctx.L);
+ if (isa_and_nonnull<SCEVCouldNotCompute>(PtrSCEV))
break;
Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0));
@@ -3374,7 +3382,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
InstructionCost ScalarCost =
ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(
PtrTy, UsedByLoadStoreAddress ? nullptr : &Ctx.SE,
- nullptr, Ctx.CostKind);
+ PtrSCEV, Ctx.CostKind);
if (isSingleScalar())
return ScalarCost;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.h b/llvm/lib/Transforms/Vectorize/VPlanSLP.h
index 77ff36c..44972c68 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanSLP.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.h
@@ -89,8 +89,7 @@ class VPlanSlp {
/// Width of the widest combined bundle in bits.
unsigned WidestBundleBits = 0;
- using MultiNodeOpTy =
- typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
+ using MultiNodeOpTy = std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
// Input operand bundles for the current multi node. Each multi node operand
// bundle contains values not matching the multi node's opcode. They will
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 4d98014..9d9bb14 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -151,7 +151,27 @@ static bool cannotHoistOrSinkRecipe(const VPRecipeBase &R) {
static bool sinkScalarOperands(VPlan &Plan) {
auto Iter = vp_depth_first_deep(Plan.getEntry());
+ bool ScalarVFOnly = Plan.hasScalarVFOnly();
bool Changed = false;
+
+ auto IsValidSinkCandidate = [ScalarVFOnly](VPBasicBlock *SinkTo,
+ VPSingleDefRecipe *Candidate) {
+ // We only know how to duplicate VPReplicateRecipes and
+ // VPScalarIVStepsRecipes for now.
+ if (!isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(Candidate))
+ return false;
+
+ if (Candidate->getParent() == SinkTo || Candidate->mayHaveSideEffects() ||
+ Candidate->mayReadOrWriteMemory())
+ return false;
+
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(Candidate))
+ if (!ScalarVFOnly && RepR->isSingleScalar())
+ return false;
+
+ return true;
+ };
+
// First, collect the operands of all recipes in replicate blocks as seeds for
// sinking.
SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList;
@@ -159,51 +179,37 @@ static bool sinkScalarOperands(VPlan &Plan) {
VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();
if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)
continue;
- VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]);
- if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
+ VPBasicBlock *VPBB = cast<VPBasicBlock>(EntryVPBB->getSuccessors().front());
+ if (VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
continue;
for (auto &Recipe : *VPBB) {
- for (VPValue *Op : Recipe.operands())
+ for (VPValue *Op : Recipe.operands()) {
if (auto *Def =
dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
- WorkList.insert({VPBB, Def});
+ if (IsValidSinkCandidate(VPBB, Def))
+ WorkList.insert({VPBB, Def});
+ }
}
}
- bool ScalarVFOnly = Plan.hasScalarVFOnly();
// Try to sink each replicate or scalar IV steps recipe in the worklist.
for (unsigned I = 0; I != WorkList.size(); ++I) {
VPBasicBlock *SinkTo;
VPSingleDefRecipe *SinkCandidate;
std::tie(SinkTo, SinkCandidate) = WorkList[I];
- if (SinkCandidate->getParent() == SinkTo ||
- SinkCandidate->mayHaveSideEffects() ||
- SinkCandidate->mayReadOrWriteMemory())
- continue;
- if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
- if (!ScalarVFOnly && RepR->isSingleScalar())
- continue;
- } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))
- continue;
- bool NeedsDuplicating = false;
// All recipe users of the sink candidate must be in the same block SinkTo
- // or all users outside of SinkTo must be uniform-after-vectorization (
- // i.e., only first lane is used) . In the latter case, we need to duplicate
- // SinkCandidate.
- auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
- SinkCandidate](VPUser *U) {
- auto *UI = cast<VPRecipeBase>(U);
- if (UI->getParent() == SinkTo)
- return true;
- NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
- // We only know how to duplicate VPReplicateRecipes and
- // VPScalarIVStepsRecipes for now.
- return NeedsDuplicating &&
- isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(SinkCandidate);
- };
- if (!all_of(SinkCandidate->users(), CanSinkWithUser))
+ // or all users outside of SinkTo must have only their first lane used. In
+ // the latter case, we need to duplicate SinkCandidate.
+ auto UsersOutsideSinkTo =
+ make_filter_range(SinkCandidate->users(), [SinkTo](VPUser *U) {
+ return cast<VPRecipeBase>(U)->getParent() != SinkTo;
+ });
+ if (any_of(UsersOutsideSinkTo, [SinkCandidate](VPUser *U) {
+ return !U->onlyFirstLaneUsed(SinkCandidate);
+ }))
continue;
+ bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
if (NeedsDuplicating) {
if (ScalarVFOnly)
@@ -230,7 +236,8 @@ static bool sinkScalarOperands(VPlan &Plan) {
for (VPValue *Op : SinkCandidate->operands())
if (auto *Def =
dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
- WorkList.insert({SinkTo, Def});
+ if (IsValidSinkCandidate(SinkTo, Def))
+ WorkList.insert({SinkTo, Def});
Changed = true;
}
return Changed;
@@ -699,8 +706,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
continue;
const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
- VPValue *StartV =
- Plan.getOrAddLiveIn(ConstantInt::get(ID.getStep()->getType(), 0));
+ VPValue *StartV = Plan.getConstantInt(ID.getStep()->getType(), 0);
VPValue *StepV = PtrIV->getOperand(1);
VPScalarIVStepsRecipe *Steps = createScalarIVSteps(
Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr,
@@ -820,7 +826,7 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
// Calculate the final index.
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
auto *CanonicalIV = LoopRegion->getCanonicalIV();
- Type *CanonicalIVType = CanonicalIV->getScalarType();
+ Type *CanonicalIVType = LoopRegion->getCanonicalIVType();
VPBuilder B(cast<VPBasicBlock>(PredVPBB));
DebugLoc DL = cast<VPInstruction>(Op)->getDebugLoc();
@@ -836,7 +842,7 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
// changed it means the exit is using the incremented value, so we need to
// add the step.
if (Incoming != WideIV) {
- VPValue *One = Plan.getOrAddLiveIn(ConstantInt::get(CanonicalIVType, 1));
+ VPValue *One = Plan.getConstantInt(CanonicalIVType, 1);
EndValue = B.createNaryOp(Instruction::Add, {EndValue, One}, DL);
}
@@ -882,7 +888,7 @@ static VPValue *optimizeLatchExitInductionUser(
return B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
if (ScalarTy->isPointerTy()) {
Type *StepTy = TypeInfo.inferScalarType(Step);
- auto *Zero = Plan.getOrAddLiveIn(ConstantInt::get(StepTy, 0));
+ auto *Zero = Plan.getConstantInt(StepTy, 0);
return B.createPtrAdd(EndValue,
B.createNaryOp(Instruction::Sub, {Zero, Step}),
DebugLoc::getUnknown(), "ind.escape");
@@ -1057,13 +1063,9 @@ static VPValue *tryToFoldLiveIns(VPSingleDefRecipe &R,
return nullptr;
}
-/// Try to simplify recipe \p R.
-static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
- VPlan *Plan = R.getParent()->getPlan();
-
- auto *Def = dyn_cast<VPSingleDefRecipe>(&R);
- if (!Def)
- return;
+/// Try to simplify VPSingleDefRecipe \p Def.
+static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
+ VPlan *Plan = Def->getParent()->getPlan();
// Simplification of live-in IR values for SingleDef recipes using
// InstSimplifyFolder.
@@ -1073,7 +1075,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return Def->replaceAllUsesWith(V);
// Fold PredPHI LiveIn -> LiveIn.
- if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(&R)) {
+ if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(Def)) {
VPValue *Op = PredPHI->getOperand(0);
if (Op->isLiveIn())
PredPHI->replaceAllUsesWith(Op);
@@ -1092,12 +1094,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
- unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
+ unsigned ExtOpcode = match(Def->getOperand(0), m_SExt(m_VPValue()))
? Instruction::SExt
: Instruction::ZExt;
auto *Ext = Builder.createWidenCast(Instruction::CastOps(ExtOpcode), A,
TruncTy);
- if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
+ if (auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
// UnderlyingExt has distinct return type, used to retain legacy cost.
Ext->setUnderlyingValue(UnderlyingExt);
}
@@ -1160,7 +1162,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
Builder.createLogicalAnd(X, Builder.createOr(Y, Z)));
// x && !x -> 0
- if (match(&R, m_LogicalAnd(m_VPValue(X), m_Not(m_Deferred(X)))))
+ if (match(Def, m_LogicalAnd(m_VPValue(X), m_Not(m_Deferred(X)))))
return Def->replaceAllUsesWith(Plan->getFalse());
if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X))))
@@ -1188,8 +1190,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return Def->replaceAllUsesWith(A);
if (match(Def, m_c_Mul(m_VPValue(A), m_ZeroInt())))
- return Def->replaceAllUsesWith(R.getOperand(0) == A ? R.getOperand(1)
- : R.getOperand(0));
+ return Def->replaceAllUsesWith(
+ Def->getOperand(0) == A ? Def->getOperand(1) : Def->getOperand(0));
if (match(Def, m_Not(m_VPValue(A)))) {
if (match(A, m_Not(m_VPValue(A))))
@@ -1218,8 +1220,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
}
// If Cmp doesn't have a debug location, use the one from the negation,
// to preserve the location.
- if (!Cmp->getDebugLoc() && R.getDebugLoc())
- Cmp->setDebugLoc(R.getDebugLoc());
+ if (!Cmp->getDebugLoc() && Def->getDebugLoc())
+ Cmp->setDebugLoc(Def->getDebugLoc());
}
}
}
@@ -1245,7 +1247,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
if (match(Def, m_Intrinsic<Intrinsic::vp_merge>(m_True(), m_VPValue(A),
m_VPValue(X), m_VPValue())) &&
match(A, m_c_BinaryOr(m_Specific(X), m_VPValue(Y))) &&
- TypeInfo.inferScalarType(R.getVPSingleValue())->isIntegerTy(1)) {
+ TypeInfo.inferScalarType(Def)->isIntegerTy(1)) {
Def->setOperand(1, Def->getOperand(0));
Def->setOperand(0, Y);
return;
@@ -1253,35 +1255,41 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
if (auto *Phi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(Def)) {
if (Phi->getOperand(0) == Phi->getOperand(1))
- Def->replaceAllUsesWith(Phi->getOperand(0));
+ Phi->replaceAllUsesWith(Phi->getOperand(0));
return;
}
// Look through ExtractLastElement (BuildVector ....).
- if (match(&R, m_CombineOr(m_ExtractLastElement(m_BuildVector()),
- m_ExtractLastLanePerPart(m_BuildVector())))) {
- auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
+ if (match(Def, m_CombineOr(m_ExtractLastElement(m_BuildVector()),
+ m_ExtractLastLanePerPart(m_BuildVector())))) {
+ auto *BuildVector = cast<VPInstruction>(Def->getOperand(0));
Def->replaceAllUsesWith(
BuildVector->getOperand(BuildVector->getNumOperands() - 1));
return;
}
// Look through ExtractPenultimateElement (BuildVector ....).
- if (match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
- m_BuildVector()))) {
- auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
+ if (match(Def, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
+ m_BuildVector()))) {
+ auto *BuildVector = cast<VPInstruction>(Def->getOperand(0));
Def->replaceAllUsesWith(
BuildVector->getOperand(BuildVector->getNumOperands() - 2));
return;
}
uint64_t Idx;
- if (match(&R, m_ExtractElement(m_BuildVector(), m_ConstantInt(Idx)))) {
- auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
+ if (match(Def, m_ExtractElement(m_BuildVector(), m_ConstantInt(Idx)))) {
+ auto *BuildVector = cast<VPInstruction>(Def->getOperand(0));
Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
return;
}
+ if (match(Def, m_BuildVector()) && all_equal(Def->operands())) {
+ Def->replaceAllUsesWith(
+ Builder.createNaryOp(VPInstruction::Broadcast, Def->getOperand(0)));
+ return;
+ }
+
if (auto *Phi = dyn_cast<VPPhi>(Def)) {
if (Phi->getNumOperands() == 1)
Phi->replaceAllUsesWith(Phi->getOperand(0));
@@ -1298,7 +1306,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
isa<VPPhi>(X)) {
auto *Phi = cast<VPPhi>(X);
if (Phi->getOperand(1) != Def && match(Phi->getOperand(0), m_ZeroInt()) &&
- Phi->getNumUsers() == 1 && (*Phi->user_begin() == &R)) {
+ Phi->getNumUsers() == 1 && (*Phi->user_begin() == Def)) {
Phi->setOperand(0, Y);
Def->replaceAllUsesWith(Phi);
return;
@@ -1306,7 +1314,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
}
// VPVectorPointer for part 0 can be replaced by their start pointer.
- if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) {
+ if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(Def)) {
if (VecPtr->isFirstPart()) {
VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
return;
@@ -1361,9 +1369,9 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan) {
Plan.getEntry());
VPTypeAnalysis TypeInfo(Plan);
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
- for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- simplifyRecipe(R, TypeInfo);
- }
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB))
+ if (auto *Def = dyn_cast<VPSingleDefRecipe>(&R))
+ simplifyRecipe(Def, TypeInfo);
}
}
@@ -1419,6 +1427,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
true /*IsSingleScalar*/);
Clone->insertBefore(RepOrWidenR);
RepOrWidenR->replaceAllUsesWith(Clone);
+ if (isDeadRecipe(*RepOrWidenR))
+ RepOrWidenR->eraseFromParent();
}
}
}
@@ -1572,9 +1582,9 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
continue;
// Update IV operands and comparison bound to use new narrower type.
- auto *NewStart = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 0));
+ auto *NewStart = Plan.getConstantInt(NewIVTy, 0);
WideIV->setStartValue(NewStart);
- auto *NewStep = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 1));
+ auto *NewStep = Plan.getConstantInt(NewIVTy, 1);
WideIV->setStepValue(NewStep);
auto *NewBTC = new VPWidenCastRecipe(
@@ -1693,8 +1703,7 @@ static bool tryToReplaceALMWithWideALM(VPlan &Plan, ElementCount VF,
// When using wide lane masks, the return type of the get.active.lane.mask
// intrinsic is VF x UF (last operand).
- VPValue *ALMMultiplier =
- Plan.getOrAddLiveIn(ConstantInt::get(IntegerType::getInt64Ty(Ctx), UF));
+ VPValue *ALMMultiplier = Plan.getConstantInt(64, UF);
EntryALM->setOperand(2, ALMMultiplier);
LoopALM->setOperand(2, ALMMultiplier);
@@ -2400,8 +2409,8 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
"index.part.next");
// Create the active lane mask instruction in the VPlan preheader.
- VPValue *ALMMultiplier = Plan.getOrAddLiveIn(
- ConstantInt::get(TopRegion->getCanonicalIV()->getScalarType(), 1));
+ VPValue *ALMMultiplier =
+ Plan.getConstantInt(TopRegion->getCanonicalIVType(), 1);
auto *EntryALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
{EntryIncrement, TC, ALMMultiplier}, DL,
"active.lane.mask.entry");
@@ -2501,7 +2510,7 @@ void VPlanTransforms::addActiveLaneMask(
} else {
VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV);
VPValue *ALMMultiplier = Plan.getOrAddLiveIn(
- ConstantInt::get(LoopRegion->getCanonicalIV()->getScalarType(), 1));
+ ConstantInt::get(LoopRegion->getCanonicalIVType(), 1));
LaneMask =
B.createNaryOp(VPInstruction::ActiveLaneMask,
{WideCanonicalIV, Plan.getTripCount(), ALMMultiplier},
@@ -2515,90 +2524,102 @@ void VPlanTransforms::addActiveLaneMask(
HeaderMask->eraseFromParent();
}
+template <typename Op0_t, typename Op1_t> struct RemoveMask_match {
+ Op0_t In;
+ Op1_t &Out;
+
+ RemoveMask_match(const Op0_t &In, Op1_t &Out) : In(In), Out(Out) {}
+
+ template <typename OpTy> bool match(OpTy *V) const {
+ if (m_Specific(In).match(V)) {
+ Out = nullptr;
+ return true;
+ }
+ if (m_LogicalAnd(m_Specific(In), m_VPValue(Out)).match(V))
+ return true;
+ return false;
+ }
+};
+
+/// Match a specific mask \p In, or a combination of it (logical-and In, Out).
+/// Returns the remaining part \p Out if so, or nullptr otherwise.
+template <typename Op0_t, typename Op1_t>
+static inline RemoveMask_match<Op0_t, Op1_t> m_RemoveMask(const Op0_t &In,
+ Op1_t &Out) {
+ return RemoveMask_match<Op0_t, Op1_t>(In, Out);
+}
+
/// Try to optimize a \p CurRecipe masked by \p HeaderMask to a corresponding
/// EVL-based recipe without the header mask. Returns nullptr if no EVL-based
/// recipe could be created.
/// \p HeaderMask Header Mask.
/// \p CurRecipe Recipe to be transform.
/// \p TypeInfo VPlan-based type analysis.
-/// \p AllOneMask The vector mask parameter of vector-predication intrinsics.
/// \p EVL The explicit vector length parameter of vector-predication
/// intrinsics.
static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
VPRecipeBase &CurRecipe,
- VPTypeAnalysis &TypeInfo,
- VPValue &AllOneMask, VPValue &EVL) {
- // FIXME: Don't transform recipes to EVL recipes if they're not masked by the
- // header mask.
- auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
- assert(OrigMask && "Unmasked recipe when folding tail");
- // HeaderMask will be handled using EVL.
- VPValue *Mask;
- if (match(OrigMask, m_LogicalAnd(m_Specific(HeaderMask), m_VPValue(Mask))))
- return Mask;
- return HeaderMask == OrigMask ? nullptr : OrigMask;
- };
+ VPTypeAnalysis &TypeInfo, VPValue &EVL) {
+ VPlan *Plan = CurRecipe.getParent()->getPlan();
+ VPValue *Addr, *Mask, *EndPtr;
/// Adjust any end pointers so that they point to the end of EVL lanes not VF.
- auto GetNewAddr = [&CurRecipe, &EVL](VPValue *Addr) -> VPValue * {
- auto *EndPtr = dyn_cast<VPVectorEndPointerRecipe>(Addr);
- if (!EndPtr)
- return Addr;
- assert(EndPtr->getOperand(1) == &EndPtr->getParent()->getPlan()->getVF() &&
- "VPVectorEndPointerRecipe with non-VF VF operand?");
- assert(
- all_of(EndPtr->users(),
- [](VPUser *U) {
- return cast<VPWidenMemoryRecipe>(U)->isReverse();
- }) &&
- "VPVectorEndPointRecipe not used by reversed widened memory recipe?");
- VPVectorEndPointerRecipe *EVLAddr = EndPtr->clone();
- EVLAddr->insertBefore(&CurRecipe);
- EVLAddr->setOperand(1, &EVL);
- return EVLAddr;
+ auto AdjustEndPtr = [&CurRecipe, &EVL](VPValue *EndPtr) {
+ auto *EVLEndPtr = cast<VPVectorEndPointerRecipe>(EndPtr)->clone();
+ EVLEndPtr->insertBefore(&CurRecipe);
+ EVLEndPtr->setOperand(1, &EVL);
+ return EVLEndPtr;
};
- return TypeSwitch<VPRecipeBase *, VPRecipeBase *>(&CurRecipe)
- .Case<VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) {
- VPValue *NewMask = GetNewMask(L->getMask());
- VPValue *NewAddr = GetNewAddr(L->getAddr());
- return new VPWidenLoadEVLRecipe(*L, NewAddr, EVL, NewMask);
- })
- .Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
- VPValue *NewMask = GetNewMask(S->getMask());
- VPValue *NewAddr = GetNewAddr(S->getAddr());
- return new VPWidenStoreEVLRecipe(*S, NewAddr, EVL, NewMask);
- })
- .Case<VPInterleaveRecipe>([&](VPInterleaveRecipe *IR) {
- VPValue *NewMask = GetNewMask(IR->getMask());
- return new VPInterleaveEVLRecipe(*IR, EVL, NewMask);
- })
- .Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
- VPValue *NewMask = GetNewMask(Red->getCondOp());
- return new VPReductionEVLRecipe(*Red, EVL, NewMask);
- })
- .Case<VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * {
- VPValue *LHS, *RHS;
- // Transform select with a header mask condition
- // select(header_mask, LHS, RHS)
- // into vector predication merge.
- // vp.merge(all-true, LHS, RHS, EVL)
- if (!match(VPI, m_Select(m_Specific(HeaderMask), m_VPValue(LHS),
- m_VPValue(RHS))))
- return nullptr;
- // Use all true as the condition because this transformation is
- // limited to selects whose condition is a header mask.
- return new VPWidenIntrinsicRecipe(
- Intrinsic::vp_merge, {&AllOneMask, LHS, RHS, &EVL},
- TypeInfo.inferScalarType(LHS), VPI->getDebugLoc());
- })
- .Default([&](VPRecipeBase *R) { return nullptr; });
+ if (match(&CurRecipe,
+ m_MaskedLoad(m_VPValue(Addr), m_RemoveMask(HeaderMask, Mask))) &&
+ !cast<VPWidenLoadRecipe>(CurRecipe).isReverse())
+ return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe), Addr,
+ EVL, Mask);
+
+ if (match(&CurRecipe,
+ m_MaskedLoad(m_VPValue(EndPtr), m_RemoveMask(HeaderMask, Mask))) &&
+ match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) &&
+ cast<VPWidenLoadRecipe>(CurRecipe).isReverse())
+ return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe),
+ AdjustEndPtr(EndPtr), EVL, Mask);
+
+ if (match(&CurRecipe, m_MaskedStore(m_VPValue(Addr), m_VPValue(),
+ m_RemoveMask(HeaderMask, Mask))) &&
+ !cast<VPWidenStoreRecipe>(CurRecipe).isReverse())
+ return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe), Addr,
+ EVL, Mask);
+
+ if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_VPValue(),
+ m_RemoveMask(HeaderMask, Mask))) &&
+ match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) &&
+ cast<VPWidenStoreRecipe>(CurRecipe).isReverse())
+ return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe),
+ AdjustEndPtr(EndPtr), EVL, Mask);
+
+ if (auto *Rdx = dyn_cast<VPReductionRecipe>(&CurRecipe))
+ if (Rdx->isConditional() &&
+ match(Rdx->getCondOp(), m_RemoveMask(HeaderMask, Mask)))
+ return new VPReductionEVLRecipe(*Rdx, EVL, Mask);
+
+ if (auto *Interleave = dyn_cast<VPInterleaveRecipe>(&CurRecipe))
+ if (Interleave->getMask() &&
+ match(Interleave->getMask(), m_RemoveMask(HeaderMask, Mask)))
+ return new VPInterleaveEVLRecipe(*Interleave, EVL, Mask);
+
+ VPValue *LHS, *RHS;
+ if (match(&CurRecipe,
+ m_Select(m_Specific(HeaderMask), m_VPValue(LHS), m_VPValue(RHS))))
+ return new VPWidenIntrinsicRecipe(
+ Intrinsic::vp_merge, {Plan->getTrue(), LHS, RHS, &EVL},
+ TypeInfo.inferScalarType(LHS), CurRecipe.getDebugLoc());
+
+ return nullptr;
}
/// Replace recipes with their EVL variants.
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
VPTypeAnalysis TypeInfo(Plan);
- VPValue *AllOneMask = Plan.getTrue();
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
@@ -2658,7 +2679,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
ConstantInt::getSigned(Type::getInt32Ty(Plan.getContext()), -1));
VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe(
Intrinsic::experimental_vp_splice,
- {V1, V2, Imm, AllOneMask, PrevEVL, &EVL},
+ {V1, V2, Imm, Plan.getTrue(), PrevEVL, &EVL},
TypeInfo.inferScalarType(R.getVPSingleValue()), R.getDebugLoc());
VPSplice->insertBefore(&R);
R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
@@ -2692,7 +2713,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
for (VPUser *U : collectUsersRecursively(EVLMask)) {
auto *CurRecipe = cast<VPRecipeBase>(U);
VPRecipeBase *EVLRecipe =
- optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, *AllOneMask, EVL);
+ optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, EVL);
if (!EVLRecipe)
continue;
@@ -2773,7 +2794,7 @@ void VPlanTransforms::addExplicitVectorLength(
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
auto *CanonicalIVPHI = LoopRegion->getCanonicalIV();
- auto *CanIVTy = CanonicalIVPHI->getScalarType();
+ auto *CanIVTy = LoopRegion->getCanonicalIVType();
VPValue *StartV = CanonicalIVPHI->getStartValue();
// Create the ExplicitVectorLengthPhi recipe in the main loop.
@@ -2788,8 +2809,7 @@ void VPlanTransforms::addExplicitVectorLength(
if (MaxSafeElements) {
// Support for MaxSafeDist for correct loop emission.
- VPValue *AVLSafe =
- Plan.getOrAddLiveIn(ConstantInt::get(CanIVTy, *MaxSafeElements));
+ VPValue *AVLSafe = Plan.getConstantInt(CanIVTy, *MaxSafeElements);
VPValue *Cmp = Builder.createICmp(ICmpInst::ICMP_ULT, AVL, AVLSafe);
AVL = Builder.createSelect(Cmp, AVL, AVLSafe, DebugLoc::getUnknown(),
"safe_avl");
@@ -2902,9 +2922,8 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) {
Type *AVLTy = VPTypeAnalysis(Plan).inferScalarType(AVLNext);
VPBuilder Builder(LatchExitingBr);
- VPValue *Cmp =
- Builder.createICmp(CmpInst::ICMP_EQ, AVLNext,
- Plan.getOrAddLiveIn(ConstantInt::getNullValue(AVLTy)));
+ VPValue *Cmp = Builder.createICmp(CmpInst::ICMP_EQ, AVLNext,
+ Plan.getConstantInt(AVLTy, 0));
Builder.createNaryOp(VPInstruction::BranchOnCond, Cmp);
LatchExitingBr->eraseFromParent();
}
@@ -2928,8 +2947,7 @@ void VPlanTransforms::replaceSymbolicStrides(
// Only handle constant strides for now.
continue;
- auto *CI =
- Plan.getOrAddLiveIn(ConstantInt::get(Stride->getType(), *StrideConst));
+ auto *CI = Plan.getConstantInt(*StrideConst);
if (VPValue *StrideVPV = Plan.getLiveIn(StrideV))
StrideVPV->replaceUsesWithIf(CI, CanUseVersionedStride);
@@ -2944,7 +2962,7 @@ void VPlanTransforms::replaceSymbolicStrides(
unsigned BW = U->getType()->getScalarSizeInBits();
APInt C =
isa<SExtInst>(U) ? StrideConst->sext(BW) : StrideConst->zext(BW);
- VPValue *CI = Plan.getOrAddLiveIn(ConstantInt::get(U->getType(), C));
+ VPValue *CI = Plan.getConstantInt(C);
StrideVPV->replaceUsesWithIf(CI, CanUseVersionedStride);
}
RewriteMap[StrideV] = PSE.getSCEV(StrideV);
@@ -3123,8 +3141,7 @@ void VPlanTransforms::createInterleaveGroups(
DL.getTypeAllocSize(getLoadStoreType(IRInsertPos)) *
IG->getIndex(IRInsertPos),
/*IsSigned=*/true);
- VPValue *OffsetVPV =
- Plan.getOrAddLiveIn(ConstantInt::get(Plan.getContext(), -Offset));
+ VPValue *OffsetVPV = Plan.getConstantInt(-Offset);
VPBuilder B(InsertPos);
Addr = B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
}
@@ -3865,8 +3882,7 @@ void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan,
VPBuilder Builder(VectorPH, VectorPH->begin());
auto *TCTy = VPTypeAnalysis(Plan).inferScalarType(Plan.getTripCount());
auto *TCMO = Builder.createNaryOp(
- Instruction::Sub,
- {Plan.getTripCount(), Plan.getOrAddLiveIn(ConstantInt::get(TCTy, 1))},
+ Instruction::Sub, {Plan.getTripCount(), Plan.getConstantInt(TCTy, 1)},
DebugLoc::getCompilerGenerated(), "trip.count.minus.1");
BTC->replaceAllUsesWith(TCMO);
}
@@ -3991,9 +4007,8 @@ void VPlanTransforms::materializeVectorTripCount(VPlan &Plan,
if (TailByMasking) {
TC = Builder.createNaryOp(
Instruction::Add,
- {TC, Builder.createNaryOp(
- Instruction::Sub,
- {Step, Plan.getOrAddLiveIn(ConstantInt::get(TCTy, 1))})},
+ {TC, Builder.createNaryOp(Instruction::Sub,
+ {Step, Plan.getConstantInt(TCTy, 1)})},
DebugLoc::getCompilerGenerated(), "n.rnd.up");
}
@@ -4015,8 +4030,8 @@ void VPlanTransforms::materializeVectorTripCount(VPlan &Plan,
if (RequiresScalarEpilogue) {
assert(!TailByMasking &&
"requiring scalar epilogue is not supported with fail folding");
- VPValue *IsZero = Builder.createICmp(
- CmpInst::ICMP_EQ, R, Plan.getOrAddLiveIn(ConstantInt::get(TCTy, 0)));
+ VPValue *IsZero =
+ Builder.createICmp(CmpInst::ICMP_EQ, R, Plan.getConstantInt(TCTy, 0));
R = Builder.createSelect(IsZero, Step, R);
}
@@ -4054,7 +4069,7 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
}
VF.replaceAllUsesWith(RuntimeVF);
- VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF()));
+ VPValue *UF = Plan.getConstantInt(TCTy, Plan.getUF());
VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF});
VFxUF.replaceAllUsesWith(MulByUF);
}
@@ -4174,7 +4189,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
unsigned VFMinVal = VF.getKnownMinValue();
SmallVector<VPInterleaveRecipe *> StoreGroups;
for (auto &R : *VectorLoop->getEntryBasicBlock()) {
- if (isa<VPCanonicalIVPHIRecipe>(&R) || match(&R, m_BranchOnCount()))
+ if (isa<VPCanonicalIVPHIRecipe>(&R))
continue;
if (isa<VPDerivedIVRecipe, VPScalarIVStepsRecipe>(&R) &&
@@ -4334,17 +4349,17 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
VPBuilder PHBuilder(Plan.getVectorPreheader());
VPValue *UF = Plan.getOrAddLiveIn(
- ConstantInt::get(CanIV->getScalarType(), 1 * Plan.getUF()));
+ ConstantInt::get(VectorLoop->getCanonicalIVType(), 1 * Plan.getUF()));
if (VF.isScalable()) {
VPValue *VScale = PHBuilder.createElementCount(
- CanIV->getScalarType(), ElementCount::getScalable(1));
+ VectorLoop->getCanonicalIVType(), ElementCount::getScalable(1));
VPValue *VScaleUF = PHBuilder.createNaryOp(Instruction::Mul, {VScale, UF});
Inc->setOperand(1, VScaleUF);
Plan.getVF().replaceAllUsesWith(VScale);
} else {
Inc->setOperand(1, UF);
Plan.getVF().replaceAllUsesWith(
- Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
+ Plan.getConstantInt(CanIV->getScalarType(), 1));
}
removeDeadRecipes(Plan);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index cfd1a74..d6a0028 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -68,10 +68,9 @@ class UnrollState {
void unrollWidenInductionByUF(VPWidenInductionRecipe *IV,
VPBasicBlock::iterator InsertPtForPhi);
- VPValue *getConstantVPV(unsigned Part) {
- Type *CanIVIntTy =
- Plan.getVectorLoopRegion()->getCanonicalIV()->getScalarType();
- return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part));
+ VPValue *getConstantInt(unsigned Part) {
+ Type *CanIVIntTy = Plan.getVectorLoopRegion()->getCanonicalIVType();
+ return Plan.getConstantInt(CanIVIntTy, Part);
}
public:
@@ -138,7 +137,7 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
remapOperands(&PartIR, Part);
if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
- ScalarIVSteps->addOperand(getConstantVPV(Part));
+ ScalarIVSteps->addOperand(getConstantInt(Part));
}
addRecipeForPart(&Part0R, &PartIR, Part);
@@ -250,7 +249,7 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
for (unsigned Part = 1; Part != UF; ++Part)
VPV2Parts[VPI][Part - 1] = StartV;
}
- Copy->addOperand(getConstantVPV(Part));
+ Copy->addOperand(getConstantInt(Part));
} else {
assert(isa<VPActiveLaneMaskPHIRecipe>(R) &&
"unexpected header phi recipe not needing unrolled part");
@@ -319,7 +318,7 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
VPVectorPointerRecipe, VPVectorEndPointerRecipe>(Copy) ||
match(Copy,
m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
- Copy->addOperand(getConstantVPV(Part));
+ Copy->addOperand(getConstantInt(Part));
if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe>(R))
Copy->setOperand(0, R.getOperand(0));
@@ -475,8 +474,7 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
if (LaneDefs != Def2LaneDefs.end())
return LaneDefs->second[Lane.getKnownLane()];
- VPValue *Idx =
- Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane()));
+ VPValue *Idx = Plan.getConstantInt(IdxTy, Lane.getKnownLane());
return Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
}
@@ -510,8 +508,7 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
cast<VPInstruction>(Op)->getOperand(Lane.getKnownLane()));
continue;
}
- VPValue *Idx =
- Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane()));
+ VPValue *Idx = Plan.getConstantInt(IdxTy, Lane.getKnownLane());
VPValue *Ext = Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
NewOps.push_back(Ext);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 4db92e7..c6380d3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -32,22 +32,17 @@ bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
}
VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
- VPValue *Expanded = nullptr;
if (auto *E = dyn_cast<SCEVConstant>(Expr))
- Expanded = Plan.getOrAddLiveIn(E->getValue());
- else {
- auto *U = dyn_cast<SCEVUnknown>(Expr);
- // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
- // value. Otherwise the value may be defined in a loop and using it directly
- // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
- // form.
- if (U && !isa<Instruction>(U->getValue())) {
- Expanded = Plan.getOrAddLiveIn(U->getValue());
- } else {
- Expanded = new VPExpandSCEVRecipe(Expr);
- Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe());
- }
- }
+ return Plan.getOrAddLiveIn(E->getValue());
+ // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
+ // value. Otherwise the value may be defined in a loop and using it directly
+ // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
+ // form.
+ auto *U = dyn_cast<SCEVUnknown>(Expr);
+ if (U && !isa<Instruction>(U->getValue()))
+ return Plan.getOrAddLiveIn(U->getValue());
+ auto *Expanded = new VPExpandSCEVRecipe(Expr);
+ Plan.getEntry()->appendRecipe(Expanded);
return Expanded;
}
@@ -75,7 +70,8 @@ bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
B == Plan.getBackedgeTakenCount();
}
-const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
+const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V,
+ ScalarEvolution &SE, const Loop *L) {
if (V->isLiveIn()) {
if (Value *LiveIn = V->getLiveInIRValue())
return SE.getSCEV(LiveIn);
@@ -86,6 +82,53 @@ const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
return TypeSwitch<const VPRecipeBase *, const SCEV *>(V->getDefiningRecipe())
.Case<VPExpandSCEVRecipe>(
[](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
+ .Case<VPCanonicalIVPHIRecipe>([&SE, L](const VPCanonicalIVPHIRecipe *R) {
+ if (!L)
+ return SE.getCouldNotCompute();
+ const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L);
+ return SE.getAddRecExpr(Start, SE.getOne(Start->getType()), L,
+ SCEV::FlagAnyWrap);
+ })
+ .Case<VPDerivedIVRecipe>([&SE, L](const VPDerivedIVRecipe *R) {
+ const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L);
+ const SCEV *IV = getSCEVExprForVPValue(R->getOperand(1), SE, L);
+ const SCEV *Scale = getSCEVExprForVPValue(R->getOperand(2), SE, L);
+ if (any_of(ArrayRef({Start, IV, Scale}), IsaPred<SCEVCouldNotCompute>))
+ return SE.getCouldNotCompute();
+
+ return SE.getAddExpr(SE.getTruncateOrSignExtend(Start, IV->getType()),
+ SE.getMulExpr(IV, SE.getTruncateOrSignExtend(
+ Scale, IV->getType())));
+ })
+ .Case<VPScalarIVStepsRecipe>([&SE, L](const VPScalarIVStepsRecipe *R) {
+ const SCEV *IV = getSCEVExprForVPValue(R->getOperand(0), SE, L);
+ const SCEV *Step = getSCEVExprForVPValue(R->getOperand(1), SE, L);
+ if (isa<SCEVCouldNotCompute>(IV) || isa<SCEVCouldNotCompute>(Step) ||
+ !Step->isOne())
+ return SE.getCouldNotCompute();
+ return SE.getMulExpr(SE.getTruncateOrSignExtend(IV, Step->getType()),
+ Step);
+ })
+ .Case<VPReplicateRecipe>([&SE, L](const VPReplicateRecipe *R) {
+ if (R->getOpcode() != Instruction::GetElementPtr)
+ return SE.getCouldNotCompute();
+
+ const SCEV *Base = getSCEVExprForVPValue(R->getOperand(0), SE, L);
+ if (isa<SCEVCouldNotCompute>(Base))
+ return SE.getCouldNotCompute();
+
+ SmallVector<const SCEV *> IndexExprs;
+ for (VPValue *Index : drop_begin(R->operands())) {
+ const SCEV *IndexExpr = getSCEVExprForVPValue(Index, SE, L);
+ if (isa<SCEVCouldNotCompute>(IndexExpr))
+ return SE.getCouldNotCompute();
+ IndexExprs.push_back(IndexExpr);
+ }
+
+ Type *SrcElementTy = cast<GetElementPtrInst>(R->getUnderlyingInstr())
+ ->getSourceElementType();
+ return SE.getGEPExpr(Base, IndexExprs, SrcElementTy);
+ })
.Default([&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 37cd413..c21a0e7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -37,7 +37,8 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr);
/// Return the SCEV expression for \p V. Returns SCEVCouldNotCompute if no
/// SCEV expression could be constructed.
-const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
+const SCEV *getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE,
+ const Loop *L = nullptr);
/// Returns true if \p VPV is a single scalar, either because it produces the
/// same value for all lanes or only has its first lane used.