aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp44
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp128
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp11
-rw-r--r--llvm/lib/BinaryFormat/CMakeLists.txt1
-rw-r--r--llvm/lib/BinaryFormat/Minidump.cpp14
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp36
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp2
-rw-r--r--llvm/lib/CAS/ActionCaches.cpp166
-rw-r--r--llvm/lib/CAS/BuiltinCAS.cpp14
-rw-r--r--llvm/lib/CAS/BuiltinCAS.h25
-rw-r--r--llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp38
-rw-r--r--llvm/lib/CAS/CMakeLists.txt3
-rw-r--r--llvm/lib/CAS/InMemoryCAS.cpp8
-rw-r--r--llvm/lib/CAS/ObjectStore.cpp93
-rw-r--r--llvm/lib/CAS/OnDiskCAS.cpp211
-rw-r--r--llvm/lib/CAS/OnDiskGraphDB.cpp34
-rw-r--r--llvm/lib/CAS/OnDiskKeyValueDB.cpp21
-rw-r--r--llvm/lib/CAS/UnifiedOnDiskCache.cpp613
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp16
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineFunctionSplitter.cpp3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp109
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp5
-rw-r--r--llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h2
-rw-r--r--llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h2
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDie.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp2
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp2
-rw-r--r--llvm/lib/IR/Instructions.cpp10
-rw-r--r--llvm/lib/IR/RuntimeLibcalls.cpp79
-rw-r--r--llvm/lib/IR/Value.cpp8
-rw-r--r--llvm/lib/LTO/LTO.cpp40
-rw-r--r--llvm/lib/MC/GOFFObjectWriter.cpp2
-rw-r--r--llvm/lib/MC/MCDXContainerWriter.cpp2
-rw-r--r--llvm/lib/MC/MCGOFFStreamer.cpp2
-rw-r--r--llvm/lib/MC/SPIRVObjectWriter.cpp7
-rw-r--r--llvm/lib/ObjCopy/COFF/COFFWriter.h2
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObject.h10
-rw-r--r--llvm/lib/ObjCopy/MachO/MachOReader.h2
-rw-r--r--llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h2
-rw-r--r--llvm/lib/Object/MachOObjectFile.cpp26
-rw-r--r--llvm/lib/ObjectYAML/GOFFYAML.cpp2
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp2
-rw-r--r--llvm/lib/Passes/StandardInstrumentations.cpp4
-rw-r--r--llvm/lib/SandboxIR/Context.cpp2
-rw-r--r--llvm/lib/Support/BalancedPartitioning.cpp2
-rw-r--r--llvm/lib/Support/BranchProbability.cpp2
-rw-r--r--llvm/lib/Support/CommandLine.cpp8
-rw-r--r--llvm/lib/Support/DAGDeltaAlgorithm.cpp16
-rw-r--r--llvm/lib/Support/DynamicLibrary.cpp2
-rw-r--r--llvm/lib/Support/Mustache.cpp186
-rw-r--r--llvm/lib/Support/Timer.cpp2
-rw-r--r--llvm/lib/Support/UnicodeNameToCodepoint.cpp8
-rw-r--r--llvm/lib/TableGen/Record.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp14
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.td6
-rw-r--r--llvm/lib/Target/AArch64/AArch64FastISel.cpp5
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp8
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp43
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h1
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp19
-rw-r--r--llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp211
-rw-r--r--llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h10
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp14
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp177
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h14
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp90
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h4
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td10
-rw-r--r--llvm/lib/Target/ARM/ARMFastISel.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp78
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h1
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp9
-rw-r--r--llvm/lib/Target/BPF/BPFAsmPrinter.cpp34
-rw-r--r--llvm/lib/Target/BPF/BPFAsmPrinter.h1
-rw-r--r--llvm/lib/Target/DirectX/DXContainerGlobals.cpp7
-rw-r--r--llvm/lib/Target/DirectX/DXIL.td9
-rw-r--r--llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp4
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td4
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td4
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsFastISel.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp22
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td2
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrFuture.td8
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrMMA.td4
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp125
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h1
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp16
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp127
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td6
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td6
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td1
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp5
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp30
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp6
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp65
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp11
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td19
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp5
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp2
-rw-r--r--llvm/lib/Target/X86/X86.h26
-rw-r--r--llvm/lib/Target/X86/X86CompressEVEX.cpp2
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp256
-rw-r--r--llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp3
-rw-r--r--llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp48
-rw-r--r--llvm/lib/Target/X86/X86PartialReduction.cpp72
-rw-r--r--llvm/lib/Target/X86/X86PassRegistry.def4
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp6
-rw-r--r--llvm/lib/Target/X86/X86VZeroUpper.cpp2
-rw-r--r--llvm/lib/TextAPI/RecordVisitor.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp61
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp6
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp24
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp29
-rw-r--r--llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFuse.cpp16
-rw-r--r--llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp16
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp19
-rw-r--r--llvm/lib/Transforms/Scalar/StructurizeCFG.cpp19
-rw-r--r--llvm/lib/Transforms/Utils/CodeExtractor.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp48
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp21
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp5
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp149
-rw-r--r--llvm/lib/Transforms/Utils/UnifyLoopExits.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp5
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp21
-rw-r--r--llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp4
-rw-r--r--llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h13
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h61
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp284
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp25
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp27
168 files changed, 3485 insertions, 1197 deletions
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index e27a9b1..5d88e5f 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -806,11 +806,11 @@ public:
typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
AccessAnalysis(const Loop *TheLoop, AAResults *AA, const LoopInfo *LI,
- MemoryDepChecker::DepCandidates &DA,
+ DominatorTree &DT, MemoryDepChecker::DepCandidates &DA,
PredicatedScalarEvolution &PSE,
SmallPtrSetImpl<MDNode *> &LoopAliasScopes)
- : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE),
- LoopAliasScopes(LoopAliasScopes) {
+ : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DT(DT), DepCands(DA),
+ PSE(PSE), LoopAliasScopes(LoopAliasScopes) {
// We're analyzing dependences across loop iterations.
BAA.enableCrossIterationMode();
}
@@ -934,6 +934,9 @@ private:
/// The LoopInfo of the loop being checked.
const LoopInfo *LI;
+ /// The dominator tree of the function.
+ DominatorTree &DT;
+
/// Sets of potentially dependent accesses - members of one set share an
/// underlying pointer. The set "CheckDeps" identfies which sets really need a
/// dependence check.
@@ -1015,6 +1018,7 @@ getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy,
/// informating from the IR pointer value to determine no-wrap.
static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR,
Value *Ptr, Type *AccessTy, const Loop *L, bool Assume,
+ const DominatorTree &DT,
std::optional<int64_t> Stride = std::nullopt) {
// FIXME: This should probably only return true for NUW.
if (AR->getNoWrapFlags(SCEV::NoWrapMask))
@@ -1029,8 +1033,18 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR,
// case, the GEP would be poison and any memory access dependent on it would
// be immediate UB when executed.
if (auto *GEP = dyn_cast_if_present<GetElementPtrInst>(Ptr);
- GEP && GEP->hasNoUnsignedSignedWrap())
- return true;
+ GEP && GEP->hasNoUnsignedSignedWrap()) {
+ // For the above reasoning to apply, the pointer must be dereferenced in
+ // every iteration.
+ if (L->getHeader() == L->getLoopLatch() ||
+ any_of(GEP->users(), [L, &DT, GEP](User *U) {
+ if (getLoadStorePointerOperand(U) != GEP)
+ return false;
+ BasicBlock *UserBB = cast<Instruction>(U)->getParent();
+ return !LoopAccessInfo::blockNeedsPredication(UserBB, L, &DT);
+ }))
+ return true;
+ }
if (!Stride)
Stride = getStrideFromAddRec(AR, L, AccessTy, Ptr, PSE);
@@ -1293,7 +1307,7 @@ bool AccessAnalysis::createCheckForAccess(
}
if (!isNoWrap(PSE, AR, RTCheckPtrs.size() == 1 ? Ptr : nullptr, AccessTy,
- TheLoop, Assume))
+ TheLoop, Assume, DT))
return false;
}
@@ -1606,7 +1620,7 @@ void AccessAnalysis::processMemAccesses() {
/// Check whether the access through \p Ptr has a constant stride.
std::optional<int64_t>
llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
- const Loop *Lp,
+ const Loop *Lp, const DominatorTree &DT,
const DenseMap<Value *, const SCEV *> &StridesMap,
bool Assume, bool ShouldCheckWrap) {
const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
@@ -1630,7 +1644,7 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
if (!ShouldCheckWrap || !Stride)
return Stride;
- if (isNoWrap(PSE, AR, Ptr, AccessTy, Lp, Assume, Stride))
+ if (isNoWrap(PSE, AR, Ptr, AccessTy, Lp, Assume, DT, Stride))
return Stride;
LLVM_DEBUG(
@@ -2047,10 +2061,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
BPtr->getType()->getPointerAddressSpace())
return MemoryDepChecker::Dependence::Unknown;
- std::optional<int64_t> StrideAPtr =
- getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true);
- std::optional<int64_t> StrideBPtr =
- getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true);
+ std::optional<int64_t> StrideAPtr = getPtrStride(
+ PSE, ATy, APtr, InnermostLoop, *DT, SymbolicStrides, true, true);
+ std::optional<int64_t> StrideBPtr = getPtrStride(
+ PSE, BTy, BPtr, InnermostLoop, *DT, SymbolicStrides, true, true);
const SCEV *Src = PSE.getSCEV(APtr);
const SCEV *Sink = PSE.getSCEV(BPtr);
@@ -2627,7 +2641,8 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
}
MemoryDepChecker::DepCandidates DepCands;
- AccessAnalysis Accesses(TheLoop, AA, LI, DepCands, *PSE, LoopAliasScopes);
+ AccessAnalysis Accesses(TheLoop, AA, LI, *DT, DepCands, *PSE,
+ LoopAliasScopes);
// Holds the analyzed pointers. We don't want to call getUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
@@ -2691,7 +2706,8 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
bool IsReadOnlyPtr = false;
Type *AccessTy = getLoadStoreType(LD);
if (Seen.insert({Ptr, AccessTy}).second ||
- !getPtrStride(*PSE, AccessTy, Ptr, TheLoop, SymbolicStrides)) {
+ !getPtrStride(*PSE, AccessTy, Ptr, TheLoop, *DT, SymbolicStrides, false,
+ true)) {
++NumReads;
IsReadOnlyPtr = true;
}
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 0a72076..789a983 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -7419,84 +7419,20 @@ static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind,
if (cast<ConstantInt>(II->getArgOperand(1))->isNullValue())
return false;
break;
- case Intrinsic::ctpop:
- case Intrinsic::bswap:
- case Intrinsic::bitreverse:
- case Intrinsic::fshl:
- case Intrinsic::fshr:
- case Intrinsic::smax:
- case Intrinsic::smin:
- case Intrinsic::scmp:
- case Intrinsic::umax:
- case Intrinsic::umin:
- case Intrinsic::ucmp:
- case Intrinsic::ptrmask:
- case Intrinsic::fptoui_sat:
- case Intrinsic::fptosi_sat:
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::ssub_with_overflow:
- case Intrinsic::smul_with_overflow:
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::usub_with_overflow:
- case Intrinsic::umul_with_overflow:
- case Intrinsic::sadd_sat:
- case Intrinsic::uadd_sat:
- case Intrinsic::ssub_sat:
- case Intrinsic::usub_sat:
- return false;
case Intrinsic::sshl_sat:
case Intrinsic::ushl_sat:
- return includesPoison(Kind) &&
- !shiftAmountKnownInRange(II->getArgOperand(1));
- case Intrinsic::fma:
- case Intrinsic::fmuladd:
- case Intrinsic::sqrt:
- case Intrinsic::powi:
- case Intrinsic::sin:
- case Intrinsic::cos:
- case Intrinsic::pow:
- case Intrinsic::log:
- case Intrinsic::log10:
- case Intrinsic::log2:
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::exp10:
- case Intrinsic::fabs:
- case Intrinsic::copysign:
- case Intrinsic::floor:
- case Intrinsic::ceil:
- case Intrinsic::trunc:
- case Intrinsic::rint:
- case Intrinsic::nearbyint:
- case Intrinsic::round:
- case Intrinsic::roundeven:
- case Intrinsic::fptrunc_round:
- case Intrinsic::canonicalize:
- case Intrinsic::arithmetic_fence:
- case Intrinsic::minnum:
- case Intrinsic::maxnum:
- case Intrinsic::minimum:
- case Intrinsic::maximum:
- case Intrinsic::minimumnum:
- case Intrinsic::maximumnum:
- case Intrinsic::is_fpclass:
- case Intrinsic::ldexp:
- case Intrinsic::frexp:
- return false;
- case Intrinsic::lround:
- case Intrinsic::llround:
- case Intrinsic::lrint:
- case Intrinsic::llrint:
- // If the value doesn't fit an unspecified value is returned (but this
- // is not poison).
- return false;
+ if (!includesPoison(Kind) ||
+ shiftAmountKnownInRange(II->getArgOperand(1)))
+ return false;
+ break;
}
}
[[fallthrough]];
case Instruction::CallBr:
case Instruction::Invoke: {
const auto *CB = cast<CallBase>(Op);
- return !CB->hasRetAttr(Attribute::NoUndef);
+ return !CB->hasRetAttr(Attribute::NoUndef) &&
+ !CB->hasFnAttr(Attribute::NoCreateUndefOrPoison);
}
case Instruction::InsertElement:
case Instruction::ExtractElement: {
@@ -10405,3 +10341,55 @@ const Value *llvm::stripNullTest(const Value *V) {
Value *llvm::stripNullTest(Value *V) {
return const_cast<Value *>(stripNullTest(const_cast<const Value *>(V)));
}
+
+bool llvm::collectPossibleValues(const Value *V,
+ SmallPtrSetImpl<const Constant *> &Constants,
+ unsigned MaxCount, bool AllowUndefOrPoison) {
+ SmallPtrSet<const Instruction *, 8> Visited;
+ SmallVector<const Instruction *, 8> Worklist;
+ auto Push = [&](const Value *V) -> bool {
+ if (auto *C = dyn_cast<Constant>(V)) {
+ if (!AllowUndefOrPoison && !isGuaranteedNotToBeUndefOrPoison(C))
+ return false;
+ // Check existence first to avoid unnecessary allocations.
+ if (Constants.contains(C))
+ return true;
+ if (Constants.size() == MaxCount)
+ return false;
+ Constants.insert(C);
+ return true;
+ }
+
+ if (auto *Inst = dyn_cast<Instruction>(V)) {
+ if (Visited.insert(Inst).second)
+ Worklist.push_back(Inst);
+ return true;
+ }
+ return false;
+ };
+ if (!Push(V))
+ return false;
+ while (!Worklist.empty()) {
+ const Instruction *CurInst = Worklist.pop_back_val();
+ switch (CurInst->getOpcode()) {
+ case Instruction::Select:
+ if (!Push(CurInst->getOperand(1)))
+ return false;
+ if (!Push(CurInst->getOperand(2)))
+ return false;
+ break;
+ case Instruction::PHI:
+ for (Value *IncomingValue : cast<PHINode>(CurInst)->incoming_values()) {
+ // Fast path for recurrence PHI.
+ if (IncomingValue == CurInst)
+ continue;
+ if (!Push(IncomingValue))
+ return false;
+ }
+ break;
+ default:
+ return false;
+ }
+ }
+ return true;
+}
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 091d948..977ed59 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -1387,9 +1387,9 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
// wrap around the address space we would do a memory access at nullptr
// even without the transformation. The wrapping checks are therefore
// deferred until after we've formed the interleaved groups.
- int64_t Stride =
- getPtrStride(PSE, ElementTy, Ptr, TheLoop, Strides,
- /*Assume=*/true, /*ShouldCheckWrap=*/false).value_or(0);
+ int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, *DT, Strides,
+ /*Assume=*/true, /*ShouldCheckWrap=*/false)
+ .value_or(0);
const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
@@ -1643,8 +1643,9 @@ void InterleavedAccessInfo::analyzeInterleaving(
assert(Member && "Group member does not exist");
Value *MemberPtr = getLoadStorePointerOperand(Member);
Type *AccessTy = getLoadStoreType(Member);
- if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
- /*Assume=*/false, /*ShouldCheckWrap=*/true).value_or(0))
+ if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, *DT, Strides,
+ /*Assume=*/false, /*ShouldCheckWrap=*/true)
+ .value_or(0))
return false;
LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
<< FirstOrLast
diff --git a/llvm/lib/BinaryFormat/CMakeLists.txt b/llvm/lib/BinaryFormat/CMakeLists.txt
index 4b2debb..0c8af1e 100644
--- a/llvm/lib/BinaryFormat/CMakeLists.txt
+++ b/llvm/lib/BinaryFormat/CMakeLists.txt
@@ -6,7 +6,6 @@ add_llvm_component_library(LLVMBinaryFormat
ELF.cpp
MachO.cpp
Magic.cpp
- Minidump.cpp
MsgPackDocument.cpp
MsgPackDocumentYAML.cpp
MsgPackReader.cpp
diff --git a/llvm/lib/BinaryFormat/Minidump.cpp b/llvm/lib/BinaryFormat/Minidump.cpp
deleted file mode 100644
index b618fb1..0000000
--- a/llvm/lib/BinaryFormat/Minidump.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-//===-- Minidump.cpp - Minidump constants and structures ---------*- C++-*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/BinaryFormat/Minidump.h"
-
-using namespace llvm::minidump;
-
-constexpr uint32_t Header::MagicSignature;
-constexpr uint16_t Header::MagicVersion;
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 466dcb0..8930d64 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2257,6 +2257,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::Captures;
case bitc::ATTR_KIND_DEAD_ON_RETURN:
return Attribute::DeadOnReturn;
+ case bitc::ATTR_KIND_NO_CREATE_UNDEF_OR_POISON:
+ return Attribute::NoCreateUndefOrPoison;
}
}
@@ -8566,16 +8568,13 @@ Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() {
}
static Expected<std::pair<bool, bool>>
-getEnableSplitLTOUnitAndUnifiedFlag(BitstreamCursor &Stream,
- unsigned ID,
- BitcodeLTOInfo &LTOInfo) {
+getEnableSplitLTOUnitAndUnifiedFlag(BitstreamCursor &Stream, unsigned ID) {
if (Error Err = Stream.EnterSubBlock(ID))
return std::move(Err);
- SmallVector<uint64_t, 64> Record;
+ SmallVector<uint64_t, 64> Record;
while (true) {
BitstreamEntry Entry;
- std::pair<bool, bool> Result = {false,false};
if (Error E = Stream.advanceSkippingSubblocks().moveInto(Entry))
return std::move(E);
@@ -8584,8 +8583,8 @@ getEnableSplitLTOUnitAndUnifiedFlag(BitstreamCursor &Stream,
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock: {
- // If no flags record found, set both flags to false.
- return Result;
+ // If no flags record found, return both flags as false.
+ return std::make_pair(false, false);
}
case BitstreamEntry::Record:
// The interesting case.
@@ -8607,9 +8606,7 @@ getEnableSplitLTOUnitAndUnifiedFlag(BitstreamCursor &Stream,
bool EnableSplitLTOUnit = Flags & 0x8;
bool UnifiedLTO = Flags & 0x200;
- Result = {EnableSplitLTOUnit, UnifiedLTO};
-
- return Result;
+ return std::make_pair(EnableSplitLTOUnit, UnifiedLTO);
}
}
}
@@ -8638,26 +8635,15 @@ Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() {
/*EnableSplitLTOUnit=*/false, /*UnifiedLTO=*/false};
case BitstreamEntry::SubBlock:
- if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID) {
- BitcodeLTOInfo LTOInfo;
+ if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID ||
+ Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID) {
Expected<std::pair<bool, bool>> Flags =
- getEnableSplitLTOUnitAndUnifiedFlag(Stream, Entry.ID, LTOInfo);
+ getEnableSplitLTOUnitAndUnifiedFlag(Stream, Entry.ID);
if (!Flags)
return Flags.takeError();
- std::tie(LTOInfo.EnableSplitLTOUnit, LTOInfo.UnifiedLTO) = Flags.get();
- LTOInfo.IsThinLTO = true;
- LTOInfo.HasSummary = true;
- return LTOInfo;
- }
-
- if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID) {
BitcodeLTOInfo LTOInfo;
- Expected<std::pair<bool, bool>> Flags =
- getEnableSplitLTOUnitAndUnifiedFlag(Stream, Entry.ID, LTOInfo);
- if (!Flags)
- return Flags.takeError();
std::tie(LTOInfo.EnableSplitLTOUnit, LTOInfo.UnifiedLTO) = Flags.get();
- LTOInfo.IsThinLTO = false;
+ LTOInfo.IsThinLTO = (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID);
LTOInfo.HasSummary = true;
return LTOInfo;
}
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index f17656c..76494c7 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -956,6 +956,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_CAPTURES;
case Attribute::DeadOnReturn:
return bitc::ATTR_KIND_DEAD_ON_RETURN;
+ case Attribute::NoCreateUndefOrPoison:
+ return bitc::ATTR_KIND_NO_CREATE_UNDEF_OR_POISON;
case Attribute::EndAttrKinds:
llvm_unreachable("Can not encode end-attribute kinds marker.");
case Attribute::None:
diff --git a/llvm/lib/CAS/ActionCaches.cpp b/llvm/lib/CAS/ActionCaches.cpp
index 571c5b3..003c850 100644
--- a/llvm/lib/CAS/ActionCaches.cpp
+++ b/llvm/lib/CAS/ActionCaches.cpp
@@ -13,7 +13,11 @@
#include "BuiltinCAS.h"
#include "llvm/ADT/TrieRawHashMap.h"
#include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/OnDiskKeyValueDB.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/Support/BLAKE3.h"
+#include "llvm/Support/Errc.h"
#define DEBUG_TYPE "cas-action-caches"
@@ -47,12 +51,54 @@ public:
Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,
bool CanBeDistributed) const final;
+ Error validate() const final {
+ return createStringError("InMemoryActionCache doesn't support validate()");
+ }
+
private:
using DataT = CacheEntry<sizeof(HashType)>;
using InMemoryCacheT = ThreadSafeTrieRawHashMap<DataT, sizeof(HashType)>;
InMemoryCacheT Cache;
};
+
+/// Builtin basic OnDiskActionCache that uses one underlying OnDiskKeyValueDB.
+class OnDiskActionCache final : public ActionCache {
+public:
+ Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result,
+ bool CanBeDistributed) final;
+ Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,
+ bool CanBeDistributed) const final;
+
+ static Expected<std::unique_ptr<OnDiskActionCache>> create(StringRef Path);
+
+ Error validate() const final;
+
+private:
+ static StringRef getHashName() { return "BLAKE3"; }
+
+ OnDiskActionCache(std::unique_ptr<ondisk::OnDiskKeyValueDB> DB);
+
+ std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
+ using DataT = CacheEntry<sizeof(HashType)>;
+};
+
+/// Builtin unified ActionCache that wraps around UnifiedOnDiskCache to provide
+/// access to its ActionCache.
+class UnifiedOnDiskActionCache final : public ActionCache {
+public:
+ Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result,
+ bool CanBeDistributed) final;
+ Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,
+ bool CanBeDistributed) const final;
+
+ UnifiedOnDiskActionCache(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB);
+
+ Error validate() const final;
+
+private:
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB;
+};
} // end namespace
static Error createResultCachePoisonedError(ArrayRef<uint8_t> KeyHash,
@@ -99,3 +145,123 @@ std::unique_ptr<ActionCache> createInMemoryActionCache() {
}
} // namespace llvm::cas
+
+OnDiskActionCache::OnDiskActionCache(
+ std::unique_ptr<ondisk::OnDiskKeyValueDB> DB)
+ : ActionCache(builtin::BuiltinCASContext::getDefaultContext()),
+ DB(std::move(DB)) {}
+
+Expected<std::unique_ptr<OnDiskActionCache>>
+OnDiskActionCache::create(StringRef AbsPath) {
+ std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
+ if (Error E = ondisk::OnDiskKeyValueDB::open(AbsPath, getHashName(),
+ sizeof(HashType), getHashName(),
+ sizeof(DataT))
+ .moveInto(DB))
+ return std::move(E);
+ return std::unique_ptr<OnDiskActionCache>(
+ new OnDiskActionCache(std::move(DB)));
+}
+
+Expected<std::optional<CASID>>
+OnDiskActionCache::getImpl(ArrayRef<uint8_t> Key,
+ bool /*CanBeDistributed*/) const {
+ std::optional<ArrayRef<char>> Val;
+ if (Error E = DB->get(Key).moveInto(Val))
+ return std::move(E);
+ if (!Val)
+ return std::nullopt;
+ return CASID::create(&getContext(), toStringRef(*Val));
+}
+
+Error OnDiskActionCache::putImpl(ArrayRef<uint8_t> Key, const CASID &Result,
+ bool /*CanBeDistributed*/) {
+ auto ResultHash = Result.getHash();
+ ArrayRef Expected((const char *)ResultHash.data(), ResultHash.size());
+ ArrayRef<char> Observed;
+ if (Error E = DB->put(Key, Expected).moveInto(Observed))
+ return E;
+
+ if (Expected == Observed)
+ return Error::success();
+
+ return createResultCachePoisonedError(
+ Key, getContext(), Result,
+ ArrayRef((const uint8_t *)Observed.data(), Observed.size()));
+}
+
+Error OnDiskActionCache::validate() const {
+ // FIXME: without the matching CAS there is nothing we can check about the
+ // cached values. The hash size is already validated by the DB validator.
+ return DB->validate(nullptr);
+}
+
+UnifiedOnDiskActionCache::UnifiedOnDiskActionCache(
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB)
+ : ActionCache(builtin::BuiltinCASContext::getDefaultContext()),
+ UniDB(std::move(UniDB)) {}
+
+Expected<std::optional<CASID>>
+UnifiedOnDiskActionCache::getImpl(ArrayRef<uint8_t> Key,
+ bool /*CanBeDistributed*/) const {
+ std::optional<ArrayRef<char>> Val;
+ if (Error E = UniDB->getKeyValueDB().get(Key).moveInto(Val))
+ return std::move(E);
+ if (!Val)
+ return std::nullopt;
+ auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Val);
+ return CASID::create(&getContext(),
+ toStringRef(UniDB->getGraphDB().getDigest(ID)));
+}
+
+Error UnifiedOnDiskActionCache::putImpl(ArrayRef<uint8_t> Key,
+ const CASID &Result,
+ bool /*CanBeDistributed*/) {
+ auto Expected = UniDB->getGraphDB().getReference(Result.getHash());
+ if (LLVM_UNLIKELY(!Expected))
+ return Expected.takeError();
+
+ auto Value = ondisk::UnifiedOnDiskCache::getValueFromObjectID(*Expected);
+ std::optional<ArrayRef<char>> Observed;
+ if (Error E = UniDB->getKeyValueDB().put(Key, Value).moveInto(Observed))
+ return E;
+
+ auto ObservedID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Observed);
+ if (*Expected == ObservedID)
+ return Error::success();
+
+ return createResultCachePoisonedError(
+ Key, getContext(), Result, UniDB->getGraphDB().getDigest(ObservedID));
+}
+
+Error UnifiedOnDiskActionCache::validate() const {
+ auto ValidateRef = [](FileOffset Offset, ArrayRef<char> Value) -> Error {
+ auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(Value);
+ auto formatError = [&](Twine Msg) {
+ return createStringError(
+ llvm::errc::illegal_byte_sequence,
+ "bad record at 0x" +
+ utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
+ Msg.str());
+ };
+ if (ID.getOpaqueData() == 0)
+ return formatError("zero is not a valid ref");
+ return Error::success();
+ };
+ return UniDB->getKeyValueDB().validate(ValidateRef);
+}
+
+Expected<std::unique_ptr<ActionCache>>
+cas::createOnDiskActionCache(StringRef Path) {
+#if LLVM_ENABLE_ONDISK_CAS
+ return OnDiskActionCache::create(Path);
+#else
+ return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
+#endif
+}
+
+std::unique_ptr<ActionCache>
+cas::builtin::createActionCacheFromUnifiedOnDiskCache(
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) {
+ return std::make_unique<UnifiedOnDiskActionCache>(std::move(UniDB));
+}
diff --git a/llvm/lib/CAS/BuiltinCAS.cpp b/llvm/lib/CAS/BuiltinCAS.cpp
index 73646ad..e9bc6d8 100644
--- a/llvm/lib/CAS/BuiltinCAS.cpp
+++ b/llvm/lib/CAS/BuiltinCAS.cpp
@@ -9,6 +9,7 @@
#include "BuiltinCAS.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CAS/BuiltinObjectHasher.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
#include "llvm/Support/Process.h"
using namespace llvm;
@@ -68,7 +69,7 @@ Expected<ObjectRef> BuiltinCAS::store(ArrayRef<ObjectRef> Refs,
Refs, Data);
}
-Error BuiltinCAS::validate(const CASID &ID) {
+Error BuiltinCAS::validateObject(const CASID &ID) {
auto Ref = getReference(ID);
if (!Ref)
return createUnknownObjectError(ID);
@@ -92,3 +93,14 @@ Error BuiltinCAS::validate(const CASID &ID) {
return Error::success();
}
+
+Expected<std::unique_ptr<ondisk::UnifiedOnDiskCache>>
+cas::builtin::createBuiltinUnifiedOnDiskCache(StringRef Path) {
+#if LLVM_ENABLE_ONDISK_CAS
+ return ondisk::UnifiedOnDiskCache::open(Path, /*SizeLimit=*/std::nullopt,
+ BuiltinCASContext::getHashName(),
+ sizeof(HashType));
+#else
+ return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
+#endif
+}
diff --git a/llvm/lib/CAS/BuiltinCAS.h b/llvm/lib/CAS/BuiltinCAS.h
index 3b5374d..4d2de66 100644
--- a/llvm/lib/CAS/BuiltinCAS.h
+++ b/llvm/lib/CAS/BuiltinCAS.h
@@ -1,4 +1,4 @@
-//===- BuiltinCAS.h ---------------------------------------------*- C++ -*-===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -15,6 +15,9 @@
namespace llvm::cas {
class ActionCache;
+namespace ondisk {
+class UnifiedOnDiskCache;
+} // namespace ondisk
namespace builtin {
/// Common base class for builtin CAS implementations using the same CASContext.
@@ -65,9 +68,27 @@ public:
"corrupt storage");
}
- Error validate(const CASID &ID) final;
+ Error validateObject(const CASID &ID) final;
};
+/// Create a \p UnifiedOnDiskCache instance that uses \p BLAKE3 hashing.
+Expected<std::unique_ptr<ondisk::UnifiedOnDiskCache>>
+createBuiltinUnifiedOnDiskCache(StringRef Path);
+
+/// \param UniDB A \p UnifiedOnDiskCache instance from \p
+/// createBuiltinUnifiedOnDiskCache.
+std::unique_ptr<ObjectStore> createObjectStoreFromUnifiedOnDiskCache(
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB);
+
+/// \param UniDB A \p UnifiedOnDiskCache instance from \p
+/// createBuiltinUnifiedOnDiskCache.
+std::unique_ptr<ActionCache> createActionCacheFromUnifiedOnDiskCache(
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB);
+
+// FIXME: Proxy not portable. Maybe also error-prone?
+constexpr StringLiteral DefaultDirProxy = "/^llvm::cas::builtin::default";
+constexpr StringLiteral DefaultDir = "llvm.cas.builtin.default";
+
} // end namespace builtin
} // end namespace llvm::cas
diff --git a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp
new file mode 100644
index 0000000..f3f6fa0
--- /dev/null
+++ b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
+#include "BuiltinCAS.h"
+#include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+
+Expected<std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>>>
+cas::createOnDiskUnifiedCASDatabases(StringRef Path) {
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB;
+ if (Error E = builtin::createBuiltinUnifiedOnDiskCache(Path).moveInto(UniDB))
+ return std::move(E);
+ auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB);
+ auto AC = builtin::createActionCacheFromUnifiedOnDiskCache(std::move(UniDB));
+ return std::make_pair(std::move(CAS), std::move(AC));
+}
+
+Expected<ValidationResult> cas::validateOnDiskUnifiedCASDatabasesIfNeeded(
+ StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
+ std::optional<StringRef> LLVMCasBinary) {
+#if LLVM_ENABLE_ONDISK_CAS
+ return ondisk::UnifiedOnDiskCache::validateIfNeeded(
+ Path, builtin::BuiltinCASContext::getHashName(),
+ sizeof(builtin::HashType), CheckHash, AllowRecovery, ForceValidation,
+ LLVMCasBinary);
+#else
+ return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
+#endif
+}
diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt
index a2f8c49..aad77dc 100644
--- a/llvm/lib/CAS/CMakeLists.txt
+++ b/llvm/lib/CAS/CMakeLists.txt
@@ -2,15 +2,18 @@ add_llvm_component_library(LLVMCAS
ActionCache.cpp
ActionCaches.cpp
BuiltinCAS.cpp
+ BuiltinUnifiedCASDatabases.cpp
DatabaseFile.cpp
InMemoryCAS.cpp
MappedFileRegionArena.cpp
ObjectStore.cpp
+ OnDiskCAS.cpp
OnDiskCommon.cpp
OnDiskDataAllocator.cpp
OnDiskGraphDB.cpp
OnDiskKeyValueDB.cpp
OnDiskTrieRawHashMap.cpp
+ UnifiedOnDiskCache.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS
diff --git a/llvm/lib/CAS/InMemoryCAS.cpp b/llvm/lib/CAS/InMemoryCAS.cpp
index c63ee70d..2d4eedd 100644
--- a/llvm/lib/CAS/InMemoryCAS.cpp
+++ b/llvm/lib/CAS/InMemoryCAS.cpp
@@ -233,6 +233,12 @@ public:
return cast<InMemoryObject>(asInMemoryObject(Node)).getData();
}
+ void print(raw_ostream &OS) const final;
+
+ Error validate(bool CheckHash) const final {
+ return createStringError("InMemoryCAS doesn't support validate()");
+ }
+
InMemoryCAS() = default;
private:
@@ -271,6 +277,8 @@ ArrayRef<const InMemoryObject *> InMemoryObject::getRefs() const {
return cast<InMemoryInlineObject>(this)->getRefsImpl();
}
+void InMemoryCAS::print(raw_ostream &OS) const {}
+
Expected<ObjectRef>
InMemoryCAS::storeFromNullTerminatedRegion(ArrayRef<uint8_t> ComputedHash,
sys::fs::mapped_file_region Map) {
diff --git a/llvm/lib/CAS/ObjectStore.cpp b/llvm/lib/CAS/ObjectStore.cpp
index e0be50b..3110577 100644
--- a/llvm/lib/CAS/ObjectStore.cpp
+++ b/llvm/lib/CAS/ObjectStore.cpp
@@ -1,4 +1,4 @@
-//===- ObjectStore.cpp ------------------------------------------*- C++ -*-===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -12,7 +12,7 @@
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
-#include <optional>
+#include <deque>
using namespace llvm;
using namespace llvm::cas;
@@ -21,6 +21,7 @@ void CASContext::anchor() {}
void ObjectStore::anchor() {}
LLVM_DUMP_METHOD void CASID::dump() const { print(dbgs()); }
+LLVM_DUMP_METHOD void ObjectStore::dump() const { print(dbgs()); }
LLVM_DUMP_METHOD void ObjectRef::dump() const { print(dbgs()); }
LLVM_DUMP_METHOD void ObjectHandle::dump() const { print(dbgs()); }
@@ -141,7 +142,7 @@ Error ObjectStore::validateTree(ObjectRef Root) {
auto [I, Inserted] = ValidatedRefs.insert(Ref);
if (!Inserted)
continue; // already validated.
- if (Error E = validate(getID(Ref)))
+ if (Error E = validateObject(getID(Ref)))
return E;
Expected<ObjectHandle> Obj = load(Ref);
if (!Obj)
@@ -155,6 +156,92 @@ Error ObjectStore::validateTree(ObjectRef Root) {
return Error::success();
}
+Expected<ObjectRef> ObjectStore::importObject(ObjectStore &Upstream,
+ ObjectRef Other) {
+ // Copy the full CAS tree from upstream with depth-first ordering to ensure
+ // all the child nodes are available in downstream CAS before inserting
+ // current object. This uses a similar algorithm as
+ // `OnDiskGraphDB::importFullTree` but doesn't assume the upstream CAS schema
+ // so it can be used to import from any other ObjectStore reguardless of the
+ // CAS schema.
+
+ // There is no work to do if importing from self.
+ if (this == &Upstream)
+ return Other;
+
+ /// Keeps track of the state of visitation for current node and all of its
+ /// parents. Upstream Cursor holds information only from upstream CAS.
+ struct UpstreamCursor {
+ ObjectRef Ref;
+ ObjectHandle Node;
+ size_t RefsCount;
+ std::deque<ObjectRef> Refs;
+ };
+ SmallVector<UpstreamCursor, 16> CursorStack;
+ /// PrimaryNodeStack holds the ObjectRef of the current CAS, with nodes either
+ /// just stored in the CAS or nodes already exists in the current CAS.
+ SmallVector<ObjectRef, 128> PrimaryRefStack;
+ /// A map from upstream ObjectRef to current ObjectRef.
+ llvm::DenseMap<ObjectRef, ObjectRef> CreatedObjects;
+
+ auto enqueueNode = [&](ObjectRef Ref, ObjectHandle Node) {
+ unsigned NumRefs = Upstream.getNumRefs(Node);
+ std::deque<ObjectRef> Refs;
+ for (unsigned I = 0; I < NumRefs; ++I)
+ Refs.push_back(Upstream.readRef(Node, I));
+
+ CursorStack.push_back({Ref, Node, NumRefs, std::move(Refs)});
+ };
+
+ auto UpstreamHandle = Upstream.load(Other);
+ if (!UpstreamHandle)
+ return UpstreamHandle.takeError();
+ enqueueNode(Other, *UpstreamHandle);
+
+ while (!CursorStack.empty()) {
+ UpstreamCursor &Cur = CursorStack.back();
+ if (Cur.Refs.empty()) {
+ // Copy the node data into the primary store.
+ // The bottom of \p PrimaryRefStack contains the ObjectRef for the
+ // current node.
+ assert(PrimaryRefStack.size() >= Cur.RefsCount);
+ auto Refs = ArrayRef(PrimaryRefStack)
+ .slice(PrimaryRefStack.size() - Cur.RefsCount);
+ auto NewNode = store(Refs, Upstream.getData(Cur.Node));
+ if (!NewNode)
+ return NewNode.takeError();
+
+ // Remove the current node and its IDs from the stack.
+ PrimaryRefStack.truncate(PrimaryRefStack.size() - Cur.RefsCount);
+ CursorStack.pop_back();
+
+ PrimaryRefStack.push_back(*NewNode);
+ CreatedObjects.try_emplace(Cur.Ref, *NewNode);
+ continue;
+ }
+
+ // Check if the node exists already.
+ auto CurrentID = Cur.Refs.front();
+ Cur.Refs.pop_front();
+ auto Ref = CreatedObjects.find(CurrentID);
+ if (Ref != CreatedObjects.end()) {
+ // If exists already, just need to enqueue the primary node.
+ PrimaryRefStack.push_back(Ref->second);
+ continue;
+ }
+
+ // Load child.
+ auto PrimaryID = Upstream.load(CurrentID);
+ if (LLVM_UNLIKELY(!PrimaryID))
+ return PrimaryID.takeError();
+
+ enqueueNode(CurrentID, *PrimaryID);
+ }
+
+ assert(PrimaryRefStack.size() == 1);
+ return PrimaryRefStack.front();
+}
+
std::unique_ptr<MemoryBuffer>
ObjectProxy::getMemoryBuffer(StringRef Name,
bool RequiresNullTerminator) const {
diff --git a/llvm/lib/CAS/OnDiskCAS.cpp b/llvm/lib/CAS/OnDiskCAS.cpp
new file mode 100644
index 0000000..7d29f44
--- /dev/null
+++ b/llvm/lib/CAS/OnDiskCAS.cpp
@@ -0,0 +1,211 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BuiltinCAS.h"
+#include "llvm/CAS/BuiltinCASContext.h"
+#include "llvm/CAS/BuiltinObjectHasher.h"
+#include "llvm/CAS/OnDiskGraphDB.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Error.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::builtin;
+
+namespace {
+
+class OnDiskCAS : public BuiltinCAS {
+public:
+ Expected<ObjectRef> storeImpl(ArrayRef<uint8_t> ComputedHash,
+ ArrayRef<ObjectRef> Refs,
+ ArrayRef<char> Data) final;
+
+ Expected<std::optional<ObjectHandle>> loadIfExists(ObjectRef Ref) final;
+
+ CASID getID(ObjectRef Ref) const final;
+
+ std::optional<ObjectRef> getReference(const CASID &ID) const final;
+
+ Expected<bool> isMaterialized(ObjectRef Ref) const final;
+
+ ArrayRef<char> getDataConst(ObjectHandle Node) const final;
+
+ void print(raw_ostream &OS) const final;
+ Error validate(bool CheckHash) const final;
+
+ static Expected<std::unique_ptr<OnDiskCAS>> open(StringRef Path);
+
+ OnDiskCAS(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB)
+ : UnifiedDB(std::move(UniDB)), DB(&UnifiedDB->getGraphDB()) {}
+
+private:
+ ObjectHandle convertHandle(ondisk::ObjectHandle Node) const {
+ return makeObjectHandle(Node.getOpaqueData());
+ }
+
+ ondisk::ObjectHandle convertHandle(ObjectHandle Node) const {
+ return ondisk::ObjectHandle(Node.getInternalRef(*this));
+ }
+
+ ObjectRef convertRef(ondisk::ObjectID Ref) const {
+ return makeObjectRef(Ref.getOpaqueData());
+ }
+
+ ondisk::ObjectID convertRef(ObjectRef Ref) const {
+ return ondisk::ObjectID::fromOpaqueData(Ref.getInternalRef(*this));
+ }
+
+ size_t getNumRefs(ObjectHandle Node) const final {
+ auto RefsRange = DB->getObjectRefs(convertHandle(Node));
+ return std::distance(RefsRange.begin(), RefsRange.end());
+ }
+
+ ObjectRef readRef(ObjectHandle Node, size_t I) const final {
+ auto RefsRange = DB->getObjectRefs(convertHandle(Node));
+ return convertRef(RefsRange.begin()[I]);
+ }
+
+ Error forEachRef(ObjectHandle Node,
+ function_ref<Error(ObjectRef)> Callback) const final;
+
+ Error setSizeLimit(std::optional<uint64_t> SizeLimit) final;
+ Expected<std::optional<uint64_t>> getStorageSize() const final;
+ Error pruneStorageData() final;
+
+ OnDiskCAS(std::unique_ptr<ondisk::OnDiskGraphDB> GraphDB)
+ : OwnedDB(std::move(GraphDB)), DB(OwnedDB.get()) {}
+
+ std::unique_ptr<ondisk::OnDiskGraphDB> OwnedDB;
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UnifiedDB;
+ ondisk::OnDiskGraphDB *DB;
+};
+
+} // end anonymous namespace
+
+void OnDiskCAS::print(raw_ostream &OS) const { DB->print(OS); }
+Error OnDiskCAS::validate(bool CheckHash) const {
+ auto Hasher = [](ArrayRef<ArrayRef<uint8_t>> Refs, ArrayRef<char> Data,
+ SmallVectorImpl<uint8_t> &Result) {
+ auto Hash = BuiltinObjectHasher<llvm::cas::builtin::HasherT>::hashObject(
+ Refs, Data);
+ Result.assign(Hash.begin(), Hash.end());
+ };
+
+ if (auto E = DB->validate(CheckHash, Hasher))
+ return E;
+
+ return Error::success();
+}
+
+CASID OnDiskCAS::getID(ObjectRef Ref) const {
+ ArrayRef<uint8_t> Hash = DB->getDigest(convertRef(Ref));
+ return CASID::create(&getContext(), toStringRef(Hash));
+}
+
+std::optional<ObjectRef> OnDiskCAS::getReference(const CASID &ID) const {
+ std::optional<ondisk::ObjectID> ObjID =
+ DB->getExistingReference(ID.getHash());
+ if (!ObjID)
+ return std::nullopt;
+ return convertRef(*ObjID);
+}
+
+Expected<bool> OnDiskCAS::isMaterialized(ObjectRef ExternalRef) const {
+ return DB->isMaterialized(convertRef(ExternalRef));
+}
+
+ArrayRef<char> OnDiskCAS::getDataConst(ObjectHandle Node) const {
+ return DB->getObjectData(convertHandle(Node));
+}
+
+Expected<std::optional<ObjectHandle>>
+OnDiskCAS::loadIfExists(ObjectRef ExternalRef) {
+ Expected<std::optional<ondisk::ObjectHandle>> ObjHnd =
+ DB->load(convertRef(ExternalRef));
+ if (!ObjHnd)
+ return ObjHnd.takeError();
+ if (!*ObjHnd)
+ return std::nullopt;
+ return convertHandle(**ObjHnd);
+}
+
+Expected<ObjectRef> OnDiskCAS::storeImpl(ArrayRef<uint8_t> ComputedHash,
+ ArrayRef<ObjectRef> Refs,
+ ArrayRef<char> Data) {
+ SmallVector<ondisk::ObjectID, 64> IDs;
+ IDs.reserve(Refs.size());
+ for (ObjectRef Ref : Refs) {
+ IDs.push_back(convertRef(Ref));
+ }
+
+ auto StoredID = DB->getReference(ComputedHash);
+ if (LLVM_UNLIKELY(!StoredID))
+ return StoredID.takeError();
+ if (Error E = DB->store(*StoredID, IDs, Data))
+ return std::move(E);
+ return convertRef(*StoredID);
+}
+
+Error OnDiskCAS::forEachRef(ObjectHandle Node,
+ function_ref<Error(ObjectRef)> Callback) const {
+ auto RefsRange = DB->getObjectRefs(convertHandle(Node));
+ for (ondisk::ObjectID Ref : RefsRange) {
+ if (Error E = Callback(convertRef(Ref)))
+ return E;
+ }
+ return Error::success();
+}
+
+Error OnDiskCAS::setSizeLimit(std::optional<uint64_t> SizeLimit) {
+ UnifiedDB->setSizeLimit(SizeLimit);
+ return Error::success();
+}
+
+Expected<std::optional<uint64_t>> OnDiskCAS::getStorageSize() const {
+ return UnifiedDB->getStorageSize();
+}
+
+Error OnDiskCAS::pruneStorageData() { return UnifiedDB->collectGarbage(); }
+
+Expected<std::unique_ptr<OnDiskCAS>> OnDiskCAS::open(StringRef AbsPath) {
+ Expected<std::unique_ptr<ondisk::OnDiskGraphDB>> DB =
+ ondisk::OnDiskGraphDB::open(AbsPath, BuiltinCASContext::getHashName(),
+ sizeof(HashType));
+ if (!DB)
+ return DB.takeError();
+ return std::unique_ptr<OnDiskCAS>(new OnDiskCAS(std::move(*DB)));
+}
+
+bool cas::isOnDiskCASEnabled() {
+#if LLVM_ENABLE_ONDISK_CAS
+ return true;
+#else
+ return false;
+#endif
+}
+
+Expected<std::unique_ptr<ObjectStore>> cas::createOnDiskCAS(const Twine &Path) {
+#if LLVM_ENABLE_ONDISK_CAS
+ // FIXME: An absolute path isn't really good enough. Should open a directory
+ // and use openat() for files underneath.
+ SmallString<256> AbsPath;
+ Path.toVector(AbsPath);
+ sys::fs::make_absolute(AbsPath);
+
+ return OnDiskCAS::open(AbsPath);
+#else
+ return createStringError(inconvertibleErrorCode(), "OnDiskCAS is disabled");
+#endif /* LLVM_ENABLE_ONDISK_CAS */
+}
+
+std::unique_ptr<ObjectStore>
+cas::builtin::createObjectStoreFromUnifiedOnDiskCache(
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) {
+ return std::make_unique<OnDiskCAS>(std::move(UniDB));
+}
diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp
index 64cbe9d..245b6fb 100644
--- a/llvm/lib/CAS/OnDiskGraphDB.cpp
+++ b/llvm/lib/CAS/OnDiskGraphDB.cpp
@@ -893,6 +893,10 @@ int64_t DataRecordHandle::getDataRelOffset() const {
}
Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const {
+ if (UpstreamDB) {
+ if (auto E = UpstreamDB->validate(Deep, Hasher))
+ return E;
+ }
return Index.validate([&](FileOffset Offset,
OnDiskTrieRawHashMap::ConstValueProxy Record)
-> Error {
@@ -1202,11 +1206,8 @@ OnDiskGraphDB::load(ObjectID ExternalRef) {
return I.takeError();
TrieRecord::Data Object = I->Ref.load();
- if (Object.SK == TrieRecord::StorageKind::Unknown) {
- if (!UpstreamDB)
- return std::nullopt;
+ if (Object.SK == TrieRecord::StorageKind::Unknown)
return faultInFromUpstream(ExternalRef);
- }
if (Object.SK == TrieRecord::StorageKind::DataPool)
return ObjectHandle::fromFileOffset(Object.Offset);
@@ -1286,8 +1287,10 @@ OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef,
TrieRecord::Data Object = I->Ref.load();
if (Object.SK != TrieRecord::StorageKind::Unknown)
return ObjectPresence::InPrimaryDB;
+
if (!CheckUpstream || !UpstreamDB)
return ObjectPresence::Missing;
+
std::optional<ObjectID> UpstreamID =
UpstreamDB->getExistingReference(getDigest(*I));
return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB
@@ -1549,9 +1552,10 @@ unsigned OnDiskGraphDB::getHardStorageLimitUtilization() const {
return std::max(IndexPercent, DataPercent);
}
-Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open(
- StringRef AbsPath, StringRef HashName, unsigned HashByteSize,
- std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy) {
+Expected<std::unique_ptr<OnDiskGraphDB>>
+OnDiskGraphDB::open(StringRef AbsPath, StringRef HashName,
+ unsigned HashByteSize, OnDiskGraphDB *UpstreamDB,
+ FaultInPolicy Policy) {
if (std::error_code EC = sys::fs::create_directories(AbsPath))
return createFileError(AbsPath, EC);
@@ -1604,18 +1608,15 @@ Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open(
"unexpected user header in '" + DataPoolPath +
"'");
- return std::unique_ptr<OnDiskGraphDB>(
- new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool),
- std::move(UpstreamDB), Policy));
+ return std::unique_ptr<OnDiskGraphDB>(new OnDiskGraphDB(
+ AbsPath, std::move(*Index), std::move(*DataPool), UpstreamDB, Policy));
}
OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
OnDiskDataAllocator DataPool,
- std::unique_ptr<OnDiskGraphDB> UpstreamDB,
- FaultInPolicy Policy)
+ OnDiskGraphDB *UpstreamDB, FaultInPolicy Policy)
: Index(std::move(Index)), DataPool(std::move(DataPool)),
- RootPath(RootPath.str()), UpstreamDB(std::move(UpstreamDB)),
- FIPolicy(Policy) {
+ RootPath(RootPath.str()), UpstreamDB(UpstreamDB), FIPolicy(Policy) {
/// Lifetime for "big" objects not in DataPool.
///
/// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something
@@ -1638,7 +1639,6 @@ Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID,
// against the process dying during importing and leaving the database with an
// incomplete tree. Note that if the upstream has missing nodes then the tree
// will be copied with missing nodes as well, it won't be considered an error.
-
struct UpstreamCursor {
ObjectHandle Node;
size_t RefsCount;
@@ -1720,7 +1720,6 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,
// Copy the node data into the primary store.
// FIXME: Use hard-link or cloning if the file-system supports it and data is
// stored into a separate file.
-
auto Data = UpstreamDB->getObjectData(UpstreamNode);
auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode);
SmallVector<ObjectID, 64> Refs;
@@ -1737,7 +1736,8 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,
Expected<std::optional<ObjectHandle>>
OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) {
- assert(UpstreamDB);
+ if (!UpstreamDB)
+ return std::nullopt;
auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID));
if (LLVM_UNLIKELY(!UpstreamID))
diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp
index 2186071..15656cb 100644
--- a/llvm/lib/CAS/OnDiskKeyValueDB.cpp
+++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp
@@ -20,6 +20,7 @@
#include "llvm/CAS/OnDiskKeyValueDB.h"
#include "OnDiskCommon.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Errc.h"
@@ -53,15 +54,21 @@ Expected<std::optional<ArrayRef<char>>>
OnDiskKeyValueDB::get(ArrayRef<uint8_t> Key) {
// Check the result cache.
OnDiskTrieRawHashMap::ConstOnDiskPtr ActionP = Cache.find(Key);
- if (!ActionP)
+ if (ActionP) {
+ assert(isAddrAligned(Align(8), ActionP->Data.data()));
+ return ActionP->Data;
+ }
+ if (!UnifiedCache || !UnifiedCache->UpstreamKVDB)
return std::nullopt;
- assert(isAddrAligned(Align(8), ActionP->Data.data()));
- return ActionP->Data;
+
+ // Try to fault in from upstream.
+ return UnifiedCache->faultInFromUpstreamKV(Key);
}
Expected<std::unique_ptr<OnDiskKeyValueDB>>
OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize,
- StringRef ValueName, size_t ValueSize) {
+ StringRef ValueName, size_t ValueSize,
+ UnifiedOnDiskCache *Cache) {
if (std::error_code EC = sys::fs::create_directories(Path))
return createFileError(Path, EC);
@@ -87,10 +94,14 @@ OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize,
return std::move(E);
return std::unique_ptr<OnDiskKeyValueDB>(
- new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache)));
+ new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache), Cache));
}
Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const {
+ if (UnifiedCache && UnifiedCache->UpstreamKVDB) {
+ if (auto E = UnifiedCache->UpstreamKVDB->validate(CheckValue))
+ return E;
+ }
return Cache.validate(
[&](FileOffset Offset,
OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error {
diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp
new file mode 100644
index 0000000..ae9d818
--- /dev/null
+++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp
@@ -0,0 +1,613 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Encapsulates \p OnDiskGraphDB and \p OnDiskKeyValueDB instances within one
+/// directory while also restricting storage growth with a scheme of chaining
+/// the two most recent directories (primary & upstream), where the primary
+/// "faults-in" data from the upstream one. When the primary (most recent)
+/// directory exceeds its intended limit a new empty directory becomes the
+/// primary one.
+///
+/// Within the top-level directory (the path that \p UnifiedOnDiskCache::open
+/// receives) there are directories named like this:
+///
+/// 'v<version>.<x>'
+/// 'v<version>.<x+1>'
+/// 'v<version>.<x+2>'
+/// ...
+///
+/// 'version' is the version integer for this \p UnifiedOnDiskCache's scheme and
+/// the part after the dot is an increasing integer. The primary directory is
+/// the one with the highest integer and the upstream one is the directory
+/// before it. For example, if the sub-directories contained are:
+///
+/// 'v1.5', 'v1.6', 'v1.7', 'v1.8'
+///
+/// Then the primary one is 'v1.8', the upstream one is 'v1.7', and the rest are
+/// unused directories that can be safely deleted at any time and by any
+/// process.
+///
+/// Contained within the top-level directory is a file named "lock" which is
+/// used for processes to take shared or exclusive locks for the contents of the
+/// top directory. While a \p UnifiedOnDiskCache is open it keeps a shared lock
+/// for the top-level directory; when it closes, if the primary sub-directory
+/// exceeded its limit, it attempts to get an exclusive lock in order to create
+/// a new empty primary directory; if it can't get the exclusive lock it gives
+/// up and lets the next \p UnifiedOnDiskCache instance that closes to attempt
+/// again.
+///
+/// The downside of this scheme is that while \p UnifiedOnDiskCache is open on a
+/// directory, by any process, the storage size in that directory will keep
+/// growing unrestricted. But the major benefit is that garbage-collection can
+/// be triggered on a directory concurrently, at any time and by any process,
+/// without affecting any active readers/writers in the same process or other
+/// processes.
+///
+/// The \c UnifiedOnDiskCache also provides validation and recovery on top of
+/// the underlying on-disk storage. The low-level storage is designed to remain
+/// coherent across regular process crashes, but may be invalid after power loss
+/// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows
+/// validating the contents once per boot and can recover by marking invalid
+/// data for garbage collection.
+///
+/// The data recovery described above requires exclusive access to the CAS, and
+/// it is an error to attempt recovery if the CAS is open in any process/thread.
+/// In order to maximize backwards compatibility with tools that do not perform
+/// validation before opening the CAS, we do not attempt to get exclusive access
+/// until recovery is actually performed, meaning as long as the data is valid
+/// it will not conflict with concurrent use.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "BuiltinCAS.h"
+#include "OnDiskCommon.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/OnDiskGraphDB.h"
+#include "llvm/CAS/OnDiskKeyValueDB.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+#include <optional>
+
+#if __has_include(<sys/sysctl.h>)
+#include <sys/sysctl.h>
+#endif
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+/// FIXME: When the version of \p DBDirPrefix is bumped up we need to figure out
+/// how to handle the leftover sub-directories of the previous version, within
+/// the \p UnifiedOnDiskCache::collectGarbage function.
+static constexpr StringLiteral DBDirPrefix = "v1.";
+
+static constexpr StringLiteral ValidationFilename = "v1.validation";
+static constexpr StringLiteral CorruptPrefix = "corrupt.";
+
+ObjectID UnifiedOnDiskCache::getObjectIDFromValue(ArrayRef<char> Value) {
+ // little endian encoded.
+ assert(Value.size() == sizeof(uint64_t));
+ return ObjectID::fromOpaqueData(support::endian::read64le(Value.data()));
+}
+
+UnifiedOnDiskCache::ValueBytes
+UnifiedOnDiskCache::getValueFromObjectID(ObjectID ID) {
+ // little endian encoded.
+ UnifiedOnDiskCache::ValueBytes ValBytes;
+ static_assert(ValBytes.size() == sizeof(ID.getOpaqueData()));
+ support::endian::write64le(ValBytes.data(), ID.getOpaqueData());
+ return ValBytes;
+}
+
+Expected<std::optional<ArrayRef<char>>>
+UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef<uint8_t> Key) {
+ assert(UpstreamGraphDB);
+ assert(UpstreamKVDB);
+
+ std::optional<ArrayRef<char>> UpstreamValue;
+ if (Error E = UpstreamKVDB->get(Key).moveInto(UpstreamValue))
+ return std::move(E);
+ if (!UpstreamValue)
+ return std::nullopt;
+
+ // The value is the \p ObjectID in the context of the upstream
+ // \p OnDiskGraphDB instance. Translate it to the context of the primary
+ // \p OnDiskGraphDB instance.
+ ObjectID UpstreamID = getObjectIDFromValue(*UpstreamValue);
+ auto PrimaryID =
+ PrimaryGraphDB->getReference(UpstreamGraphDB->getDigest(UpstreamID));
+ if (LLVM_UNLIKELY(!PrimaryID))
+ return PrimaryID.takeError();
+ return PrimaryKVDB->put(Key, getValueFromObjectID(*PrimaryID));
+}
+
+/// \returns all the 'v<version>.<x>' names of sub-directories, sorted with
+/// ascending order of the integer after the dot. Corrupt directories, if
+/// included, will come first.
+static Expected<SmallVector<std::string, 4>>
+getAllDBDirs(StringRef Path, bool IncludeCorrupt = false) {
+ struct DBDir {
+ uint64_t Order;
+ std::string Name;
+ };
+ SmallVector<DBDir> FoundDBDirs;
+
+ std::error_code EC;
+ for (sys::fs::directory_iterator DirI(Path, EC), DirE; !EC && DirI != DirE;
+ DirI.increment(EC)) {
+ if (DirI->type() != sys::fs::file_type::directory_file)
+ continue;
+ StringRef SubDir = sys::path::filename(DirI->path());
+ if (IncludeCorrupt && SubDir.starts_with(CorruptPrefix)) {
+ FoundDBDirs.push_back({0, std::string(SubDir)});
+ continue;
+ }
+ if (!SubDir.starts_with(DBDirPrefix))
+ continue;
+ uint64_t Order;
+ if (SubDir.substr(DBDirPrefix.size()).getAsInteger(10, Order))
+ return createStringError(inconvertibleErrorCode(),
+ "unexpected directory " + DirI->path());
+ FoundDBDirs.push_back({Order, std::string(SubDir)});
+ }
+ if (EC)
+ return createFileError(Path, EC);
+
+ llvm::sort(FoundDBDirs, [](const DBDir &LHS, const DBDir &RHS) -> bool {
+ return LHS.Order <= RHS.Order;
+ });
+
+ SmallVector<std::string, 4> DBDirs;
+ for (DBDir &Dir : FoundDBDirs)
+ DBDirs.push_back(std::move(Dir.Name));
+ return DBDirs;
+}
+
+static Expected<SmallVector<std::string, 4>> getAllGarbageDirs(StringRef Path) {
+ auto DBDirs = getAllDBDirs(Path, /*IncludeCorrupt=*/true);
+ if (!DBDirs)
+ return DBDirs.takeError();
+
+ // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure
+ // out how to handle the leftover sub-directories of the previous version.
+
+ for (unsigned Keep = 2; Keep > 0 && !DBDirs->empty(); --Keep) {
+ StringRef Back(DBDirs->back());
+ if (Back.starts_with(CorruptPrefix))
+ break;
+ DBDirs->pop_back();
+ }
+ return *DBDirs;
+}
+
+/// \returns Given a sub-directory named 'v<version>.<x>', it outputs the
+/// 'v<version>.<x+1>' name.
+static void getNextDBDirName(StringRef DBDir, llvm::raw_ostream &OS) {
+ assert(DBDir.starts_with(DBDirPrefix));
+ uint64_t Count;
+ bool Failed = DBDir.substr(DBDirPrefix.size()).getAsInteger(10, Count);
+ assert(!Failed);
+ (void)Failed;
+ OS << DBDirPrefix << Count + 1;
+}
+
+static Error validateOutOfProcess(StringRef LLVMCasBinary, StringRef RootPath,
+ bool CheckHash) {
+ SmallVector<StringRef> Args{LLVMCasBinary, "-cas", RootPath, "-validate"};
+ if (CheckHash)
+ Args.push_back("-check-hash");
+
+ llvm::SmallString<128> StdErrPath;
+ int StdErrFD = -1;
+ if (std::error_code EC = sys::fs::createTemporaryFile(
+ "llvm-cas-validate-stderr", "txt", StdErrFD, StdErrPath,
+ llvm::sys::fs::OF_Text))
+ return createStringError(EC, "failed to create temporary file");
+ FileRemover OutputRemover(StdErrPath.c_str());
+
+ std::optional<llvm::StringRef> Redirects[] = {
+ {""}, // stdin = /dev/null
+ {""}, // stdout = /dev/null
+ StdErrPath.str(),
+ };
+
+ std::string ErrMsg;
+ int Result =
+ sys::ExecuteAndWait(LLVMCasBinary, Args, /*Env=*/std::nullopt, Redirects,
+ /*SecondsToWait=*/120, /*MemoryLimit=*/0, &ErrMsg);
+
+ if (Result == -1)
+ return createStringError("failed to exec " + join(Args, " ") + ": " +
+ ErrMsg);
+ if (Result != 0) {
+ llvm::SmallString<64> Err("cas contents invalid");
+ if (!ErrMsg.empty()) {
+ Err += ": ";
+ Err += ErrMsg;
+ }
+ auto StdErrBuf = MemoryBuffer::getFile(StdErrPath.c_str());
+ if (StdErrBuf && !(*StdErrBuf)->getBuffer().empty()) {
+ Err += ": ";
+ Err += (*StdErrBuf)->getBuffer();
+ }
+ return createStringError(Err);
+ }
+ return Error::success();
+}
+
+static Error validateInProcess(StringRef RootPath, StringRef HashName,
+ unsigned HashByteSize, bool CheckHash) {
+ std::shared_ptr<UnifiedOnDiskCache> UniDB;
+ if (Error E = UnifiedOnDiskCache::open(RootPath, std::nullopt, HashName,
+ HashByteSize)
+ .moveInto(UniDB))
+ return E;
+ auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB);
+ if (Error E = CAS->validate(CheckHash))
+ return E;
+ auto Cache = builtin::createActionCacheFromUnifiedOnDiskCache(UniDB);
+ if (Error E = Cache->validate())
+ return E;
+ return Error::success();
+}
+
+static Expected<uint64_t> getBootTime() {
+#if __has_include(<sys/sysctl.h>) && defined(KERN_BOOTTIME)
+ struct timeval TV;
+ size_t TVLen = sizeof(TV);
+ int KernBoot[2] = {CTL_KERN, KERN_BOOTTIME};
+ if (sysctl(KernBoot, 2, &TV, &TVLen, nullptr, 0) < 0)
+ return createStringError(llvm::errnoAsErrorCode(),
+ "failed to get boottime");
+ if (TVLen != sizeof(TV))
+ return createStringError("sysctl kern.boottime unexpected format");
+ return TV.tv_sec;
+#elif defined(__linux__)
+ // Use the mtime for /proc, which is recreated during system boot.
+ // We could also read /proc/stat and search for 'btime'.
+ sys::fs::file_status Status;
+ if (std::error_code EC = sys::fs::status("/proc", Status))
+ return createFileError("/proc", EC);
+ return Status.getLastModificationTime().time_since_epoch().count();
+#else
+ llvm::report_fatal_error("getBootTime unimplemented");
+#endif
+}
+
+Expected<ValidationResult> UnifiedOnDiskCache::validateIfNeeded(
+ StringRef RootPath, StringRef HashName, unsigned HashByteSize,
+ bool CheckHash, bool AllowRecovery, bool ForceValidation,
+ std::optional<StringRef> LLVMCasBinaryPath) {
+ if (std::error_code EC = sys::fs::create_directories(RootPath))
+ return createFileError(RootPath, EC);
+
+ SmallString<256> PathBuf(RootPath);
+ sys::path::append(PathBuf, ValidationFilename);
+ int FD = -1;
+ if (std::error_code EC = sys::fs::openFileForReadWrite(
+ PathBuf, FD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
+ return createFileError(PathBuf, EC);
+ assert(FD != -1);
+
+ sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD);
+ auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(File); });
+
+ if (std::error_code EC = lockFileThreadSafe(FD, sys::fs::LockKind::Exclusive))
+ return createFileError(PathBuf, EC);
+ auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(FD); });
+
+ SmallString<8> Bytes;
+ if (Error E = sys::fs::readNativeFileToEOF(File, Bytes))
+ return createFileError(PathBuf, std::move(E));
+
+ uint64_t ValidationBootTime = 0;
+ if (!Bytes.empty() &&
+ StringRef(Bytes).trim().getAsInteger(10, ValidationBootTime))
+ return createFileError(PathBuf, errc::illegal_byte_sequence,
+ "expected integer");
+
+ static uint64_t BootTime = 0;
+ if (BootTime == 0)
+ if (Error E = getBootTime().moveInto(BootTime))
+ return std::move(E);
+
+ std::string LogValidationError;
+
+ if (ValidationBootTime == BootTime && !ForceValidation)
+ return ValidationResult::Skipped;
+
+ // Validate!
+ bool NeedsRecovery = false;
+ if (Error E =
+ LLVMCasBinaryPath
+ ? validateOutOfProcess(*LLVMCasBinaryPath, RootPath, CheckHash)
+ : validateInProcess(RootPath, HashName, HashByteSize,
+ CheckHash)) {
+ if (AllowRecovery) {
+ consumeError(std::move(E));
+ NeedsRecovery = true;
+ } else {
+ return std::move(E);
+ }
+ }
+
+ if (NeedsRecovery) {
+ sys::path::remove_filename(PathBuf);
+ sys::path::append(PathBuf, "lock");
+
+ int LockFD = -1;
+ if (std::error_code EC = sys::fs::openFileForReadWrite(
+ PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
+ return createFileError(PathBuf, EC);
+ sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD);
+ auto CloseLock = make_scope_exit([&]() { sys::fs::closeFile(LockFile); });
+ if (std::error_code EC = tryLockFileThreadSafe(LockFD)) {
+ if (EC == std::errc::no_lock_available)
+ return createFileError(
+ PathBuf, EC,
+ "CAS validation requires exclusive access but CAS was in use");
+ return createFileError(PathBuf, EC);
+ }
+ auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); });
+
+ auto DBDirs = getAllDBDirs(RootPath);
+ if (!DBDirs)
+ return DBDirs.takeError();
+
+ for (StringRef DBDir : *DBDirs) {
+ sys::path::remove_filename(PathBuf);
+ sys::path::append(PathBuf, DBDir);
+ std::error_code EC;
+ int Attempt = 0, MaxAttempts = 100;
+ SmallString<128> GCPath;
+ for (; Attempt < MaxAttempts; ++Attempt) {
+ GCPath.assign(RootPath);
+ sys::path::append(GCPath, CorruptPrefix + std::to_string(Attempt) +
+ "." + DBDir);
+ EC = sys::fs::rename(PathBuf, GCPath);
+ // Darwin uses ENOTEMPTY. Linux may return either ENOTEMPTY or EEXIST.
+ if (EC != errc::directory_not_empty && EC != errc::file_exists)
+ break;
+ }
+ if (Attempt == MaxAttempts)
+ return createStringError(
+ EC, "rename " + PathBuf +
+ " failed: too many CAS directories awaiting pruning");
+ if (EC)
+ return createStringError(EC, "rename " + PathBuf + " to " + GCPath +
+ " failed: " + EC.message());
+ }
+ }
+
+ if (ValidationBootTime != BootTime) {
+ // Fix filename in case we have error to report.
+ sys::path::remove_filename(PathBuf);
+ sys::path::append(PathBuf, ValidationFilename);
+ if (std::error_code EC = sys::fs::resize_file(FD, 0))
+ return createFileError(PathBuf, EC);
+ raw_fd_ostream OS(FD, /*shouldClose=*/false);
+ OS.seek(0); // resize does not reset position
+ OS << BootTime << '\n';
+ if (OS.has_error())
+ return createFileError(PathBuf, OS.error());
+ }
+
+ return NeedsRecovery ? ValidationResult::Recovered : ValidationResult::Valid;
+}
+
+Expected<std::unique_ptr<UnifiedOnDiskCache>>
+UnifiedOnDiskCache::open(StringRef RootPath, std::optional<uint64_t> SizeLimit,
+ StringRef HashName, unsigned HashByteSize,
+ OnDiskGraphDB::FaultInPolicy FaultInPolicy) {
+ if (std::error_code EC = sys::fs::create_directories(RootPath))
+ return createFileError(RootPath, EC);
+
+ SmallString<256> PathBuf(RootPath);
+ sys::path::append(PathBuf, "lock");
+ int LockFD = -1;
+ if (std::error_code EC = sys::fs::openFileForReadWrite(
+ PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
+ return createFileError(PathBuf, EC);
+ assert(LockFD != -1);
+ // Locking the directory using shared lock, which will prevent other processes
+ // from creating a new chain (essentially while a \p UnifiedOnDiskCache
+ // instance holds a shared lock the storage for the primary directory will
+ // grow unrestricted).
+ if (std::error_code EC =
+ lockFileThreadSafe(LockFD, sys::fs::LockKind::Shared))
+ return createFileError(PathBuf, EC);
+
+ auto DBDirs = getAllDBDirs(RootPath);
+ if (!DBDirs)
+ return DBDirs.takeError();
+ if (DBDirs->empty())
+ DBDirs->push_back((Twine(DBDirPrefix) + "1").str());
+
+ assert(!DBDirs->empty());
+
+ /// If there is only one directory open databases on it. If there are 2 or
+ /// more directories, get the most recent directories and chain them, with the
+ /// most recent being the primary one. The remaining directories are unused
+ /// data than can be garbage-collected.
+ auto UniDB = std::unique_ptr<UnifiedOnDiskCache>(new UnifiedOnDiskCache());
+ std::unique_ptr<OnDiskGraphDB> UpstreamGraphDB;
+ std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB;
+ if (DBDirs->size() > 1) {
+ StringRef UpstreamDir = *(DBDirs->end() - 2);
+ PathBuf = RootPath;
+ sys::path::append(PathBuf, UpstreamDir);
+ if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize,
+ /*UpstreamDB=*/nullptr, FaultInPolicy)
+ .moveInto(UpstreamGraphDB))
+ return std::move(E);
+ if (Error E = OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize,
+ /*ValueName=*/"objectid",
+ /*ValueSize=*/sizeof(uint64_t))
+ .moveInto(UpstreamKVDB))
+ return std::move(E);
+ }
+
+ StringRef PrimaryDir = *(DBDirs->end() - 1);
+ PathBuf = RootPath;
+ sys::path::append(PathBuf, PrimaryDir);
+ std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB;
+ if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize,
+ UpstreamGraphDB.get(), FaultInPolicy)
+ .moveInto(PrimaryGraphDB))
+ return std::move(E);
+ std::unique_ptr<OnDiskKeyValueDB> PrimaryKVDB;
+ // \p UnifiedOnDiskCache does manual chaining for key-value requests,
+ // including an extra translation step of the value during fault-in.
+ if (Error E =
+ OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize,
+ /*ValueName=*/"objectid",
+ /*ValueSize=*/sizeof(uint64_t), UniDB.get())
+ .moveInto(PrimaryKVDB))
+ return std::move(E);
+
+ UniDB->RootPath = RootPath;
+ UniDB->SizeLimit = SizeLimit.value_or(0);
+ UniDB->LockFD = LockFD;
+ UniDB->NeedsGarbageCollection = DBDirs->size() > 2;
+ UniDB->PrimaryDBDir = PrimaryDir;
+ UniDB->UpstreamGraphDB = std::move(UpstreamGraphDB);
+ UniDB->PrimaryGraphDB = std::move(PrimaryGraphDB);
+ UniDB->UpstreamKVDB = std::move(UpstreamKVDB);
+ UniDB->PrimaryKVDB = std::move(PrimaryKVDB);
+
+ return std::move(UniDB);
+}
+
+void UnifiedOnDiskCache::setSizeLimit(std::optional<uint64_t> SizeLimit) {
+ this->SizeLimit = SizeLimit.value_or(0);
+}
+
+uint64_t UnifiedOnDiskCache::getStorageSize() const {
+ uint64_t TotalSize = getPrimaryStorageSize();
+ if (UpstreamGraphDB)
+ TotalSize += UpstreamGraphDB->getStorageSize();
+ if (UpstreamKVDB)
+ TotalSize += UpstreamKVDB->getStorageSize();
+ return TotalSize;
+}
+
+uint64_t UnifiedOnDiskCache::getPrimaryStorageSize() const {
+ return PrimaryGraphDB->getStorageSize() + PrimaryKVDB->getStorageSize();
+}
+
+bool UnifiedOnDiskCache::hasExceededSizeLimit() const {
+ uint64_t CurSizeLimit = SizeLimit;
+ if (!CurSizeLimit)
+ return false;
+
+ // If the hard limit is beyond 85%, declare above limit and request clean up.
+ unsigned CurrentPercent =
+ std::max(PrimaryGraphDB->getHardStorageLimitUtilization(),
+ PrimaryKVDB->getHardStorageLimitUtilization());
+ if (CurrentPercent > 85)
+ return true;
+
+ // We allow each of the directories in the chain to reach up to half the
+ // intended size limit. Check whether the primary directory has exceeded half
+ // the limit or not, in order to decide whether we need to start a new chain.
+ //
+ // We could check the size limit against the sum of sizes of both the primary
+ // and upstream directories but then if the upstream is significantly larger
+ // than the intended limit, it would trigger a new chain to be created before
+ // the primary has reached its own limit. Essentially in such situation we
+ // prefer reclaiming the storage later in order to have more consistent cache
+ // hits behavior.
+ return (CurSizeLimit / 2) < getPrimaryStorageSize();
+}
+
+Error UnifiedOnDiskCache::close(bool CheckSizeLimit) {
+ if (LockFD == -1)
+ return Error::success(); // already closed.
+ auto CloseLock = make_scope_exit([&]() {
+ assert(LockFD >= 0);
+ sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD);
+ sys::fs::closeFile(LockFile);
+ LockFD = -1;
+ });
+
+ bool ExceededSizeLimit = CheckSizeLimit ? hasExceededSizeLimit() : false;
+ UpstreamKVDB.reset();
+ PrimaryKVDB.reset();
+ UpstreamGraphDB.reset();
+ PrimaryGraphDB.reset();
+ if (std::error_code EC = unlockFileThreadSafe(LockFD))
+ return createFileError(RootPath, EC);
+
+ if (!ExceededSizeLimit)
+ return Error::success();
+
+ // The primary directory exceeded its intended size limit. Try to get an
+ // exclusive lock in order to create a new primary directory for next time
+ // this \p UnifiedOnDiskCache path is opened.
+
+ if (std::error_code EC = tryLockFileThreadSafe(
+ LockFD, std::chrono::milliseconds(0), sys::fs::LockKind::Exclusive)) {
+ if (EC == errc::no_lock_available)
+ return Error::success(); // couldn't get exclusive lock, give up.
+ return createFileError(RootPath, EC);
+ }
+ auto UnlockFile = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); });
+
+ // Managed to get an exclusive lock which means there are no other open
+ // \p UnifiedOnDiskCache instances for the same path, so we can safely start a
+ // new primary directory. To start a new primary directory we just have to
+ // create a new empty directory with the next consecutive index; since this is
+ // an atomic operation we will leave the top-level directory in a consistent
+ // state even if the process dies during this code-path.
+
+ SmallString<256> PathBuf(RootPath);
+ raw_svector_ostream OS(PathBuf);
+ OS << sys::path::get_separator();
+ getNextDBDirName(PrimaryDBDir, OS);
+ if (std::error_code EC = sys::fs::create_directory(PathBuf))
+ return createFileError(PathBuf, EC);
+
+ NeedsGarbageCollection = true;
+ return Error::success();
+}
+
+UnifiedOnDiskCache::UnifiedOnDiskCache() = default;
+
+UnifiedOnDiskCache::~UnifiedOnDiskCache() { consumeError(close()); }
+
+Error UnifiedOnDiskCache::collectGarbage(StringRef Path) {
+ auto DBDirs = getAllGarbageDirs(Path);
+ if (!DBDirs)
+ return DBDirs.takeError();
+
+ SmallString<256> PathBuf(Path);
+ for (StringRef UnusedSubDir : *DBDirs) {
+ sys::path::append(PathBuf, UnusedSubDir);
+ if (std::error_code EC = sys::fs::remove_directories(PathBuf))
+ return createFileError(PathBuf, EC);
+ sys::path::remove_filename(PathBuf);
+ }
+ return Error::success();
+}
+
+Error UnifiedOnDiskCache::collectGarbage() { return collectGarbage(RootPath); }
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index e5c85d5..1ea30d8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -745,11 +745,6 @@ void AppleAccelTableStaticTypeData::emit(AsmPrinter *Asm) const {
Asm->emitInt32(QualifiedNameHash);
}
-constexpr AppleAccelTableData::Atom AppleAccelTableTypeData::Atoms[];
-constexpr AppleAccelTableData::Atom AppleAccelTableOffsetData::Atoms[];
-constexpr AppleAccelTableData::Atom AppleAccelTableStaticOffsetData::Atoms[];
-constexpr AppleAccelTableData::Atom AppleAccelTableStaticTypeData::Atoms[];
-
#ifndef NDEBUG
void AppleAccelTableWriter::Header::print(raw_ostream &OS) const {
OS << "Magic: " << format("0x%x", Magic) << "\n"
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 1fc90d0..4fd2204 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -294,6 +294,10 @@ void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
MachinePreds[Edge].push_back(NewPred);
}
+static bool targetSupportsBF16Type(const MachineFunction *MF) {
+ return MF->getTarget().getTargetTriple().isSPIRV();
+}
+
static bool containsBF16Type(const User &U) {
// BF16 cannot currently be represented by LLT, to avoid miscompiles we
// prevent any instructions using them. FIXME: This can be removed once LLT
@@ -306,7 +310,7 @@ static bool containsBF16Type(const User &U) {
bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
- if (containsBF16Type(U))
+ if (containsBF16Type(U) && !targetSupportsBF16Type(MF))
return false;
// Get or create a virtual register for each value.
@@ -328,7 +332,7 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
- if (containsBF16Type(U))
+ if (containsBF16Type(U) && !targetSupportsBF16Type(MF))
return false;
Register Op0 = getOrCreateVReg(*U.getOperand(0));
@@ -348,7 +352,7 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
bool IRTranslator::translateCompare(const User &U,
MachineIRBuilder &MIRBuilder) {
- if (containsBF16Type(U))
+ if (containsBF16Type(U) && !targetSupportsBF16Type(MF))
return false;
auto *CI = cast<CmpInst>(&U);
@@ -1569,7 +1573,7 @@ bool IRTranslator::translateBitCast(const User &U,
bool IRTranslator::translateCast(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
- if (containsBF16Type(U))
+ if (containsBF16Type(U) && !targetSupportsBF16Type(MF))
return false;
uint32_t Flags = 0;
@@ -2688,7 +2692,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
bool IRTranslator::translateInlineAsm(const CallBase &CB,
MachineIRBuilder &MIRBuilder) {
- if (containsBF16Type(CB))
+ if (containsBF16Type(CB) && !targetSupportsBF16Type(MF))
return false;
const InlineAsmLowering *ALI = MF->getSubtarget().getInlineAsmLowering();
@@ -2779,7 +2783,7 @@ bool IRTranslator::translateCallBase(const CallBase &CB,
}
bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
- if (!MF->getTarget().getTargetTriple().isSPIRV() && containsBF16Type(U))
+ if (containsBF16Type(U) && !targetSupportsBF16Type(MF))
return false;
const CallInst &CI = cast<CallInst>(U);
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 4b4df98..637acd6 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -109,8 +109,10 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
if (auto *CI = dyn_cast<ConstantInt>(NumericConstant)) {
if (CI->getBitWidth() > 64)
MIB.addCImm(CI);
- else
+ else if (CI->getBitWidth() == 1)
MIB.addImm(CI->getZExtValue());
+ else
+ MIB.addImm(CI->getSExtValue());
} else if (auto *CFP = dyn_cast<ConstantFP>(NumericConstant)) {
MIB.addFPImm(CFP);
} else if (isa<ConstantPointerNull>(NumericConstant)) {
diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
index c31454a..b5d3092 100644
--- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -129,6 +129,9 @@ static bool isColdBlock(const MachineBasicBlock &MBB,
}
bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
// Do not split functions when -basic-block-sections=all is specified.
if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All)
return false;
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index bb10cf6..d84c3fb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -733,6 +733,8 @@ MachineOperand GetMOForConstDbgOp(const SDDbgOperand &Op) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getBitWidth() > 64)
return MachineOperand::CreateCImm(CI);
+ if (CI->getBitWidth() == 1)
+ return MachineOperand::CreateImm(CI->getZExtValue());
return MachineOperand::CreateImm(CI->getSExtValue());
}
if (const ConstantFP *CF = dyn_cast<ConstantFP>(V))
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 431a810..316aacd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -163,6 +163,8 @@ private:
RTLIB::Libcall CallI128);
void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ SDValue ExpandSincosStretLibCall(SDNode *Node) const;
+
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
const SDLoc &dl);
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
@@ -2423,6 +2425,101 @@ static bool useSinCos(SDNode *Node) {
return false;
}
+SDValue SelectionDAGLegalize::ExpandSincosStretLibCall(SDNode *Node) const {
+ // For iOS, we want to call an alternative entry point: __sincos_stret,
+ // which returns the values in two S / D registers.
+ SDLoc dl(Node);
+ SDValue Arg = Node->getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ RTLIB::Libcall LC = RTLIB::getSINCOS_STRET(ArgVT);
+ RTLIB::LibcallImpl SincosStret = TLI.getLibcallImpl(LC);
+ if (SincosStret == RTLIB::Unsupported)
+ return SDValue();
+
+ /// There are 3 different ABI cases to handle:
+ /// - Direct return of separate fields in registers
+ /// - Single return as vector elements
+ /// - sret struct
+
+ const RTLIB::RuntimeLibcallsInfo &CallsInfo = TLI.getRuntimeLibcallsInfo();
+
+ const DataLayout &DL = DAG.getDataLayout();
+
+ auto [FuncTy, FuncAttrs] = CallsInfo.getFunctionTy(
+ *DAG.getContext(), TM.getTargetTriple(), DL, SincosStret);
+
+ Type *SincosStretRetTy = FuncTy->getReturnType();
+ CallingConv::ID CallConv = CallsInfo.getLibcallImplCallingConv(SincosStret);
+ StringRef LibcallImplName = CallsInfo.getLibcallImplName(SincosStret);
+
+ SDValue Callee = DAG.getExternalSymbol(LibcallImplName.data(),
+ TLI.getProgramPointerTy(DL));
+
+ TargetLowering::ArgListTy Args;
+ SDValue SRet;
+
+ int FrameIdx;
+ if (FuncTy->getParamType(0)->isPointerTy()) {
+ // Uses sret
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+
+ AttributeSet PtrAttrs = FuncAttrs.getParamAttrs(0);
+ Type *StructTy = PtrAttrs.getStructRetType();
+ const uint64_t ByteSize = DL.getTypeAllocSize(StructTy);
+ const Align StackAlign = DL.getPrefTypeAlign(StructTy);
+
+ FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
+ SRet = DAG.getFrameIndex(FrameIdx, TLI.getFrameIndexTy(DL));
+
+ TargetLowering::ArgListEntry Entry(SRet, FuncTy->getParamType(0));
+ Entry.IsSRet = true;
+ Entry.IndirectType = StructTy;
+ Entry.Alignment = StackAlign;
+
+ Args.push_back(Entry);
+ Args.emplace_back(Arg, FuncTy->getParamType(1));
+ } else {
+ Args.emplace_back(Arg, FuncTy->getParamType(0));
+ }
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(CallConv, SincosStretRetTy, Callee, std::move(Args))
+ .setIsPostTypeLegalization();
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ if (SRet) {
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+ SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, PtrInfo);
+
+ TypeSize StoreSize = ArgVT.getStoreSize();
+
+ // Address of cos field.
+ SDValue Add = DAG.getObjectPtrOffset(dl, SRet, StoreSize);
+ SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
+ PtrInfo.getWithOffset(StoreSize));
+
+ SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, LoadSin.getValue(0),
+ LoadCos.getValue(0));
+ }
+
+ if (!CallResult.first.getValueType().isVector())
+ return CallResult.first;
+
+ SDValue SinVal =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first,
+ DAG.getVectorIdxConstant(0, dl));
+ SDValue CosVal =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first,
+ DAG.getVectorIdxConstant(1, dl));
+ SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
+}
+
SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
@@ -4730,6 +4827,18 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
case ISD::FSINCOS:
case ISD::FSINCOSPI: {
EVT VT = Node->getValueType(0);
+
+ if (Node->getOpcode() == ISD::FSINCOS) {
+ RTLIB::Libcall SincosStret = RTLIB::getSINCOS_STRET(VT);
+ if (SincosStret != RTLIB::UNKNOWN_LIBCALL) {
+ if (SDValue Expanded = ExpandSincosStretLibCall(Node)) {
+ Results.push_back(Expanded);
+ Results.push_back(Expanded.getValue(1));
+ break;
+ }
+ }
+ }
+
RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
? RTLIB::getSINCOS(VT)
: RTLIB::getSINCOSPI(VT);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a522650..fa0c899 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -8958,9 +8958,8 @@ bool SelectionDAGBuilder::canTailCall(const CallBase &CB) const {
// Avoid emitting tail calls in functions with the disable-tail-calls
// attribute.
const Function *Caller = CB.getParent()->getParent();
- if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
- "true" &&
- !isMustTailCall)
+ if (!isMustTailCall &&
+ Caller->getFnAttribute("disable-tail-calls").getValueAsBool())
return false;
// We can't tail call inside a function with a swifterror argument. Lowering
diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h
index 84757ae..970abdc 100644
--- a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h
+++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h
@@ -28,7 +28,7 @@ using MacroOffset2UnitMapTy = DenseMap<uint64_t, DwarfUnit *>;
/// Base class for all Dwarf units(Compile unit/Type table unit).
class DwarfUnit : public OutputSections {
public:
- virtual ~DwarfUnit() {}
+ virtual ~DwarfUnit() = default;
DwarfUnit(LinkingGlobalData &GlobalData, unsigned ID,
StringRef ClangModuleName)
: OutputSections(GlobalData), ID(ID), ClangModuleName(ClangModuleName),
diff --git a/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h b/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h
index f67536e..8ccb4a5 100644
--- a/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h
+++ b/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h
@@ -22,7 +22,7 @@ class StringEntryToDwarfStringPoolEntryMap {
public:
StringEntryToDwarfStringPoolEntryMap(LinkingGlobalData &GlobalData)
: GlobalData(GlobalData) {}
- ~StringEntryToDwarfStringPoolEntryMap() {}
+ ~StringEntryToDwarfStringPoolEntryMap() = default;
/// Create DwarfStringPoolEntry for specified StringEntry if necessary.
/// Initialize DwarfStringPoolEntry with initial values.
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index 6c78ef0..7496c5a 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -704,7 +704,9 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent,
DIDumpOptions ChildDumpOpts = DumpOpts;
ChildDumpOpts.ShowParents = false;
while (Child) {
- Child.dump(OS, Indent + 2, ChildDumpOpts);
+ if (DumpOpts.FilterChildTag.empty() ||
+ llvm::is_contained(DumpOpts.FilterChildTag, Child.getTag()))
+ Child.dump(OS, Indent + 2, ChildDumpOpts);
Child = Child.getSibling();
}
}
diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
index 7e606c6a..4e7db82 100644
--- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
@@ -27,7 +27,7 @@
namespace llvm {
namespace orc {
-MemoryMapper::~MemoryMapper() {}
+MemoryMapper::~MemoryMapper() = default;
InProcessMemoryMapper::InProcessMemoryMapper(size_t PageSize)
: PageSize(PageSize) {}
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 0e5926f..fff9a81 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -528,7 +528,7 @@ void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
Value *Version = Builder.getInt32(OMP_KERNEL_ARG_VERSION);
Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);
auto Int32Ty = Type::getInt32Ty(Builder.getContext());
- constexpr const size_t MaxDim = 3;
+ constexpr size_t MaxDim = 3;
Value *ZeroArray = Constant::getNullValue(ArrayType::get(Int32Ty, MaxDim));
Value *Flags = Builder.getInt64(KernelArgs.HasNoWait);
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 3b8fde8..cd39970 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -4171,6 +4171,16 @@ SwitchInstProfUpdateWrapper::removeCase(SwitchInst::CaseIt I) {
return SI.removeCase(I);
}
+void SwitchInstProfUpdateWrapper::replaceDefaultDest(SwitchInst::CaseIt I) {
+ auto *DestBlock = I->getCaseSuccessor();
+ if (Weights) {
+ auto Weight = getSuccessorWeight(I->getCaseIndex() + 1);
+ (*Weights)[0] = Weight.value();
+ }
+
+ SI.setDefaultDest(DestBlock);
+}
+
void SwitchInstProfUpdateWrapper::addCase(
ConstantInt *OnVal, BasicBlock *Dest,
SwitchInstProfUpdateWrapper::CaseWeightOpt W) {
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index 77af29b..2ce5719 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -7,7 +7,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/RuntimeLibcalls.h"
+#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/StringTable.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/xxhash.h"
#include "llvm/TargetParser/ARMTargetParser.h"
@@ -72,3 +74,80 @@ bool RuntimeLibcallsInfo::darwinHasExp10(const Triple &TT) {
return false;
}
}
+
+std::pair<FunctionType *, AttributeList>
+RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT,
+ const DataLayout &DL,
+ RTLIB::LibcallImpl LibcallImpl) const {
+ static constexpr Attribute::AttrKind CommonFnAttrs[] = {
+ Attribute::NoCallback, Attribute::NoFree, Attribute::NoSync,
+ Attribute::NoUnwind, Attribute::WillReturn};
+
+ switch (LibcallImpl) {
+ case RTLIB::impl___sincos_stret:
+ case RTLIB::impl___sincosf_stret: {
+ if (!darwinHasSinCosStret(TT)) // Non-darwin currently unexpected
+ return {};
+
+ Type *ScalarTy = LibcallImpl == RTLIB::impl___sincosf_stret
+ ? Type::getFloatTy(Ctx)
+ : Type::getDoubleTy(Ctx);
+
+ AttrBuilder FuncAttrBuilder(Ctx);
+ for (Attribute::AttrKind Attr : CommonFnAttrs)
+ FuncAttrBuilder.addAttribute(Attr);
+
+ const bool UseSret =
+ TT.isX86_32() || ((TT.isARM() || TT.isThumb()) &&
+ ARM::computeTargetABI(TT) == ARM::ARM_ABI_APCS);
+
+ FuncAttrBuilder.addMemoryAttr(MemoryEffects::argumentOrErrnoMemOnly(
+ UseSret ? ModRefInfo::Mod : ModRefInfo::NoModRef, ModRefInfo::Mod));
+
+ AttributeList Attrs;
+ Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder);
+
+ if (UseSret) {
+ AttrBuilder AttrBuilder(Ctx);
+ StructType *StructTy = StructType::get(ScalarTy, ScalarTy);
+ AttrBuilder.addStructRetAttr(StructTy);
+ AttrBuilder.addAlignmentAttr(DL.getABITypeAlign(StructTy));
+ FunctionType *FuncTy = FunctionType::get(
+ Type::getVoidTy(Ctx), {DL.getAllocaPtrType(Ctx), ScalarTy}, false);
+
+ return {FuncTy, Attrs.addParamAttributes(Ctx, 0, AttrBuilder)};
+ }
+
+ Type *RetTy =
+ LibcallImpl == RTLIB::impl___sincosf_stret && TT.isX86_64()
+ ? static_cast<Type *>(FixedVectorType::get(ScalarTy, 2))
+ : static_cast<Type *>(StructType::get(ScalarTy, ScalarTy));
+
+ return {FunctionType::get(RetTy, {ScalarTy}, false), Attrs};
+ }
+ case RTLIB::impl_sqrtf:
+ case RTLIB::impl_sqrt: {
+ AttrBuilder FuncAttrBuilder(Ctx);
+
+ for (Attribute::AttrKind Attr : CommonFnAttrs)
+ FuncAttrBuilder.addAttribute(Attr);
+ FuncAttrBuilder.addMemoryAttr(MemoryEffects::errnoMemOnly(ModRefInfo::Mod));
+
+ AttributeList Attrs;
+ Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder);
+
+ Type *ScalarTy = LibcallImpl == RTLIB::impl_sqrtf ? Type::getFloatTy(Ctx)
+ : Type::getDoubleTy(Ctx);
+ FunctionType *FuncTy = FunctionType::get(ScalarTy, {ScalarTy}, false);
+
+ Attrs = Attrs.addRetAttribute(
+ Ctx, Attribute::getWithNoFPClass(Ctx, fcNegInf | fcNegSubnormal |
+ fcNegNormal));
+ return {FuncTy, Attrs};
+ }
+ default:
+ return {};
+ }
+
+ return {};
+}
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index b775cbb..95d61a9 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -148,18 +148,10 @@ void Value::destroyValueName() {
}
bool Value::hasNUses(unsigned N) const {
- if (!UseList)
- return N == 0;
-
- // TODO: Disallow for ConstantData and remove !UseList check?
return hasNItems(use_begin(), use_end(), N);
}
bool Value::hasNUsesOrMore(unsigned N) const {
- // TODO: Disallow for ConstantData and remove !UseList check?
- if (!UseList)
- return N == 0;
-
return hasNItemsOrMore(use_begin(), use_end(), N);
}
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index b618222..23be42f 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1076,63 +1076,59 @@ Expected<ArrayRef<SymbolResolution>>
LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
ArrayRef<SymbolResolution> Res) {
llvm::TimeTraceScope timeScope("LTO add thin LTO");
+ const auto BMID = BM.getModuleIdentifier();
ArrayRef<SymbolResolution> ResTmp = Res;
for (const InputFile::Symbol &Sym : Syms) {
assert(!ResTmp.empty());
const SymbolResolution &R = ResTmp.consume_front();
- if (!Sym.getIRName().empty()) {
+ if (!Sym.getIRName().empty() && R.Prevailing) {
auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(
GlobalValue::getGlobalIdentifier(Sym.getIRName(),
GlobalValue::ExternalLinkage, ""));
- if (R.Prevailing)
- ThinLTO.setPrevailingModuleForGUID(GUID, BM.getModuleIdentifier());
+ ThinLTO.setPrevailingModuleForGUID(GUID, BMID);
}
}
- if (Error Err =
- BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(),
- [&](GlobalValue::GUID GUID) {
- return ThinLTO.isPrevailingModuleForGUID(
- GUID, BM.getModuleIdentifier());
- }))
+ if (Error Err = BM.readSummary(
+ ThinLTO.CombinedIndex, BMID, [&](GlobalValue::GUID GUID) {
+ return ThinLTO.isPrevailingModuleForGUID(GUID, BMID);
+ }))
return Err;
- LLVM_DEBUG(dbgs() << "Module " << BM.getModuleIdentifier() << "\n");
+ LLVM_DEBUG(dbgs() << "Module " << BMID << "\n");
for (const InputFile::Symbol &Sym : Syms) {
assert(!Res.empty());
const SymbolResolution &R = Res.consume_front();
- if (!Sym.getIRName().empty()) {
+ if (!Sym.getIRName().empty() &&
+ (R.Prevailing || R.FinalDefinitionInLinkageUnit)) {
auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(
GlobalValue::getGlobalIdentifier(Sym.getIRName(),
GlobalValue::ExternalLinkage, ""));
if (R.Prevailing) {
- assert(
- ThinLTO.isPrevailingModuleForGUID(GUID, BM.getModuleIdentifier()));
+ assert(ThinLTO.isPrevailingModuleForGUID(GUID, BMID));
// For linker redefined symbols (via --wrap or --defsym) we want to
// switch the linkage to `weak` to prevent IPOs from happening.
// Find the summary in the module for this very GV and record the new
// linkage so that we can switch it when we import the GV.
if (R.LinkerRedefined)
- if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(
- GUID, BM.getModuleIdentifier()))
+ if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(GUID, BMID))
S->setLinkage(GlobalValue::WeakAnyLinkage);
}
// If the linker resolved the symbol to a local definition then mark it
// as local in the summary for the module we are adding.
if (R.FinalDefinitionInLinkageUnit) {
- if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(
- GUID, BM.getModuleIdentifier())) {
+ if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(GUID, BMID)) {
S->setDSOLocal(true);
}
}
}
}
- if (!ThinLTO.ModuleMap.insert({BM.getModuleIdentifier(), BM}).second)
+ if (!ThinLTO.ModuleMap.insert({BMID, BM}).second)
return make_error<StringError>(
"Expected at most one ThinLTO module per bitcode file",
inconvertibleErrorCode());
@@ -1143,10 +1139,10 @@ LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
// This is a fuzzy name matching where only modules with name containing the
// specified switch values are going to be compiled.
for (const std::string &Name : Conf.ThinLTOModulesToCompile) {
- if (BM.getModuleIdentifier().contains(Name)) {
- ThinLTO.ModulesToCompile->insert({BM.getModuleIdentifier(), BM});
- LLVM_DEBUG(dbgs() << "[ThinLTO] Selecting " << BM.getModuleIdentifier()
- << " to compile\n");
+ if (BMID.contains(Name)) {
+ ThinLTO.ModulesToCompile->insert({BMID, BM});
+ LLVM_DEBUG(dbgs() << "[ThinLTO] Selecting " << BMID << " to compile\n");
+ break;
}
}
}
diff --git a/llvm/lib/MC/GOFFObjectWriter.cpp b/llvm/lib/MC/GOFFObjectWriter.cpp
index 71bd397..a3eaaa7 100644
--- a/llvm/lib/MC/GOFFObjectWriter.cpp
+++ b/llvm/lib/MC/GOFFObjectWriter.cpp
@@ -520,7 +520,7 @@ GOFFObjectWriter::GOFFObjectWriter(
std::unique_ptr<MCGOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS)
: TargetObjectWriter(std::move(MOTW)), OS(OS) {}
-GOFFObjectWriter::~GOFFObjectWriter() {}
+GOFFObjectWriter::~GOFFObjectWriter() = default;
uint64_t GOFFObjectWriter::writeObject() {
uint64_t Size = GOFFWriter(OS, *Asm).writeObject();
diff --git a/llvm/lib/MC/MCDXContainerWriter.cpp b/llvm/lib/MC/MCDXContainerWriter.cpp
index 5eda039..ebed411 100644
--- a/llvm/lib/MC/MCDXContainerWriter.cpp
+++ b/llvm/lib/MC/MCDXContainerWriter.cpp
@@ -16,7 +16,7 @@
using namespace llvm;
-MCDXContainerTargetWriter::~MCDXContainerTargetWriter() {}
+MCDXContainerTargetWriter::~MCDXContainerTargetWriter() = default;
uint64_t DXContainerObjectWriter::writeObject() {
auto &Asm = *this->Asm;
diff --git a/llvm/lib/MC/MCGOFFStreamer.cpp b/llvm/lib/MC/MCGOFFStreamer.cpp
index 8b228db..ad6397b 100644
--- a/llvm/lib/MC/MCGOFFStreamer.cpp
+++ b/llvm/lib/MC/MCGOFFStreamer.cpp
@@ -20,7 +20,7 @@
using namespace llvm;
-MCGOFFStreamer::~MCGOFFStreamer() {}
+MCGOFFStreamer::~MCGOFFStreamer() = default;
GOFFObjectWriter &MCGOFFStreamer::getWriter() {
return static_cast<GOFFObjectWriter &>(getAssembler().getWriter());
diff --git a/llvm/lib/MC/SPIRVObjectWriter.cpp b/llvm/lib/MC/SPIRVObjectWriter.cpp
index 5e37137..d693ea3 100644
--- a/llvm/lib/MC/SPIRVObjectWriter.cpp
+++ b/llvm/lib/MC/SPIRVObjectWriter.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSPIRVObjectWriter.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCValue.h"
@@ -17,8 +18,10 @@ using namespace llvm;
void SPIRVObjectWriter::writeHeader(const MCAssembler &Asm) {
constexpr uint32_t MagicNumber = 0x07230203;
constexpr uint32_t GeneratorID = 43;
- constexpr uint32_t GeneratorMagicNumber =
- (GeneratorID << 16) | (LLVM_VERSION_MAJOR);
+ const uint32_t GeneratorMagicNumber =
+ Asm.getContext().getTargetTriple().getVendor() == Triple::AMD
+ ? UINT16_MAX
+ : ((GeneratorID << 16) | (LLVM_VERSION_MAJOR));
constexpr uint32_t Schema = 0;
W.write<uint32_t>(MagicNumber);
diff --git a/llvm/lib/ObjCopy/COFF/COFFWriter.h b/llvm/lib/ObjCopy/COFF/COFFWriter.h
index 66d7f01..3ee0e06 100644
--- a/llvm/lib/ObjCopy/COFF/COFFWriter.h
+++ b/llvm/lib/ObjCopy/COFF/COFFWriter.h
@@ -50,7 +50,7 @@ class COFFWriter {
Expected<uint32_t> virtualAddressToFileAddress(uint32_t RVA);
public:
- virtual ~COFFWriter() {}
+ virtual ~COFFWriter() = default;
Error write();
COFFWriter(Object &Obj, raw_ostream &Out)
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h
index 4f6473f..2783ef27 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.h
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.h
@@ -134,7 +134,7 @@ private:
using Elf_Sym = typename ELFT::Sym;
public:
- ~ELFSectionWriter() override {}
+ ~ELFSectionWriter() override = default;
Error visit(const SymbolTableSection &Sec) override;
Error visit(const RelocationSection &Sec) override;
Error visit(const GnuDebugLinkSection &Sec) override;
@@ -180,7 +180,7 @@ public:
class BinarySectionWriter : public SectionWriter {
public:
- ~BinarySectionWriter() override {}
+ ~BinarySectionWriter() override = default;
Error visit(const SymbolTableSection &Sec) override;
Error visit(const RelocationSection &Sec) override;
@@ -346,7 +346,7 @@ private:
size_t totalSize() const;
public:
- ~ELFWriter() override {}
+ ~ELFWriter() override = default;
bool WriteSectionHeaders;
// For --only-keep-debug, select an alternative section/segment layout
@@ -367,7 +367,7 @@ private:
uint64_t TotalSize = 0;
public:
- ~BinaryWriter() override {}
+ ~BinaryWriter() override = default;
Error finalize() override;
Error write() override;
BinaryWriter(Object &Obj, raw_ostream &Out, const CommonConfig &Config)
@@ -784,7 +784,7 @@ private:
SymbolTableSection *Symbols = nullptr;
public:
- ~SectionIndexSection() override {}
+ ~SectionIndexSection() override = default;
void addIndex(uint32_t Index) {
assert(Size > 0);
Indexes.push_back(Index);
diff --git a/llvm/lib/ObjCopy/MachO/MachOReader.h b/llvm/lib/ObjCopy/MachO/MachOReader.h
index e315e6fd..940ba4c 100644
--- a/llvm/lib/ObjCopy/MachO/MachOReader.h
+++ b/llvm/lib/ObjCopy/MachO/MachOReader.h
@@ -23,7 +23,7 @@ namespace macho {
// raw binaries and regular MachO object files.
class Reader {
public:
- virtual ~Reader(){};
+ virtual ~Reader() = default;
virtual Expected<std::unique_ptr<Object>> create() const = 0;
};
diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h b/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h
index 8620548..47639ad 100644
--- a/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h
+++ b/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h
@@ -20,7 +20,7 @@ namespace xcoff {
class XCOFFWriter {
public:
- virtual ~XCOFFWriter() {}
+ virtual ~XCOFFWriter() = default;
XCOFFWriter(Object &Obj, raw_ostream &Out) : Obj(Obj), Out(Out) {}
Error write();
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index e09dc94..c2f4560 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -1978,20 +1978,42 @@ uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const {
return SectSize;
}
-ArrayRef<uint8_t> MachOObjectFile::getSectionContents(uint32_t Offset,
+ArrayRef<uint8_t> MachOObjectFile::getSectionContents(uint64_t Offset,
uint64_t Size) const {
return arrayRefFromStringRef(getData().substr(Offset, Size));
}
Expected<ArrayRef<uint8_t>>
MachOObjectFile::getSectionContents(DataRefImpl Sec) const {
- uint32_t Offset;
+ uint64_t Offset;
uint64_t Size;
if (is64Bit()) {
MachO::section_64 Sect = getSection64(Sec);
Offset = Sect.offset;
Size = Sect.size;
+ // Check for large mach-o files where the section contents might exceed
+ // 4GB. MachO::section_64 objects only have 32 bit file offsets to the
+ // section contents and can overflow in dSYM files. We can track this and
+ // adjust the section offset to be 64 bit safe. If sections overflow then
+ // section ordering is enforced. If sections are not ordered, then an error
+ // will be returned stopping invalid section data from being returned.
+ uint64_t PrevTrueOffset = 0;
+ uint64_t SectOffsetAdjust = 0;
+ for (uint32_t SectIdx = 0; SectIdx < Sec.d.a; ++SectIdx) {
+ MachO::section_64 CurrSect =
+ getStruct<MachO::section_64>(*this, Sections[SectIdx]);
+ uint64_t CurrTrueOffset = (uint64_t)CurrSect.offset + SectOffsetAdjust;
+ if ((SectOffsetAdjust > 0) && (PrevTrueOffset > CurrTrueOffset))
+ return malformedError("section data exceeds 4GB and section file "
+ "offsets are not ordered");
+ const uint64_t EndSectFileOffset =
+ (uint64_t)CurrSect.offset + CurrSect.size;
+ if (EndSectFileOffset > UINT32_MAX)
+ SectOffsetAdjust += EndSectFileOffset & 0xFFFFFFFF00000000ull;
+ PrevTrueOffset = CurrTrueOffset;
+ }
+ Offset += SectOffsetAdjust;
} else {
MachO::section Sect = getSection(Sec);
Offset = Sect.offset;
diff --git a/llvm/lib/ObjectYAML/GOFFYAML.cpp b/llvm/lib/ObjectYAML/GOFFYAML.cpp
index 60bc1f7..ecd7fb6 100644
--- a/llvm/lib/ObjectYAML/GOFFYAML.cpp
+++ b/llvm/lib/ObjectYAML/GOFFYAML.cpp
@@ -15,7 +15,7 @@
namespace llvm {
namespace GOFFYAML {
-Object::Object() {}
+Object::Object() = default;
} // namespace GOFFYAML
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index bd03ac0..3f41618 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -228,7 +228,7 @@ static cl::opt<bool> EnableLoopHeaderDuplication(
static cl::opt<bool>
EnableDFAJumpThreading("enable-dfa-jump-thread",
cl::desc("Enable DFA jump threading"),
- cl::init(false), cl::Hidden);
+ cl::init(true), cl::Hidden);
static cl::opt<bool>
EnableHotColdSplit("hot-cold-split",
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index 7290a86..6b7e980 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -537,7 +537,7 @@ void IRChangedPrinter::handleAfter(StringRef PassID, std::string &Name,
Out << "*** IR Dump After " << PassID << " on " << Name << " ***\n" << After;
}
-IRChangedTester::~IRChangedTester() {}
+IRChangedTester::~IRChangedTester() = default;
void IRChangedTester::registerCallbacks(PassInstrumentationCallbacks &PIC) {
if (TestChanged != "")
@@ -1566,7 +1566,7 @@ void InLineChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) {
TextChangeReporter<IRDataT<EmptyData>>::registerRequiredCallbacks(PIC);
}
-TimeProfilingPassesHandler::TimeProfilingPassesHandler() {}
+TimeProfilingPassesHandler::TimeProfilingPassesHandler() = default;
void TimeProfilingPassesHandler::registerCallbacks(
PassInstrumentationCallbacks &PIC) {
diff --git a/llvm/lib/SandboxIR/Context.cpp b/llvm/lib/SandboxIR/Context.cpp
index fb6ff62..6f5d072 100644
--- a/llvm/lib/SandboxIR/Context.cpp
+++ b/llvm/lib/SandboxIR/Context.cpp
@@ -637,7 +637,7 @@ Context::Context(LLVMContext &LLVMCtx)
: LLVMCtx(LLVMCtx), IRTracker(*this),
LLVMIRBuilder(LLVMCtx, ConstantFolder()) {}
-Context::~Context() {}
+Context::~Context() = default;
void Context::clear() {
// TODO: Ideally we should clear only function-scope objects, and keep global
diff --git a/llvm/lib/Support/BalancedPartitioning.cpp b/llvm/lib/Support/BalancedPartitioning.cpp
index 1914f4c..d859abd 100644
--- a/llvm/lib/Support/BalancedPartitioning.cpp
+++ b/llvm/lib/Support/BalancedPartitioning.cpp
@@ -231,7 +231,7 @@ unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes,
}
// Compute move gains
- typedef std::pair<float, BPFunctionNode *> GainPair;
+ using GainPair = std::pair<float, BPFunctionNode *>;
std::vector<GainPair> Gains;
for (auto &N : Nodes) {
bool FromLeftToRight = (N.Bucket == LeftBucket);
diff --git a/llvm/lib/Support/BranchProbability.cpp b/llvm/lib/Support/BranchProbability.cpp
index ea42f34..143e58a 100644
--- a/llvm/lib/Support/BranchProbability.cpp
+++ b/llvm/lib/Support/BranchProbability.cpp
@@ -20,8 +20,6 @@
using namespace llvm;
-constexpr uint32_t BranchProbability::D;
-
raw_ostream &BranchProbability::print(raw_ostream &OS) const {
if (isUnknown())
return OS << "?%";
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index de5bd79..dab8bee 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -2343,10 +2343,10 @@ namespace {
class HelpPrinter {
protected:
const bool ShowHidden;
- typedef SmallVector<std::pair<const char *, Option *>, 128>
- StrOptionPairVector;
- typedef SmallVector<std::pair<const char *, SubCommand *>, 128>
- StrSubCommandPairVector;
+ using StrOptionPairVector =
+ SmallVector<std::pair<const char *, Option *>, 128>;
+ using StrSubCommandPairVector =
+ SmallVector<std::pair<const char *, SubCommand *>, 128>;
// Print the options. Opts is assumed to be alphabetically sorted.
virtual void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) {
for (const auto &Opt : Opts)
diff --git a/llvm/lib/Support/DAGDeltaAlgorithm.cpp b/llvm/lib/Support/DAGDeltaAlgorithm.cpp
index 98153647..3bfae14 100644
--- a/llvm/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/llvm/lib/Support/DAGDeltaAlgorithm.cpp
@@ -47,16 +47,16 @@ class DAGDeltaAlgorithmImpl {
friend class DeltaActiveSetHelper;
public:
- typedef DAGDeltaAlgorithm::change_ty change_ty;
- typedef DAGDeltaAlgorithm::changeset_ty changeset_ty;
- typedef DAGDeltaAlgorithm::changesetlist_ty changesetlist_ty;
- typedef DAGDeltaAlgorithm::edge_ty edge_ty;
+ using change_ty = DAGDeltaAlgorithm::change_ty;
+ using changeset_ty = DAGDeltaAlgorithm::changeset_ty;
+ using changesetlist_ty = DAGDeltaAlgorithm::changesetlist_ty;
+ using edge_ty = DAGDeltaAlgorithm::edge_ty;
private:
- typedef std::vector<change_ty>::iterator pred_iterator_ty;
- typedef std::vector<change_ty>::iterator succ_iterator_ty;
- typedef std::set<change_ty>::iterator pred_closure_iterator_ty;
- typedef std::set<change_ty>::iterator succ_closure_iterator_ty;
+ using pred_iterator_ty = std::vector<change_ty>::iterator;
+ using succ_iterator_ty = std::vector<change_ty>::iterator;
+ using pred_closure_iterator_ty = std::set<change_ty>::iterator;
+ using succ_closure_iterator_ty = std::set<change_ty>::iterator;
DAGDeltaAlgorithm &DDA;
diff --git a/llvm/lib/Support/DynamicLibrary.cpp b/llvm/lib/Support/DynamicLibrary.cpp
index f1c15c0..61566d3 100644
--- a/llvm/lib/Support/DynamicLibrary.cpp
+++ b/llvm/lib/Support/DynamicLibrary.cpp
@@ -23,7 +23,7 @@ using namespace llvm::sys;
// All methods for HandleSet should be used holding SymbolsMutex.
class DynamicLibrary::HandleSet {
- typedef std::vector<void *> HandleList;
+ using HandleList = std::vector<void *>;
HandleList Handles;
void *Process = &Invalid;
diff --git a/llvm/lib/Support/Mustache.cpp b/llvm/lib/Support/Mustache.cpp
index 708e79d..012e1ff 100644
--- a/llvm/lib/Support/Mustache.cpp
+++ b/llvm/lib/Support/Mustache.cpp
@@ -51,7 +51,7 @@ static Accessor splitMustacheString(StringRef Str, MustacheContext &Ctx) {
std::tie(Part, Str) = Str.split('.');
// Each part of the accessor needs to be saved to the arena
// to ensure it has a stable address.
- Tokens.push_back(Ctx.Saver.save(Part.trim()));
+ Tokens.push_back(Part.trim());
}
}
// Now, allocate memory for the array of StringRefs in the arena.
@@ -368,141 +368,99 @@ struct Tag {
llvm_unreachable("Unknown json::Value::Kind");
}
-static Tag findNextTag(StringRef Template, size_t StartPos, StringRef Open,
- StringRef Close) {
- const StringLiteral TripleOpen("{{{");
- const StringLiteral TripleClose("}}}");
-
- size_t NormalOpenPos = Template.find(Open, StartPos);
- size_t TripleOpenPos = Template.find(TripleOpen, StartPos);
-
- Tag Result;
-
- // Determine which tag comes first.
- if (TripleOpenPos != StringRef::npos &&
- (NormalOpenPos == StringRef::npos || TripleOpenPos <= NormalOpenPos)) {
- // Found a triple mustache tag.
- size_t EndPos =
- Template.find(TripleClose, TripleOpenPos + TripleOpen.size());
- if (EndPos == StringRef::npos)
- return Result; // No closing tag found.
-
- Result.TagKind = Tag::Kind::Triple;
- Result.StartPosition = TripleOpenPos;
- size_t ContentStart = TripleOpenPos + TripleOpen.size();
- Result.Content = Template.substr(ContentStart, EndPos - ContentStart);
- Result.FullMatch = Template.substr(
- TripleOpenPos, (EndPos + TripleClose.size()) - TripleOpenPos);
- } else if (NormalOpenPos != StringRef::npos) {
- // Found a normal mustache tag.
- size_t EndPos = Template.find(Close, NormalOpenPos + Open.size());
- if (EndPos == StringRef::npos)
- return Result; // No closing tag found.
-
- Result.TagKind = Tag::Kind::Normal;
- Result.StartPosition = NormalOpenPos;
- size_t ContentStart = NormalOpenPos + Open.size();
- Result.Content = Template.substr(ContentStart, EndPos - ContentStart);
- Result.FullMatch =
- Template.substr(NormalOpenPos, (EndPos + Close.size()) - NormalOpenPos);
- }
-
- return Result;
-}
-
-static std::optional<std::pair<StringRef, StringRef>>
-processTag(const Tag &T, SmallVectorImpl<Token> &Tokens, MustacheContext &Ctx) {
- LLVM_DEBUG(dbgs() << "[Tag] " << T.FullMatch << ", Content: " << T.Content
- << ", Kind: " << tagKindToString(T.TagKind) << "\n");
- if (T.TagKind == Tag::Kind::Triple) {
- Tokens.emplace_back(T.FullMatch, Ctx.Saver.save("&" + T.Content), '&', Ctx);
- return std::nullopt;
- }
- StringRef Interpolated = T.Content;
- if (!Interpolated.trim().starts_with("=")) {
- char Front = Interpolated.empty() ? ' ' : Interpolated.trim().front();
- Tokens.emplace_back(T.FullMatch, Interpolated, Front, Ctx);
- return std::nullopt;
- }
- Tokens.emplace_back(T.FullMatch, Interpolated, '=', Ctx);
- StringRef DelimSpec = Interpolated.trim();
- DelimSpec = DelimSpec.drop_front(1);
- DelimSpec = DelimSpec.take_until([](char C) { return C == '='; });
- DelimSpec = DelimSpec.trim();
-
- std::pair<StringRef, StringRef> Ret = DelimSpec.split(' ');
- LLVM_DEBUG(dbgs() << "[Set Delimiter] NewOpen: " << Ret.first
- << ", NewClose: " << Ret.second << "\n");
- return Ret;
-}
-
// Simple tokenizer that splits the template into tokens.
-// The mustache spec allows {{{ }}} to unescape variables,
-// but we don't support that here. An unescape variable
-// is represented only by {{& variable}}.
static SmallVector<Token> tokenize(StringRef Template, MustacheContext &Ctx) {
LLVM_DEBUG(dbgs() << "[Tokenize Template] \"" << Template << "\"\n");
SmallVector<Token> Tokens;
SmallString<8> Open("{{");
SmallString<8> Close("}}");
- size_t Start = 0;
+ size_t Cursor = 0;
+ size_t TextStart = 0;
- while (Start < Template.size()) {
- LLVM_DEBUG(dbgs() << "[Tokenize Loop] Start:" << Start << ", Open:'" << Open
- << "', Close:'" << Close << "'\n");
- Tag T = findNextTag(Template, Start, Open, Close);
+ const StringLiteral TripleOpen("{{{");
+ const StringLiteral TripleClose("}}}");
- if (T.TagKind == Tag::Kind::None) {
- // No more tags, the rest is text.
- Tokens.emplace_back(Template.substr(Start));
- break;
+ while (Cursor < Template.size()) {
+ StringRef TemplateSuffix = Template.substr(Cursor);
+ StringRef TagOpen, TagClose;
+ Tag::Kind Kind;
+
+ // Determine which tag we've encountered.
+ if (TemplateSuffix.starts_with(TripleOpen)) {
+ Kind = Tag::Kind::Triple;
+ TagOpen = TripleOpen;
+ TagClose = TripleClose;
+ } else if (TemplateSuffix.starts_with(Open)) {
+ Kind = Tag::Kind::Normal;
+ TagOpen = Open;
+ TagClose = Close;
+ } else {
+ // Not at a tag, continue scanning.
+ ++Cursor;
+ continue;
}
- // Add the text before the tag.
- if (T.StartPosition > Start) {
- StringRef Text = Template.substr(Start, T.StartPosition - Start);
- Tokens.emplace_back(Text);
+ // Found a tag, first add the preceding text.
+ if (Cursor > TextStart)
+ Tokens.emplace_back(Template.slice(TextStart, Cursor));
+
+ // Find the closing tag.
+ size_t EndPos = Template.find(TagClose, Cursor + TagOpen.size());
+ if (EndPos == StringRef::npos) {
+ // No closing tag, the rest is text.
+ Tokens.emplace_back(Template.substr(Cursor));
+ TextStart = Cursor = Template.size();
+ break;
}
- if (auto NewDelims = processTag(T, Tokens, Ctx)) {
- std::tie(Open, Close) = *NewDelims;
+ // Extract tag content and full match.
+ size_t ContentStart = Cursor + TagOpen.size();
+ StringRef Content = Template.substr(ContentStart, EndPos - ContentStart);
+ StringRef FullMatch =
+ Template.substr(Cursor, (EndPos + TagClose.size()) - Cursor);
+
+ // Process the tag (inlined logic from processTag).
+ LLVM_DEBUG(dbgs() << "[Tag] " << FullMatch << ", Content: " << Content
+ << ", Kind: " << tagKindToString(Kind) << "\n");
+ if (Kind == Tag::Kind::Triple) {
+ Tokens.emplace_back(FullMatch, Ctx.Saver.save("&" + Content), '&', Ctx);
+ } else { // Normal Tag
+ StringRef Interpolated = Content;
+ if (!Interpolated.trim().starts_with("=")) {
+ char Front = Interpolated.empty() ? ' ' : Interpolated.trim().front();
+ Tokens.emplace_back(FullMatch, Interpolated, Front, Ctx);
+ } else { // Set Delimiter
+ Tokens.emplace_back(FullMatch, Interpolated, '=', Ctx);
+ StringRef DelimSpec = Interpolated.trim();
+ DelimSpec = DelimSpec.drop_front(1);
+ DelimSpec = DelimSpec.take_until([](char C) { return C == '='; });
+ DelimSpec = DelimSpec.trim();
+
+ auto [NewOpen, NewClose] = DelimSpec.split(' ');
+ LLVM_DEBUG(dbgs() << "[Set Delimiter] NewOpen: " << NewOpen
+ << ", NewClose: " << NewClose << "\n");
+ Open = NewOpen;
+ Close = NewClose;
+ }
}
- // Move past the tag.
- Start = T.StartPosition + T.FullMatch.size();
+ // Move past the tag for the next iteration.
+ Cursor += FullMatch.size();
+ TextStart = Cursor;
}
- // Fix up white spaces for:
- // - open sections
- // - inverted sections
- // - close sections
- // - comments
- //
- // This loop attempts to find standalone tokens and tries to trim out
- // the surrounding whitespace.
- // For example:
- // if you have the template string
- // {{#section}} \n Example \n{{/section}}
- // The output should would be
- // For example:
- // \n Example \n
+ // Add any remaining text after the last tag.
+ if (TextStart < Template.size())
+ Tokens.emplace_back(Template.substr(TextStart));
+
+ // Fix up white spaces for standalone tags.
size_t LastIdx = Tokens.size() - 1;
for (size_t Idx = 0, End = Tokens.size(); Idx < End; ++Idx) {
Token &CurrentToken = Tokens[Idx];
Token::Type CurrentType = CurrentToken.getType();
- // Check if token type requires cleanup.
- bool RequiresCleanUp = requiresCleanUp(CurrentType);
-
- if (!RequiresCleanUp)
+ if (!requiresCleanUp(CurrentType))
continue;
- // We adjust the token body if there's no text behind or ahead.
- // A token is considered to have no text ahead if the right of the previous
- // token is a newline followed by spaces.
- // A token is considered to have no text behind if the left of the next
- // token is spaces followed by a newline.
- // eg.
- // "Line 1\n {{#section}} \n Line 2 \n {{/section}} \n Line 3"
bool HasTextBehind = hasTextBehind(Idx, Tokens);
bool HasTextAhead = hasTextAhead(Idx, Tokens);
diff --git a/llvm/lib/Support/Timer.cpp b/llvm/lib/Support/Timer.cpp
index 9d45096..b08f508 100644
--- a/llvm/lib/Support/Timer.cpp
+++ b/llvm/lib/Support/Timer.cpp
@@ -207,7 +207,7 @@ void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const {
namespace {
-typedef StringMap<Timer> Name2TimerMap;
+using Name2TimerMap = StringMap<Timer>;
class Name2PairMap {
StringMap<std::pair<TimerGroup*, Name2TimerMap> > Map;
diff --git a/llvm/lib/Support/UnicodeNameToCodepoint.cpp b/llvm/lib/Support/UnicodeNameToCodepoint.cpp
index 6f8e091..8f0d24e 100644
--- a/llvm/lib/Support/UnicodeNameToCodepoint.cpp
+++ b/llvm/lib/Support/UnicodeNameToCodepoint.cpp
@@ -251,10 +251,10 @@ constexpr const char *const HangulSyllables[][3] = {
// Unicode 15.0
// 3.12 Conjoining Jamo Behavior Common constants
-constexpr const char32_t SBase = 0xAC00;
-constexpr const uint32_t LCount = 19;
-constexpr const uint32_t VCount = 21;
-constexpr const uint32_t TCount = 28;
+constexpr char32_t SBase = 0xAC00;
+constexpr uint32_t LCount = 19;
+constexpr uint32_t VCount = 21;
+constexpr uint32_t TCount = 28;
static std::size_t findSyllable(StringRef Name, bool Strict,
char &PreviousInName, int &Pos, int Column) {
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index afce803..8ad20b4 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -46,12 +46,11 @@ using namespace llvm;
// Context
//===----------------------------------------------------------------------===//
-namespace llvm::detail {
/// This class represents the internal implementation of the RecordKeeper.
/// It contains all of the contextual static state of the Record classes. It is
/// kept out-of-line to simplify dependencies, and also make it easier for
/// internal classes to access the uniquer state of the keeper.
-struct RecordKeeperImpl {
+struct detail::RecordKeeperImpl {
RecordKeeperImpl(RecordKeeper &RK)
: SharedBitRecTy(RK), SharedIntRecTy(RK), SharedStringRecTy(RK),
SharedDagRecTy(RK), AnyRecord(RK, {}), TheUnsetInit(RK),
@@ -99,7 +98,6 @@ struct RecordKeeperImpl {
void dumpAllocationStats(raw_ostream &OS) const;
};
-} // namespace llvm::detail
void detail::RecordKeeperImpl::dumpAllocationStats(raw_ostream &OS) const {
// Dump memory allocation related stats.
diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
index 1169f26..97298f9 100644
--- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
@@ -655,16 +655,10 @@ Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) {
BasicBlock *BB = BasicBlock::Create(M->getContext(), "", GuestExit);
IRBuilder<> B(BB);
- // Load the global symbol as a pointer to the check function.
- Value *GuardFn;
- if (cfguard_module_flag == 2 && !F->hasFnAttribute("guard_nocf"))
- GuardFn = GuardFnCFGlobal;
- else
- GuardFn = GuardFnGlobal;
- LoadInst *GuardCheckLoad = B.CreateLoad(PtrTy, GuardFn);
-
- // Create new call instruction. The CFGuard check should always be a call,
- // even if the original CallBase is an Invoke or CallBr instruction.
+ // Create new call instruction. The call check should always be a call,
+ // even if the original CallBase is an Invoke or CallBr instructio.
+ // This is treated as a direct call, so do not use GuardFnCFGlobal.
+ LoadInst *GuardCheckLoad = B.CreateLoad(PtrTy, GuardFnGlobal);
Function *Thunk = buildExitThunk(F->getFunctionType(), F->getAttributes());
CallInst *GuardCheck = B.CreateCall(
GuardFnType, GuardCheckLoad, {F, Thunk});
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 1b5a713..34c85d5 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -601,6 +601,12 @@ def CSR_Win_AArch64_AAPCS_SwiftError
def CSR_Win_AArch64_AAPCS_SwiftTail
: CalleeSavedRegs<(sub CSR_Win_AArch64_AAPCS, X20, X22)>;
+def CSR_Win_AArch64_RT_MostRegs
+ : CalleeSavedRegs<(add CSR_Win_AArch64_AAPCS, (sequence "X%u", 9, 15))>;
+
+def CSR_Win_AArch64_RT_AllRegs
+ : CalleeSavedRegs<(add CSR_Win_AArch64_RT_MostRegs, (sequence "Q%u", 8, 31))>;
+
// The Control Flow Guard check call uses a custom calling convention that also
// preserves X0-X8 and Q0-Q7.
def CSR_Win_AArch64_CFGuard_Check : CalleeSavedRegs<(add CSR_Win_AArch64_AAPCS,
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index cf34498..18e246e 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -81,10 +81,7 @@ namespace {
class AArch64FastISel final : public FastISel {
class Address {
public:
- using BaseKind = enum {
- RegBase,
- FrameIndexBase
- };
+ enum BaseKind { RegBase, FrameIndexBase };
private:
BaseKind Kind = RegBase;
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 0f7b34c..3ee4d58 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2380,13 +2380,6 @@ void AArch64FrameLowering::determineStackHazardSlot(
return;
}
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (MFI.hasVarSizedObjects() || TRI->hasStackRealignment(MF)) {
- LLVM_DEBUG(dbgs() << "SplitSVEObjects is not supported with variable "
- "sized objects or realignment\n");
- return;
- }
-
// If another calling convention is explicitly set FPRs can't be promoted to
// ZPR callee-saves.
if (!is_contained({CallingConv::C, CallingConv::Fast,
@@ -2402,6 +2395,7 @@ void AArch64FrameLowering::determineStackHazardSlot(
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Expected SVE to be available for PPRs");
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
// With SplitSVEObjects the CS hazard padding is placed between the
// PPRs and ZPRs. If there are any FPR CS there would be a hazard between
// them and the CS GRPs. Avoid this by promoting all FPR CS to ZPRs.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 60aa61e..d08f9b9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1052,15 +1052,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Lower READCYCLECOUNTER using an mrs from CNTVCT_EL0.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
- if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
- getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
- // Issue __sincos_stret if available.
- setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
- setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
- } else {
- setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
- }
+ // Issue __sincos_stret if available.
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// Make floating-point constants legal for the large code model, so they don't
// become loads from the constant pool.
@@ -5346,35 +5340,6 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
return SDValue();
}
-SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
- SelectionDAG &DAG) const {
- // For iOS, we want to call an alternative entry point: __sincos_stret,
- // which returns the values in two S / D registers.
- SDLoc DL(Op);
- SDValue Arg = Op.getOperand(0);
- EVT ArgVT = Arg.getValueType();
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-
- ArgListTy Args;
- Args.emplace_back(Arg, ArgTy);
-
- RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
- : RTLIB::SINCOS_STRET_F32;
- const char *LibcallName = getLibcallName(LC);
- SDValue Callee =
- DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
-
- StructType *RetTy = StructType::get(ArgTy, ArgTy);
- TargetLowering::CallLoweringInfo CLI(DAG);
- CallingConv::ID CC = getLibcallCallingConv(LC);
- CLI.setDebugLoc(DL)
- .setChain(DAG.getEntryNode())
- .setLibCallee(CC, RetTy, Callee, std::move(Args));
-
- std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
- return CallResult.first;
-}
-
static MVT getSVEContainerType(EVT ContentTy);
SDValue
@@ -7723,8 +7688,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
return LowerFP_TO_INT_SAT(Op, DAG);
- case ISD::FSINCOS:
- return LowerFSINCOS(Op, DAG);
case ISD::GET_ROUNDING:
return LowerGET_ROUNDING(Op, DAG);
case ISD::SET_ROUNDING:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 2cb8ed2..70bfae7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -745,7 +745,6 @@ private:
SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOOP_DEPENDENCE_MASK(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index e69fa32..2ab7bf1 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1386,6 +1386,25 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
if (MOP.isReg() && MOP.isKill())
DefinedInBB.addReg(MOP.getReg());
+ // Copy over any implicit-def operands. This is like MI.copyImplicitOps, but
+ // only copies implicit defs and makes sure that each operand is only added
+ // once in case of duplicates.
+ auto CopyImplicitOps = [&](MachineBasicBlock::iterator MI1,
+ MachineBasicBlock::iterator MI2) {
+ SmallSetVector<Register, 4> Ops;
+ for (const MachineOperand &MO :
+ llvm::drop_begin(MI1->operands(), MI1->getDesc().getNumOperands()))
+ if (MO.isReg() && MO.isImplicit() && MO.isDef())
+ Ops.insert(MO.getReg());
+ for (const MachineOperand &MO :
+ llvm::drop_begin(MI2->operands(), MI2->getDesc().getNumOperands()))
+ if (MO.isReg() && MO.isImplicit() && MO.isDef())
+ Ops.insert(MO.getReg());
+ for (auto Op : Ops)
+ MIB.addDef(Op, RegState::Implicit);
+ };
+ CopyImplicitOps(I, Paired);
+
// Erase the old instructions.
I->eraseFromParent();
Paired->eraseFromParent();
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index 7e03b97..4df4d54 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -370,6 +370,22 @@ SVEFrameSizes AArch64PrologueEpilogueCommon::getSVEStackFrameSizes() const {
{ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}};
}
+SVEStackAllocations AArch64PrologueEpilogueCommon::getSVEStackAllocations(
+ SVEFrameSizes const &SVE) {
+ StackOffset AfterZPRs = SVE.ZPR.LocalsSize;
+ StackOffset BeforePPRs = SVE.ZPR.CalleeSavesSize + SVE.PPR.CalleeSavesSize;
+ StackOffset AfterPPRs = {};
+ if (SVELayout == SVEStackLayout::Split) {
+ BeforePPRs = SVE.PPR.CalleeSavesSize;
+ // If there are no ZPR CSRs, place all local allocations after the ZPRs.
+ if (SVE.ZPR.CalleeSavesSize)
+ AfterPPRs += SVE.PPR.LocalsSize + SVE.ZPR.CalleeSavesSize;
+ else
+ AfterZPRs += SVE.PPR.LocalsSize; // Group allocation of locals.
+ }
+ return {BeforePPRs, AfterPPRs, AfterZPRs};
+}
+
struct SVEPartitions {
struct {
MachineBasicBlock::iterator Begin, End;
@@ -687,16 +703,19 @@ void AArch64PrologueEmitter::emitPrologue() {
// All of the remaining stack allocations are for locals.
determineLocalsStackSize(NumBytes, PrologueSaveSize);
+ auto [PPR, ZPR] = getSVEStackFrameSizes();
+ SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
+
MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
+ assert(!SVEAllocs.AfterPPRs &&
+ "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
// If we're doing SVE saves first, we need to immediately allocate space
// for fixed objects, then space for the SVE callee saves.
//
// Windows unwind requires that the scalable size is a multiple of 16;
// that's handled when the callee-saved size is computed.
- auto SaveSize =
- StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
- StackOffset::getFixed(FixedObject);
+ auto SaveSize = SVEAllocs.BeforePPRs + StackOffset::getFixed(FixedObject);
allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},
/*FollowupAllocs=*/true);
NumBytes -= FixedObject;
@@ -764,12 +783,11 @@ void AArch64PrologueEmitter::emitPrologue() {
if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
- auto [PPR, ZPR] = getSVEStackFrameSizes();
- StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes);
+ SVEAllocs.AfterZPRs += NonSVELocalsSize;
+
StackOffset CFAOffset =
StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize;
-
MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
// Allocate space for the callee saves and PPR locals (if any).
if (SVELayout != SVEStackLayout::CalleeSavesAboveFrameRecord) {
@@ -780,31 +798,23 @@ void AArch64PrologueEmitter::emitPrologue() {
if (EmitAsyncCFI)
emitCalleeSavedSVELocations(AfterSVESavesI);
- StackOffset AllocateBeforePPRs = SVECalleeSavesSize;
- StackOffset AllocateAfterPPRs = PPR.LocalsSize;
- if (SVELayout == SVEStackLayout::Split) {
- AllocateBeforePPRs = PPR.CalleeSavesSize;
- AllocateAfterPPRs = PPR.LocalsSize + ZPR.CalleeSavesSize;
- }
- allocateStackSpace(PPRRange.Begin, 0, AllocateBeforePPRs,
+ allocateStackSpace(PPRRange.Begin, 0, SVEAllocs.BeforePPRs,
EmitAsyncCFI && !HasFP, CFAOffset,
- MFI.hasVarSizedObjects() || AllocateAfterPPRs ||
- ZPR.LocalsSize || NonSVELocalsSize);
- CFAOffset += AllocateBeforePPRs;
+ MFI.hasVarSizedObjects() || SVEAllocs.AfterPPRs ||
+ SVEAllocs.AfterZPRs);
+ CFAOffset += SVEAllocs.BeforePPRs;
assert(PPRRange.End == ZPRRange.Begin &&
"Expected ZPR callee saves after PPR locals");
- allocateStackSpace(PPRRange.End, RealignmentPadding, AllocateAfterPPRs,
+ allocateStackSpace(PPRRange.End, 0, SVEAllocs.AfterPPRs,
EmitAsyncCFI && !HasFP, CFAOffset,
- MFI.hasVarSizedObjects() || ZPR.LocalsSize ||
- NonSVELocalsSize);
- CFAOffset += AllocateAfterPPRs;
+ MFI.hasVarSizedObjects() || SVEAllocs.AfterZPRs);
+ CFAOffset += SVEAllocs.AfterPPRs;
} else {
assert(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord);
- // Note: With CalleeSavesAboveFrameRecord, the SVE CS have already been
- // allocated (and separate PPR locals are not supported, all SVE locals,
- // both PPR and ZPR, are within the ZPR locals area).
- assert(!PPR.LocalsSize && "Unexpected PPR locals!");
- CFAOffset += SVECalleeSavesSize;
+ // Note: With CalleeSavesAboveFrameRecord, the SVE CS (BeforePPRs) have
+ // already been allocated. PPR locals (included in AfterPPRs) are not
+ // supported (note: this is asserted above).
+ CFAOffset += SVEAllocs.BeforePPRs;
}
// Allocate space for the rest of the frame including ZPR locals. Align the
@@ -815,9 +825,9 @@ void AArch64PrologueEmitter::emitPrologue() {
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the
// correct value here, as NumBytes also includes padding bytes, which
// shouldn't be counted here.
- allocateStackSpace(
- AfterSVESavesI, RealignmentPadding, ZPR.LocalsSize + NonSVELocalsSize,
- EmitAsyncCFI && !HasFP, CFAOffset, MFI.hasVarSizedObjects());
+ allocateStackSpace(AfterSVESavesI, RealignmentPadding, SVEAllocs.AfterZPRs,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects());
}
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -1308,6 +1318,26 @@ AArch64EpilogueEmitter::AArch64EpilogueEmitter(MachineFunction &MF,
SEHEpilogueStartI = MBB.end();
}
+void AArch64EpilogueEmitter::moveSPBelowFP(MachineBasicBlock::iterator MBBI,
+ StackOffset Offset) {
+ // Other combinations could be supported, but are not currently needed.
+ assert(Offset.getScalable() < 0 && Offset.getFixed() <= 0 &&
+ "expected negative offset (with optional fixed portion)");
+ Register Base = AArch64::FP;
+ if (int64_t FixedOffset = Offset.getFixed()) {
+ // If we have a negative fixed offset, we need to first subtract it in a
+ // temporary register first (to avoid briefly deallocating the scalable
+ // portion of the offset).
+ Base = MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+ emitFrameOffset(MBB, MBBI, DL, Base, AArch64::FP,
+ StackOffset::getFixed(FixedOffset), TII,
+ MachineInstr::FrameDestroy);
+ }
+ emitFrameOffset(MBB, MBBI, DL, AArch64::SP, Base,
+ StackOffset::getScalable(Offset.getScalable()), TII,
+ MachineInstr::FrameDestroy);
+}
+
void AArch64EpilogueEmitter::emitEpilogue() {
MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr();
if (MBB.end() != EpilogueEndI) {
@@ -1408,6 +1438,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
AfterCSRPopSize += ProloguePopSize;
}
}
+
// Move past the restores of the callee-saved registers.
// If we plan on combining the sp bump of the local stack size and the callee
// save stack size, we might need to adjust the CSR save and restore offsets.
@@ -1472,27 +1503,25 @@ void AArch64EpilogueEmitter::emitEpilogue() {
assert(NumBytes >= 0 && "Negative stack allocation size!?");
StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
- StackOffset SVEStackSize =
- SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize;
- MachineBasicBlock::iterator RestoreBegin = ZPRRange.Begin;
- MachineBasicBlock::iterator RestoreEnd = PPRRange.End;
+ SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
// Deallocate the SVE area.
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
- StackOffset SVELocalsSize = ZPR.LocalsSize + PPR.LocalsSize;
+ assert(!SVEAllocs.AfterPPRs &&
+ "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
// If the callee-save area is before FP, restoring the FP implicitly
- // deallocates non-callee-save SVE allocations. Otherwise, deallocate them
+ // deallocates non-callee-save SVE allocations. Otherwise, deallocate them
// explicitly.
if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
- SVELocalsSize, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI);
+ SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI);
}
// Deallocate callee-save SVE registers.
- emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
- SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI);
+ emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
+ SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI);
} else if (AFI->hasSVEStackSize()) {
// If we have stack realignment or variable-sized objects we must use the FP
// to restore SVE callee saves (as there is an unknown amount of
@@ -1501,69 +1530,53 @@ void AArch64EpilogueEmitter::emitEpilogue() {
(AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
: AArch64::SP;
if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) {
- // TODO: Support stack realigment and variable-sized objects.
- assert(
- SVELayout != SVEStackLayout::Split &&
- "unexpected stack realignment or variable sized objects with split "
- "SVE stack objects");
-
- Register CalleeSaveBase = AArch64::FP;
- if (int64_t CalleeSaveBaseOffset =
- AFI->getCalleeSaveBaseToFrameRecordOffset()) {
- // If we have have an non-zero offset to the non-SVE CS base we need to
- // compute the base address by subtracting the offest in a temporary
- // register first (to avoid briefly deallocating the SVE CS).
- CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
- &AArch64::GPR64RegClass);
- emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
- StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
- MachineInstr::FrameDestroy);
+ if (ZPR.CalleeSavesSize || SVELayout != SVEStackLayout::Split) {
+ // The offset from the frame-pointer to the start of the ZPR saves.
+ StackOffset FPOffsetZPR =
+ -SVECalleeSavesSize - PPR.LocalsSize -
+ StackOffset::getFixed(AFI->getCalleeSaveBaseToFrameRecordOffset());
+ // Deallocate the stack space space by moving the SP to the start of the
+ // ZPR/PPR callee-save area.
+ moveSPBelowFP(ZPRRange.Begin, FPOffsetZPR);
}
- // The code below will deallocate the stack space space by moving the SP
- // to the start of the SVE callee-save area.
- emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
- -SVECalleeSavesSize, TII, MachineInstr::FrameDestroy);
- } else if (BaseForSVEDealloc == AArch64::SP) {
- auto CFAOffset =
- SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize);
-
- if (SVECalleeSavesSize) {
- // Deallocate the non-SVE locals first before we can deallocate (and
- // restore callee saves) from the SVE area.
- auto NonSVELocals = StackOffset::getFixed(NumBytes);
- emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
- NonSVELocals, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
- CFAOffset -= NonSVELocals;
- NumBytes = 0;
+ // With split SVE, the predicates are stored in a separate area above the
+ // ZPR saves, so we must adjust the stack to the start of the PPRs.
+ if (PPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split) {
+ // The offset from the frame-pointer to the start of the PPR saves.
+ StackOffset FPOffsetPPR = -PPR.CalleeSavesSize;
+ // Move to the start of the PPR area.
+ assert(!FPOffsetPPR.getFixed() && "expected only scalable offset");
+ emitFrameOffset(MBB, ZPRRange.End, DL, AArch64::SP, AArch64::FP,
+ FPOffsetPPR, TII, MachineInstr::FrameDestroy);
}
-
- if (ZPR.LocalsSize) {
- emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
- ZPR.LocalsSize, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
- CFAOffset -= ZPR.LocalsSize;
- }
-
- StackOffset SVECalleeSavesToDealloc = SVECalleeSavesSize;
- if (SVELayout == SVEStackLayout::Split &&
- (PPR.LocalsSize || ZPR.CalleeSavesSize)) {
- assert(PPRRange.Begin == ZPRRange.End &&
- "Expected PPR restores after ZPR");
- emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP,
- PPR.LocalsSize + ZPR.CalleeSavesSize, TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI,
- &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
- CFAOffset -= PPR.LocalsSize + ZPR.CalleeSavesSize;
- SVECalleeSavesToDealloc -= ZPR.CalleeSavesSize;
+ } else if (BaseForSVEDealloc == AArch64::SP) {
+ auto NonSVELocals = StackOffset::getFixed(NumBytes);
+ auto CFAOffset = NonSVELocals + StackOffset::getFixed(PrologueSaveSize) +
+ SVEAllocs.totalSize();
+
+ if (SVECalleeSavesSize || SVELayout == SVEStackLayout::Split) {
+ // Deallocate non-SVE locals now. This is needed to reach the SVE callee
+ // saves, but may also allow combining stack hazard bumps for split SVE.
+ SVEAllocs.AfterZPRs += NonSVELocals;
+ NumBytes -= NonSVELocals.getFixed();
}
-
- // If split SVE is on, this dealloc PPRs, otherwise, deallocs ZPRs + PPRs:
- if (SVECalleeSavesToDealloc)
- emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
- SVECalleeSavesToDealloc, TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI,
- &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
+ // To deallocate the SVE stack adjust by the allocations in reverse.
+ emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
+ SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ CFAOffset);
+ CFAOffset -= SVEAllocs.AfterZPRs;
+ assert(PPRRange.Begin == ZPRRange.End &&
+ "Expected PPR restores after ZPR");
+ emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP,
+ SVEAllocs.AfterPPRs, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ CFAOffset);
+ CFAOffset -= SVEAllocs.AfterPPRs;
+ emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
+ SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ CFAOffset);
}
if (EmitCFI)
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
index bccadda..7f297b5 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
@@ -33,6 +33,11 @@ struct SVEFrameSizes {
} PPR, ZPR;
};
+struct SVEStackAllocations {
+ StackOffset BeforePPRs, AfterPPRs, AfterZPRs;
+ StackOffset totalSize() const { return BeforePPRs + AfterPPRs + AfterZPRs; }
+};
+
class AArch64PrologueEpilogueCommon {
public:
AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -66,6 +71,7 @@ protected:
bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const;
SVEFrameSizes getSVEStackFrameSizes() const;
+ SVEStackAllocations getSVEStackAllocations(SVEFrameSizes const &);
MachineFunction &MF;
MachineBasicBlock &MBB;
@@ -174,6 +180,10 @@ public:
private:
bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const;
+ /// A helper for moving the SP to a negative offset from the FP, without
+ /// deallocating any stack in the range FP to FP + Offset.
+ void moveSPBelowFP(MachineBasicBlock::iterator MBBI, StackOffset Offset);
+
void emitSwiftAsyncContextFramePointer(MachineBasicBlock::iterator MBBI,
const DebugLoc &DL) const;
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 5bfb19d9..a5048b9 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -90,6 +90,16 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (MF->getSubtarget<AArch64Subtarget>().isTargetDarwin())
return getDarwinCalleeSavedRegs(MF);
+ if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost)
+ return MF->getSubtarget<AArch64Subtarget>().isTargetWindows()
+ ? CSR_Win_AArch64_RT_MostRegs_SaveList
+ : CSR_AArch64_RT_MostRegs_SaveList;
+
+ if (MF->getFunction().getCallingConv() == CallingConv::PreserveAll)
+ return MF->getSubtarget<AArch64Subtarget>().isTargetWindows()
+ ? CSR_Win_AArch64_RT_AllRegs_SaveList
+ : CSR_AArch64_RT_AllRegs_SaveList;
+
if (MF->getFunction().getCallingConv() == CallingConv::CFGuard_Check)
return CSR_Win_AArch64_CFGuard_Check_SaveList;
if (MF->getSubtarget<AArch64Subtarget>().isTargetWindows()) {
@@ -138,10 +148,6 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_AArch64_AAPCS_SwiftError_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::SwiftTail)
return CSR_AArch64_AAPCS_SwiftTail_SaveList;
- if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost)
- return CSR_AArch64_RT_MostRegs_SaveList;
- if (MF->getFunction().getCallingConv() == CallingConv::PreserveAll)
- return CSR_AArch64_RT_AllRegs_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::Win64)
// This is for OSes other than Windows; Windows is a separate case further
// above.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index e8352be..197aae6 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3007,9 +3007,9 @@ AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
llvm_unreachable("Unsupported register kind");
}
-bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
- ArrayRef<const Value *> Args,
- Type *SrcOverrideTy) const {
+bool AArch64TTIImpl::isSingleExtWideningInstruction(
+ unsigned Opcode, Type *DstTy, ArrayRef<const Value *> Args,
+ Type *SrcOverrideTy) const {
// A helper that returns a vector type from the given type. The number of
// elements in type Ty determines the vector width.
auto toVectorTy = [&](Type *ArgTy) {
@@ -3027,48 +3027,29 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
(DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
return false;
- // Determine if the operation has a widening variant. We consider both the
- // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
- // instructions.
- //
- // TODO: Add additional widening operations (e.g., shl, etc.) once we
- // verify that their extending operands are eliminated during code
- // generation.
Type *SrcTy = SrcOverrideTy;
switch (Opcode) {
- case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
- case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
+ case Instruction::Add: // UADDW(2), SADDW(2).
+ case Instruction::Sub: { // USUBW(2), SSUBW(2).
// The second operand needs to be an extend
if (isa<SExtInst>(Args[1]) || isa<ZExtInst>(Args[1])) {
if (!SrcTy)
SrcTy =
toVectorTy(cast<Instruction>(Args[1])->getOperand(0)->getType());
- } else
+ break;
+ }
+
+ if (Opcode == Instruction::Sub)
return false;
- break;
- case Instruction::Mul: { // SMULL(2), UMULL(2)
- // Both operands need to be extends of the same type.
- if ((isa<SExtInst>(Args[0]) && isa<SExtInst>(Args[1])) ||
- (isa<ZExtInst>(Args[0]) && isa<ZExtInst>(Args[1]))) {
+
+ // UADDW(2), SADDW(2) can be commutted.
+ if (isa<SExtInst>(Args[0]) || isa<ZExtInst>(Args[0])) {
if (!SrcTy)
SrcTy =
toVectorTy(cast<Instruction>(Args[0])->getOperand(0)->getType());
- } else if (isa<ZExtInst>(Args[0]) || isa<ZExtInst>(Args[1])) {
- // If one of the operands is a Zext and the other has enough zero bits to
- // be treated as unsigned, we can still general a umull, meaning the zext
- // is free.
- KnownBits Known =
- computeKnownBits(isa<ZExtInst>(Args[0]) ? Args[1] : Args[0], DL);
- if (Args[0]->getType()->getScalarSizeInBits() -
- Known.Zero.countLeadingOnes() >
- DstTy->getScalarSizeInBits() / 2)
- return false;
- if (!SrcTy)
- SrcTy = toVectorTy(Type::getIntNTy(DstTy->getContext(),
- DstTy->getScalarSizeInBits() / 2));
- } else
- return false;
- break;
+ break;
+ }
+ return false;
}
default:
return false;
@@ -3099,6 +3080,73 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
}
+Type *AArch64TTIImpl::isBinExtWideningInstruction(unsigned Opcode, Type *DstTy,
+ ArrayRef<const Value *> Args,
+ Type *SrcOverrideTy) const {
+ if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
+ Opcode != Instruction::Mul)
+ return nullptr;
+
+ // Exit early if DstTy is not a vector type whose elements are one of [i16,
+ // i32, i64]. SVE doesn't generally have the same set of instructions to
+ // perform an extend with the add/sub/mul. There are SMULLB style
+ // instructions, but they operate on top/bottom, requiring some sort of lane
+ // interleaving to be used with zext/sext.
+ unsigned DstEltSize = DstTy->getScalarSizeInBits();
+ if (!useNeonVector(DstTy) || Args.size() != 2 ||
+ (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
+ return nullptr;
+
+ auto getScalarSizeWithOverride = [&](const Value *V) {
+ if (SrcOverrideTy)
+ return SrcOverrideTy->getScalarSizeInBits();
+ return cast<Instruction>(V)
+ ->getOperand(0)
+ ->getType()
+ ->getScalarSizeInBits();
+ };
+
+ unsigned MaxEltSize = 0;
+ if ((isa<SExtInst>(Args[0]) && isa<SExtInst>(Args[1])) ||
+ (isa<ZExtInst>(Args[0]) && isa<ZExtInst>(Args[1]))) {
+ unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
+ unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
+ MaxEltSize = std::max(EltSize0, EltSize1);
+ } else if (isa<SExtInst, ZExtInst>(Args[0]) &&
+ isa<SExtInst, ZExtInst>(Args[1])) {
+ unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
+ unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
+ // mul(sext, zext) will become smull(sext, zext) if the extends are large
+ // enough.
+ if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
+ return nullptr;
+ MaxEltSize = DstEltSize / 2;
+ } else if (Opcode == Instruction::Mul &&
+ (isa<ZExtInst>(Args[0]) || isa<ZExtInst>(Args[1]))) {
+ // If one of the operands is a Zext and the other has enough zero bits
+ // to be treated as unsigned, we can still generate a umull, meaning the
+ // zext is free.
+ KnownBits Known =
+ computeKnownBits(isa<ZExtInst>(Args[0]) ? Args[1] : Args[0], DL);
+ if (Args[0]->getType()->getScalarSizeInBits() -
+ Known.Zero.countLeadingOnes() >
+ DstTy->getScalarSizeInBits() / 2)
+ return nullptr;
+
+ MaxEltSize =
+ getScalarSizeWithOverride(isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
+ } else
+ return nullptr;
+
+ if (MaxEltSize * 2 > DstEltSize)
+ return nullptr;
+
+ Type *ExtTy = DstTy->getWithNewBitWidth(MaxEltSize * 2);
+ if (ExtTy->getPrimitiveSizeInBits() <= 64)
+ return nullptr;
+ return ExtTy;
+}
+
// s/urhadd instructions implement the following pattern, making the
// extends free:
// %x = add ((zext i8 -> i16), 1)
@@ -3159,7 +3207,24 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
if (I && I->hasOneUser()) {
auto *SingleUser = cast<Instruction>(*I->user_begin());
SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
- if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands, Src)) {
+ if (Type *ExtTy = isBinExtWideningInstruction(
+ SingleUser->getOpcode(), Dst, Operands,
+ Src != I->getOperand(0)->getType() ? Src : nullptr)) {
+ // The cost from Src->Src*2 needs to be added if required, the cost from
+ // Src*2->ExtTy is free.
+ if (ExtTy->getScalarSizeInBits() > Src->getScalarSizeInBits() * 2) {
+ Type *DoubleSrcTy =
+ Src->getWithNewBitWidth(Src->getScalarSizeInBits() * 2);
+ return getCastInstrCost(Opcode, DoubleSrcTy, Src,
+ TTI::CastContextHint::None, CostKind);
+ }
+
+ return 0;
+ }
+
+ if (isSingleExtWideningInstruction(
+ SingleUser->getOpcode(), Dst, Operands,
+ Src != I->getOperand(0)->getType() ? Src : nullptr)) {
// For adds only count the second operand as free if both operands are
// extends but not the same operation. (i.e both operands are not free in
// add(sext, zext)).
@@ -3168,8 +3233,11 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
(isa<CastInst>(SingleUser->getOperand(1)) &&
cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
return 0;
- } else // Others are free so long as isWideningInstruction returned true.
+ } else {
+ // Others are free so long as isSingleExtWideningInstruction
+ // returned true.
return 0;
+ }
}
// The cast will be free for the s/urhadd instructions
@@ -4148,6 +4216,18 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
}))
return *PromotedCost;
+ // If the operation is a widening instruction (smull or umull) and both
+ // operands are extends the cost can be cheaper by considering that the
+ // operation will operate on the narrowest type size possible (double the
+ // largest input size) and a further extend.
+ if (Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
+ if (ExtTy != Ty)
+ return getArithmeticInstrCost(Opcode, ExtTy, CostKind) +
+ getCastInstrCost(Instruction::ZExt, Ty, ExtTy,
+ TTI::CastContextHint::None, CostKind);
+ return LT.first;
+ }
+
switch (ISD) {
default:
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
@@ -4381,10 +4461,8 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
// - two 2-cost i64 inserts, and
// - two 1-cost muls.
// So, for a v2i64 with LT.First = 1 the cost is 14, and for a v4i64 with
- // LT.first = 2 the cost is 28. If both operands are extensions it will not
- // need to scalarize so the cost can be cheaper (smull or umull).
- // so the cost can be cheaper (smull or umull).
- if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
+ // LT.first = 2 the cost is 28.
+ if (LT.second != MVT::v2i64)
return LT.first;
return cast<VectorType>(Ty)->getElementCount().getKnownMinValue() *
(getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind) +
@@ -6129,7 +6207,8 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
}
static bool containsDecreasingPointers(Loop *TheLoop,
- PredicatedScalarEvolution *PSE) {
+ PredicatedScalarEvolution *PSE,
+ const DominatorTree &DT) {
const auto &Strides = DenseMap<Value *, const SCEV *>();
for (BasicBlock *BB : TheLoop->blocks()) {
// Scan the instructions in the block and look for addresses that are
@@ -6138,8 +6217,8 @@ static bool containsDecreasingPointers(Loop *TheLoop,
if (isa<LoadInst>(&I) || isa<StoreInst>(&I)) {
Value *Ptr = getLoadStorePointerOperand(&I);
Type *AccessTy = getLoadStoreType(&I);
- if (getPtrStride(*PSE, AccessTy, Ptr, TheLoop, Strides, /*Assume=*/true,
- /*ShouldCheckWrap=*/false)
+ if (getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
+ /*Assume=*/true, /*ShouldCheckWrap=*/false)
.value_or(0) < 0)
return true;
}
@@ -6184,7 +6263,8 @@ bool AArch64TTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) const {
// negative strides. This will require extra work to reverse the loop
// predicate, which may be expensive.
if (containsDecreasingPointers(TFI->LVL->getLoop(),
- TFI->LVL->getPredicatedScalarEvolution()))
+ TFI->LVL->getPredicatedScalarEvolution(),
+ *TFI->LVL->getDominatorTree()))
Required |= TailFoldingOpts::Reverse;
if (Required == TailFoldingOpts::Disabled)
Required |= TailFoldingOpts::Simple;
@@ -6657,10 +6737,15 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
Ops.push_back(&Ext->getOperandUse(0));
Ops.push_back(&Op);
- if (isa<SExtInst>(Ext))
+ if (isa<SExtInst>(Ext)) {
NumSExts++;
- else
+ } else {
NumZExts++;
+ // A zext(a) is also a sext(zext(a)), if we take more than 2 steps.
+ if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
+ I->getType()->getScalarSizeInBits())
+ NumSExts++;
+ }
continue;
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index b39546a..e3b0a1b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -59,9 +59,17 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
VECTOR_LDST_FOUR_ELEMENTS
};
- bool isWideningInstruction(Type *DstTy, unsigned Opcode,
- ArrayRef<const Value *> Args,
- Type *SrcOverrideTy = nullptr) const;
+ /// Given a add/sub/mul operation, detect a widening addl/subl/mull pattern
+ /// where both operands can be treated like extends. Returns the minimal type
+ /// needed to compute the operation.
+ Type *isBinExtWideningInstruction(unsigned Opcode, Type *DstTy,
+ ArrayRef<const Value *> Args,
+ Type *SrcOverrideTy = nullptr) const;
+ /// Given a add/sub operation with a single extend operand, detect a
+ /// widening addw/subw pattern.
+ bool isSingleExtWideningInstruction(unsigned Opcode, Type *DstTy,
+ ArrayRef<const Value *> Args,
+ Type *SrcOverrideTy = nullptr) const;
// A helper function called by 'getVectorInstrCost'.
//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index cd8b249..67042b7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -69,7 +69,7 @@ FunctionPass *createAMDGPUPreloadKernArgPrologLegacyPass();
ModulePass *createAMDGPUPreloadKernelArgumentsLegacyPass(const TargetMachine *);
struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
- AMDGPUSimplifyLibCallsPass() {}
+ AMDGPUSimplifyLibCallsPass() = default;
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
@@ -371,13 +371,13 @@ public:
class AMDGPUAnnotateUniformValuesPass
: public PassInfoMixin<AMDGPUAnnotateUniformValuesPass> {
public:
- AMDGPUAnnotateUniformValuesPass() {}
+ AMDGPUAnnotateUniformValuesPass() = default;
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
class SIModeRegisterPass : public PassInfoMixin<SIModeRegisterPass> {
public:
- SIModeRegisterPass() {}
+ SIModeRegisterPass() = default;
PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM);
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index 1064e57..dad94b8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -96,7 +96,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ArgDescriptor &Arg) {
}
struct KernArgPreloadDescriptor : public ArgDescriptor {
- KernArgPreloadDescriptor() {}
+ KernArgPreloadDescriptor() = default;
SmallVector<MCRegister> Regs;
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 9907c88f..8669978 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1555,7 +1555,7 @@ private:
AMDGPU::ClusterDimsAttr Attr;
- static constexpr const char AttrName[] = "amdgpu-cluster-dims";
+ static constexpr char AttrName[] = "amdgpu-cluster-dims";
};
AAAMDGPUClusterDims &
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
index cf2ab825..a3be0f5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
@@ -48,7 +48,7 @@ private:
FuncInfoMap FIM;
public:
- AMDGPUPerfHintAnalysis() {}
+ AMDGPUPerfHintAnalysis() = default;
// OldPM
bool runOnSCC(const GCNTargetMachine &TM, CallGraphSCC &SCC);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 103cdec..1e5885a2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -202,7 +202,7 @@ bool PredicateMapping::match(const MachineInstr &MI,
return true;
}
-SetOfRulesForOpcode::SetOfRulesForOpcode() {}
+SetOfRulesForOpcode::SetOfRulesForOpcode() = default;
SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes)
: FastTypes(FastTypes) {}
@@ -913,6 +913,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
+ addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
+
addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
.Uni(S64, {{Sgpr64}, {}});
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 733c5d5..fe81a5e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -181,14 +181,52 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
return NewRetBlock;
}
+static BasicBlock *
+createDummyReturnBlock(Function &F,
+ SmallVector<BasicBlock *, 4> &ReturningBlocks) {
+ BasicBlock *DummyReturnBB =
+ BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F);
+ Type *RetTy = F.getReturnType();
+ Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
+ ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
+ ReturningBlocks.push_back(DummyReturnBB);
+ return DummyReturnBB;
+}
+
+/// Handle conditional branch instructions (-> 2 targets) and callbr
+/// instructions with N targets.
+static void handleNBranch(Function &F, BasicBlock *BB, Instruction *BI,
+ BasicBlock *DummyReturnBB,
+ std::vector<DominatorTree::UpdateType> &Updates) {
+ SmallVector<BasicBlock *, 2> Successors(successors(BB));
+
+ // Create a new transition block to hold the conditional branch.
+ BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
+
+ Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
+
+ // 'Successors' become successors of TransitionBB instead of BB,
+ // and TransitionBB becomes a single successor of BB.
+ Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
+ for (BasicBlock *Successor : Successors) {
+ Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor);
+ Updates.emplace_back(DominatorTree::Delete, BB, Successor);
+ }
+
+ // Create a branch that will always branch to the transition block and
+ // references DummyReturnBB.
+ BB->getTerminator()->eraseFromParent();
+ BranchInst::Create(TransitionBB, DummyReturnBB,
+ ConstantInt::getTrue(F.getContext()), BB);
+ Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
+}
+
bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
const PostDominatorTree &PDT,
const UniformityInfo &UA) {
- assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
-
if (PDT.root_size() == 0 ||
(PDT.root_size() == 1 &&
- !isa<BranchInst>(PDT.getRoot()->getTerminator())))
+ !isa<BranchInst, CallBrInst>(PDT.getRoot()->getTerminator())))
return false;
// Loop over all of the blocks in a function, tracking all of the blocks that
@@ -222,46 +260,28 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
if (HasDivergentExitBlock)
UnreachableBlocks.push_back(BB);
} else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
-
- ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext());
- if (DummyReturnBB == nullptr) {
- DummyReturnBB = BasicBlock::Create(F.getContext(),
- "DummyReturnBlock", &F);
- Type *RetTy = F.getReturnType();
- Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
- ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
- ReturningBlocks.push_back(DummyReturnBB);
- }
+ if (!DummyReturnBB)
+ DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
if (BI->isUnconditional()) {
BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
BI->eraseFromParent(); // Delete the unconditional branch.
// Add a new conditional branch with a dummy edge to the return block.
- BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
- Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
- } else { // Conditional branch.
- SmallVector<BasicBlock *, 2> Successors(successors(BB));
-
- // Create a new transition block to hold the conditional branch.
- BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
-
- Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
-
- // 'Successors' become successors of TransitionBB instead of BB,
- // and TransitionBB becomes a single successor of BB.
- Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
- for (BasicBlock *Successor : Successors) {
- Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor);
- Updates.emplace_back(DominatorTree::Delete, BB, Successor);
- }
-
- // Create a branch that will always branch to the transition block and
- // references DummyReturnBB.
- BB->getTerminator()->eraseFromParent();
- BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
+ BranchInst::Create(LoopHeaderBB, DummyReturnBB,
+ ConstantInt::getTrue(F.getContext()), BB);
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
+ } else {
+ handleNBranch(F, BB, BI, DummyReturnBB, Updates);
}
Changed = true;
+ } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(BB->getTerminator())) {
+ if (!DummyReturnBB)
+ DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
+
+ handleNBranch(F, BB, CBI, DummyReturnBB, Updates);
+ Changed = true;
+ } else {
+ llvm_unreachable("unsupported block terminator");
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index 61c5dcd..ded2f5a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -54,7 +54,7 @@ public:
bool CullSGPRHazardsAtMemWait;
unsigned CullSGPRHazardsMemWaitThreshold;
- AMDGPUWaitSGPRHazards() {}
+ AMDGPUWaitSGPRHazards() = default;
// Return the numeric ID 0-127 for a given SGPR.
static std::optional<unsigned> sgprNumber(Register Reg,
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 52cc4ca..1a14629 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -435,7 +435,7 @@ void GCNHazardRecognizer::RecedeCycle() {
// Helper Functions
//===----------------------------------------------------------------------===//
-using HazardFnResult = enum { HazardFound, HazardExpired, NoHazardFound };
+enum HazardFnResult { HazardFound, HazardExpired, NoHazardFound };
using IsExpiredFn = function_ref<bool(const MachineInstr &, int WaitStates)>;
using GetNumWaitStatesFn = function_ref<unsigned int(const MachineInstr &)>;
diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
index 959ce69..1682abb 100644
--- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -43,7 +43,7 @@ public:
bool run(MachineFunction &MF);
private:
- using NSA_Status = enum {
+ enum NSA_Status {
NOT_NSA, // Not an NSA instruction
FIXED, // NSA which we cannot modify
NON_CONTIGUOUS, // NSA with non-sequential address which we can try
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 975781f..f357981 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -183,7 +183,7 @@ class ScheduleMetrics {
unsigned BubbleCycles;
public:
- ScheduleMetrics() {}
+ ScheduleMetrics() = default;
ScheduleMetrics(unsigned L, unsigned BC)
: ScheduleLength(L), BubbleCycles(BC) {}
unsigned getLength() const { return ScheduleLength; }
@@ -217,7 +217,7 @@ class RegionPressureMap {
bool IsLiveOut;
public:
- RegionPressureMap() {}
+ RegionPressureMap() = default;
RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
: DAG(GCNDAG), IsLiveOut(LiveOut) {}
// Build the Instr->LiveReg and RegionIdx->Instr maps
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 2aa54c9..31eca04 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1129,12 +1129,9 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
// Add an extra level of chain to isolate this vector
SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
- // TODO: can the chain be replaced without creating a new store?
- SDValue NewStore = DAG.getTruncStore(
- NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), MemVT,
- StoreNode->getAlign(), StoreNode->getMemOperand()->getFlags(),
- StoreNode->getAAInfo());
- StoreNode = cast<StoreSDNode>(NewStore);
+ SmallVector<SDValue, 4> NewOps(StoreNode->ops());
+ NewOps[0] = NewChain;
+ StoreNode = cast<StoreSDNode>(DAG.UpdateNodeOperands(StoreNode, NewOps));
}
return scalarizeVectorStore(StoreNode, DAG);
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 5c39f7a..aa5ea77 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -2170,7 +2170,9 @@ bool SIFrameLowering::hasFPImpl(const MachineFunction &MF) const {
return MFI.getStackSize() != 0;
}
- return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
+ return (frameTriviallyRequiresSP(MFI) &&
+ !MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) ||
+ MFI.isFrameAddressTaken() ||
MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
MF) ||
mayReserveScratchForCWSR(MF) ||
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d9f76c9..45f5919 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6153,7 +6153,7 @@ bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
// information.
if (AMDGPU::isPackedFP32Inst(MI.getOpcode()) && AMDGPU::isGFX12Plus(ST) &&
MO.isReg() && RI.isSGPRReg(MRI, MO.getReg())) {
- constexpr const AMDGPU::OpName OpNames[] = {
+ constexpr AMDGPU::OpName OpNames[] = {
AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
for (auto [I, OpName] : enumerate(OpNames)) {
@@ -6215,8 +6215,8 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
bool SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand(
const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN,
const MachineOperand *MO) const {
- constexpr const unsigned NumOps = 3;
- constexpr const AMDGPU::OpName OpNames[NumOps * 2] = {
+ constexpr unsigned NumOps = 3;
+ constexpr AMDGPU::OpName OpNames[NumOps * 2] = {
AMDGPU::OpName::src0, AMDGPU::OpName::src1,
AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 7431e11..abefa32 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -296,7 +296,7 @@ bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,
for (MachineBasicBlock::instr_iterator I = std::next(First.getIterator()),
E = MI.getIterator();
I != E; ++I) {
- if (I->isBundle())
+ if (I->isBundle() || I->isDebugInstr())
continue;
switch (I->getOpcode()) {
case AMDGPU::S_SET_GPR_IDX_MODE:
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 4ae2c1e..31d8bce4 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1707,7 +1707,7 @@ multiclass WMMAInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string Pse
defvar WMMAConstraints2Addr = !if(DiffVdstSrc2, "@earlyclobber $vdst", "@earlyclobber $vdst,$vdst = $src2");
defvar WMMAConstraints3Addr = "@earlyclobber $vdst";
- let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
+ let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0, isConvergent = 1 in {
let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = 1 in
def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>, WMMAInstInfo {
let PseudoInstr = Instr#PseudoInstrSuffix;
@@ -1734,7 +1734,7 @@ multiclass SWMMACInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string P
let mayRaiseFPException = 0;
let ReadsModeReg = 0;
let AsmMatchConverter = "cvtSWMMAC";
-
+ let isConvergent = 1;
let Constraints = "@earlyclobber $vdst,$vdst = $srcTiedDef";
}
}
@@ -1906,8 +1906,10 @@ defm V_WMMA_SCALE_F32_32X16X128_F4_w32 : WMMAInstGFX12<"v_wmma_scale_f32_32x16
defm V_WMMA_SCALE16_F32_32X16X128_F4_w32 : WMMAInstGFX12<"v_wmma_scale16_f32_32x16x128_f4", F32_32X16X128_F4_SCALE16_w32, "_w32">;
} // End is_wmma_xdl = 1.
-defm V_WMMA_LD_SCALE_PAIRED_B32 : VOP3PInst<"v_wmma_ld_scale_paired_b32", VOP_WMMA_LD_SCALE<i32, VCSrc_b32_Lo256>>;
-defm V_WMMA_LD_SCALE16_PAIRED_B64 : VOP3PInst<"v_wmma_ld_scale16_paired_b64", VOP_WMMA_LD_SCALE<i64, VCSrc_b64_Lo256>>;
+let isConvergent = 1 in {
+ defm V_WMMA_LD_SCALE_PAIRED_B32 : VOP3PInst<"v_wmma_ld_scale_paired_b32", VOP_WMMA_LD_SCALE<i32, VCSrc_b32_Lo256>>;
+ defm V_WMMA_LD_SCALE16_PAIRED_B64 : VOP3PInst<"v_wmma_ld_scale16_paired_b64", VOP_WMMA_LD_SCALE<i64, VCSrc_b64_Lo256>>;
+}
} // End SubtargetPredicate = isGFX125xOnly
} // End WaveSizePredicate = isWave32
diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp
index 14e1160..88d3b6f 100644
--- a/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -86,7 +86,7 @@ namespace {
// All possible address modes, plus some.
class Address {
public:
- using BaseKind = enum { RegBase, FrameIndexBase };
+ enum BaseKind { RegBase, FrameIndexBase };
private:
BaseKind Kind = RegBase;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 6b06534..92fae71 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1312,8 +1312,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
}
- setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
- setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// FP-ARMv8 implements a lot of rounding-like FP operations.
if (Subtarget->hasFPARMv8Base()) {
@@ -9855,76 +9855,6 @@ static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
}
-SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
- // For iOS, we want to call an alternative entry point: __sincos_stret,
- // return values are passed via sret.
- SDLoc dl(Op);
- SDValue Arg = Op.getOperand(0);
- EVT ArgVT = Arg.getValueType();
- RTLIB::Libcall LC = RTLIB::getSINCOS_STRET(ArgVT);
- RTLIB::LibcallImpl SincosStret = getLibcallImpl(LC);
- if (SincosStret == RTLIB::Unsupported)
- return SDValue();
-
- assert(Subtarget->isTargetDarwin());
-
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- auto PtrVT = getPointerTy(DAG.getDataLayout());
-
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
-
- // Pair of floats / doubles used to pass the result.
- Type *RetTy = StructType::get(ArgTy, ArgTy);
- auto &DL = DAG.getDataLayout();
-
- ArgListTy Args;
- bool ShouldUseSRet = getTM().isAPCS_ABI();
- SDValue SRet;
- if (ShouldUseSRet) {
- // Create stack object for sret.
- const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
- const Align StackAlign = DL.getPrefTypeAlign(RetTy);
- int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
- SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL));
-
- ArgListEntry Entry(SRet, PointerType::getUnqual(RetTy->getContext()));
- Entry.IsSExt = false;
- Entry.IsZExt = false;
- Entry.IsSRet = true;
- Args.push_back(Entry);
- RetTy = Type::getVoidTy(*DAG.getContext());
- }
-
- Args.emplace_back(Arg, ArgTy);
-
- StringRef LibcallName = getLibcallImplName(SincosStret);
- CallingConv::ID CC = getLibcallImplCallingConv(SincosStret);
- SDValue Callee = DAG.getExternalSymbol(LibcallName.data(), getPointerTy(DL));
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl)
- .setChain(DAG.getEntryNode())
- .setCallee(CC, RetTy, Callee, std::move(Args))
- .setDiscardResult(ShouldUseSRet);
- std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
-
- if (!ShouldUseSRet)
- return CallResult.first;
-
- SDValue LoadSin =
- DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
-
- // Address of cos field.
- SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
- DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
- SDValue LoadCos =
- DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
-
- SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
- return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
- LoadSin.getValue(0), LoadCos.getValue(0));
-}
-
SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
bool Signed,
SDValue &Chain) const {
@@ -10726,8 +10656,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VECREDUCE_SMAX:
return LowerVecReduceMinMax(Op, DAG, Subtarget);
case ISD::ATOMIC_LOAD:
- case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
- case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
+ case ISD::ATOMIC_STORE:
+ return LowerAtomicLoadStore(Op, DAG);
case ISD::SDIVREM:
case ISD::UDIVREM: return LowerDivRem(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index bf3438b..bc2fec3 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -901,7 +901,6 @@ class VectorType;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed,
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 9b250e6..24f58a6 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -2448,7 +2448,8 @@ static bool canTailPredicateInstruction(Instruction &I, int &ICmpCount) {
//
static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
const DataLayout &DL,
- const LoopAccessInfo *LAI) {
+ const LoopAccessInfo *LAI,
+ const DominatorTree &DT) {
LLVM_DEBUG(dbgs() << "Tail-predication: checking allowed instructions\n");
// If there are live-out values, it is probably a reduction. We can predicate
@@ -2498,7 +2499,8 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
if (isa<StoreInst>(I) || isa<LoadInst>(I)) {
Value *Ptr = getLoadStorePointerOperand(&I);
Type *AccessTy = getLoadStoreType(&I);
- int64_t NextStride = getPtrStride(PSE, AccessTy, Ptr, L).value_or(0);
+ int64_t NextStride =
+ getPtrStride(PSE, AccessTy, Ptr, L, DT).value_or(0);
if (NextStride == 1) {
// TODO: for now only allow consecutive strides of 1. We could support
// other strides as long as it is uniform, but let's keep it simple
@@ -2585,7 +2587,8 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) const {
return false;
}
- return canTailPredicateLoop(L, LI, *SE, DL, LVL->getLAI());
+ return canTailPredicateLoop(L, LI, *SE, DL, LVL->getLAI(),
+ *LVL->getDominatorTree());
}
TailFoldingStyle
diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
index 77dc4a7..378a72a 100644
--- a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -88,6 +88,16 @@ bool BPFAsmPrinter::doFinalization(Module &M) {
}
}
+ for (GlobalObject &GO : M.global_objects()) {
+ if (!GO.hasExternalWeakLinkage())
+ continue;
+
+ if (!SawTrapCall && GO.getName() == BPF_TRAP) {
+ GO.eraseFromParent();
+ break;
+ }
+ }
+
return AsmPrinter::doFinalization(M);
}
@@ -160,6 +170,20 @@ bool BPFAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
void BPFAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ if (MI->isCall()) {
+ for (const MachineOperand &Op : MI->operands()) {
+ if (Op.isGlobal()) {
+ if (const GlobalValue *GV = Op.getGlobal())
+ if (GV->getName() == BPF_TRAP)
+ SawTrapCall = true;
+ } else if (Op.isSymbol()) {
+ if (const MCSymbol *Sym = Op.getMCSymbol())
+ if (Sym->getName() == BPF_TRAP)
+ SawTrapCall = true;
+ }
+ }
+ }
+
BPF_MC::verifyInstructionPredicates(MI->getOpcode(),
getSubtargetInfo().getFeatureBits());
@@ -195,6 +219,10 @@ void BPFAsmPrinter::emitJumpTableInfo() {
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
const Function &F = MF->getFunction();
+
+ MCSection *Sec = OutStreamer->getCurrentSectionOnly();
+ MCSymbol *SecStart = Sec->getBeginSymbol();
+
MCSection *JTS = TLOF.getSectionForJumpTable(F, TM);
assert(MJTI->getEntryKind() == MachineJumpTableInfo::EK_BlockAddress);
unsigned EntrySize = MJTI->getEntrySize(getDataLayout());
@@ -207,8 +235,10 @@ void BPFAsmPrinter::emitJumpTableInfo() {
MCSymbol *JTStart = getJTPublicSymbol(JTI);
OutStreamer->emitLabel(JTStart);
for (const MachineBasicBlock *MBB : JTBBs) {
- const MCExpr *LHS = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
- OutStreamer->emitValue(LHS, EntrySize);
+ const MCExpr *Diff = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(MBB->getSymbol(), OutContext),
+ MCSymbolRefExpr::create(SecStart, OutContext), OutContext);
+ OutStreamer->emitValue(Diff, EntrySize);
}
const MCExpr *JTSize =
MCConstantExpr::create(JTBBs.size() * EntrySize, OutContext);
diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.h b/llvm/lib/Target/BPF/BPFAsmPrinter.h
index 90ef207..75a1d7e 100644
--- a/llvm/lib/Target/BPF/BPFAsmPrinter.h
+++ b/llvm/lib/Target/BPF/BPFAsmPrinter.h
@@ -39,6 +39,7 @@ public:
private:
BTFDebug *BTF;
TargetMachine &TM;
+ bool SawTrapCall = false;
const BPFTargetMachine &getBTM() const;
};
diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
index 8ace2d2..eb4c884 100644
--- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
+++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
@@ -194,9 +194,10 @@ void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) {
dxbc::PSV::v2::ResourceBindInfo BindInfo;
BindInfo.Type = Type;
BindInfo.LowerBound = Binding.LowerBound;
- assert(Binding.Size == UINT32_MAX ||
- (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX &&
- "Resource range is too large");
+ assert(
+ (Binding.Size == UINT32_MAX ||
+ (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX) &&
+ "Resource range is too large");
BindInfo.UpperBound = (Binding.Size == UINT32_MAX)
? UINT32_MAX
: Binding.LowerBound + Binding.Size - 1;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 7ae500a..67437f6 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -1079,6 +1079,15 @@ def WaveActiveOp : DXILOp<119, waveActiveOp> {
let attributes = [Attributes<DXIL1_0, []>];
}
+def LegacyF16ToF32 : DXILOp<131, legacyF16ToF32> {
+ let Doc = "returns the float16 stored in the low-half of the uint converted "
+ "to a float";
+ let intrinsics = [IntrinSelect<int_dx_legacyf16tof32>];
+ let arguments = [Int32Ty];
+ let result = FloatTy;
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
def WaveAllBitCount : DXILOp<135, waveAllOp> {
let Doc = "returns the count of bits set to 1 across the wave";
let intrinsics = [IntrinSelect<int_dx_wave_active_countbits>];
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index 60dfd96..6cacbf6 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -29,11 +29,12 @@ bool DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
int OpdIdx) const {
switch (ID) {
case Intrinsic::dx_asdouble:
- case Intrinsic::dx_isinf:
- case Intrinsic::dx_isnan:
case Intrinsic::dx_firstbitlow:
- case Intrinsic::dx_firstbituhigh:
case Intrinsic::dx_firstbitshigh:
+ case Intrinsic::dx_firstbituhigh:
+ case Intrinsic::dx_isinf:
+ case Intrinsic::dx_isnan:
+ case Intrinsic::dx_legacyf16tof32:
return OpdIdx == 0;
default:
return OpdIdx == -1;
@@ -50,6 +51,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
case Intrinsic::dx_frac:
case Intrinsic::dx_isinf:
case Intrinsic::dx_isnan:
+ case Intrinsic::dx_legacyf16tof32:
case Intrinsic::dx_rsqrt:
case Intrinsic::dx_saturate:
case Intrinsic::dx_splitdouble:
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 7ee280d..eadf020 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -1815,7 +1815,7 @@ struct WeightedLeaf {
int Weight;
int InsertionOrder;
- WeightedLeaf() {}
+ WeightedLeaf() = default;
WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 904aabed..fe700e1 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -375,6 +375,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FFLOOR, VT, Legal);
setOperationAction(ISD::FTRUNC, VT, Legal);
setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::FMINNUM, VT, Legal);
+ setOperationAction(ISD::FMAXNUM, VT, Legal);
}
setOperationAction(ISD::CTPOP, GRLenVT, Legal);
setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
@@ -461,6 +463,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FFLOOR, VT, Legal);
setOperationAction(ISD::FTRUNC, VT, Legal);
setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::FMINNUM, VT, Legal);
+ setOperationAction(ISD::FMAXNUM, VT, Legal);
}
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 610ba05..b502b056 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1558,6 +1558,10 @@ defm : PatXrXrF<fmul, "XVFMUL">;
// XVFDIV_{S/D}
defm : PatXrXrF<fdiv, "XVFDIV">;
+// XVFMAX_{S/D}, XVFMIN_{S/D}
+defm : PatXrXrF<fmaxnum, "XVFMAX">;
+defm : PatXrXrF<fminnum, "XVFMIN">;
+
// XVFMADD_{S/D}
def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa),
(XVFMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 6470842..6b74a4b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1760,6 +1760,10 @@ defm : PatVrVrF<fmul, "VFMUL">;
// VFDIV_{S/D}
defm : PatVrVrF<fdiv, "VFDIV">;
+// VFMAX_{S/D}, VFMIN_{S/D}
+defm : PatVrVrF<fmaxnum, "VFMAX">;
+defm : PatVrVrF<fminnum, "VFMIN">;
+
// VFMADD_{S/D}
def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va),
(VFMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>;
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
index 7d54565..6d69af5 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
@@ -39,7 +39,7 @@ LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit)
: MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH,
/*HasRelocationAddend=*/true) {}
-LoongArchELFObjectWriter::~LoongArchELFObjectWriter() {}
+LoongArchELFObjectWriter::~LoongArchELFObjectWriter() = default;
unsigned LoongArchELFObjectWriter::getRelocType(const MCFixup &Fixup,
const MCValue &Target,
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
index f0e2bc4..08fa51d 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
@@ -38,7 +38,7 @@ public:
LoongArchMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII)
: Ctx(ctx), MCII(MCII) {}
- ~LoongArchMCCodeEmitter() override {}
+ ~LoongArchMCCodeEmitter() override = default;
void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp
index df0c8c1..06210b6 100644
--- a/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -82,7 +82,7 @@ class MipsFastISel final : public FastISel {
// All possible address modes.
class Address {
public:
- using BaseKind = enum { RegBase, FrameIndexBase };
+ enum BaseKind { RegBase, FrameIndexBase };
private:
BaseKind Kind = RegBase;
diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
index caef8fe7..b832b82 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
+++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
@@ -20,7 +20,7 @@ class MemoryLocation;
class NVPTXAAResult : public AAResultBase {
public:
- NVPTXAAResult() {}
+ NVPTXAAResult() = default;
NVPTXAAResult(NVPTXAAResult &&Arg) : AAResultBase(std::move(Arg)) {}
/// Handle invalidation events from the new pass manager.
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index c667a09..996d653 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -1836,7 +1836,7 @@ bool NVPTXDAGToDAGISel::tryFence(SDNode *N) {
return true;
}
-NVPTXScopes::NVPTXScopes(LLVMContext &C) {
+NVPTXScopes::NVPTXScopes(LLVMContext &C) : Context(&C) {
Scopes[C.getOrInsertSyncScopeID("singlethread")] = NVPTX::Scope::Thread;
Scopes[C.getOrInsertSyncScopeID("")] = NVPTX::Scope::System;
Scopes[C.getOrInsertSyncScopeID("block")] = NVPTX::Scope::Block;
@@ -1851,11 +1851,21 @@ NVPTX::Scope NVPTXScopes::operator[](SyncScope::ID ID) const {
auto S = Scopes.find(ID);
if (S == Scopes.end()) {
- // TODO:
- // - Add API to LLVMContext to get the name of a single scope.
- // - Use that API here to print an error containing the name
- // of this Unknown ID.
- report_fatal_error(formatv("Could not find scope ID={}.", int(ID)));
+ auto scopeName = Context->getSyncScopeName(ID);
+ assert(scopeName.has_value() && "Scope name must exist.");
+
+ // Build list of supported syncscopes programmatically
+ SmallVector<StringRef> supportedScopes;
+ for (const auto &Entry : Scopes) {
+ if (auto name = Context->getSyncScopeName(Entry.first))
+ supportedScopes.push_back(name->empty() ? "<empty string>" : *name);
+ }
+
+ reportFatalUsageError(
+ formatv("NVPTX backend does not support syncscope \"{0}\" (ID={1}).\n"
+ "Supported syncscopes are: {2}.",
+ scopeName.value(), int(ID),
+ make_range(supportedScopes.begin(), supportedScopes.end())));
}
return S->second;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 1cb579b..d525531 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -35,6 +35,7 @@ struct NVPTXScopes {
private:
SmallMapVector<SyncScope::ID, NVPTX::Scope, 8> Scopes{};
+ LLVMContext *Context = nullptr;
};
class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index b260221..f0bdf47 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2267,7 +2267,7 @@ def : Pat<(f32 (fpround f64:$a)), (CVT_f32_f64 $a, CvtRN)>;
def : Pat<(f32 (fpextend f16:$a)), (CVT_f32_f16 $a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f32 (fpextend f16:$a)), (CVT_f32_f16 $a, CvtNONE)>;
// fpextend bf16 -> f32
-def : Pat<(f32 (fpextend bf16:$a)), (CVT_f32_bf16 $a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(f32 (fpextend bf16:$a)), (CVT_f32_bf16 $a, CvtNONE_FTZ)>, Requires<[doF32FTZ, hasPTX<78>, hasSM<90>]>;
def : Pat<(f32 (fpextend bf16:$a)), (CVT_f32_bf16 $a, CvtNONE)>, Requires<[hasPTX<71>, hasSM<80>]>;
// fpextend f16 -> f64
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index da3efdc..0c2e44e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -360,6 +360,10 @@ let Predicates = [HasVSX, IsISAFuture] in {
def LXVPRLL : XForm_XTp5_RAB5<31, 621, (outs vsrprc:$XTp),
(ins (memr $RA):$addr, g8rc:$RB),
"lxvprll $XTp, $addr, $RB", IIC_LdStLFD, []>;
+ def LXVPB32X
+ : XForm_XTp5_RAB5<31, 877, (outs vsrprc:$XTp),
+ (ins (memr $RA):$addr, g8rc:$RB),
+ "lxvpb32x $XTp, $addr, $RB", IIC_LdStLFD, []>;
}
let mayStore = 1 in {
@@ -376,6 +380,10 @@ let Predicates = [HasVSX, IsISAFuture] in {
: XForm_XTp5_RAB5<31, 749, (outs),
(ins vsrprc:$XTp, (memr $RA):$addr, g8rc:$RB),
"stxvprll $XTp, $addr, $RB", IIC_LdStLFD, []>;
+ def STXVPB32X
+ : XForm_XTp5_RAB5<31, 1005, (outs),
+ (ins vsrprc:$XTp, (memr $RA):$addr, g8rc:$RB),
+ "stxvpb32x $XTp, $addr, $RB", IIC_LdStLFD, []>;
}
def VUPKHSNTOB : VXForm_VRTB5<387, 0, (outs vrrc:$VRT), (ins vrrc:$VRB),
diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
index b38dd4a..fc3cde3 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrMMA.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
@@ -202,7 +202,7 @@ multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
RegConstraint<"@earlyclobber $AT">;
def PM#NAME#WPP :
MMIRR_XX3Form_XY4P2_XAB6<
- opcode, !or(xo, 0x20), (outs acc:$AT),
+ opcode, !or(xo, 0x20), (outs wacc:$AT),
!con((ins wacc:$ATi),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
@@ -765,7 +765,7 @@ let Predicates = [MMA, IsISAFuture] in {
def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
(XVF64GERWPN $ATi, $XA, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
- (XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>;
+ (XVF64GERWNP $ATi, $XA, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
(XVF64GERWNN $ATi, $XA, RCCp.BToVSRC)>;
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 282cf5d..3d5a55c 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -95,7 +95,8 @@ private:
void addVectorLoadStoreOperands(MachineInstr &I,
SmallVectorImpl<SrcOp> &SrcOps,
unsigned &CurOp, bool IsMasked,
- bool IsStrided) const;
+ bool IsStridedOrIndexed,
+ LLT *IndexVT = nullptr) const;
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineIRBuilder &MIB) const;
@@ -722,15 +723,17 @@ static unsigned selectRegImmLoadStoreOp(unsigned GenericOpc, unsigned OpSize) {
void RISCVInstructionSelector::addVectorLoadStoreOperands(
MachineInstr &I, SmallVectorImpl<SrcOp> &SrcOps, unsigned &CurOp,
- bool IsMasked, bool IsStrided) const {
+ bool IsMasked, bool IsStridedOrIndexed, LLT *IndexVT) const {
// Base Pointer
auto PtrReg = I.getOperand(CurOp++).getReg();
SrcOps.push_back(PtrReg);
- // Stride
- if (IsStrided) {
+ // Stride or Index
+ if (IsStridedOrIndexed) {
auto StrideReg = I.getOperand(CurOp++).getReg();
SrcOps.push_back(StrideReg);
+ if (IndexVT)
+ *IndexVT = MRI->getType(StrideReg);
}
// Mask
@@ -805,6 +808,70 @@ bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
I.eraseFromParent();
return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);
}
+ case Intrinsic::riscv_vloxei:
+ case Intrinsic::riscv_vloxei_mask:
+ case Intrinsic::riscv_vluxei:
+ case Intrinsic::riscv_vluxei_mask: {
+ bool IsMasked = IntrinID == Intrinsic::riscv_vloxei_mask ||
+ IntrinID == Intrinsic::riscv_vluxei_mask;
+ bool IsOrdered = IntrinID == Intrinsic::riscv_vloxei ||
+ IntrinID == Intrinsic::riscv_vloxei_mask;
+ LLT VT = MRI->getType(I.getOperand(0).getReg());
+ unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
+
+ // Result vector
+ const Register DstReg = I.getOperand(0).getReg();
+
+ // Sources
+ bool HasPassthruOperand = IntrinID != Intrinsic::riscv_vlm;
+ unsigned CurOp = 2;
+ SmallVector<SrcOp, 4> SrcOps; // Source registers.
+
+ // Passthru
+ if (HasPassthruOperand) {
+ auto PassthruReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(PassthruReg);
+ } else {
+ // Use NoRegister if there is no specified passthru.
+ SrcOps.push_back(Register());
+ }
+ LLT IndexVT;
+ addVectorLoadStoreOperands(I, SrcOps, CurOp, IsMasked, true, &IndexVT);
+
+ RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(VT));
+ RISCVVType::VLMUL IndexLMUL =
+ RISCVTargetLowering::getLMUL(getMVTForLLT(IndexVT));
+ unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
+ if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
+ reportFatalUsageError("The V extension does not support EEW=64 for index "
+ "values when XLEN=32");
+ }
+ const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
+ IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
+ static_cast<unsigned>(IndexLMUL));
+
+ auto PseudoMI = MIB.buildInstr(P->Pseudo, {DstReg}, SrcOps);
+
+ // Select VL
+ auto VLOpFn = renderVLOp(I.getOperand(CurOp++));
+ for (auto &RenderFn : *VLOpFn)
+ RenderFn(PseudoMI);
+
+ // SEW
+ PseudoMI.addImm(Log2SEW);
+
+ // Policy
+ uint64_t Policy = RISCVVType::MASK_AGNOSTIC;
+ if (IsMasked)
+ Policy = I.getOperand(CurOp++).getImm();
+ PseudoMI.addImm(Policy);
+
+ // Memref
+ PseudoMI.cloneMemRefs(I);
+
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);
+ }
case Intrinsic::riscv_vsm:
case Intrinsic::riscv_vse:
case Intrinsic::riscv_vse_mask:
@@ -847,6 +914,56 @@ bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
I.eraseFromParent();
return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);
}
+ case Intrinsic::riscv_vsoxei:
+ case Intrinsic::riscv_vsoxei_mask:
+ case Intrinsic::riscv_vsuxei:
+ case Intrinsic::riscv_vsuxei_mask: {
+ bool IsMasked = IntrinID == Intrinsic::riscv_vsoxei_mask ||
+ IntrinID == Intrinsic::riscv_vsuxei_mask;
+ bool IsOrdered = IntrinID == Intrinsic::riscv_vsoxei ||
+ IntrinID == Intrinsic::riscv_vsoxei_mask;
+ LLT VT = MRI->getType(I.getOperand(1).getReg());
+ unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
+
+ // Sources
+ unsigned CurOp = 1;
+ SmallVector<SrcOp, 4> SrcOps; // Source registers.
+
+ // Store value
+ auto PassthruReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(PassthruReg);
+
+ LLT IndexVT;
+ addVectorLoadStoreOperands(I, SrcOps, CurOp, IsMasked, true, &IndexVT);
+
+ RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(VT));
+ RISCVVType::VLMUL IndexLMUL =
+ RISCVTargetLowering::getLMUL(getMVTForLLT(IndexVT));
+ unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
+ if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
+ reportFatalUsageError("The V extension does not support EEW=64 for index "
+ "values when XLEN=32");
+ }
+ const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
+ IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
+ static_cast<unsigned>(IndexLMUL));
+
+ auto PseudoMI = MIB.buildInstr(P->Pseudo, {}, SrcOps);
+
+ // Select VL
+ auto VLOpFn = renderVLOp(I.getOperand(CurOp++));
+ for (auto &RenderFn : *VLOpFn)
+ RenderFn(PseudoMI);
+
+ // SEW
+ PseudoMI.addImm(Log2SEW);
+
+ // Memref
+ PseudoMI.cloneMemRefs(I);
+
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);
+ }
}
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index e75dfe3..5b8cfb2 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -407,7 +407,6 @@ enum OperandType : unsigned {
OPERAND_SIMM5_PLUS1,
OPERAND_SIMM6,
OPERAND_SIMM6_NONZERO,
- OPERAND_SIMM8,
OPERAND_SIMM8_UNSIGNED,
OPERAND_SIMM10,
OPERAND_SIMM10_LSB0000_NONZERO,
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 526675a..b0453fc 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -131,6 +131,7 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoCCMAXU:
case RISCV::PseudoCCMIN:
case RISCV::PseudoCCMINU:
+ case RISCV::PseudoCCMUL:
case RISCV::PseudoCCADDW:
case RISCV::PseudoCCSUBW:
case RISCV::PseudoCCSLL:
@@ -237,6 +238,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
case RISCV::PseudoCCMIN: NewOpc = RISCV::MIN; break;
case RISCV::PseudoCCMAXU: NewOpc = RISCV::MAXU; break;
case RISCV::PseudoCCMINU: NewOpc = RISCV::MINU; break;
+ case RISCV::PseudoCCMUL: NewOpc = RISCV::MUL; break;
case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break;
case RISCV::PseudoCCSLLI: NewOpc = RISCV::SLLI; break;
case RISCV::PseudoCCSRLI: NewOpc = RISCV::SRLI; break;
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index cfee6ab..5b72334 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1856,6 +1856,11 @@ def TuneShortForwardBranchIMinMax
"true", "Enable short forward branch optimization for min,max instructions in Zbb",
[TuneShortForwardBranchOpt]>;
+def TuneShortForwardBranchIMul
+ : SubtargetFeature<"short-forward-branch-i-mul", "HasShortForwardBranchIMul",
+ "true", "Enable short forward branch optimization for mul instruction",
+ [TuneShortForwardBranchOpt]>;
+
// Some subtargets require a S2V transfer buffer to move scalars into vectors.
// FIXME: Forming .vx/.vf/.wx/.wf can reduce register pressure.
def TuneNoSinkSplatOperands
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index b25a054..9078335 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -371,8 +371,8 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
- report_fatal_error("The V extension does not support EEW=64 for index "
- "values when XLEN=32");
+ reportFatalUsageError("The V extension does not support EEW=64 for index "
+ "values when XLEN=32");
}
const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
@@ -444,8 +444,8 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
- report_fatal_error("The V extension does not support EEW=64 for index "
- "values when XLEN=32");
+ reportFatalUsageError("The V extension does not support EEW=64 for index "
+ "values when XLEN=32");
}
const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
@@ -2223,8 +2223,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
- report_fatal_error("The V extension does not support EEW=64 for index "
- "values when XLEN=32");
+ reportFatalUsageError("The V extension does not support EEW=64 for "
+ "index values when XLEN=32");
}
const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
@@ -2457,8 +2457,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
- report_fatal_error("The V extension does not support EEW=64 for index "
- "values when XLEN=32");
+ reportFatalUsageError("The V extension does not support EEW=64 for "
+ "index values when XLEN=32");
}
const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
IsMasked, IsOrdered, IndexLog2EEW,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e0cf739..b860562 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -9186,7 +9186,7 @@ static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG,
unsigned ShAmount = Log2_64(TrueM1);
if (Subtarget.hasShlAdd(ShAmount))
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, CondV,
- DAG.getConstant(ShAmount, DL, VT), CondV);
+ DAG.getTargetConstant(ShAmount, DL, VT), CondV);
}
}
// (select c, y, 0) -> -c & y
@@ -15463,7 +15463,7 @@ static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
- DAG.getConstant(Diff, DL, VT), NS);
+ DAG.getTargetConstant(Diff, DL, VT), NS);
return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
}
@@ -15501,7 +15501,7 @@ static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other,
int64_t AddConst = AddVal.getSExtValue();
SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
- DAG.getConstant(ShlConst, DL, VT), Other);
+ DAG.getTargetConstant(ShlConst, DL, VT), Other);
return DAG.getNode(ISD::ADD, DL, VT, SHADD,
DAG.getSignedConstant(AddConst, DL, VT));
}
@@ -16495,6 +16495,35 @@ static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(Op, DL, VT, Shift1, Shift2);
}
+static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
+ unsigned ShY) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue X = N->getOperand(0);
+ SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
+ DAG.getTargetConstant(ShY, DL, VT), X);
+ return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
+ DAG.getTargetConstant(ShX, DL, VT), Mul359);
+}
+
+static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
+ uint64_t MulAmt) {
+ switch (MulAmt) {
+ case 5 * 3:
+ return getShlAddShlAdd(N, DAG, 2, 1);
+ case 9 * 3:
+ return getShlAddShlAdd(N, DAG, 3, 1);
+ case 5 * 5:
+ return getShlAddShlAdd(N, DAG, 2, 2);
+ case 9 * 5:
+ return getShlAddShlAdd(N, DAG, 3, 2);
+ case 9 * 9:
+ return getShlAddShlAdd(N, DAG, 3, 3);
+ default:
+ return SDValue();
+ }
+}
+
// Try to expand a scalar multiply to a faster sequence.
static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -16524,18 +16553,17 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
return SDValue();
- // WARNING: The code below is knowingly incorrect with regards to undef semantics.
- // We're adding additional uses of X here, and in principle, we should be freezing
- // X before doing so. However, adding freeze here causes real regressions, and no
- // other target properly freezes X in these cases either.
- SDValue X = N->getOperand(0);
-
+ // WARNING: The code below is knowingly incorrect with regards to undef
+ // semantics. We're adding additional uses of X here, and in principle, we
+ // should be freezing X before doing so. However, adding freeze here causes
+ // real regressions, and no other target properly freezes X in these cases
+ // either.
if (Subtarget.hasShlAdd(3)) {
+ SDValue X = N->getOperand(0);
int Shift;
if (int ShXAmount = isShifted359(MulAmt, Shift)) {
// 3/5/9 * 2^N -> shl (shXadd X, X), N
SDLoc DL(N);
- SDValue X = N->getOperand(0);
// Put the shift first if we can fold a zext into the shift forming
// a slli.uw.
if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
@@ -16543,49 +16571,19 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
SDValue Shl =
DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT));
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
- DAG.getConstant(ShXAmount, DL, VT), Shl);
+ DAG.getTargetConstant(ShXAmount, DL, VT), Shl);
}
// Otherwise, put the shl second so that it can fold with following
// instructions (e.g. sext or add).
SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getConstant(ShXAmount, DL, VT), X);
+ DAG.getTargetConstant(ShXAmount, DL, VT), X);
return DAG.getNode(ISD::SHL, DL, VT, Mul359,
DAG.getConstant(Shift, DL, VT));
}
// 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
- int ShX;
- int ShY;
- switch (MulAmt) {
- case 3 * 5:
- ShY = 1;
- ShX = 2;
- break;
- case 3 * 9:
- ShY = 1;
- ShX = 3;
- break;
- case 5 * 5:
- ShX = ShY = 2;
- break;
- case 5 * 9:
- ShY = 2;
- ShX = 3;
- break;
- case 9 * 9:
- ShX = ShY = 3;
- break;
- default:
- ShX = ShY = 0;
- break;
- }
- if (ShX) {
- SDLoc DL(N);
- SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getConstant(ShY, DL, VT), X);
- return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
- DAG.getConstant(ShX, DL, VT), Mul359);
- }
+ if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt))
+ return V;
// If this is a power 2 + 2/4/8, we can use a shift followed by a single
// shXadd. First check if this a sum of two power of 2s because that's
@@ -16598,7 +16596,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
SDValue Shift1 =
DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getConstant(ScaleShift, DL, VT), Shift1);
+ DAG.getTargetConstant(ScaleShift, DL, VT), Shift1);
}
}
@@ -16611,10 +16609,11 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
assert(Shift != 0 && "MulAmt=4,6,10 handled before");
if (Shift <= 3) {
SDLoc DL(N);
- SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getConstant(ShXAmount, DL, VT), X);
+ SDValue Mul359 =
+ DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
+ DAG.getTargetConstant(ShXAmount, DL, VT), X);
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
- DAG.getConstant(Shift, DL, VT), X);
+ DAG.getTargetConstant(Shift, DL, VT), X);
}
}
@@ -16626,9 +16625,10 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
SDValue Shift1 =
DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
- return DAG.getNode(ISD::ADD, DL, VT, Shift1,
- DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getConstant(ScaleShift, DL, VT), X));
+ return DAG.getNode(
+ ISD::ADD, DL, VT, Shift1,
+ DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
+ DAG.getTargetConstant(ScaleShift, DL, VT), X));
}
}
@@ -16643,28 +16643,17 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT));
SDValue Mul359 =
DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
+ DAG.getTargetConstant(Log2_64(Offset - 1), DL, VT), X);
return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
}
}
- for (uint64_t Divisor : {3, 5, 9}) {
- if (MulAmt % Divisor != 0)
- continue;
- uint64_t MulAmt2 = MulAmt / Divisor;
- // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
- // of 25 which happen to be quite common.
- if (int ShBAmount = isShifted359(MulAmt2, Shift)) {
- SDLoc DL(N);
- SDValue Mul359A =
- DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
- SDValue Mul359B =
- DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359A,
- DAG.getConstant(ShBAmount, DL, VT), Mul359A);
- return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
- DAG.getConstant(Shift, DL, VT));
- }
+ // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
+ // of 25 which happen to be quite common.
+ Shift = llvm::countr_zero(MulAmt);
+ if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT));
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
index a1c8e23..c58a5c0 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
@@ -48,7 +48,7 @@ class VXRMInfo {
} State = Uninitialized;
public:
- VXRMInfo() {}
+ VXRMInfo() = default;
static VXRMInfo getUnknown() {
VXRMInfo Info;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index c9df787..b8ab70b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1703,6 +1703,7 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
case RISCV::MAXU: return RISCV::PseudoCCMAXU;
case RISCV::MIN: return RISCV::PseudoCCMIN;
case RISCV::MINU: return RISCV::PseudoCCMINU;
+ case RISCV::MUL: return RISCV::PseudoCCMUL;
case RISCV::ADDI: return RISCV::PseudoCCADDI;
case RISCV::SLLI: return RISCV::PseudoCCSLLI;
@@ -1754,6 +1755,9 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg,
MI->getOpcode() == RISCV::MINU || MI->getOpcode() == RISCV::MAXU))
return nullptr;
+ if (!STI.hasShortForwardBranchIMul() && MI->getOpcode() == RISCV::MUL)
+ return nullptr;
+
// Check if MI can be predicated and folded into the CCMOV.
if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
return nullptr;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
index 5a67a5a..494b1c9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
@@ -110,6 +110,7 @@ def PseudoCCMAX : SFBALU_rr;
def PseudoCCMIN : SFBALU_rr;
def PseudoCCMAXU : SFBALU_rr;
def PseudoCCMINU : SFBALU_rr;
+def PseudoCCMUL : SFBALU_rr;
def PseudoCCADDI : SFBALU_ri;
def PseudoCCANDI : SFBALU_ri;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
index b37ceaae..c2b25c6 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
@@ -60,6 +60,8 @@ def immfour : RISCVOp {
let DecoderMethod = "decodeImmFourOperand";
}
+def tuimm2 : TImmLeaf<XLenVT, [{return isUInt<2>(Imm);}]>;
+
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
@@ -557,8 +559,8 @@ multiclass VPatTernaryVMAQA_VV_VX<string intrinsic, string instruction,
let Predicates = [HasVendorXTHeadBa] in {
def : Pat<(add_like_non_imm12 (shl GPR:$rs2, uimm2:$uimm2), (XLenVT GPR:$rs1)),
(TH_ADDSL GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>;
-def : Pat<(XLenVT (riscv_shl_add GPR:$rs2, uimm2:$uimm2, GPR:$rs1)),
- (TH_ADDSL GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>;
+def : Pat<(XLenVT (riscv_shl_add GPR:$rs2, tuimm2:$uimm2, GPR:$rs1)),
+ (TH_ADDSL GPR:$rs1, GPR:$rs2, tuimm2:$uimm2)>;
// Reuse complex patterns from StdExtZba
def : Pat<(add_like_non_imm12 sh1add_op:$rs2, (XLenVT GPR:$rs1)),
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index 4537bfe..8376da5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -53,6 +53,8 @@ def uimm5gt3 : RISCVOp<XLenVT>, ImmLeaf<XLenVT,
let OperandType = "OPERAND_UIMM5_GT3";
}
+def tuimm5gt3 : TImmLeaf<XLenVT, [{return (Imm > 3) && isUInt<5>(Imm);}]>;
+
def UImm5Plus1AsmOperand : AsmOperandClass {
let Name = "UImm5Plus1";
let RenderMethod = "addImmOperands";
@@ -1419,8 +1421,8 @@ def : Pat<(i32 (add GPRNoX0:$rd, (mul GPRNoX0:$rs1, simm12_lo:$imm12))),
(QC_MULIADD GPRNoX0:$rd, GPRNoX0:$rs1, simm12_lo:$imm12)>;
def : Pat<(i32 (add_like_non_imm12 (shl GPRNoX0:$rs1, (i32 uimm5gt3:$imm)), GPRNoX0:$rs2)),
(QC_SHLADD GPRNoX0:$rs1, GPRNoX0:$rs2, uimm5gt3:$imm)>;
-def : Pat<(i32 (riscv_shl_add GPRNoX0:$rs1, (i32 uimm5gt3:$imm), GPRNoX0:$rs2)),
- (QC_SHLADD GPRNoX0:$rs1, GPRNoX0:$rs2, uimm5gt3:$imm)>;
+def : Pat<(i32 (riscv_shl_add GPRNoX0:$rs1, (i32 tuimm5gt3:$imm), GPRNoX0:$rs2)),
+ (QC_SHLADD GPRNoX0:$rs1, GPRNoX0:$rs2, tuimm5gt3:$imm)>;
} // Predicates = [HasVendorXqciac, IsRV32]
/// Simple arithmetic operations
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index c31713e..1c6a5af 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -90,6 +90,7 @@ defvar ZfhminDExts = [ZfhminDExt, ZhinxminZdinxExt, ZhinxminZdinx32Ext];
//===----------------------------------------------------------------------===//
let Predicates = [HasHalfFPLoadStoreMove] in {
+let canFoldAsLoad = 1 in
def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>;
// Operands for stores are in the order srcreg, base, offset rather than
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp
index 0a318e0..ed6d355 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp
@@ -15,4 +15,4 @@
using namespace llvm;
SPIRVTargetStreamer::SPIRVTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
-SPIRVTargetStreamer::~SPIRVTargetStreamer() {}
+SPIRVTargetStreamer::~SPIRVTargetStreamer() = default;
diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
index 640b014..0175f2f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
@@ -577,6 +577,11 @@ void SPIRVAsmPrinter::outputExecutionMode(const Module &M) {
if (MDNode *Node = F.getMetadata("intel_reqd_sub_group_size"))
outputExecutionModeFromMDNode(FReg, Node,
SPIRV::ExecutionMode::SubgroupSize, 0, 0);
+ if (MDNode *Node = F.getMetadata("max_work_group_size")) {
+ if (ST->canUseExtension(SPIRV::Extension::SPV_INTEL_kernel_attributes))
+ outputExecutionModeFromMDNode(
+ FReg, Node, SPIRV::ExecutionMode::MaxWorkgroupSizeINTEL, 3, 1);
+ }
if (MDNode *Node = F.getMetadata("vec_type_hint")) {
MCInst Inst;
Inst.setOpcode(SPIRV::OpExecutionMode);
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index 9e11c3a..dd57b74 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -149,23 +149,23 @@ static FunctionType *getOriginalFunctionType(const Function &F) {
return isa<MDString>(N->getOperand(0)) &&
cast<MDString>(N->getOperand(0))->getString() == F.getName();
});
- // TODO: probably one function can have numerous type mutations,
- // so we should support this.
if (ThisFuncMDIt != NamedMD->op_end()) {
auto *ThisFuncMD = *ThisFuncMDIt;
- MDNode *MD = dyn_cast<MDNode>(ThisFuncMD->getOperand(1));
- assert(MD && "MDNode operand is expected");
- ConstantInt *Const = getConstInt(MD, 0);
- if (Const) {
- auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(1));
- assert(CMeta && "ConstantAsMetadata operand is expected");
- assert(Const->getSExtValue() >= -1);
- // Currently -1 indicates return value, greater values mean
- // argument numbers.
- if (Const->getSExtValue() == -1)
- RetTy = CMeta->getType();
- else
- ArgTypes[Const->getSExtValue()] = CMeta->getType();
+ for (unsigned I = 1; I != ThisFuncMD->getNumOperands(); ++I) {
+ MDNode *MD = dyn_cast<MDNode>(ThisFuncMD->getOperand(I));
+ assert(MD && "MDNode operand is expected");
+ ConstantInt *Const = getConstInt(MD, 0);
+ if (Const) {
+ auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(1));
+ assert(CMeta && "ConstantAsMetadata operand is expected");
+ assert(Const->getSExtValue() >= -1);
+ // Currently -1 indicates return value, greater values mean
+ // argument numbers.
+ if (Const->getSExtValue() == -1)
+ RetTy = CMeta->getType();
+ else
+ ArgTypes[Const->getSExtValue()] = CMeta->getType();
+ }
}
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index 96f5dee..43b2869 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -107,6 +107,8 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>>
SPIRV::Extension::Extension::SPV_INTEL_inline_assembly},
{"SPV_INTEL_bindless_images",
SPIRV::Extension::Extension::SPV_INTEL_bindless_images},
+ {"SPV_INTEL_bfloat16_arithmetic",
+ SPIRV::Extension::Extension::SPV_INTEL_bfloat16_arithmetic},
{"SPV_INTEL_bfloat16_conversion",
SPIRV::Extension::Extension::SPV_INTEL_bfloat16_conversion},
{"SPV_KHR_subgroup_rotate",
@@ -155,7 +157,9 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>>
{"SPV_INTEL_predicated_io",
SPIRV::Extension::Extension::SPV_INTEL_predicated_io},
{"SPV_KHR_maximal_reconvergence",
- SPIRV::Extension::Extension::SPV_KHR_maximal_reconvergence}};
+ SPIRV::Extension::Extension::SPV_KHR_maximal_reconvergence},
+ {"SPV_INTEL_kernel_attributes",
+ SPIRV::Extension::Extension::SPV_INTEL_kernel_attributes}};
bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName,
StringRef ArgValue,
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 3f0424f..245e5a2 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -3516,6 +3516,10 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
case Intrinsic::spv_resource_nonuniformindex: {
return selectResourceNonUniformIndex(ResVReg, ResType, I);
}
+ case Intrinsic::spv_unpackhalf2x16: {
+ return selectExtInst(ResVReg, ResType, I, GL::UnpackHalf2x16);
+ }
+
default: {
std::string DiagMsg;
raw_string_ostream OS(DiagMsg);
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index db036a5..e5ac76c4 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1435,6 +1435,8 @@ void addInstrRequirements(const MachineInstr &MI,
addPrintfRequirements(MI, Reqs, ST);
break;
}
+ // TODO: handle bfloat16 extended instructions when
+ // SPV_INTEL_bfloat16_arithmetic is enabled.
break;
}
case SPIRV::OpAliasDomainDeclINTEL:
@@ -2060,7 +2062,64 @@ void addInstrRequirements(const MachineInstr &MI,
Reqs.addCapability(SPIRV::Capability::PredicatedIOINTEL);
break;
}
-
+ case SPIRV::OpFAddS:
+ case SPIRV::OpFSubS:
+ case SPIRV::OpFMulS:
+ case SPIRV::OpFDivS:
+ case SPIRV::OpFRemS:
+ case SPIRV::OpFMod:
+ case SPIRV::OpFNegate:
+ case SPIRV::OpFAddV:
+ case SPIRV::OpFSubV:
+ case SPIRV::OpFMulV:
+ case SPIRV::OpFDivV:
+ case SPIRV::OpFRemV:
+ case SPIRV::OpFNegateV: {
+ const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+ SPIRVType *TypeDef = MRI.getVRegDef(MI.getOperand(1).getReg());
+ if (TypeDef->getOpcode() == SPIRV::OpTypeVector)
+ TypeDef = MRI.getVRegDef(TypeDef->getOperand(1).getReg());
+ if (isBFloat16Type(TypeDef)) {
+ if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_bfloat16_arithmetic))
+ report_fatal_error(
+ "Arithmetic instructions with bfloat16 arguments require the "
+ "following SPIR-V extension: SPV_INTEL_bfloat16_arithmetic",
+ false);
+ Reqs.addExtension(SPIRV::Extension::SPV_INTEL_bfloat16_arithmetic);
+ Reqs.addCapability(SPIRV::Capability::BFloat16ArithmeticINTEL);
+ }
+ break;
+ }
+ case SPIRV::OpOrdered:
+ case SPIRV::OpUnordered:
+ case SPIRV::OpFOrdEqual:
+ case SPIRV::OpFOrdNotEqual:
+ case SPIRV::OpFOrdLessThan:
+ case SPIRV::OpFOrdLessThanEqual:
+ case SPIRV::OpFOrdGreaterThan:
+ case SPIRV::OpFOrdGreaterThanEqual:
+ case SPIRV::OpFUnordEqual:
+ case SPIRV::OpFUnordNotEqual:
+ case SPIRV::OpFUnordLessThan:
+ case SPIRV::OpFUnordLessThanEqual:
+ case SPIRV::OpFUnordGreaterThan:
+ case SPIRV::OpFUnordGreaterThanEqual: {
+ const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+ MachineInstr *OperandDef = MRI.getVRegDef(MI.getOperand(2).getReg());
+ SPIRVType *TypeDef = MRI.getVRegDef(OperandDef->getOperand(1).getReg());
+ if (TypeDef->getOpcode() == SPIRV::OpTypeVector)
+ TypeDef = MRI.getVRegDef(TypeDef->getOperand(1).getReg());
+ if (isBFloat16Type(TypeDef)) {
+ if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_bfloat16_arithmetic))
+ report_fatal_error(
+ "Relational instructions with bfloat16 arguments require the "
+ "following SPIR-V extension: SPV_INTEL_bfloat16_arithmetic",
+ false);
+ Reqs.addExtension(SPIRV::Extension::SPV_INTEL_bfloat16_arithmetic);
+ Reqs.addCapability(SPIRV::Capability::BFloat16ArithmeticINTEL);
+ }
+ break;
+ }
default:
break;
}
@@ -2180,6 +2239,10 @@ static void collectReqs(const Module &M, SPIRV::ModuleAnalysisInfo &MAI,
MAI.Reqs.getAndAddRequirements(
SPIRV::OperandCategory::ExecutionModeOperand,
SPIRV::ExecutionMode::SubgroupSize, ST);
+ if (F.getMetadata("max_work_group_size"))
+ MAI.Reqs.getAndAddRequirements(
+ SPIRV::OperandCategory::ExecutionModeOperand,
+ SPIRV::ExecutionMode::MaxWorkgroupSizeINTEL, ST);
if (F.getMetadata("vec_type_hint"))
MAI.Reqs.getAndAddRequirements(
SPIRV::OperandCategory::ExecutionModeOperand,
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
index 2d19f6de..44b6c66 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
@@ -81,7 +81,7 @@ private:
void initAvailableCapabilitiesForVulkan(const SPIRVSubtarget &ST);
public:
- RequirementHandler() {}
+ RequirementHandler() = default;
void clear() {
MinimalCaps.clear();
AllCaps.clear();
diff --git a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
index ba09692..ad6c9cd 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
@@ -70,7 +70,6 @@ SPIRVSubtarget::SPIRVSubtarget(const Triple &TT, const std::string &CPU,
SPIRVVersion = VersionTuple(1, 3);
break;
case Triple::SPIRVSubArch_v14:
- default:
SPIRVVersion = VersionTuple(1, 4);
break;
case Triple::SPIRVSubArch_v15:
@@ -79,13 +78,19 @@ SPIRVSubtarget::SPIRVSubtarget(const Triple &TT, const std::string &CPU,
case Triple::SPIRVSubArch_v16:
SPIRVVersion = VersionTuple(1, 6);
break;
+ default:
+ if (TT.getVendor() == Triple::AMD)
+ SPIRVVersion = VersionTuple(1, 6);
+ else
+ SPIRVVersion = VersionTuple(1, 4);
}
OpenCLVersion = VersionTuple(2, 2);
// Set the environment based on the target triple.
if (TargetTriple.getOS() == Triple::Vulkan)
Env = Shader;
- else if (TargetTriple.getEnvironment() == Triple::OpenCL)
+ else if (TargetTriple.getEnvironment() == Triple::OpenCL ||
+ TargetTriple.getVendor() == Triple::AMD)
Env = Kernel;
else
Env = Unknown;
@@ -93,6 +98,8 @@ SPIRVSubtarget::SPIRVSubtarget(const Triple &TT, const std::string &CPU,
// Set the default extensions based on the target triple.
if (TargetTriple.getVendor() == Triple::Intel)
Extensions.insert(SPIRV::Extension::SPV_INTEL_function_pointers);
+ if (TargetTriple.getVendor() == Triple::AMD)
+ Extensions = SPIRVExtensionsParser::getValidExtensions(TargetTriple);
// The order of initialization is important.
initAvailableExtensions(Extensions);
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 7d08b29..1b4b29b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -387,6 +387,8 @@ defm SPV_INTEL_tensor_float32_conversion : ExtensionOperand<125, [EnvOpenCL]>;
defm SPV_KHR_bfloat16 : ExtensionOperand<126, [EnvVulkan, EnvOpenCL]>;
defm SPV_INTEL_predicated_io : ExtensionOperand<127, [EnvOpenCL]>;
defm SPV_KHR_maximal_reconvergence : ExtensionOperand<128, [EnvVulkan]>;
+defm SPV_INTEL_bfloat16_arithmetic
+ : ExtensionOperand<129, [EnvVulkan, EnvOpenCL]>;
//===----------------------------------------------------------------------===//
// Multiclass used to define Capabilities enum values and at the same time
@@ -570,6 +572,7 @@ defm AtomicFloat64MinMaxEXT : CapabilityOperand<5613, 0, 0, [SPV_EXT_shader_atom
defm VariableLengthArrayINTEL : CapabilityOperand<5817, 0, 0, [SPV_INTEL_variable_length_array], []>;
defm GroupUniformArithmeticKHR : CapabilityOperand<6400, 0, 0, [SPV_KHR_uniform_group_instructions], []>;
defm USMStorageClassesINTEL : CapabilityOperand<5935, 0, 0, [SPV_INTEL_usm_storage_classes], [Kernel]>;
+defm BFloat16ArithmeticINTEL : CapabilityOperand<6226, 0, 0, [SPV_INTEL_bfloat16_arithmetic], []>;
defm BFloat16ConversionINTEL : CapabilityOperand<6115, 0, 0, [SPV_INTEL_bfloat16_conversion], []>;
defm GlobalVariableHostAccessINTEL : CapabilityOperand<6187, 0, 0, [SPV_INTEL_global_variable_host_access], []>;
defm HostAccessINTEL : CapabilityOperand<6188, 0, 0, [SPV_INTEL_global_variable_host_access], []>;
@@ -587,6 +590,11 @@ defm CooperativeMatrixBFloat16ComponentTypeINTEL : CapabilityOperand<6437, 0, 0,
defm RoundToInfinityINTEL : CapabilityOperand<5582, 0, 0, [SPV_INTEL_float_controls2], []>;
defm FloatingPointModeINTEL : CapabilityOperand<5583, 0, 0, [SPV_INTEL_float_controls2], []>;
defm FunctionFloatControlINTEL : CapabilityOperand<5821, 0, 0, [SPV_INTEL_float_controls2], []>;
+defm KernelAttributesINTEL : CapabilityOperand<5892, 0, 0, [SPV_INTEL_kernel_attributes], [Kernel]>;
+// TODO-SPIRV: add these once they are used / tested.
+// defm FPGAKernelAttributesINTEL : CapabilityOperand<5897, 0, 0, [SPV_INTEL_kernel_attributes], [Kernel]>;
+// defm FPGAKernelAttributesv2INTEL : CapabilityOperand<6161, 0, 0, [SPV_INTEL_kernel_attributes], [Kernel]>;
+// END TODO-SPIRV
defm LongCompositesINTEL : CapabilityOperand<6089, 0, 0, [SPV_INTEL_long_composites], []>;
defm BindlessImagesINTEL : CapabilityOperand<6528, 0, 0, [SPV_INTEL_bindless_images], []>;
defm MemoryAccessAliasingINTEL : CapabilityOperand<5910, 0, 0, [SPV_INTEL_memory_access_aliasing], []>;
@@ -805,6 +813,15 @@ defm RoundingModeRTPINTEL : ExecutionModeOperand<5620, [RoundToInfinityINTEL]>;
defm RoundingModeRTNINTEL : ExecutionModeOperand<5621, [RoundToInfinityINTEL]>;
defm FloatingPointModeALTINTEL : ExecutionModeOperand<5622, [FloatingPointModeINTEL]>;
defm FloatingPointModeIEEEINTEL : ExecutionModeOperand<5623, [FloatingPointModeINTEL]>;
+defm MaxWorkgroupSizeINTEL : ExecutionModeOperand<5893, [KernelAttributesINTEL]>;
+// TODO-SPIRV: Add the following once they are used / tested.
+// defm MaxWorkDimINTEL : ExecutionModeOperand<5894, [KernelAttributesINTEL]>;
+// defm NoGlobalOffsetINTEL : ExecutionModeOperand<5895, [KernelAttributesINTEL]>;
+// defm NumSIMDWorkitemsINTEL : ExecutionModeOperand<5896, [FPGAKernelAttributesINTEL]>;
+// defm SchedulerTargetFmaxMhzINTEL : ExecutionModeOperand<5903, [FPGAKernelAttributesINTEL]>;
+// defm StreamingInterfaceINTEL : ExecutionModeOperand<6154, [FPGAKernelAttributesv2INTEL]>;
+// defm RegisterMapInterfaceINTEL : ExecutionModeOperand<6160, [FPGAKernelAttributesv2INTEL]>;
+// END TODO-SPIRV
defm FPFastMathDefault : ExecutionModeOperand<6028, [FloatControls2]>;
defm MaximallyReconvergesKHR : ExecutionModeOperand<6023, [Shader]>;
@@ -1919,7 +1936,7 @@ defm GenericCastToPtr : SpecConstantOpOperandsOperand<122, [], [Kernel]>;
defm PtrCastToGeneric : SpecConstantOpOperandsOperand<121, [], [Kernel]>;
defm Bitcast : SpecConstantOpOperandsOperand<124, [], []>;
defm QuantizeToF16 : SpecConstantOpOperandsOperand<116, [], [Shader]>;
-// Arithmetic
+// Arithmetic
defm SNegate : SpecConstantOpOperandsOperand<126, [], []>;
defm Not : SpecConstantOpOperandsOperand<200, [], []>;
defm IAdd : SpecConstantOpOperandsOperand<128, [], []>;
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
index 7dd0b95..2951a4b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -69,7 +69,7 @@ static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
}
// Pin SPIRVTargetObjectFile's vtables to this file.
-SPIRVTargetObjectFile::~SPIRVTargetObjectFile() {}
+SPIRVTargetObjectFile::~SPIRVTargetObjectFile() = default;
SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -244,7 +244,8 @@ static cl::opt<bool> SPVEnableNonSemanticDI(
cl::Optional, cl::init(false));
void SPIRVPassConfig::addPreEmitPass() {
- if (SPVEnableNonSemanticDI) {
+ if (SPVEnableNonSemanticDI ||
+ getSPIRVTargetMachine().getTargetTriple().getVendor() == Triple::AMD) {
addPass(createSPIRVEmitNonSemanticDIPass(&getTM<SPIRVTargetMachine>()));
}
}
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h b/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h
index 9d0adbb..87ec256 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h
@@ -16,7 +16,7 @@ namespace llvm {
/// This implementation is used for SystemZ ELF targets.
class SystemZELFTargetObjectFile : public TargetLoweringObjectFileELF {
public:
- SystemZELFTargetObjectFile() {}
+ SystemZELFTargetObjectFile() = default;
/// Describe a TLS variable address within debug info.
const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 2666342..66ed8b0 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -46,7 +46,7 @@ class WebAssemblyFastISel final : public FastISel {
// All possible address modes.
class Address {
public:
- using BaseKind = enum { RegBase, FrameIndexBase };
+ enum BaseKind { RegBase, FrameIndexBase };
private:
BaseKind Kind = RegBase;
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 51b540a..fa23656 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -158,7 +158,16 @@ FunctionPass *createX86InsertX87waitPass();
/// This pass optimizes arithmetic based on knowledge that is only used by
/// a reduction sequence and is therefore safe to reassociate in interesting
/// ways.
-FunctionPass *createX86PartialReductionPass();
+class X86PartialReductionPass : public PassInfoMixin<X86PartialReductionPass> {
+private:
+ const X86TargetMachine *TM;
+
+public:
+ X86PartialReductionPass(const X86TargetMachine *TM) : TM(TM) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+
+FunctionPass *createX86PartialReductionLegacyPass();
/// // Analyzes and emits pseudos to support Win x64 Unwind V2.
FunctionPass *createX86WinEHUnwindV2Pass();
@@ -179,7 +188,18 @@ FunctionPass *createX86LowerAMXTypeLegacyPass();
/// The pass transforms amx intrinsics to scalar operation if the function has
/// optnone attribute or it is O0.
-FunctionPass *createX86LowerAMXIntrinsicsPass();
+class X86LowerAMXIntrinsicsPass
+ : public PassInfoMixin<X86LowerAMXIntrinsicsPass> {
+private:
+ const TargetMachine *TM;
+
+public:
+ X86LowerAMXIntrinsicsPass(const TargetMachine *TM) : TM(TM) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+ static bool isRequired() { return true; }
+};
+
+FunctionPass *createX86LowerAMXIntrinsicsLegacyPass();
InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
const X86Subtarget &,
@@ -220,7 +240,7 @@ void initializeX86LowerAMXIntrinsicsLegacyPassPass(PassRegistry &);
void initializeX86LowerAMXTypeLegacyPassPass(PassRegistry &);
void initializeX86LowerTileCopyPass(PassRegistry &);
void initializeX86OptimizeLEAPassPass(PassRegistry &);
-void initializeX86PartialReductionPass(PassRegistry &);
+void initializeX86PartialReductionLegacyPass(PassRegistry &);
void initializeX86PreTileConfigPass(PassRegistry &);
void initializeX86ReturnThunksPass(PassRegistry &);
void initializeX86SpeculativeExecutionSideEffectSuppressionPass(PassRegistry &);
diff --git a/llvm/lib/Target/X86/X86CompressEVEX.cpp b/llvm/lib/Target/X86/X86CompressEVEX.cpp
index c0c7f5a..ddbd10d 100644
--- a/llvm/lib/Target/X86/X86CompressEVEX.cpp
+++ b/llvm/lib/Target/X86/X86CompressEVEX.cpp
@@ -272,7 +272,7 @@ static bool CompressEVEXImpl(MachineInstr &MI, MachineBasicBlock &MBB,
const MachineOperand &Src2 = MI.getOperand(2);
bool Is32BitReg = Opc == X86::ADD32ri_ND || Opc == X86::ADD32rr_ND;
const MCInstrDesc &NewDesc =
- ST.getInstrInfo()->get(Is32BitReg ? X86::LEA32r : X86::LEA64r);
+ ST.getInstrInfo()->get(Is32BitReg ? X86::LEA64_32r : X86::LEA64r);
if (Is32BitReg)
Src1 = getX86SubSuperRegister(Src1, 64);
MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), NewDesc, Dst)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 133406b..06b8f7614 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2572,8 +2572,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
// Combine sin / cos into _sincos_stret if it is available.
- setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
- setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
if (Subtarget.isTargetWin64()) {
setOperationAction(ISD::SDIV, MVT::i128, Custom);
@@ -30908,6 +30908,63 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(X86ISD::PACKUS, dl, VT, LoR, HiR);
}
+ if (VT == MVT::v64i8 && Subtarget.canExtendTo512BW()) {
+ // On AVX512BW, we can use variable 16-bit shifts to implement variable
+ // 8-bit shifts. For this, we split the input into two vectors, RLo and RHi.
+ // The i-th lane of RLo contains the (2*i)-th lane of R, and the i-th lane
+ // of RHi contains the (2*i+1)-th lane of R. After shifting, these vectors
+ // can efficiently be merged together using a masked move.
+ MVT ExtVT = MVT::v32i16;
+
+ SDValue RLo, RHi;
+ // Isolate lower and upper lanes of Amt by masking odd lanes in AmtLo and
+ // right shifting AmtHi.
+ SDValue AmtLo = DAG.getNode(ISD::AND, dl, ExtVT, DAG.getBitcast(ExtVT, Amt),
+ DAG.getConstant(0x00ff, dl, ExtVT));
+ SDValue AmtHi = getTargetVShiftByConstNode(
+ X86ISD::VSRLI, dl, ExtVT, DAG.getBitcast(ExtVT, Amt), 8, DAG);
+ switch (Opc) {
+ case ISD::SHL:
+ // Because we shift left, no bits from the high half can influence the low
+ // half, so we don't need to mask RLo. We do however need to mask RHi, to
+ // prevent high bits of an even lane overflowing into low bits of an odd
+ // lane.
+ RLo = DAG.getBitcast(ExtVT, R);
+ RHi = DAG.getNode(ISD::AND, dl, ExtVT, RLo,
+ DAG.getConstant(0xff00, dl, ExtVT));
+ break;
+ case ISD::SRL:
+ // Same idea as above, but this time we need to make sure no low bits of
+ // an odd lane can overflow into high bits of an even lane.
+ RHi = DAG.getBitcast(ExtVT, R);
+ RLo = DAG.getNode(ISD::AND, dl, ExtVT, RHi,
+ DAG.getConstant(0x00ff, dl, ExtVT));
+ break;
+ case ISD::SRA:
+ // For arithmetic right shifts, we want to sign extend each even lane of R
+ // such that the upper half of the corresponding lane of RLo is 0 or -1
+ // depending on the sign bit of the original lane. We do this using 2
+ // immediate shifts.
+ RHi = DAG.getBitcast(ExtVT, R);
+ RLo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExtVT, RHi, 8, DAG);
+ RLo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExtVT, RLo, 8, DAG);
+ break;
+ default:
+ llvm_unreachable("Unexpected Shift Op");
+ }
+
+ SDValue ShiftedLo =
+ DAG.getBitcast(VT, DAG.getNode(Opc, dl, ExtVT, RLo, AmtLo));
+ SDValue ShiftedHi =
+ DAG.getBitcast(VT, DAG.getNode(Opc, dl, ExtVT, RHi, AmtHi));
+
+ // To merge the shifted vectors back together, we select even lanes
+ // from ShiftedLo and odd lanes from ShiftedHi.
+ SDValue SelectMask = DAG.getBitcast(
+ MVT::v64i1, DAG.getConstant(0x5555555555555555, dl, MVT::i64));
+ return DAG.getSelect(dl, VT, SelectMask, ShiftedLo, ShiftedHi);
+ }
+
if (VT == MVT::v16i8 ||
(VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) ||
(VT == MVT::v64i8 && Subtarget.hasBWI())) {
@@ -33004,60 +33061,6 @@ static SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
-static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue Arg = Op.getOperand(0);
- EVT ArgVT = Arg.getValueType();
- bool isF64 = ArgVT == MVT::f64;
-
- RTLIB::Libcall LC = isF64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
- const char *LibcallName = TLI.getLibcallName(LC);
- if (!LibcallName)
- return SDValue();
-
- assert(Subtarget.isTargetDarwin() && Subtarget.is64Bit());
-
- // For MacOSX, we want to call an alternative entry point: __sincos_stret,
- // which returns the values as { float, float } (in XMM0) or
- // { double, double } (which is returned in XMM0, XMM1).
- SDLoc dl(Op);
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-
- TargetLowering::ArgListTy Args;
- Args.emplace_back(Arg, ArgTy);
-
- // Only optimize x86_64 for now. i386 is a bit messy. For f32,
- // the small struct {f32, f32} is returned in (eax, edx). For f64,
- // the results are returned via SRet in memory.
- SDValue Callee =
- DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));
-
- Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy)
- : (Type *)FixedVectorType::get(ArgTy, 4);
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl)
- .setChain(DAG.getEntryNode())
- .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args));
-
- std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
-
- if (isF64)
- // Returned in xmm0 and xmm1.
- return CallResult.first;
-
- // Returned in bits 0:31 and 32:64 xmm0.
- SDValue SinVal =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first,
- DAG.getVectorIdxConstant(0, dl));
- SDValue CosVal =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first,
- DAG.getVectorIdxConstant(1, dl));
- SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
- return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
-}
-
/// Widen a vector input to a vector of NVT. The
/// input vector must have the same element type as NVT.
static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG,
@@ -33662,7 +33665,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ABDS:
case ISD::ABDU: return LowerABD(Op, Subtarget, DAG);
case ISD::AVGCEILU: return LowerAVG(Op, Subtarget, DAG);
- case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG);
@@ -53347,6 +53349,103 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Look for a RMW operation that only touches one bit of a larger than legal
+// type and fold it to a BTC/BTR/BTS or bit insertion pattern acting on a single
+// i32 sub value.
+static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ using namespace SDPatternMatch;
+ SDValue StoredVal = St->getValue();
+ EVT VT = StoredVal.getValueType();
+
+ // Only narrow normal stores of larger than legal scalar integers.
+ if (!ISD::isNormalStore(St) || !St->isSimple() || !VT.isScalarInteger() ||
+ VT.getSizeInBits() <= (Subtarget.is64Bit() ? 64 : 32))
+ return SDValue();
+
+ // BTR: X & ~(1 << ShAmt)
+ // BTS: X | (1 << ShAmt)
+ // BTC: X ^ (1 << ShAmt)
+ //
+ // BitInsert: (X & ~(1 << ShAmt)) | (InsertBit << ShAmt)
+ SDValue SrcVal, InsertBit, ShAmt;
+ if (!StoredVal.hasOneUse() ||
+ !(sd_match(StoredVal, m_And(m_Value(SrcVal),
+ m_Not(m_Shl(m_One(), m_Value(ShAmt))))) ||
+ sd_match(StoredVal,
+ m_Or(m_Value(SrcVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
+ sd_match(StoredVal,
+ m_Xor(m_Value(SrcVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
+ sd_match(
+ StoredVal,
+ m_Or(m_And(m_Value(SrcVal), m_Not(m_Shl(m_One(), m_Value(ShAmt)))),
+ m_Shl(m_Value(InsertBit), m_Deferred(ShAmt))))))
+ return SDValue();
+
+ // SrcVal must be a matching normal load further up the chain.
+ auto *Ld = dyn_cast<LoadSDNode>(SrcVal);
+ if (!Ld || !ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
+ Ld->getBasePtr() != St->getBasePtr() ||
+ Ld->getOffset() != St->getOffset() ||
+ !St->getChain().reachesChainWithoutSideEffects(SDValue(Ld, 1)))
+ return SDValue();
+
+ // Ensure the shift amount is in bounds.
+ KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
+ if (KnownAmt.getMaxValue().uge(VT.getSizeInBits()))
+ return SDValue();
+
+ // If we're inserting a bit then it must be the LSB.
+ if (InsertBit) {
+ KnownBits KnownInsert = DAG.computeKnownBits(InsertBit);
+ if (KnownInsert.countMinLeadingZeros() < (VT.getSizeInBits() - 1))
+ return SDValue();
+ }
+
+ // Split the shift into an alignment shift that moves the active i32 block to
+ // the bottom bits for truncation and a modulo shift that can act on the i32.
+ EVT AmtVT = ShAmt.getValueType();
+ SDValue AlignAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
+ DAG.getSignedConstant(-32LL, DL, AmtVT));
+ SDValue ModuloAmt =
+ DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT));
+ ModuloAmt = DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8);
+
+ // Compute the byte offset for the i32 block that is changed by the RMW.
+ // combineTruncate will adjust the load for us in a similar way.
+ EVT PtrVT = St->getBasePtr().getValueType();
+ SDValue PtrBitOfs = DAG.getZExtOrTrunc(AlignAmt, DL, PtrVT);
+ SDValue PtrByteOfs = DAG.getNode(ISD::SRL, DL, PtrVT, PtrBitOfs,
+ DAG.getShiftAmountConstant(3, PtrVT, DL));
+ SDValue NewPtr = DAG.getMemBasePlusOffset(St->getBasePtr(), PtrByteOfs, DL,
+ SDNodeFlags::NoUnsignedWrap);
+
+ // Reconstruct the BTC/BTR/BTS pattern for the i32 block and store.
+ SDValue X = DAG.getNode(ISD::SRL, DL, VT, SrcVal, AlignAmt);
+ X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
+
+ SDValue Mask = DAG.getNode(ISD::SHL, DL, MVT::i32,
+ DAG.getConstant(1, DL, MVT::i32), ModuloAmt);
+
+ SDValue Res;
+ if (InsertBit) {
+ SDValue BitMask =
+ DAG.getNode(ISD::SHL, DL, MVT::i32,
+ DAG.getZExtOrTrunc(InsertBit, DL, MVT::i32), ModuloAmt);
+ Res =
+ DAG.getNode(ISD::AND, DL, MVT::i32, X, DAG.getNOT(DL, Mask, MVT::i32));
+ Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, BitMask);
+ } else {
+ if (StoredVal.getOpcode() == ISD::AND)
+ Mask = DAG.getNOT(DL, Mask, MVT::i32);
+ Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask);
+ }
+
+ return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(),
+ Align(), St->getMemOperand()->getFlags());
+}
+
static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -53573,6 +53672,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
}
}
+ if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget))
+ return R;
+
// Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC)
// store(cmov(x, load(p), CC), p) to cstore(x, p, InvertCC)
if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
@@ -54505,8 +54607,9 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
// truncation, see if we can convert the shift into a pointer offset instead.
// Limit this to normal (non-ext) scalar integer loads.
if (SrcVT.isScalarInteger() && Src.getOpcode() == ISD::SRL &&
- Src.hasOneUse() && Src.getOperand(0).hasOneUse() &&
- ISD::isNormalLoad(Src.getOperand(0).getNode())) {
+ Src.hasOneUse() && ISD::isNormalLoad(Src.getOperand(0).getNode()) &&
+ (Src.getOperand(0).hasOneUse() ||
+ !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, SrcVT))) {
auto *Ld = cast<LoadSDNode>(Src.getOperand(0));
if (Ld->isSimple() && VT.isByteSized() &&
isPowerOf2_64(VT.getSizeInBits())) {
@@ -54529,8 +54632,7 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
SDValue NewLoad =
DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getPointerInfo(),
Align(), Ld->getMemOperand()->getFlags());
- DAG.ReplaceAllUsesOfValueWith(Src.getOperand(0).getValue(1),
- NewLoad.getValue(1));
+ DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
return NewLoad;
}
}
@@ -56306,6 +56408,7 @@ static SDValue combineAVX512SetCCToKMOV(EVT VT, SDValue Op0, ISD::CondCode CC,
static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ using namespace SDPatternMatch;
const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
const SDValue LHS = N->getOperand(0);
const SDValue RHS = N->getOperand(1);
@@ -56364,6 +56467,37 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
if (SDValue AndN = MatchAndCmpEq(RHS, LHS))
return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);
+ // If we're performing a bit test on a larger than legal type, attempt
+ // to (aligned) shift down the value to the bottom 32-bits and then
+ // perform the bittest on the i32 value.
+ // ICMP_ZERO(AND(X,SHL(1,IDX)))
+ // --> ICMP_ZERO(AND(TRUNC(SRL(X,AND(IDX,-32))),SHL(1,AND(IDX,31))))
+ if (isNullConstant(RHS) &&
+ OpVT.getScalarSizeInBits() > (Subtarget.is64Bit() ? 64 : 32)) {
+ SDValue X, ShAmt;
+ if (sd_match(LHS, m_OneUse(m_And(m_Value(X),
+ m_Shl(m_One(), m_Value(ShAmt)))))) {
+ // Only attempt this if the shift amount is known to be in bounds.
+ KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
+ if (KnownAmt.getMaxValue().ult(OpVT.getScalarSizeInBits())) {
+ EVT AmtVT = ShAmt.getValueType();
+ SDValue AlignAmt =
+ DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
+ DAG.getSignedConstant(-32LL, DL, AmtVT));
+ SDValue ModuloAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
+ DAG.getConstant(31, DL, AmtVT));
+ SDValue Mask = DAG.getNode(
+ ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32),
+ DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
+ X = DAG.getNode(ISD::SRL, DL, OpVT, X, AlignAmt);
+ X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
+ X = DAG.getNode(ISD::AND, DL, MVT::i32, X, Mask);
+ return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, MVT::i32),
+ CC);
+ }
+ }
+ }
+
// cmpeq(trunc(x),C) --> cmpeq(x,C)
// cmpne(trunc(x),C) --> cmpne(x,C)
// iff x upper bits are zero.
diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
index b655183..3b96e70 100644
--- a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
@@ -227,9 +227,6 @@ struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {
} // end namespace llvm
-constexpr MachineInstr *MachineGadgetGraph::ArgNodeSentinel;
-constexpr int MachineGadgetGraph::GadgetEdgeSentinel;
-
char X86LoadValueInjectionLoadHardeningPass::ID = 0;
void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage(
diff --git a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
index 7f33939..662aec2 100644
--- a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
@@ -23,12 +23,15 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Analysis.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsX86.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -40,7 +43,7 @@
using namespace llvm;
using namespace PatternMatch;
-#define DEBUG_TYPE "lower-amx-intrinsics"
+#define DEBUG_TYPE "x86-lower-amx-intrinsics"
#ifndef NDEBUG
static bool isV256I32Ty(Type *Ty) {
@@ -627,6 +630,37 @@ bool X86LowerAMXIntrinsics::visit() {
}
namespace {
+bool shouldRunLowerAMXIntrinsics(const Function &F, const TargetMachine *TM) {
+ return X86ScalarizeAMX && (F.hasFnAttribute(Attribute::OptimizeNone) ||
+ TM->getOptLevel() == CodeGenOptLevel::None);
+}
+
+bool runLowerAMXIntrinsics(Function &F, DominatorTree *DT, LoopInfo *LI) {
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+
+ X86LowerAMXIntrinsics LAT(F, DTU, LI);
+ return LAT.visit();
+}
+} // namespace
+
+PreservedAnalyses X86LowerAMXIntrinsicsPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ if (!shouldRunLowerAMXIntrinsics(F, TM))
+ return PreservedAnalyses::all();
+
+ DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+ LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
+ bool Changed = runLowerAMXIntrinsics(F, &DT, &LI);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA = PreservedAnalyses::none();
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LoopAnalysis>();
+ return PA;
+}
+
+namespace {
class X86LowerAMXIntrinsicsLegacyPass : public FunctionPass {
public:
static char ID;
@@ -634,21 +668,15 @@ public:
X86LowerAMXIntrinsicsLegacyPass() : FunctionPass(ID) {}
bool runOnFunction(Function &F) override {
- if (!X86ScalarizeAMX)
- return false;
TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
- if (!F.hasFnAttribute(Attribute::OptimizeNone) &&
- TM->getOptLevel() != CodeGenOptLevel::None)
+ if (!shouldRunLowerAMXIntrinsics(F, TM))
return false;
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
-
- X86LowerAMXIntrinsics LAT(F, DTU, LI);
- return LAT.visit();
+ return runLowerAMXIntrinsics(F, DT, LI);
}
StringRef getPassName() const override { return "Lower AMX intrinsics"; }
@@ -668,6 +696,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(X86LowerAMXIntrinsicsLegacyPass, DEBUG_TYPE, PassName,
false, false)
-FunctionPass *llvm::createX86LowerAMXIntrinsicsPass() {
+FunctionPass *llvm::createX86LowerAMXIntrinsicsLegacyPass() {
return new X86LowerAMXIntrinsicsLegacyPass();
}
diff --git a/llvm/lib/Target/X86/X86PartialReduction.cpp b/llvm/lib/Target/X86/X86PartialReduction.cpp
index a25e4e0..898c83c 100644
--- a/llvm/lib/Target/X86/X86PartialReduction.cpp
+++ b/llvm/lib/Target/X86/X86PartialReduction.cpp
@@ -16,10 +16,12 @@
#include "X86TargetMachine.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Analysis.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsX86.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
#include "llvm/Support/KnownBits.h"
@@ -30,39 +32,44 @@ using namespace llvm;
namespace {
-class X86PartialReduction : public FunctionPass {
+class X86PartialReduction {
+ const X86TargetMachine *TM;
const DataLayout *DL = nullptr;
const X86Subtarget *ST = nullptr;
public:
+ X86PartialReduction(const X86TargetMachine *TM) : TM(TM) {}
+ bool run(Function &F);
+
+private:
+ bool tryMAddReplacement(Instruction *Op, bool ReduceInOneBB);
+ bool trySADReplacement(Instruction *Op);
+};
+
+class X86PartialReductionLegacy : public FunctionPass {
+public:
static char ID; // Pass identification, replacement for typeid.
- X86PartialReduction() : FunctionPass(ID) { }
+ X86PartialReductionLegacy() : FunctionPass(ID) {}
- bool runOnFunction(Function &Fn) override;
+ bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
}
- StringRef getPassName() const override {
- return "X86 Partial Reduction";
- }
-
-private:
- bool tryMAddReplacement(Instruction *Op, bool ReduceInOneBB);
- bool trySADReplacement(Instruction *Op);
+ StringRef getPassName() const override { return "X86 Partial Reduction"; }
};
}
-FunctionPass *llvm::createX86PartialReductionPass() {
- return new X86PartialReduction();
+FunctionPass *llvm::createX86PartialReductionLegacyPass() {
+ return new X86PartialReductionLegacy();
}
-char X86PartialReduction::ID = 0;
+char X86PartialReductionLegacy::ID = 0;
-INITIALIZE_PASS(X86PartialReduction, DEBUG_TYPE,
- "X86 Partial Reduction", false, false)
+INITIALIZE_PASS(X86PartialReductionLegacy, DEBUG_TYPE, "X86 Partial Reduction",
+ false, false)
// This function should be aligned with detectExtMul() in X86ISelLowering.cpp.
static bool matchVPDPBUSDPattern(const X86Subtarget *ST, BinaryOperator *Mul,
@@ -494,17 +501,8 @@ static void collectLeaves(Value *Root, SmallVectorImpl<Instruction *> &Leaves) {
}
}
-bool X86PartialReduction::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
-
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC)
- return false;
-
- auto &TM = TPC->getTM<X86TargetMachine>();
- ST = TM.getSubtargetImpl(F);
-
+bool X86PartialReduction::run(Function &F) {
+ ST = TM->getSubtargetImpl(F);
DL = &F.getDataLayout();
bool MadeChange = false;
@@ -540,3 +538,25 @@ bool X86PartialReduction::runOnFunction(Function &F) {
return MadeChange;
}
+
+bool X86PartialReductionLegacy::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ return X86PartialReduction(&TPC->getTM<X86TargetMachine>()).run(F);
+}
+
+PreservedAnalyses X86PartialReductionPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ bool Changed = X86PartialReduction(TM).run(F);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA = PreservedAnalyses::none();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def
index fc25d55..db25594 100644
--- a/llvm/lib/Target/X86/X86PassRegistry.def
+++ b/llvm/lib/Target/X86/X86PassRegistry.def
@@ -15,14 +15,14 @@
#ifndef FUNCTION_PASS
#define FUNCTION_PASS(NAME, CREATE_PASS)
#endif
+FUNCTION_PASS("x86-lower-amx-intrinsics", X86LowerAMXIntrinsicsPass(this))
FUNCTION_PASS("x86-lower-amx-type", X86LowerAMXTypePass(this))
+FUNCTION_PASS("x86-partial-reduction", X86PartialReductionPass(this))
#undef FUNCTION_PASS
#ifndef DUMMY_FUNCTION_PASS
#define DUMMY_FUNCTION_PASS(NAME, CREATE_PASS)
#endif
-DUMMY_FUNCTION_PASS("lower-amx-intrinsics", X86LowerAMXIntrinsics(*this))
-DUMMY_FUNCTION_PASS("x86-partial-reduction", X86PartialReduction())
DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass())
#undef DUMMY_FUNCTION_PASS
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 9a76abc..5f0bcab 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -97,7 +97,7 @@ extern "C" LLVM_C_ABI void LLVMInitializeX86Target() {
initializeX86LoadValueInjectionLoadHardeningPassPass(PR);
initializeX86LoadValueInjectionRetHardeningPassPass(PR);
initializeX86OptimizeLEAPassPass(PR);
- initializeX86PartialReductionPass(PR);
+ initializeX86PartialReductionLegacyPass(PR);
initializePseudoProbeInserterPass(PR);
initializeX86ReturnThunksPass(PR);
initializeX86DAGToDAGISelLegacyPass(PR);
@@ -422,14 +422,14 @@ void X86PassConfig::addIRPasses() {
// We add both pass anyway and when these two passes run, we skip the pass
// based on the option level and option attribute.
- addPass(createX86LowerAMXIntrinsicsPass());
+ addPass(createX86LowerAMXIntrinsicsLegacyPass());
addPass(createX86LowerAMXTypeLegacyPass());
TargetPassConfig::addIRPasses();
if (TM->getOptLevel() != CodeGenOptLevel::None) {
addPass(createInterleavedAccessPass());
- addPass(createX86PartialReductionPass());
+ addPass(createX86PartialReductionLegacyPass());
}
// Add passes that handle indirect branch removal and insertion of a retpoline
diff --git a/llvm/lib/Target/X86/X86VZeroUpper.cpp b/llvm/lib/Target/X86/X86VZeroUpper.cpp
index f6f7e92..2f28ab3 100644
--- a/llvm/lib/Target/X86/X86VZeroUpper.cpp
+++ b/llvm/lib/Target/X86/X86VZeroUpper.cpp
@@ -66,7 +66,7 @@ namespace {
MachineBasicBlock &MBB);
void addDirtySuccessor(MachineBasicBlock &MBB);
- using BlockExitState = enum { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
+ enum BlockExitState { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
static const char* getBlockExitStateName(BlockExitState ST);
diff --git a/llvm/lib/TextAPI/RecordVisitor.cpp b/llvm/lib/TextAPI/RecordVisitor.cpp
index d333b33..24971a7 100644
--- a/llvm/lib/TextAPI/RecordVisitor.cpp
+++ b/llvm/lib/TextAPI/RecordVisitor.cpp
@@ -15,7 +15,7 @@
using namespace llvm;
using namespace llvm::MachO;
-RecordVisitor::~RecordVisitor() {}
+RecordVisitor::~RecordVisitor() = default;
void RecordVisitor::visitObjCInterface(const ObjCInterfaceRecord &) {}
void RecordVisitor::visitObjCCategory(const ObjCCategoryRecord &) {}
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 5ed47ae..a6ac761 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -5185,6 +5185,7 @@ struct AADereferenceableCallSiteReturned final
// ------------------------ Align Argument Attribute ------------------------
namespace {
+
static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
Value &AssociatedValue, const Use *U,
const Instruction *I, bool &TrackUse) {
@@ -5200,6 +5201,28 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
TrackUse = true;
return 0;
}
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::ptrmask: {
+ // Is it appropriate to pull attribute in initialization?
+ const auto *ConstVals = A.getAAFor<AAPotentialConstantValues>(
+ QueryingAA, IRPosition::value(*II->getOperand(1)), DepClassTy::NONE);
+ const auto *AlignAA = A.getAAFor<AAAlign>(
+ QueryingAA, IRPosition::value(*II), DepClassTy::NONE);
+ if (ConstVals && ConstVals->isValidState() && ConstVals->isAtFixpoint()) {
+ unsigned ShiftValue = std::min(ConstVals->getAssumedMinTrailingZeros(),
+ Value::MaxAlignmentExponent);
+ Align ConstAlign(UINT64_C(1) << ShiftValue);
+ if (ConstAlign >= AlignAA->getKnownAlign())
+ return Align(1).value();
+ }
+ if (AlignAA)
+ return AlignAA->getKnownAlign().value();
+ break;
+ }
+ default:
+ break;
+ }
MaybeAlign MA;
if (const auto *CB = dyn_cast<CallBase>(I)) {
@@ -5499,6 +5522,44 @@ struct AAAlignCallSiteReturned final
AAAlignCallSiteReturned(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
+ ChangeStatus updateImpl(Attributor &A) override {
+ Instruction *I = getIRPosition().getCtxI();
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::ptrmask: {
+ Align Alignment;
+ bool Valid = false;
+
+ const auto *ConstVals = A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*II->getOperand(1)), DepClassTy::REQUIRED);
+ if (ConstVals && ConstVals->isValidState()) {
+ unsigned ShiftValue =
+ std::min(ConstVals->getAssumedMinTrailingZeros(),
+ Value::MaxAlignmentExponent);
+ Alignment = Align(UINT64_C(1) << ShiftValue);
+ Valid = true;
+ }
+
+ const auto *AlignAA =
+ A.getAAFor<AAAlign>(*this, IRPosition::value(*(II->getOperand(0))),
+ DepClassTy::REQUIRED);
+ if (AlignAA && AlignAA->isValidState()) {
+ Alignment = std::max(AlignAA->getAssumedAlign(), Alignment);
+ Valid = true;
+ }
+
+ if (Valid)
+ return clampStateAndIndicateChange<StateType>(
+ this->getState(),
+ std::min(this->getAssumedAlign(), Alignment).value());
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ return Base::updateImpl(A);
+ };
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); }
};
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index d7eb745..2a87a0f 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -208,7 +208,7 @@ namespace KernelInfo {
// };
#define KERNEL_ENVIRONMENT_IDX(MEMBER, IDX) \
- constexpr const unsigned MEMBER##Idx = IDX;
+ constexpr unsigned MEMBER##Idx = IDX;
KERNEL_ENVIRONMENT_IDX(Configuration, 0)
KERNEL_ENVIRONMENT_IDX(Ident, 1)
@@ -216,7 +216,7 @@ KERNEL_ENVIRONMENT_IDX(Ident, 1)
#undef KERNEL_ENVIRONMENT_IDX
#define KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MEMBER, IDX) \
- constexpr const unsigned MEMBER##Idx = IDX;
+ constexpr unsigned MEMBER##Idx = IDX;
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(UseGenericStateMachine, 0)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MayUseNestedParallelism, 1)
@@ -258,7 +258,7 @@ KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxTeams)
GlobalVariable *
getKernelEnvironementGVFromKernelInitCB(CallBase *KernelInitCB) {
- constexpr const int InitKernelEnvironmentArgNo = 0;
+ constexpr int InitKernelEnvironmentArgNo = 0;
return cast<GlobalVariable>(
KernelInitCB->getArgOperand(InitKernelEnvironmentArgNo)
->stripPointerCasts());
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3ddf182..cbaff29 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3997,6 +3997,27 @@ static Value *foldOrUnsignedUMulOverflowICmp(BinaryOperator &I,
return nullptr;
}
+/// Fold select(X >s 0, 0, -X) | smax(X, 0) --> abs(X)
+/// select(X <s 0, -X, 0) | smax(X, 0) --> abs(X)
+static Value *FoldOrOfSelectSmaxToAbs(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ Value *X;
+ Value *Sel;
+ if (match(&I,
+ m_c_Or(m_Value(Sel), m_OneUse(m_SMax(m_Value(X), m_ZeroInt()))))) {
+ auto NegX = m_Neg(m_Specific(X));
+ if (match(Sel, m_Select(m_SpecificICmp(ICmpInst::ICMP_SGT, m_Specific(X),
+ m_ZeroInt()),
+ m_ZeroInt(), NegX)) ||
+ match(Sel, m_Select(m_SpecificICmp(ICmpInst::ICMP_SLT, m_Specific(X),
+ m_ZeroInt()),
+ NegX, m_ZeroInt())))
+ return Builder.CreateBinaryIntrinsic(Intrinsic::abs, X,
+ Builder.getFalse());
+ }
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -4545,6 +4566,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Value *V = SimplifyAddWithRemainder(I))
return replaceInstUsesWith(I, V);
+ if (Value *Res = FoldOrOfSelectSmaxToAbs(I, Builder))
+ return replaceInstUsesWith(I, Res);
+
return nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index f5130da..9572f9d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3599,6 +3599,21 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
m_Not(m_Specific(SelCond->getTrueValue())));
if (MayNeedFreeze)
C = Builder.CreateFreeze(C);
+ if (!ProfcheckDisableMetadataFixes) {
+ Value *C2 = nullptr, *A2 = nullptr, *B2 = nullptr;
+ if (match(CondVal, m_LogicalAnd(m_Specific(C), m_Value(A2))) &&
+ SelCond) {
+ return SelectInst::Create(C, A, B, "", nullptr, SelCond);
+ } else if (match(FalseVal,
+ m_LogicalAnd(m_Not(m_Value(C2)), m_Value(B2))) &&
+ SelFVal) {
+ SelectInst *NewSI = SelectInst::Create(C, A, B, "", nullptr, SelFVal);
+ NewSI->swapProfMetadata();
+ return NewSI;
+ } else {
+ return createSelectInstWithUnknownProfile(C, A, B);
+ }
+ }
return SelectInst::Create(C, A, B);
}
@@ -3615,6 +3630,20 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
m_Not(m_Specific(SelFVal->getTrueValue())));
if (MayNeedFreeze)
C = Builder.CreateFreeze(C);
+ if (!ProfcheckDisableMetadataFixes) {
+ Value *C2 = nullptr, *A2 = nullptr, *B2 = nullptr;
+ if (match(CondVal, m_LogicalAnd(m_Not(m_Value(C2)), m_Value(A2))) &&
+ SelCond) {
+ SelectInst *NewSI = SelectInst::Create(C, B, A, "", nullptr, SelCond);
+ NewSI->swapProfMetadata();
+ return NewSI;
+ } else if (match(FalseVal, m_LogicalAnd(m_Specific(C), m_Value(B2))) &&
+ SelFVal) {
+ return SelectInst::Create(C, B, A, "", nullptr, SelFVal);
+ } else {
+ return createSelectInstWithUnknownProfile(C, B, A);
+ }
+ }
return SelectInst::Create(C, B, A);
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
index 80e77e09..a2fad02 100644
--- a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
@@ -161,7 +161,7 @@ template <char NsanTypeId>
class ShadowTypeConfigImpl : public ShadowTypeConfig {
public:
char getNsanTypeId() const override { return NsanTypeId; }
- static constexpr const char kNsanTypeId = NsanTypeId;
+ static constexpr char kNsanTypeId = NsanTypeId;
};
// `double` (`d`) shadow type.
diff --git a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
index 89980d5..a577f51 100644
--- a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
+++ b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
@@ -122,7 +122,8 @@ DropUnnecessaryAssumesPass::run(Function &F, FunctionAnalysisManager &FAM) {
Value *Cond = Assume->getArgOperand(0);
// Don't drop type tests, which have special semantics.
- if (match(Cond, m_Intrinsic<Intrinsic::type_test>()))
+ if (match(Cond, m_Intrinsic<Intrinsic::type_test>()) ||
+ match(Cond, m_Intrinsic<Intrinsic::public_type_test>()))
continue;
SmallVector<Value *> Affected;
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 19eccb9..9ffa602 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -1796,14 +1796,16 @@ private:
// mergeLatch may remove the only block in FC1.
SE.forgetLoop(FC1.L);
SE.forgetLoop(FC0.L);
- // Forget block dispositions as well, so that there are no dangling
- // pointers to erased/free'ed blocks.
- SE.forgetBlockAndLoopDispositions();
// Move instructions from FC0.Latch to FC1.Latch.
// Note: mergeLatch requires an updated DT.
mergeLatch(FC0, FC1);
+ // Forget block dispositions as well, so that there are no dangling
+ // pointers to erased/free'ed blocks. It should be done after mergeLatch()
+ // since merging the latches may affect the dispositions.
+ SE.forgetBlockAndLoopDispositions();
+
// Merge the loops.
SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());
for (BasicBlock *BB : Blocks) {
@@ -2092,14 +2094,16 @@ private:
// mergeLatch may remove the only block in FC1.
SE.forgetLoop(FC1.L);
SE.forgetLoop(FC0.L);
- // Forget block dispositions as well, so that there are no dangling
- // pointers to erased/free'ed blocks.
- SE.forgetBlockAndLoopDispositions();
// Move instructions from FC0.Latch to FC1.Latch.
// Note: mergeLatch requires an updated DT.
mergeLatch(FC0, FC1);
+ // Forget block dispositions as well, so that there are no dangling
+ // pointers to erased/free'ed blocks. It should be done after mergeLatch()
+ // since merging the latches may affect the dispositions.
+ SE.forgetBlockAndLoopDispositions();
+
// Merge the loops.
SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());
for (BasicBlock *BB : Blocks) {
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index a883998..1b770be 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -89,8 +89,8 @@ struct StoreToLoadForwardingCandidate {
/// Return true if the dependence from the store to the load has an
/// absolute distance of one.
/// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
- bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
- Loop *L) const {
+ bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE, Loop *L,
+ const DominatorTree &DT) const {
Value *LoadPtr = Load->getPointerOperand();
Value *StorePtr = Store->getPointerOperand();
Type *LoadType = getLoadStoreType(Load);
@@ -102,8 +102,10 @@ struct StoreToLoadForwardingCandidate {
DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
"Should be a known dependence");
- int64_t StrideLoad = getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0);
- int64_t StrideStore = getPtrStride(PSE, LoadType, StorePtr, L).value_or(0);
+ int64_t StrideLoad =
+ getPtrStride(PSE, LoadType, LoadPtr, L, DT).value_or(0);
+ int64_t StrideStore =
+ getPtrStride(PSE, LoadType, StorePtr, L, DT).value_or(0);
if (!StrideLoad || !StrideStore || StrideLoad != StrideStore)
return false;
@@ -287,8 +289,8 @@ public:
// so deciding which one forwards is easy. The later one forwards as
// long as they both have a dependence distance of one to the load.
if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
- Cand.isDependenceDistanceOfOne(PSE, L) &&
- OtherCand->isDependenceDistanceOfOne(PSE, L)) {
+ Cand.isDependenceDistanceOfOne(PSE, L, *DT) &&
+ OtherCand->isDependenceDistanceOfOne(PSE, L, *DT)) {
// They are in the same block, the later one will forward to the load.
if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
OtherCand = &Cand;
@@ -538,7 +540,7 @@ public:
// Check whether the SCEV difference is the same as the induction step,
// thus we load the value in the next iteration.
- if (!Cand.isDependenceDistanceOfOne(PSE, L))
+ if (!Cand.isDependenceDistanceOfOne(PSE, L, *DT))
continue;
assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) &&
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 2bda9d8..802ae4e 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1327,7 +1327,8 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
}
// Do not attempt partial/runtime unrolling in FullLoopUnrolling
- if (OnlyFullUnroll && (UP.Count < TripCount || UP.Count < MaxTripCount)) {
+ if (OnlyFullUnroll && ((!TripCount && !MaxTripCount) ||
+ UP.Count < TripCount || UP.Count < MaxTripCount)) {
LLVM_DEBUG(
dbgs() << "Not attempting partial/runtime unroll in FullLoopUnroll.\n");
return LoopUnrollResult::Unmodified;
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index bb6c879..239526e 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -337,7 +337,7 @@ static void buildPartialUnswitchConditionalBranch(
static void buildPartialInvariantUnswitchConditionalBranch(
BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater *MSSAU, const BranchInst &OriginalBranch) {
ValueToValueMapTy VMap;
for (auto *Val : reverse(ToDuplicate)) {
Instruction *Inst = cast<Instruction>(Val);
@@ -377,8 +377,19 @@ static void buildPartialInvariantUnswitchConditionalBranch(
IRBuilder<> IRB(&BB);
IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());
Value *Cond = VMap[ToDuplicate[0]];
- IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
- Direction ? &NormalSucc : &UnswitchedSucc);
+ // The expectation is that ToDuplicate[0] is the condition used by the
+ // OriginalBranch, case in which we can clone the profile metadata from there.
+ auto *ProfData =
+ !ProfcheckDisableMetadataFixes &&
+ ToDuplicate[0] == skipTrivialSelect(OriginalBranch.getCondition())
+ ? OriginalBranch.getMetadata(LLVMContext::MD_prof)
+ : nullptr;
+ auto *BR =
+ IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
+ Direction ? &NormalSucc : &UnswitchedSucc, ProfData);
+ if (!ProfData)
+ setExplicitlyUnknownBranchWeightsIfProfiled(*BR, *BR->getFunction(),
+ DEBUG_TYPE);
}
/// Rewrite the PHI nodes in an unswitched loop exit basic block.
@@ -2515,7 +2526,7 @@ static void unswitchNontrivialInvariants(
// the branch in the split block.
if (PartiallyInvariant)
buildPartialInvariantUnswitchConditionalBranch(
- *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
+ *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU, *BI);
else {
buildPartialUnswitchConditionalBranch(
*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 5f6f66a..0a8f5ea 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -558,11 +558,10 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
} else {
// Test for successors as back edge
BasicBlock *BB = N->getNodeAs<BasicBlock>();
- BranchInst *Term = cast<BranchInst>(BB->getTerminator());
-
- for (BasicBlock *Succ : Term->successors())
- if (Visited.count(Succ))
- Loops[Succ] = BB;
+ if (BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()))
+ for (BasicBlock *Succ : Term->successors())
+ if (Visited.count(Succ))
+ Loops[Succ] = BB;
}
}
@@ -594,7 +593,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
for (BasicBlock *P : predecessors(BB)) {
// Ignore it if it's a branch from outside into our region entry
- if (!ParentRegion->contains(P))
+ if (!ParentRegion->contains(P) || !dyn_cast<BranchInst>(P->getTerminator()))
continue;
Region *R = RI->getRegionFor(P);
@@ -1402,13 +1401,17 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
/// Run the transformation for each region found
bool StructurizeCFG::run(Region *R, DominatorTree *DT,
const TargetTransformInfo *TTI) {
- if (R->isTopLevelRegion())
+ // CallBr and its corresponding direct target blocks are for now ignored by
+ // this pass. This is not a limitation for the currently intended uses cases
+ // of callbr in the AMDGPU backend.
+ // Parent and child regions are not affected by this (current) restriction.
+ // See `llvm/test/Transforms/StructurizeCFG/callbr.ll` for details.
+ if (R->isTopLevelRegion() || isa<CallBrInst>(R->getEntry()->getTerminator()))
return false;
this->DT = DT;
this->TTI = TTI;
Func = R->getEntry()->getParent();
- assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
ParentRegion = R;
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 5ba6f95f..6086615 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -933,6 +933,7 @@ Function *CodeExtractor::constructFunctionDeclaration(
case Attribute::CoroDestroyOnlyWhenComplete:
case Attribute::CoroElideSafe:
case Attribute::NoDivergenceSource:
+ case Attribute::NoCreateUndefOrPoison:
continue;
// Those attributes should be safe to propagate to the extracted function.
case Attribute::AlwaysInline:
diff --git a/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp b/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp
index 0642d51..6d4436b 100644
--- a/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp
+++ b/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp
@@ -16,22 +16,62 @@
using namespace llvm;
+static void mergeAttributes(LLVMContext &Ctx, const Module &M,
+ const DataLayout &DL, const Triple &TT,
+ Function *Func, FunctionType *FuncTy,
+ AttributeList FuncAttrs) {
+ AttributeList OldAttrs = Func->getAttributes();
+ AttributeList NewAttrs = OldAttrs;
+
+ {
+ AttrBuilder OldBuilder(Ctx, OldAttrs.getFnAttrs());
+ AttrBuilder NewBuilder(Ctx, FuncAttrs.getFnAttrs());
+ OldBuilder.merge(NewBuilder);
+ NewAttrs = NewAttrs.addFnAttributes(Ctx, OldBuilder);
+ }
+
+ {
+ AttrBuilder OldBuilder(Ctx, OldAttrs.getRetAttrs());
+ AttrBuilder NewBuilder(Ctx, FuncAttrs.getRetAttrs());
+ OldBuilder.merge(NewBuilder);
+ NewAttrs = NewAttrs.addRetAttributes(Ctx, OldBuilder);
+ }
+
+ for (unsigned I = 0, E = FuncTy->getNumParams(); I != E; ++I) {
+ AttrBuilder OldBuilder(Ctx, OldAttrs.getParamAttrs(I));
+ AttrBuilder NewBuilder(Ctx, FuncAttrs.getParamAttrs(I));
+ OldBuilder.merge(NewBuilder);
+ NewAttrs = NewAttrs.addParamAttributes(Ctx, I, OldBuilder);
+ }
+
+ Func->setAttributes(NewAttrs);
+}
+
PreservedAnalyses DeclareRuntimeLibcallsPass::run(Module &M,
ModuleAnalysisManager &MAM) {
RTLIB::RuntimeLibcallsInfo RTLCI(M.getTargetTriple());
LLVMContext &Ctx = M.getContext();
+ const DataLayout &DL = M.getDataLayout();
+ const Triple &TT = M.getTargetTriple();
for (RTLIB::LibcallImpl Impl : RTLCI.getLibcallImpls()) {
if (Impl == RTLIB::Unsupported)
continue;
- // TODO: Declare with correct type, calling convention, and attributes.
+ auto [FuncTy, FuncAttrs] = RTLCI.getFunctionTy(Ctx, TT, DL, Impl);
- FunctionType *FuncTy =
- FunctionType::get(Type::getVoidTy(Ctx), {}, /*IsVarArgs=*/true);
+ // TODO: Declare with correct type, calling convention, and attributes.
+ if (!FuncTy)
+ FuncTy = FunctionType::get(Type::getVoidTy(Ctx), {}, /*IsVarArgs=*/true);
StringRef FuncName = RTLCI.getLibcallImplName(Impl);
- M.getOrInsertFunction(FuncName, FuncTy);
+
+ Function *Func =
+ cast<Function>(M.getOrInsertFunction(FuncName, FuncTy).getCallee());
+ if (Func->getFunctionType() == FuncTy) {
+ mergeAttributes(Ctx, M, DL, TT, Func, FuncTy, FuncAttrs);
+ Func->setCallingConv(RTLCI.getLibcallImplCallingConv(Impl));
+ }
}
return PreservedAnalyses::none();
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 46f2903..a03cf6e 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3416,7 +3416,11 @@ DIExpression *llvm::getExpressionForConstant(DIBuilder &DIB, const Constant &C,
// Create integer constant expression.
auto createIntegerExpression = [&DIB](const Constant &CV) -> DIExpression * {
const APInt &API = cast<ConstantInt>(&CV)->getValue();
- std::optional<int64_t> InitIntOpt = API.trySExtValue();
+ std::optional<int64_t> InitIntOpt;
+ if (API.getBitWidth() == 1)
+ InitIntOpt = API.tryZExtValue();
+ else
+ InitIntOpt = API.trySExtValue();
return InitIntOpt ? DIB.createConstantValueExpression(
static_cast<uint64_t>(*InitIntOpt))
: nullptr;
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 1e8f6cc..6c9467b 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -202,6 +202,27 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
/// probability of executing at least one more iteration?
static BranchProbability
probOfNextInRemainder(BranchProbability OriginalLoopProb, unsigned N) {
+ // OriginalLoopProb == 1 would produce a division by zero in the calculation
+ // below. The problem is that case indicates an always infinite loop, but a
+ // remainder loop cannot be calculated at run time if the original loop is
+ // infinite as infinity % UnrollCount is undefined. We then choose
+ // probabilities indicating that all remainder loop iterations will always
+ // execute.
+ //
+ // Currently, the remainder loop here is an epilogue, which cannot be reached
+ // if the original loop is infinite, so the aforementioned choice is
+ // arbitrary.
+ //
+ // FIXME: Branch weights still need to be fixed in the case of prologues
+ // (issue #135812). In that case, the aforementioned choice seems reasonable
+ // for the goal of maintaining the original loop's block frequencies. That
+ // is, an infinite loop's initial iterations are not skipped, and the prologue
+ // loop body might have unique blocks that execute a finite number of times
+ // if, for example, the original loop body contains conditionals like i <
+ // UnrollCount.
+ if (OriginalLoopProb == BranchProbability::getOne())
+ return BranchProbability::getOne();
+
// Each of these variables holds the original loop's probability that the
// number of iterations it will execute is some m in the specified range.
BranchProbability ProbOne = OriginalLoopProb; // 1 <= m
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 8be471b..6e60b94 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -992,9 +992,12 @@ BranchProbability llvm::getBranchProbability(BranchInst *B,
uint64_t Weight0, Weight1;
if (!extractBranchWeights(*B, Weight0, Weight1))
return BranchProbability::getUnknown();
+ uint64_t Denominator = Weight0 + Weight1;
+ if (Denominator == 0)
+ return BranchProbability::getUnknown();
if (!ForFirstTarget)
std::swap(Weight0, Weight1);
- return BranchProbability::getBranchProbability(Weight0, Weight0 + Weight1);
+ return BranchProbability::getBranchProbability(Weight0, Denominator);
}
bool llvm::setBranchProbability(BranchInst *B, BranchProbability P,
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index cbc604e..3a3e3ad 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -778,8 +778,10 @@ private:
return false;
// Add all values from the range to the set
- for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
+ APInt Tmp = Span.getLower();
+ do
Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
+ while (++Tmp != Span.getUpper());
UsedICmps++;
return true;
@@ -6020,6 +6022,8 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
const DataLayout &DL) {
Value *Cond = SI->getCondition();
KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
+ SmallPtrSet<const Constant *, 4> KnownValues;
+ bool IsKnownValuesValid = collectPossibleValues(Cond, KnownValues, 4);
// We can also eliminate cases by determining that their values are outside of
// the limited range of the condition based on how many significant (non-sign)
@@ -6039,15 +6043,18 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
UniqueSuccessors.push_back(Successor);
++It->second;
}
- const APInt &CaseVal = Case.getCaseValue()->getValue();
+ ConstantInt *CaseC = Case.getCaseValue();
+ const APInt &CaseVal = CaseC->getValue();
if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
- (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
- DeadCases.push_back(Case.getCaseValue());
+ (CaseVal.getSignificantBits() > MaxSignificantBitsInCond) ||
+ (IsKnownValuesValid && !KnownValues.contains(CaseC))) {
+ DeadCases.push_back(CaseC);
if (DTU)
--NumPerSuccessorCases[Successor];
LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
<< " is dead.\n");
- }
+ } else if (IsKnownValuesValid)
+ KnownValues.erase(CaseC);
}
// If we can prove that the cases must cover all possible values, the
@@ -6058,33 +6065,41 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
const unsigned NumUnknownBits =
Known.getBitWidth() - (Known.Zero | Known.One).popcount();
assert(NumUnknownBits <= Known.getBitWidth());
- if (HasDefault && DeadCases.empty() &&
- NumUnknownBits < 64 /* avoid overflow */) {
- uint64_t AllNumCases = 1ULL << NumUnknownBits;
- if (SI->getNumCases() == AllNumCases) {
+ if (HasDefault && DeadCases.empty()) {
+ if (IsKnownValuesValid && all_of(KnownValues, IsaPred<UndefValue>)) {
createUnreachableSwitchDefault(SI, DTU);
return true;
}
- // When only one case value is missing, replace default with that case.
- // Eliminating the default branch will provide more opportunities for
- // optimization, such as lookup tables.
- if (SI->getNumCases() == AllNumCases - 1) {
- assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
- IntegerType *CondTy = cast<IntegerType>(Cond->getType());
- if (CondTy->getIntegerBitWidth() > 64 ||
- !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
- return false;
- uint64_t MissingCaseVal = 0;
- for (const auto &Case : SI->cases())
- MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
- auto *MissingCase =
- cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
- SwitchInstProfUpdateWrapper SIW(*SI);
- SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
- createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
- SIW.setSuccessorWeight(0, 0);
- return true;
+ if (NumUnknownBits < 64 /* avoid overflow */) {
+ uint64_t AllNumCases = 1ULL << NumUnknownBits;
+ if (SI->getNumCases() == AllNumCases) {
+ createUnreachableSwitchDefault(SI, DTU);
+ return true;
+ }
+ // When only one case value is missing, replace default with that case.
+ // Eliminating the default branch will provide more opportunities for
+ // optimization, such as lookup tables.
+ if (SI->getNumCases() == AllNumCases - 1) {
+ assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
+ IntegerType *CondTy = cast<IntegerType>(Cond->getType());
+ if (CondTy->getIntegerBitWidth() > 64 ||
+ !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
+ return false;
+
+ uint64_t MissingCaseVal = 0;
+ for (const auto &Case : SI->cases())
+ MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
+ auto *MissingCase = cast<ConstantInt>(
+ ConstantInt::get(Cond->getType(), MissingCaseVal));
+ SwitchInstProfUpdateWrapper SIW(*SI);
+ SIW.addCase(MissingCase, SI->getDefaultDest(),
+ SIW.getSuccessorWeight(0));
+ createUnreachableSwitchDefault(SI, DTU,
+ /*RemoveOrigDefaultBlock*/ false);
+ SIW.setSuccessorWeight(0, 0);
+ return true;
+ }
}
}
@@ -7570,6 +7585,81 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
return true;
}
+/// Tries to transform the switch when the condition is umin with a constant.
+/// In that case, the default branch can be replaced by the constant's branch.
+/// This method also removes dead cases when the simplification cannot replace
+/// the default branch.
+///
+/// For example:
+/// switch(umin(a, 3)) {
+/// case 0:
+/// case 1:
+/// case 2:
+/// case 3:
+/// case 4:
+/// // ...
+/// default:
+/// unreachable
+/// }
+///
+/// Transforms into:
+///
+/// switch(a) {
+/// case 0:
+/// case 1:
+/// case 2:
+/// default:
+/// // This is case 3
+/// }
+static bool simplifySwitchWhenUMin(SwitchInst *SI, DomTreeUpdater *DTU) {
+ Value *A;
+ ConstantInt *Constant;
+
+ if (!match(SI->getCondition(), m_UMin(m_Value(A), m_ConstantInt(Constant))))
+ return false;
+
+ SmallVector<DominatorTree::UpdateType> Updates;
+ SwitchInstProfUpdateWrapper SIW(*SI);
+ BasicBlock *BB = SIW->getParent();
+
+ // Dead cases are removed even when the simplification fails.
+ // A case is dead when its value is higher than the Constant.
+ for (auto I = SI->case_begin(), E = SI->case_end(); I != E;) {
+ if (!I->getCaseValue()->getValue().ugt(Constant->getValue())) {
+ ++I;
+ continue;
+ }
+ BasicBlock *DeadCaseBB = I->getCaseSuccessor();
+ DeadCaseBB->removePredecessor(BB);
+ Updates.push_back({DominatorTree::Delete, BB, DeadCaseBB});
+ I = SIW->removeCase(I);
+ E = SIW->case_end();
+ }
+
+ auto Case = SI->findCaseValue(Constant);
+ // If the case value is not found, `findCaseValue` returns the default case.
+ // In this scenario, since there is no explicit `case 3:`, the simplification
+ // fails. The simplification also fails when the switch’s default destination
+ // is reachable.
+ if (!SI->defaultDestUnreachable() || Case == SI->case_default()) {
+ if (DTU)
+ DTU->applyUpdates(Updates);
+ return !Updates.empty();
+ }
+
+ BasicBlock *Unreachable = SI->getDefaultDest();
+ SIW.replaceDefaultDest(Case);
+ SIW.removeCase(Case);
+ SIW->setCondition(A);
+
+ Updates.push_back({DominatorTree::Delete, BB, Unreachable});
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
+ return true;
+}
+
/// Tries to transform switch of powers of two to reduce switch range.
/// For example, switch like:
/// switch (C) { case 1: case 2: case 64: case 128: }
@@ -8037,6 +8127,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
if (simplifyDuplicateSwitchArms(SI, DTU))
return requestResimplify();
+ if (simplifySwitchWhenUMin(SI, DTU))
+ return requestResimplify();
+
return false;
}
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 94c5c170..e86ab13 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -158,6 +158,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
SmallVector<BasicBlock *, 8> CallBrTargetBlocksToFix;
// Redirect exiting edges through a control flow hub.
ControlFlowHub CHub;
+ bool Changed = false;
for (unsigned I = 0; I < ExitingBlocks.size(); ++I) {
BasicBlock *BB = ExitingBlocks[I];
@@ -182,6 +183,10 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
bool UpdatedLI = false;
BasicBlock *NewSucc =
SplitCallBrEdge(BB, Succ, J, &DTU, nullptr, &LI, &UpdatedLI);
+ // SplitCallBrEdge modifies the CFG because it creates an intermediate
+ // block. So we need to set the changed flag no matter what the
+ // ControlFlowHub is going to do later.
+ Changed = true;
// Even if CallBr and Succ do not have a common parent loop, we need to
// add the new target block to the parent loop of the current loop.
if (!UpdatedLI)
@@ -207,6 +212,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
bool ChangedCFG;
std::tie(LoopExitBlock, ChangedCFG) = CHub.finalize(
&DTU, GuardBlocks, "loop.exit", MaxBooleansInControlFlowHub.getValue());
+ ChangedCFG |= Changed;
if (!ChangedCFG)
return false;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index fdfff16..03112c6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -462,8 +462,9 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
bool CanAddPredicate = !llvm::shouldOptimizeForSize(
TheLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass);
- int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
- CanAddPredicate, false).value_or(0);
+ int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, *DT, Strides,
+ CanAddPredicate, false)
+ .value_or(0);
if (Stride == 1 || Stride == -1)
return Stride;
return 0;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 34b405c..bf3f52c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -20975,6 +20975,27 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
if (isa<PHINode>(S.getMainOp()) ||
isVectorLikeInstWithConstOps(S.getMainOp()))
return nullptr;
+ // If the parent node is non-schedulable and the current node is copyable, and
+ // any of parent instructions are used outside several basic blocks or in
+ // bin-op node - cancel scheduling, it may cause wrong def-use deps in
+ // analysis, leading to a crash.
+ // Non-scheduled nodes may not have related ScheduleData model, which may lead
+ // to a skipped dep analysis.
+ if (S.areInstructionsWithCopyableElements() && EI && EI.UserTE->hasState() &&
+ EI.UserTE->doesNotNeedToSchedule() &&
+ EI.UserTE->getOpcode() != Instruction::PHI &&
+ any_of(EI.UserTE->Scalars, [](Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I || I->hasOneUser())
+ return false;
+ for (User *U : I->users()) {
+ auto *UI = cast<Instruction>(U);
+ if (isa<BinaryOperator>(UI))
+ return true;
+ }
+ return false;
+ }))
+ return std::nullopt;
bool HasCopyables = S.areInstructionsWithCopyableElements();
if (((!HasCopyables && doesNotNeedToSchedule(VL)) ||
all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) {
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp
index 9c869dd..d354933 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp
@@ -92,7 +92,7 @@ void MemDGNode::print(raw_ostream &OS, bool PrintDeps) const {
DGNode::print(OS, false);
if (PrintDeps) {
// Print memory preds.
- static constexpr const unsigned Indent = 4;
+ static constexpr unsigned Indent = 4;
for (auto *Pred : MemPreds)
OS.indent(Indent) << "<-" << *Pred->getInstruction() << "\n";
}
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
index 86dbd21..5534da9 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
@@ -25,14 +25,14 @@ static cl::opt<bool>
"emit new instructions (*very* expensive)."));
#endif // NDEBUG
-static constexpr const unsigned long StopAtDisabled =
+static constexpr unsigned long StopAtDisabled =
std::numeric_limits<unsigned long>::max();
static cl::opt<unsigned long>
StopAt("sbvec-stop-at", cl::init(StopAtDisabled), cl::Hidden,
cl::desc("Vectorize if the invocation count is < than this. 0 "
"disables vectorization."));
-static constexpr const unsigned long StopBundleDisabled =
+static constexpr unsigned long StopBundleDisabled =
std::numeric_limits<unsigned long>::max();
static cl::opt<unsigned long>
StopBundle("sbvec-stop-bndl", cl::init(StopBundleDisabled), cl::Hidden,
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
index ed2f80b..2de6921 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
@@ -43,7 +43,7 @@ cl::opt<std::string> AllowFiles(
"sbvec-allow-files", cl::init(".*"), cl::Hidden,
cl::desc("Run the vectorizer only on file paths that match any in the "
"list of comma-separated regex's."));
-static constexpr const char AllowFilesDelim = ',';
+static constexpr char AllowFilesDelim = ',';
SandboxVectorizerPass::SandboxVectorizerPass() : FPM("fpm") {
if (UserDefinedPassPipeline == DefaultPipelineMagicStr) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index aba6d35..cfe1f1e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1110,9 +1110,8 @@ public:
VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
VPInstruction *clone() override {
- SmallVector<VPValue *, 2> Operands(operands());
- auto *New =
- new VPInstruction(Opcode, Operands, *this, *this, getDebugLoc(), Name);
+ auto *New = new VPInstruction(Opcode, operands(), *this, *this,
+ getDebugLoc(), Name);
if (getUnderlyingValue())
New->setUnderlyingValue(getUnderlyingInstr());
return New;
@@ -1226,10 +1225,9 @@ public:
}
VPInstruction *clone() override {
- SmallVector<VPValue *, 2> Operands(operands());
auto *New =
- new VPInstructionWithType(getOpcode(), Operands, getResultType(), *this,
- getDebugLoc(), getName());
+ new VPInstructionWithType(getOpcode(), operands(), getResultType(),
+ *this, getDebugLoc(), getName());
New->setUnderlyingValue(getUnderlyingValue());
return New;
}
@@ -3211,6 +3209,9 @@ protected:
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
Alignment(Alignment), Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
+ assert(isa<VPVectorEndPointerRecipe>(getAddr()) ||
+ !Reverse &&
+ "Reversed acccess without VPVectorEndPointerRecipe address?");
}
public:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index b5b98c6..b57c448 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -313,7 +313,8 @@ private:
// Check for recipes that do not have opcodes.
if constexpr (std::is_same_v<RecipeTy, VPScalarIVStepsRecipe> ||
std::is_same_v<RecipeTy, VPCanonicalIVPHIRecipe> ||
- std::is_same_v<RecipeTy, VPDerivedIVRecipe>)
+ std::is_same_v<RecipeTy, VPDerivedIVRecipe> ||
+ std::is_same_v<RecipeTy, VPVectorEndPointerRecipe>)
return DefR;
else
return DefR && DefR->getOpcode() == Opcode;
@@ -686,6 +687,64 @@ m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
return VPDerivedIV_match<Op0_t, Op1_t, Op2_t>({Op0, Op1, Op2});
}
+template <typename Addr_t, typename Mask_t> struct Load_match {
+ Addr_t Addr;
+ Mask_t Mask;
+
+ Load_match(Addr_t Addr, Mask_t Mask) : Addr(Addr), Mask(Mask) {}
+
+ template <typename OpTy> bool match(const OpTy *V) const {
+ auto *Load = dyn_cast<VPWidenLoadRecipe>(V);
+ if (!Load || !Addr.match(Load->getAddr()) || !Load->isMasked() ||
+ !Mask.match(Load->getMask()))
+ return false;
+ return true;
+ }
+};
+
+/// Match a (possibly reversed) masked load.
+template <typename Addr_t, typename Mask_t>
+inline Load_match<Addr_t, Mask_t> m_MaskedLoad(const Addr_t &Addr,
+ const Mask_t &Mask) {
+ return Load_match<Addr_t, Mask_t>(Addr, Mask);
+}
+
+template <typename Addr_t, typename Val_t, typename Mask_t> struct Store_match {
+ Addr_t Addr;
+ Val_t Val;
+ Mask_t Mask;
+
+ Store_match(Addr_t Addr, Val_t Val, Mask_t Mask)
+ : Addr(Addr), Val(Val), Mask(Mask) {}
+
+ template <typename OpTy> bool match(const OpTy *V) const {
+ auto *Store = dyn_cast<VPWidenStoreRecipe>(V);
+ if (!Store || !Addr.match(Store->getAddr()) ||
+ !Val.match(Store->getStoredValue()) || !Store->isMasked() ||
+ !Mask.match(Store->getMask()))
+ return false;
+ return true;
+ }
+};
+
+/// Match a (possibly reversed) masked store.
+template <typename Addr_t, typename Val_t, typename Mask_t>
+inline Store_match<Addr_t, Val_t, Mask_t>
+m_MaskedStore(const Addr_t &Addr, const Val_t &Val, const Mask_t &Mask) {
+ return Store_match<Addr_t, Val_t, Mask_t>(Addr, Val, Mask);
+}
+
+template <typename Op0_t, typename Op1_t>
+using VectorEndPointerRecipe_match =
+ Recipe_match<std::tuple<Op0_t, Op1_t>, 0,
+ /*Commutative*/ false, VPVectorEndPointerRecipe>;
+
+template <typename Op0_t, typename Op1_t>
+VectorEndPointerRecipe_match<Op0_t, Op1_t> m_VecEndPtr(const Op0_t &Op0,
+ const Op1_t &Op1) {
+ return VectorEndPointerRecipe_match<Op0_t, Op1_t>(Op0, Op1);
+}
+
/// Match a call argument at a given argument index.
template <typename Opnd_t> struct Argument_match {
/// Call argument index to match.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 3757a59..2588c87 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -151,59 +151,65 @@ static bool cannotHoistOrSinkRecipe(const VPRecipeBase &R) {
static bool sinkScalarOperands(VPlan &Plan) {
auto Iter = vp_depth_first_deep(Plan.getEntry());
+ bool ScalarVFOnly = Plan.hasScalarVFOnly();
bool Changed = false;
+
+ SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList;
+ auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
+ VPBasicBlock *SinkTo, VPValue *Op) {
+ auto *Candidate =
+ dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe());
+ if (!Candidate)
+ return;
+
+ // We only know how to sink VPReplicateRecipes and VPScalarIVStepsRecipes
+ // for now.
+ if (!isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(Candidate))
+ return;
+
+ if (Candidate->getParent() == SinkTo || Candidate->mayHaveSideEffects() ||
+ Candidate->mayReadOrWriteMemory())
+ return;
+
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(Candidate))
+ if (!ScalarVFOnly && RepR->isSingleScalar())
+ return;
+
+ WorkList.insert({SinkTo, Candidate});
+ };
+
// First, collect the operands of all recipes in replicate blocks as seeds for
// sinking.
- SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList;
for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) {
VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();
if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)
continue;
- VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]);
- if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
+ VPBasicBlock *VPBB = cast<VPBasicBlock>(EntryVPBB->getSuccessors().front());
+ if (VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
continue;
- for (auto &Recipe : *VPBB) {
+ for (auto &Recipe : *VPBB)
for (VPValue *Op : Recipe.operands())
- if (auto *Def =
- dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
- WorkList.insert({VPBB, Def});
- }
+ InsertIfValidSinkCandidate(VPBB, Op);
}
- bool ScalarVFOnly = Plan.hasScalarVFOnly();
// Try to sink each replicate or scalar IV steps recipe in the worklist.
for (unsigned I = 0; I != WorkList.size(); ++I) {
VPBasicBlock *SinkTo;
VPSingleDefRecipe *SinkCandidate;
std::tie(SinkTo, SinkCandidate) = WorkList[I];
- if (SinkCandidate->getParent() == SinkTo ||
- SinkCandidate->mayHaveSideEffects() ||
- SinkCandidate->mayReadOrWriteMemory())
- continue;
- if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
- if (!ScalarVFOnly && RepR->isSingleScalar())
- continue;
- } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))
- continue;
- bool NeedsDuplicating = false;
- // All recipe users of the sink candidate must be in the same block SinkTo
- // or all users outside of SinkTo must be uniform-after-vectorization (
- // i.e., only first lane is used) . In the latter case, we need to duplicate
- // SinkCandidate.
- auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
- SinkCandidate](VPUser *U) {
- auto *UI = cast<VPRecipeBase>(U);
- if (UI->getParent() == SinkTo)
- return true;
- NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
- // We only know how to duplicate VPReplicateRecipes and
- // VPScalarIVStepsRecipes for now.
- return NeedsDuplicating &&
- isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(SinkCandidate);
- };
- if (!all_of(SinkCandidate->users(), CanSinkWithUser))
+ // All recipe users of SinkCandidate must be in the same block SinkTo or all
+ // users outside of SinkTo must only use the first lane of SinkCandidate. In
+ // the latter case, we need to duplicate SinkCandidate.
+ auto UsersOutsideSinkTo =
+ make_filter_range(SinkCandidate->users(), [SinkTo](VPUser *U) {
+ return cast<VPRecipeBase>(U)->getParent() != SinkTo;
+ });
+ if (any_of(UsersOutsideSinkTo, [SinkCandidate](VPUser *U) {
+ return !U->onlyFirstLaneUsed(SinkCandidate);
+ }))
continue;
+ bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
if (NeedsDuplicating) {
if (ScalarVFOnly)
@@ -228,9 +234,7 @@ static bool sinkScalarOperands(VPlan &Plan) {
}
SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi());
for (VPValue *Op : SinkCandidate->operands())
- if (auto *Def =
- dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
- WorkList.insert({SinkTo, Def});
+ InsertIfValidSinkCandidate(SinkTo, Op);
Changed = true;
}
return Changed;
@@ -1056,13 +1060,9 @@ static VPValue *tryToFoldLiveIns(VPSingleDefRecipe &R,
return nullptr;
}
-/// Try to simplify recipe \p R.
-static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
- VPlan *Plan = R.getParent()->getPlan();
-
- auto *Def = dyn_cast<VPSingleDefRecipe>(&R);
- if (!Def)
- return;
+/// Try to simplify VPSingleDefRecipe \p Def.
+static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
+ VPlan *Plan = Def->getParent()->getPlan();
// Simplification of live-in IR values for SingleDef recipes using
// InstSimplifyFolder.
@@ -1072,7 +1072,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return Def->replaceAllUsesWith(V);
// Fold PredPHI LiveIn -> LiveIn.
- if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(&R)) {
+ if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(Def)) {
VPValue *Op = PredPHI->getOperand(0);
if (Op->isLiveIn())
PredPHI->replaceAllUsesWith(Op);
@@ -1091,12 +1091,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
- unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
+ unsigned ExtOpcode = match(Def->getOperand(0), m_SExt(m_VPValue()))
? Instruction::SExt
: Instruction::ZExt;
auto *Ext = Builder.createWidenCast(Instruction::CastOps(ExtOpcode), A,
TruncTy);
- if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
+ if (auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
// UnderlyingExt has distinct return type, used to retain legacy cost.
Ext->setUnderlyingValue(UnderlyingExt);
}
@@ -1159,7 +1159,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
Builder.createLogicalAnd(X, Builder.createOr(Y, Z)));
// x && !x -> 0
- if (match(&R, m_LogicalAnd(m_VPValue(X), m_Not(m_Deferred(X)))))
+ if (match(Def, m_LogicalAnd(m_VPValue(X), m_Not(m_Deferred(X)))))
return Def->replaceAllUsesWith(Plan->getFalse());
if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X))))
@@ -1187,8 +1187,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return Def->replaceAllUsesWith(A);
if (match(Def, m_c_Mul(m_VPValue(A), m_ZeroInt())))
- return Def->replaceAllUsesWith(R.getOperand(0) == A ? R.getOperand(1)
- : R.getOperand(0));
+ return Def->replaceAllUsesWith(
+ Def->getOperand(0) == A ? Def->getOperand(1) : Def->getOperand(0));
if (match(Def, m_Not(m_VPValue(A)))) {
if (match(A, m_Not(m_VPValue(A))))
@@ -1217,8 +1217,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
}
// If Cmp doesn't have a debug location, use the one from the negation,
// to preserve the location.
- if (!Cmp->getDebugLoc() && R.getDebugLoc())
- Cmp->setDebugLoc(R.getDebugLoc());
+ if (!Cmp->getDebugLoc() && Def->getDebugLoc())
+ Cmp->setDebugLoc(Def->getDebugLoc());
}
}
}
@@ -1244,7 +1244,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
if (match(Def, m_Intrinsic<Intrinsic::vp_merge>(m_True(), m_VPValue(A),
m_VPValue(X), m_VPValue())) &&
match(A, m_c_BinaryOr(m_Specific(X), m_VPValue(Y))) &&
- TypeInfo.inferScalarType(R.getVPSingleValue())->isIntegerTy(1)) {
+ TypeInfo.inferScalarType(Def)->isIntegerTy(1)) {
Def->setOperand(1, Def->getOperand(0));
Def->setOperand(0, Y);
return;
@@ -1252,36 +1252,36 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
if (auto *Phi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(Def)) {
if (Phi->getOperand(0) == Phi->getOperand(1))
- Def->replaceAllUsesWith(Phi->getOperand(0));
+ Phi->replaceAllUsesWith(Phi->getOperand(0));
return;
}
// Look through ExtractLastElement (BuildVector ....).
- if (match(&R, m_CombineOr(m_ExtractLastElement(m_BuildVector()),
- m_ExtractLastLanePerPart(m_BuildVector())))) {
- auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
+ if (match(Def, m_CombineOr(m_ExtractLastElement(m_BuildVector()),
+ m_ExtractLastLanePerPart(m_BuildVector())))) {
+ auto *BuildVector = cast<VPInstruction>(Def->getOperand(0));
Def->replaceAllUsesWith(
BuildVector->getOperand(BuildVector->getNumOperands() - 1));
return;
}
// Look through ExtractPenultimateElement (BuildVector ....).
- if (match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
- m_BuildVector()))) {
- auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
+ if (match(Def, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
+ m_BuildVector()))) {
+ auto *BuildVector = cast<VPInstruction>(Def->getOperand(0));
Def->replaceAllUsesWith(
BuildVector->getOperand(BuildVector->getNumOperands() - 2));
return;
}
uint64_t Idx;
- if (match(&R, m_ExtractElement(m_BuildVector(), m_ConstantInt(Idx)))) {
- auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
+ if (match(Def, m_ExtractElement(m_BuildVector(), m_ConstantInt(Idx)))) {
+ auto *BuildVector = cast<VPInstruction>(Def->getOperand(0));
Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
return;
}
- if (match(Def, m_BuildVector()) && all_equal(R.operands())) {
+ if (match(Def, m_BuildVector()) && all_equal(Def->operands())) {
Def->replaceAllUsesWith(
Builder.createNaryOp(VPInstruction::Broadcast, Def->getOperand(0)));
return;
@@ -1303,7 +1303,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
isa<VPPhi>(X)) {
auto *Phi = cast<VPPhi>(X);
if (Phi->getOperand(1) != Def && match(Phi->getOperand(0), m_ZeroInt()) &&
- Phi->getNumUsers() == 1 && (*Phi->user_begin() == &R)) {
+ Phi->getNumUsers() == 1 && (*Phi->user_begin() == Def)) {
Phi->setOperand(0, Y);
Def->replaceAllUsesWith(Phi);
return;
@@ -1311,7 +1311,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
}
// VPVectorPointer for part 0 can be replaced by their start pointer.
- if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) {
+ if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(Def)) {
if (VecPtr->isFirstPart()) {
VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
return;
@@ -1366,9 +1366,9 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan) {
Plan.getEntry());
VPTypeAnalysis TypeInfo(Plan);
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
- for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- simplifyRecipe(R, TypeInfo);
- }
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB))
+ if (auto *Def = dyn_cast<VPSingleDefRecipe>(&R))
+ simplifyRecipe(Def, TypeInfo);
}
}
@@ -2521,90 +2521,102 @@ void VPlanTransforms::addActiveLaneMask(
HeaderMask->eraseFromParent();
}
+template <typename Op0_t, typename Op1_t> struct RemoveMask_match {
+ Op0_t In;
+ Op1_t &Out;
+
+ RemoveMask_match(const Op0_t &In, Op1_t &Out) : In(In), Out(Out) {}
+
+ template <typename OpTy> bool match(OpTy *V) const {
+ if (m_Specific(In).match(V)) {
+ Out = nullptr;
+ return true;
+ }
+ if (m_LogicalAnd(m_Specific(In), m_VPValue(Out)).match(V))
+ return true;
+ return false;
+ }
+};
+
+/// Match a specific mask \p In, or a combination of it (logical-and In, Out).
+/// Returns the remaining part \p Out if so, or nullptr otherwise.
+template <typename Op0_t, typename Op1_t>
+static inline RemoveMask_match<Op0_t, Op1_t> m_RemoveMask(const Op0_t &In,
+ Op1_t &Out) {
+ return RemoveMask_match<Op0_t, Op1_t>(In, Out);
+}
+
/// Try to optimize a \p CurRecipe masked by \p HeaderMask to a corresponding
/// EVL-based recipe without the header mask. Returns nullptr if no EVL-based
/// recipe could be created.
/// \p HeaderMask Header Mask.
/// \p CurRecipe Recipe to be transform.
/// \p TypeInfo VPlan-based type analysis.
-/// \p AllOneMask The vector mask parameter of vector-predication intrinsics.
/// \p EVL The explicit vector length parameter of vector-predication
/// intrinsics.
static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
VPRecipeBase &CurRecipe,
- VPTypeAnalysis &TypeInfo,
- VPValue &AllOneMask, VPValue &EVL) {
- // FIXME: Don't transform recipes to EVL recipes if they're not masked by the
- // header mask.
- auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
- assert(OrigMask && "Unmasked recipe when folding tail");
- // HeaderMask will be handled using EVL.
- VPValue *Mask;
- if (match(OrigMask, m_LogicalAnd(m_Specific(HeaderMask), m_VPValue(Mask))))
- return Mask;
- return HeaderMask == OrigMask ? nullptr : OrigMask;
- };
+ VPTypeAnalysis &TypeInfo, VPValue &EVL) {
+ VPlan *Plan = CurRecipe.getParent()->getPlan();
+ VPValue *Addr, *Mask, *EndPtr;
/// Adjust any end pointers so that they point to the end of EVL lanes not VF.
- auto GetNewAddr = [&CurRecipe, &EVL](VPValue *Addr) -> VPValue * {
- auto *EndPtr = dyn_cast<VPVectorEndPointerRecipe>(Addr);
- if (!EndPtr)
- return Addr;
- assert(EndPtr->getOperand(1) == &EndPtr->getParent()->getPlan()->getVF() &&
- "VPVectorEndPointerRecipe with non-VF VF operand?");
- assert(
- all_of(EndPtr->users(),
- [](VPUser *U) {
- return cast<VPWidenMemoryRecipe>(U)->isReverse();
- }) &&
- "VPVectorEndPointRecipe not used by reversed widened memory recipe?");
- VPVectorEndPointerRecipe *EVLAddr = EndPtr->clone();
- EVLAddr->insertBefore(&CurRecipe);
- EVLAddr->setOperand(1, &EVL);
- return EVLAddr;
+ auto AdjustEndPtr = [&CurRecipe, &EVL](VPValue *EndPtr) {
+ auto *EVLEndPtr = cast<VPVectorEndPointerRecipe>(EndPtr)->clone();
+ EVLEndPtr->insertBefore(&CurRecipe);
+ EVLEndPtr->setOperand(1, &EVL);
+ return EVLEndPtr;
};
- return TypeSwitch<VPRecipeBase *, VPRecipeBase *>(&CurRecipe)
- .Case<VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) {
- VPValue *NewMask = GetNewMask(L->getMask());
- VPValue *NewAddr = GetNewAddr(L->getAddr());
- return new VPWidenLoadEVLRecipe(*L, NewAddr, EVL, NewMask);
- })
- .Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
- VPValue *NewMask = GetNewMask(S->getMask());
- VPValue *NewAddr = GetNewAddr(S->getAddr());
- return new VPWidenStoreEVLRecipe(*S, NewAddr, EVL, NewMask);
- })
- .Case<VPInterleaveRecipe>([&](VPInterleaveRecipe *IR) {
- VPValue *NewMask = GetNewMask(IR->getMask());
- return new VPInterleaveEVLRecipe(*IR, EVL, NewMask);
- })
- .Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
- VPValue *NewMask = GetNewMask(Red->getCondOp());
- return new VPReductionEVLRecipe(*Red, EVL, NewMask);
- })
- .Case<VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * {
- VPValue *LHS, *RHS;
- // Transform select with a header mask condition
- // select(header_mask, LHS, RHS)
- // into vector predication merge.
- // vp.merge(all-true, LHS, RHS, EVL)
- if (!match(VPI, m_Select(m_Specific(HeaderMask), m_VPValue(LHS),
- m_VPValue(RHS))))
- return nullptr;
- // Use all true as the condition because this transformation is
- // limited to selects whose condition is a header mask.
- return new VPWidenIntrinsicRecipe(
- Intrinsic::vp_merge, {&AllOneMask, LHS, RHS, &EVL},
- TypeInfo.inferScalarType(LHS), VPI->getDebugLoc());
- })
- .Default([&](VPRecipeBase *R) { return nullptr; });
+ if (match(&CurRecipe,
+ m_MaskedLoad(m_VPValue(Addr), m_RemoveMask(HeaderMask, Mask))) &&
+ !cast<VPWidenLoadRecipe>(CurRecipe).isReverse())
+ return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe), Addr,
+ EVL, Mask);
+
+ if (match(&CurRecipe,
+ m_MaskedLoad(m_VPValue(EndPtr), m_RemoveMask(HeaderMask, Mask))) &&
+ match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) &&
+ cast<VPWidenLoadRecipe>(CurRecipe).isReverse())
+ return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe),
+ AdjustEndPtr(EndPtr), EVL, Mask);
+
+ if (match(&CurRecipe, m_MaskedStore(m_VPValue(Addr), m_VPValue(),
+ m_RemoveMask(HeaderMask, Mask))) &&
+ !cast<VPWidenStoreRecipe>(CurRecipe).isReverse())
+ return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe), Addr,
+ EVL, Mask);
+
+ if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_VPValue(),
+ m_RemoveMask(HeaderMask, Mask))) &&
+ match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) &&
+ cast<VPWidenStoreRecipe>(CurRecipe).isReverse())
+ return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe),
+ AdjustEndPtr(EndPtr), EVL, Mask);
+
+ if (auto *Rdx = dyn_cast<VPReductionRecipe>(&CurRecipe))
+ if (Rdx->isConditional() &&
+ match(Rdx->getCondOp(), m_RemoveMask(HeaderMask, Mask)))
+ return new VPReductionEVLRecipe(*Rdx, EVL, Mask);
+
+ if (auto *Interleave = dyn_cast<VPInterleaveRecipe>(&CurRecipe))
+ if (Interleave->getMask() &&
+ match(Interleave->getMask(), m_RemoveMask(HeaderMask, Mask)))
+ return new VPInterleaveEVLRecipe(*Interleave, EVL, Mask);
+
+ VPValue *LHS, *RHS;
+ if (match(&CurRecipe,
+ m_Select(m_Specific(HeaderMask), m_VPValue(LHS), m_VPValue(RHS))))
+ return new VPWidenIntrinsicRecipe(
+ Intrinsic::vp_merge, {Plan->getTrue(), LHS, RHS, &EVL},
+ TypeInfo.inferScalarType(LHS), CurRecipe.getDebugLoc());
+
+ return nullptr;
}
/// Replace recipes with their EVL variants.
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
VPTypeAnalysis TypeInfo(Plan);
- VPValue *AllOneMask = Plan.getTrue();
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
@@ -2664,7 +2676,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
ConstantInt::getSigned(Type::getInt32Ty(Plan.getContext()), -1));
VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe(
Intrinsic::experimental_vp_splice,
- {V1, V2, Imm, AllOneMask, PrevEVL, &EVL},
+ {V1, V2, Imm, Plan.getTrue(), PrevEVL, &EVL},
TypeInfo.inferScalarType(R.getVPSingleValue()), R.getDebugLoc());
VPSplice->insertBefore(&R);
R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
@@ -2698,7 +2710,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
for (VPUser *U : collectUsersRecursively(EVLMask)) {
auto *CurRecipe = cast<VPRecipeBase>(U);
VPRecipeBase *EVLRecipe =
- optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, *AllOneMask, EVL);
+ optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, EVL);
if (!EVLRecipe)
continue;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 8c23e78..c6380d3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -32,22 +32,17 @@ bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
}
VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
- VPValue *Expanded = nullptr;
if (auto *E = dyn_cast<SCEVConstant>(Expr))
- Expanded = Plan.getOrAddLiveIn(E->getValue());
- else {
- auto *U = dyn_cast<SCEVUnknown>(Expr);
- // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
- // value. Otherwise the value may be defined in a loop and using it directly
- // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
- // form.
- if (U && !isa<Instruction>(U->getValue())) {
- Expanded = Plan.getOrAddLiveIn(U->getValue());
- } else {
- Expanded = new VPExpandSCEVRecipe(Expr);
- Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe());
- }
- }
+ return Plan.getOrAddLiveIn(E->getValue());
+ // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
+ // value. Otherwise the value may be defined in a loop and using it directly
+ // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
+ // form.
+ auto *U = dyn_cast<SCEVUnknown>(Expr);
+ if (U && !isa<Instruction>(U->getValue()))
+ return Plan.getOrAddLiveIn(U->getValue());
+ auto *Expanded = new VPExpandSCEVRecipe(Expr);
+ Plan.getEntry()->appendRecipe(Expanded);
return Expanded;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 91734a1..34754a1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -252,6 +252,13 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
for (const VPUser *U : V->users()) {
auto *UI = cast<VPRecipeBase>(U);
+ if (isa<VPIRPhi>(UI) &&
+ UI->getNumOperands() != UI->getParent()->getNumPredecessors()) {
+ errs() << "Phi-like recipe with different number of operands and "
+ "predecessors.\n";
+ return false;
+ }
+
if (auto *Phi = dyn_cast<VPPhiAccessors>(UI)) {
for (const auto &[IncomingVPV, IncomingVPBB] :
Phi->incoming_values_and_blocks()) {
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index d6eb00d..27a8bbd 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2017,8 +2017,31 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
Value *ScalarV = Ext->getOperand(0);
if (!isGuaranteedNotToBePoison(ScalarV, &AC, dyn_cast<Instruction>(ScalarV),
- &DT))
- ScalarV = Builder.CreateFreeze(ScalarV);
+ &DT)) {
+ // Check wether all lanes are extracted, all extracts trigger UB
+ // on poison, and the last extract (and hence all previous ones)
+ // are guaranteed to execute if Ext executes. If so, we do not
+ // need to insert a freeze.
+ SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
+ bool AllExtractsTriggerUB = true;
+ ExtractElementInst *LastExtract = nullptr;
+ BasicBlock *ExtBB = Ext->getParent();
+ for (User *U : Ext->users()) {
+ auto *Extract = cast<ExtractElementInst>(U);
+ if (Extract->getParent() != ExtBB || !programUndefinedIfPoison(Extract)) {
+ AllExtractsTriggerUB = false;
+ break;
+ }
+ ExtractedLanes.insert(cast<ConstantInt>(Extract->getIndexOperand()));
+ if (!LastExtract || LastExtract->comesBefore(Extract))
+ LastExtract = Extract;
+ }
+ if (ExtractedLanes.size() != DstTy->getNumElements() ||
+ !AllExtractsTriggerUB ||
+ !isGuaranteedToTransferExecutionToSuccessor(Ext->getIterator(),
+ LastExtract->getIterator()))
+ ScalarV = Builder.CreateFreeze(ScalarV);
+ }
ScalarV = Builder.CreateBitCast(
ScalarV,
IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy)));