aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rwxr-xr-xllvm/lib/Analysis/ConstantFolding.cpp56
-rw-r--r--llvm/lib/Analysis/DXILResource.cpp47
-rw-r--r--llvm/lib/Analysis/LazyValueInfo.cpp10
-rw-r--r--llvm/lib/Analysis/LoopInfo.cpp4
-rw-r--r--llvm/lib/Analysis/MLInlineAdvisor.cpp18
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp193
-rw-r--r--llvm/lib/Analysis/StaticDataProfileInfo.cpp70
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp10
-rw-r--r--llvm/lib/BinaryFormat/XCOFF.cpp30
-rw-r--r--llvm/lib/Bitcode/Reader/MetadataLoader.cpp6
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp1
-rw-r--r--llvm/lib/CAS/OnDiskTrieRawHashMap.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp55
-rw-r--r--llvm/lib/CodeGen/BranchRelaxation.cpp14
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp6
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp6
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp3
-rw-r--r--llvm/lib/CodeGen/MachineCopyPropagation.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp10
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/CMakeLists.txt1
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp22
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp18
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp10
-rw-r--r--llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp6
-rw-r--r--llvm/lib/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.cpp104
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp367
-rw-r--r--llvm/lib/IR/AsmWriter.cpp11
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp59
-rw-r--r--llvm/lib/IR/DebugInfo.cpp43
-rw-r--r--llvm/lib/IR/Type.cpp4
-rw-r--r--llvm/lib/IR/Verifier.cpp5
-rw-r--r--llvm/lib/LTO/LTO.cpp34
-rw-r--r--llvm/lib/LTO/LTOBackend.cpp1
-rw-r--r--llvm/lib/MC/MCObjectFileInfo.cpp7
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp18
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp33
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp56
-rw-r--r--llvm/lib/Passes/PassRegistry.def1
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp2
-rw-r--r--llvm/lib/Remarks/BitstreamRemarkParser.h4
-rw-r--r--llvm/lib/Support/APFloat.cpp538
-rw-r--r--llvm/lib/Support/PrettyStackTrace.cpp2
-rw-r--r--llvm/lib/Support/SourceMgr.cpp26
-rw-r--r--llvm/lib/Support/Windows/Signals.inc4
-rw-r--r--llvm/lib/TableGen/Main.cpp2
-rw-r--r--llvm/lib/TableGen/Parser.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp40
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h1
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrGISel.td7
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp3
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/DSInstructions.td6
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp130
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h8
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td6
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.h9
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp180
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h19
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp61
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h9
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp49
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h3
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp29
-rw-r--r--llvm/lib/Target/DirectX/DXIL.td8
-rw-r--r--llvm/lib/Target/DirectX/DXILOpBuilder.cpp8
-rw-r--r--llvm/lib/Target/DirectX/DXILOpLowering.cpp25
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp15
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp16
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp12
-rw-r--r--llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp10
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsFastISel.cpp7
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.cpp7
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp6
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp6
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp12
-rw-r--r--llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td3
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp5
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp22
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td28
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td726
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td20
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h5
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp20
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp10
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp32
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td4
-rw-r--r--llvm/lib/Target/Sparc/SparcFrameLowering.cpp3
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td32
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp11
-rw-r--r--llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp2
-rw-r--r--llvm/lib/Target/X86/X86.td7
-rw-r--r--llvm/lib/Target/X86/X86FloatingPoint.cpp3
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp81
-rw-r--r--llvm/lib/Target/X86/X86ISelLoweringCall.cpp20
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td13
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp22
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp51
-rw-r--r--llvm/lib/TargetParser/ARMTargetParserCommon.cpp10
-rw-r--r--llvm/lib/TargetParser/Host.cpp18
-rw-r--r--llvm/lib/TargetParser/RISCVISAInfo.cpp2
-rw-r--r--llvm/lib/TargetParser/TargetDataLayout.cpp5
-rw-r--r--llvm/lib/TargetParser/Triple.cpp166
-rw-r--r--llvm/lib/TargetParser/Unix/Host.inc22
-rw-r--r--llvm/lib/TargetParser/X86TargetParser.cpp4
-rw-r--r--llvm/lib/Transforms/CFGuard/CFGuard.cpp25
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp22
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp39
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp6
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp42
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp36
-rw-r--r--llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp24
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp3
-rw-r--r--llvm/lib/Transforms/ObjCARC/PtrState.h3
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp36
-rw-r--r--llvm/lib/Transforms/Scalar/GVNSink.cpp48
-rw-r--r--llvm/lib/Transforms/Scalar/GuardWidening.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp54
-rw-r--r--llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp16
-rw-r--r--llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp11
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFuse.cpp34
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPassManager.cpp9
-rw-r--r--llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp59
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/Reassociate.cpp42
-rw-r--r--llvm/lib/Transforms/Scalar/Reg2Mem.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp40
-rw-r--r--llvm/lib/Transforms/Scalar/Scalarizer.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp40
-rw-r--r--llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/SCCPSolver.cpp186
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp30
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp111
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp8
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h39
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp15
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanHelpers.h2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h36
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp30
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp121
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp13
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.h4
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp15
169 files changed, 3629 insertions, 1665 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 45c889c..a5ba197 100755
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -2177,16 +2177,13 @@ Constant *constantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) {
return PoisonValue::get(VT->getElementType());
// TODO: Handle undef.
- if (!isa<ConstantVector>(Op) && !isa<ConstantDataVector>(Op))
- return nullptr;
-
- auto *EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(0U));
+ auto *EltC = dyn_cast_or_null<ConstantInt>(Op->getAggregateElement(0U));
if (!EltC)
return nullptr;
APInt Acc = EltC->getValue();
for (unsigned I = 1, E = VT->getNumElements(); I != E; I++) {
- if (!(EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(I))))
+ if (!(EltC = dyn_cast_or_null<ConstantInt>(Op->getAggregateElement(I))))
return nullptr;
const APInt &X = EltC->getValue();
switch (IID) {
@@ -3059,35 +3056,25 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
Val = Val | Val << 1;
return ConstantInt::get(Ty, Val);
}
-
- default:
- return nullptr;
}
}
- switch (IntrinsicID) {
- default: break;
- case Intrinsic::vector_reduce_add:
- case Intrinsic::vector_reduce_mul:
- case Intrinsic::vector_reduce_and:
- case Intrinsic::vector_reduce_or:
- case Intrinsic::vector_reduce_xor:
- case Intrinsic::vector_reduce_smin:
- case Intrinsic::vector_reduce_smax:
- case Intrinsic::vector_reduce_umin:
- case Intrinsic::vector_reduce_umax:
- if (Constant *C = constantFoldVectorReduce(IntrinsicID, Operands[0]))
- return C;
- break;
- }
-
- // Support ConstantVector in case we have an Undef in the top.
- if (isa<ConstantVector>(Operands[0]) ||
- isa<ConstantDataVector>(Operands[0]) ||
- isa<ConstantAggregateZero>(Operands[0])) {
+ if (Operands[0]->getType()->isVectorTy()) {
auto *Op = cast<Constant>(Operands[0]);
switch (IntrinsicID) {
default: break;
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_umax:
+ if (Constant *C = constantFoldVectorReduce(IntrinsicID, Operands[0]))
+ return C;
+ break;
case Intrinsic::x86_sse_cvtss2si:
case Intrinsic::x86_sse_cvtss2si64:
case Intrinsic::x86_sse2_cvtsd2si:
@@ -3116,10 +3103,15 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
case Intrinsic::wasm_alltrue:
// Check each element individually
unsigned E = cast<FixedVectorType>(Op->getType())->getNumElements();
- for (unsigned I = 0; I != E; ++I)
- if (Constant *Elt = Op->getAggregateElement(I))
- if (Elt->isZeroValue())
- return ConstantInt::get(Ty, 0);
+ for (unsigned I = 0; I != E; ++I) {
+ Constant *Elt = Op->getAggregateElement(I);
+ // Return false as soon as we find a non-true element.
+ if (Elt && Elt->isZeroValue())
+ return ConstantInt::get(Ty, 0);
+ // Bail as soon as we find an element we cannot prove to be true.
+ if (!Elt || !isa<ConstantInt>(Elt))
+ return nullptr;
+ }
return ConstantInt::get(Ty, 1);
}
diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp
index b78cc03e..f9bf092 100644
--- a/llvm/lib/Analysis/DXILResource.cpp
+++ b/llvm/lib/Analysis/DXILResource.cpp
@@ -281,6 +281,38 @@ static StructType *getOrCreateElementStruct(Type *ElemType, StringRef Name) {
return StructType::create(ElemType, Name);
}
+static Type *getTypeWithoutPadding(Type *Ty) {
+ // Recursively remove padding from structures.
+ if (auto *ST = dyn_cast<StructType>(Ty)) {
+ LLVMContext &Ctx = Ty->getContext();
+ SmallVector<Type *> ElementTypes;
+ ElementTypes.reserve(ST->getNumElements());
+ for (Type *ElTy : ST->elements()) {
+ if (isa<PaddingExtType>(ElTy))
+ continue;
+ ElementTypes.push_back(getTypeWithoutPadding(ElTy));
+ }
+
+ // Handle explicitly padded cbuffer arrays like { [ n x paddedty ], ty }
+ if (ElementTypes.size() == 2)
+ if (auto *AT = dyn_cast<ArrayType>(ElementTypes[0]))
+ if (ElementTypes[1] == AT->getElementType())
+ return ArrayType::get(ElementTypes[1], AT->getNumElements() + 1);
+
+ // If we only have a single element, don't wrap it in a struct.
+ if (ElementTypes.size() == 1)
+ return ElementTypes[0];
+
+ return StructType::get(Ctx, ElementTypes, /*IsPacked=*/false);
+ }
+ // Arrays just need to have their element type adjusted.
+ if (auto *AT = dyn_cast<ArrayType>(Ty))
+ return ArrayType::get(getTypeWithoutPadding(AT->getElementType()),
+ AT->getNumElements());
+ // Anything else should be good as is.
+ return Ty;
+}
+
StructType *ResourceTypeInfo::createElementStruct(StringRef CBufferName) {
SmallString<64> TypeName;
@@ -334,14 +366,21 @@ StructType *ResourceTypeInfo::createElementStruct(StringRef CBufferName) {
}
case ResourceKind::CBuffer: {
auto *RTy = cast<CBufferExtType>(HandleTy);
- LayoutExtType *LayoutType = cast<LayoutExtType>(RTy->getResourceType());
- StructType *Ty = cast<StructType>(LayoutType->getWrappedType());
SmallString<64> Name = getResourceKindName(Kind);
if (!CBufferName.empty()) {
Name.append(".");
Name.append(CBufferName);
}
- return StructType::create(Ty->elements(), Name);
+
+ // TODO: Remove this when we update the frontend to use explicit padding.
+ if (LayoutExtType *LayoutType =
+ dyn_cast<LayoutExtType>(RTy->getResourceType())) {
+ StructType *Ty = cast<StructType>(LayoutType->getWrappedType());
+ return StructType::create(Ty->elements(), Name);
+ }
+
+ return getOrCreateElementStruct(
+ getTypeWithoutPadding(RTy->getResourceType()), Name);
}
case ResourceKind::Sampler: {
auto *RTy = cast<SamplerExtType>(HandleTy);
@@ -454,10 +493,10 @@ uint32_t ResourceTypeInfo::getCBufferSize(const DataLayout &DL) const {
Type *ElTy = cast<CBufferExtType>(HandleTy)->getResourceType();
+ // TODO: Remove this when we update the frontend to use explicit padding.
if (auto *LayoutTy = dyn_cast<LayoutExtType>(ElTy))
return LayoutTy->getSize();
- // TODO: What should we do with unannotated arrays?
return DL.getTypeAllocSize(ElTy);
}
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 0e5bc48..df75999 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -947,9 +947,8 @@ LazyValueInfoImpl::solveBlockValueSelect(SelectInst *SI, BasicBlock *BB) {
/*UseBlockValue*/ false));
}
- ValueLatticeElement Result = TrueVal;
- Result.mergeIn(FalseVal);
- return Result;
+ TrueVal.mergeIn(FalseVal);
+ return TrueVal;
}
std::optional<ConstantRange>
@@ -1778,9 +1777,8 @@ ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB,
assert(OptResult && "Value not available after solving");
}
- ValueLatticeElement Result = *OptResult;
- LLVM_DEBUG(dbgs() << " Result = " << Result << "\n");
- return Result;
+ LLVM_DEBUG(dbgs() << " Result = " << *OptResult << "\n");
+ return *OptResult;
}
ValueLatticeElement LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) {
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index a8c3173..d84721b 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -986,8 +986,8 @@ PreservedAnalyses LoopPrinterPass::run(Function &F,
return PreservedAnalyses::all();
}
-void llvm::printLoop(Loop &L, raw_ostream &OS, const std::string &Banner) {
-
+void llvm::printLoop(const Loop &L, raw_ostream &OS,
+ const std::string &Banner) {
if (forcePrintModuleIR()) {
// handling -print-module-scope
OS << Banner << " (loop: ";
diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
index f90717d..1d1a5560 100644
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -61,6 +61,9 @@ static cl::opt<SkipMLPolicyCriteria> SkipPolicy(
static cl::opt<std::string> ModelSelector("ml-inliner-model-selector",
cl::Hidden, cl::init(""));
+static cl::opt<bool> StopImmediatelyForTest("ml-inliner-stop-immediately",
+ cl::Hidden);
+
#if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
// codegen-ed file
#include "InlinerSizeModel.h" // NOLINT
@@ -214,6 +217,7 @@ MLInlineAdvisor::MLInlineAdvisor(
return;
}
ModelRunner->switchContext("");
+ ForceStop = StopImmediatelyForTest;
}
unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const {
@@ -379,9 +383,17 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
if (SkipPolicy == SkipMLPolicyCriteria::IfCallerIsNotCold) {
- if (!PSI.isFunctionEntryCold(&Caller))
- return std::make_unique<InlineAdvice>(this, CB, ORE,
- GetDefaultAdvice(CB));
+ if (!PSI.isFunctionEntryCold(&Caller)) {
+ // Return a MLInlineAdvice, despite delegating to the default advice,
+ // because we need to keep track of the internal state. This is different
+ // from the other instances where we return a "default" InlineAdvice,
+ // which happen at points we won't come back to the MLAdvisor for
+ // decisions requiring that state.
+ return ForceStop ? std::make_unique<InlineAdvice>(this, CB, ORE,
+ GetDefaultAdvice(CB))
+ : std::make_unique<MLInlineAdvice>(this, CB, ORE,
+ GetDefaultAdvice(CB));
+ }
}
auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE);
// If this is a "never inline" case, there won't be any changes to internal
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index a64b93d..425420f 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -1840,19 +1840,19 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
// = zext((2^K * (trunc X to i{N-K}))<nuw>) to iM
// = (2^K * (zext(trunc X to i{N-K}) to iM))<nuw>.
//
- if (SM->getNumOperands() == 2)
- if (auto *MulLHS = dyn_cast<SCEVConstant>(SM->getOperand(0)))
- if (MulLHS->getAPInt().isPowerOf2())
- if (auto *TruncRHS = dyn_cast<SCEVTruncateExpr>(SM->getOperand(1))) {
- int NewTruncBits = getTypeSizeInBits(TruncRHS->getType()) -
- MulLHS->getAPInt().logBase2();
- Type *NewTruncTy = IntegerType::get(getContext(), NewTruncBits);
- return getMulExpr(
- getZeroExtendExpr(MulLHS, Ty),
- getZeroExtendExpr(
- getTruncateExpr(TruncRHS->getOperand(), NewTruncTy), Ty),
- SCEV::FlagNUW, Depth + 1);
- }
+ const APInt *C;
+ const SCEV *TruncRHS;
+ if (match(SM,
+ m_scev_Mul(m_scev_APInt(C), m_scev_Trunc(m_SCEV(TruncRHS)))) &&
+ C->isPowerOf2()) {
+ int NewTruncBits =
+ getTypeSizeInBits(SM->getOperand(1)->getType()) - C->logBase2();
+ Type *NewTruncTy = IntegerType::get(getContext(), NewTruncBits);
+ return getMulExpr(
+ getZeroExtendExpr(SM->getOperand(0), Ty),
+ getZeroExtendExpr(getTruncateExpr(TruncRHS, NewTruncTy), Ty),
+ SCEV::FlagNUW, Depth + 1);
+ }
}
// zext(umin(x, y)) -> umin(zext(x), zext(y))
@@ -3144,20 +3144,19 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
if (Ops.size() == 2) {
// C1*(C2+V) -> C1*C2 + C1*V
- if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
- // If any of Add's ops are Adds or Muls with a constant, apply this
- // transformation as well.
- //
- // TODO: There are some cases where this transformation is not
- // profitable; for example, Add = (C0 + X) * Y + Z. Maybe the scope of
- // this transformation should be narrowed down.
- if (Add->getNumOperands() == 2 && containsConstantInAddMulChain(Add)) {
- const SCEV *LHS = getMulExpr(LHSC, Add->getOperand(0),
- SCEV::FlagAnyWrap, Depth + 1);
- const SCEV *RHS = getMulExpr(LHSC, Add->getOperand(1),
- SCEV::FlagAnyWrap, Depth + 1);
- return getAddExpr(LHS, RHS, SCEV::FlagAnyWrap, Depth + 1);
- }
+ // If any of Add's ops are Adds or Muls with a constant, apply this
+ // transformation as well.
+ //
+ // TODO: There are some cases where this transformation is not
+ // profitable; for example, Add = (C0 + X) * Y + Z. Maybe the scope of
+ // this transformation should be narrowed down.
+ const SCEV *Op0, *Op1;
+ if (match(Ops[1], m_scev_Add(m_SCEV(Op0), m_SCEV(Op1))) &&
+ containsConstantInAddMulChain(Ops[1])) {
+ const SCEV *LHS = getMulExpr(LHSC, Op0, SCEV::FlagAnyWrap, Depth + 1);
+ const SCEV *RHS = getMulExpr(LHSC, Op1, SCEV::FlagAnyWrap, Depth + 1);
+ return getAddExpr(LHS, RHS, SCEV::FlagAnyWrap, Depth + 1);
+ }
if (Ops[0]->isAllOnesValue()) {
// If we have a mul by -1 of an add, try distributing the -1 among the
@@ -3578,20 +3577,12 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
}
// ((-C + (C smax %x)) /u %x) evaluates to zero, for any positive constant C.
- if (const auto *AE = dyn_cast<SCEVAddExpr>(LHS);
- AE && AE->getNumOperands() == 2) {
- if (const auto *VC = dyn_cast<SCEVConstant>(AE->getOperand(0))) {
- const APInt &NegC = VC->getAPInt();
- if (NegC.isNegative() && !NegC.isMinSignedValue()) {
- const auto *MME = dyn_cast<SCEVSMaxExpr>(AE->getOperand(1));
- if (MME && MME->getNumOperands() == 2 &&
- isa<SCEVConstant>(MME->getOperand(0)) &&
- cast<SCEVConstant>(MME->getOperand(0))->getAPInt() == -NegC &&
- MME->getOperand(1) == RHS)
- return getZero(LHS->getType());
- }
- }
- }
+ const APInt *NegC, *C;
+ if (match(LHS,
+ m_scev_Add(m_scev_APInt(NegC),
+ m_scev_SMax(m_scev_APInt(C), m_scev_Specific(RHS)))) &&
+ NegC->isNegative() && !NegC->isMinSignedValue() && *C == -*NegC)
+ return getZero(LHS->getType());
// TODO: Generalize to handle any common factors.
// udiv (mul nuw a, vscale), (mul nuw b, vscale) --> udiv a, b
@@ -4623,17 +4614,11 @@ const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
/// If Expr computes ~A, return A else return nullptr
static const SCEV *MatchNotExpr(const SCEV *Expr) {
- const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
- if (!Add || Add->getNumOperands() != 2 ||
- !Add->getOperand(0)->isAllOnesValue())
- return nullptr;
-
- const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
- if (!AddRHS || AddRHS->getNumOperands() != 2 ||
- !AddRHS->getOperand(0)->isAllOnesValue())
- return nullptr;
-
- return AddRHS->getOperand(1);
+ const SCEV *MulOp;
+ if (match(Expr, m_scev_Add(m_scev_AllOnes(),
+ m_scev_Mul(m_scev_AllOnes(), m_SCEV(MulOp)))))
+ return MulOp;
+ return nullptr;
}
/// Return a SCEV corresponding to ~V = -1-V
@@ -10797,19 +10782,15 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) {
}
static bool MatchBinarySub(const SCEV *S, const SCEV *&LHS, const SCEV *&RHS) {
- const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S);
- if (!Add || Add->getNumOperands() != 2)
+ const SCEV *Op0, *Op1;
+ if (!match(S, m_scev_Add(m_SCEV(Op0), m_SCEV(Op1))))
return false;
- if (auto *ME = dyn_cast<SCEVMulExpr>(Add->getOperand(0));
- ME && ME->getNumOperands() == 2 && ME->getOperand(0)->isAllOnesValue()) {
- LHS = Add->getOperand(1);
- RHS = ME->getOperand(1);
+ if (match(Op0, m_scev_Mul(m_scev_AllOnes(), m_SCEV(RHS)))) {
+ LHS = Op1;
return true;
}
- if (auto *ME = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
- ME && ME->getNumOperands() == 2 && ME->getOperand(0)->isAllOnesValue()) {
- LHS = Add->getOperand(0);
- RHS = ME->getOperand(1);
+ if (match(Op1, m_scev_Mul(m_scev_AllOnes(), m_SCEV(RHS)))) {
+ LHS = Op0;
return true;
}
return false;
@@ -12172,13 +12153,10 @@ bool ScalarEvolution::isImpliedCondBalancedTypes(
bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr,
const SCEV *&L, const SCEV *&R,
SCEV::NoWrapFlags &Flags) {
- const auto *AE = dyn_cast<SCEVAddExpr>(Expr);
- if (!AE || AE->getNumOperands() != 2)
+ if (!match(Expr, m_scev_Add(m_SCEV(L), m_SCEV(R))))
return false;
- L = AE->getOperand(0);
- R = AE->getOperand(1);
- Flags = AE->getNoWrapFlags();
+ Flags = cast<SCEVAddExpr>(Expr)->getNoWrapFlags();
return true;
}
@@ -12220,12 +12198,11 @@ ScalarEvolution::computeConstantDifference(const SCEV *More, const SCEV *Less) {
// Try to match a common constant multiply.
auto MatchConstMul =
[](const SCEV *S) -> std::optional<std::pair<const SCEV *, APInt>> {
- auto *M = dyn_cast<SCEVMulExpr>(S);
- if (!M || M->getNumOperands() != 2 ||
- !isa<SCEVConstant>(M->getOperand(0)))
- return std::nullopt;
- return {
- {M->getOperand(1), cast<SCEVConstant>(M->getOperand(0))->getAPInt()}};
+ const APInt *C;
+ const SCEV *Op;
+ if (match(S, m_scev_Mul(m_scev_APInt(C), m_SCEV(Op))))
+ return {{Op, *C}};
+ return std::nullopt;
};
if (auto MatchedMore = MatchConstMul(More)) {
if (auto MatchedLess = MatchConstMul(Less)) {
@@ -15557,19 +15534,10 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
auto IsMinMaxSCEVWithNonNegativeConstant =
[&](const SCEV *Expr, SCEVTypes &SCTy, const SCEV *&LHS,
const SCEV *&RHS) {
- if (auto *MinMax = dyn_cast<SCEVMinMaxExpr>(Expr)) {
- if (MinMax->getNumOperands() != 2)
- return false;
- if (auto *C = dyn_cast<SCEVConstant>(MinMax->getOperand(0))) {
- if (C->getAPInt().isNegative())
- return false;
- SCTy = MinMax->getSCEVType();
- LHS = MinMax->getOperand(0);
- RHS = MinMax->getOperand(1);
- return true;
- }
- }
- return false;
+ const APInt *C;
+ SCTy = Expr->getSCEVType();
+ return match(Expr, m_scev_MinMax(m_SCEV(LHS), m_SCEV(RHS))) &&
+ match(LHS, m_scev_APInt(C)) && C->isNonNegative();
};
// Return a new SCEV that modifies \p Expr to the closest number divides by
@@ -15772,19 +15740,26 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
GetNextSCEVDividesByDivisor(One, DividesBy);
To = SE.getUMaxExpr(FromRewritten, OneAlignedUp);
} else {
+ // LHS != RHS can be rewritten as (LHS - RHS) = UMax(1, LHS - RHS),
+ // but creating the subtraction eagerly is expensive. Track the
+ // inequalities in a separate map, and materialize the rewrite lazily
+ // when encountering a suitable subtraction while re-writing.
if (LHS->getType()->isPointerTy()) {
LHS = SE.getLosslessPtrToIntExpr(LHS);
RHS = SE.getLosslessPtrToIntExpr(RHS);
if (isa<SCEVCouldNotCompute>(LHS) || isa<SCEVCouldNotCompute>(RHS))
break;
}
- auto AddSubRewrite = [&](const SCEV *A, const SCEV *B) {
- const SCEV *Sub = SE.getMinusSCEV(A, B);
- AddRewrite(Sub, Sub,
- SE.getUMaxExpr(Sub, SE.getOne(From->getType())));
- };
- AddSubRewrite(LHS, RHS);
- AddSubRewrite(RHS, LHS);
+ const SCEVConstant *C;
+ const SCEV *A, *B;
+ if (match(RHS, m_scev_Add(m_SCEVConstant(C), m_SCEV(A))) &&
+ match(LHS, m_scev_Add(m_scev_Specific(C), m_SCEV(B)))) {
+ RHS = A;
+ LHS = B;
+ }
+ if (LHS > RHS)
+ std::swap(LHS, RHS);
+ Guards.NotEqual.insert({LHS, RHS});
continue;
}
break;
@@ -15918,13 +15893,15 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
class SCEVLoopGuardRewriter
: public SCEVRewriteVisitor<SCEVLoopGuardRewriter> {
const DenseMap<const SCEV *, const SCEV *> &Map;
+ const SmallDenseSet<std::pair<const SCEV *, const SCEV *>> &NotEqual;
SCEV::NoWrapFlags FlagMask = SCEV::FlagAnyWrap;
public:
SCEVLoopGuardRewriter(ScalarEvolution &SE,
const ScalarEvolution::LoopGuards &Guards)
- : SCEVRewriteVisitor(SE), Map(Guards.RewriteMap) {
+ : SCEVRewriteVisitor(SE), Map(Guards.RewriteMap),
+ NotEqual(Guards.NotEqual) {
if (Guards.PreserveNUW)
FlagMask = ScalarEvolution::setFlags(FlagMask, SCEV::FlagNUW);
if (Guards.PreserveNSW)
@@ -15979,14 +15956,36 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
}
const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+ // Helper to check if S is a subtraction (A - B) where A != B, and if so,
+ // return UMax(S, 1).
+ auto RewriteSubtraction = [&](const SCEV *S) -> const SCEV * {
+ const SCEV *LHS, *RHS;
+ if (MatchBinarySub(S, LHS, RHS)) {
+ if (LHS > RHS)
+ std::swap(LHS, RHS);
+ if (NotEqual.contains({LHS, RHS}))
+ return SE.getUMaxExpr(S, SE.getOne(S->getType()));
+ }
+ return nullptr;
+ };
+
+ // Check if Expr itself is a subtraction pattern with guard info.
+ if (const SCEV *Rewritten = RewriteSubtraction(Expr))
+ return Rewritten;
+
// Trip count expressions sometimes consist of adding 3 operands, i.e.
// (Const + A + B). There may be guard info for A + B, and if so, apply
// it.
// TODO: Could more generally apply guards to Add sub-expressions.
if (isa<SCEVConstant>(Expr->getOperand(0)) &&
Expr->getNumOperands() == 3) {
- if (const SCEV *S = Map.lookup(
- SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
+ const SCEV *Add =
+ SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2));
+ if (const SCEV *Rewritten = RewriteSubtraction(Add))
+ return SE.getAddExpr(
+ Expr->getOperand(0), Rewritten,
+ ScalarEvolution::maskFlags(Expr->getNoWrapFlags(), FlagMask));
+ if (const SCEV *S = Map.lookup(Add))
return SE.getAddExpr(Expr->getOperand(0), S);
}
SmallVector<const SCEV *, 2> Operands;
@@ -16021,7 +16020,7 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
}
};
- if (RewriteMap.empty())
+ if (RewriteMap.empty() && NotEqual.empty())
return Expr;
SCEVLoopGuardRewriter Rewriter(SE, *this);
diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
index e7f0b2c..61d4935 100644
--- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp
+++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
@@ -1,10 +1,14 @@
#include "llvm/Analysis/StaticDataProfileInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/ProfileData/InstrProf.h"
+#define DEBUG_TYPE "static-data-profile-info"
+
using namespace llvm;
namespace llvm {
@@ -79,6 +83,17 @@ StaticDataProfileInfo::getConstantHotnessUsingProfileCount(
return StaticDataHotness::LukewarmOrUnknown;
}
+StaticDataProfileInfo::StaticDataHotness
+StaticDataProfileInfo::getSectionHotnessUsingDataAccessProfile(
+ std::optional<StringRef> MaybeSectionPrefix) const {
+ if (!MaybeSectionPrefix)
+ return StaticDataHotness::LukewarmOrUnknown;
+ StringRef Prefix = *MaybeSectionPrefix;
+ assert((Prefix == "hot" || Prefix == "unlikely") &&
+ "Expect section_prefix to be one of hot or unlikely");
+ return Prefix == "hot" ? StaticDataHotness::Hot : StaticDataHotness::Cold;
+}
+
StringRef StaticDataProfileInfo::hotnessToStr(StaticDataHotness Hotness) const {
switch (Hotness) {
case StaticDataHotness::Cold:
@@ -101,13 +116,66 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const {
StringRef StaticDataProfileInfo::getConstantSectionPrefix(
const Constant *C, const ProfileSummaryInfo *PSI) const {
std::optional<uint64_t> Count = getConstantProfileCount(C);
+
+#ifndef NDEBUG
+ auto DbgPrintPrefix = [](StringRef Prefix) {
+ return Prefix.empty() ? "<empty>" : Prefix;
+ };
+#endif
+
+ if (EnableDataAccessProf) {
+ // Module flag `HasDataAccessProf` is 1 -> empty section prefix means
+ // unknown hotness except for string literals.
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C);
+ GV && llvm::memprof::IsAnnotationOK(*GV) &&
+ !GV->getName().starts_with(".str")) {
+ auto HotnessFromDataAccessProf =
+ getSectionHotnessUsingDataAccessProfile(GV->getSectionPrefix());
+
+ if (!Count) {
+ StringRef Prefix = hotnessToStr(HotnessFromDataAccessProf);
+ LLVM_DEBUG(dbgs() << GV->getName() << " has section prefix "
+ << DbgPrintPrefix(Prefix)
+ << ", solely from data access profiles\n");
+ return Prefix;
+ }
+
+ // Both data access profiles and PGO counters are available. Use the
+ // hotter one.
+ auto HotnessFromPGO = getConstantHotnessUsingProfileCount(C, PSI, *Count);
+ StaticDataHotness GlobalVarHotness = StaticDataHotness::LukewarmOrUnknown;
+ if (HotnessFromDataAccessProf == StaticDataHotness::Hot ||
+ HotnessFromPGO == StaticDataHotness::Hot) {
+ GlobalVarHotness = StaticDataHotness::Hot;
+ } else if (HotnessFromDataAccessProf ==
+ StaticDataHotness::LukewarmOrUnknown ||
+ HotnessFromPGO == StaticDataHotness::LukewarmOrUnknown) {
+ GlobalVarHotness = StaticDataHotness::LukewarmOrUnknown;
+ } else {
+ GlobalVarHotness = StaticDataHotness::Cold;
+ }
+ StringRef Prefix = hotnessToStr(GlobalVarHotness);
+ LLVM_DEBUG(
+ dbgs() << GV->getName() << " has section prefix "
+ << DbgPrintPrefix(Prefix)
+ << ", the max from data access profiles as "
+ << DbgPrintPrefix(hotnessToStr(HotnessFromDataAccessProf))
+ << " and PGO counters as "
+ << DbgPrintPrefix(hotnessToStr(HotnessFromPGO)) << "\n");
+ return Prefix;
+ }
+ }
if (!Count)
return "";
return hotnessToStr(getConstantHotnessUsingProfileCount(C, PSI, *Count));
}
bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) {
- Info.reset(new StaticDataProfileInfo());
+ bool EnableDataAccessProf = false;
+ if (auto *MD = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("EnableDataAccessProf")))
+ EnableDataAccessProf = MD->getZExtValue();
+ Info.reset(new StaticDataProfileInfo(EnableDataAccessProf));
return false;
}
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 1dd470b..cf63285 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1270,7 +1270,7 @@ bool LLParser::parseAliasOrIFunc(const std::string &Name, unsigned NameID,
if (parseToken(lltok::StringConstant, "expected partition string"))
return true;
} else if (!IsAlias && Lex.getKind() == lltok::MetadataVar) {
- if (parseGlobalObjectMetadataAttachment(*GI.get()))
+ if (parseGlobalObjectMetadataAttachment(*GI))
return true;
} else {
return tokError("unknown alias or ifunc property!");
@@ -5876,6 +5876,7 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
REQUIRED(file, MDField, (/* AllowNull */ false)); \
OPTIONAL(language, DwarfLangField, ); \
OPTIONAL(sourceLanguageName, DwarfSourceLangNameField, ); \
+ OPTIONAL(sourceLanguageVersion, MDUnsignedField, (0, UINT32_MAX)); \
OPTIONAL(producer, MDStringField, ); \
OPTIONAL(isOptimized, MDBoolField, ); \
OPTIONAL(flags, MDStringField, ); \
@@ -5905,10 +5906,15 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
return error(Loc, "can only specify one of 'language' and "
"'sourceLanguageName' on !DICompileUnit");
+ if (sourceLanguageVersion.Seen && !sourceLanguageName.Seen)
+ return error(Loc, "'sourceLanguageVersion' requires an associated "
+ "'sourceLanguageName' on !DICompileUnit");
+
Result = DICompileUnit::getDistinct(
Context,
language.Seen ? DISourceLanguageName(language.Val)
- : DISourceLanguageName(sourceLanguageName.Val, 0),
+ : DISourceLanguageName(sourceLanguageName.Val,
+ sourceLanguageVersion.Val),
file.Val, producer.Val, isOptimized.Val, flags.Val, runtimeVersion.Val,
splitDebugFilename.Val, emissionKind.Val, enums.Val, retainedTypes.Val,
globals.Val, imports.Val, macros.Val, dwoId.Val, splitDebugInlining.Val,
diff --git a/llvm/lib/BinaryFormat/XCOFF.cpp b/llvm/lib/BinaryFormat/XCOFF.cpp
index e0a4471..19d5b98 100644
--- a/llvm/lib/BinaryFormat/XCOFF.cpp
+++ b/llvm/lib/BinaryFormat/XCOFF.cpp
@@ -112,26 +112,26 @@ StringRef XCOFF::getNameForTracebackTableLanguageId(
XCOFF::CFileCpuId XCOFF::getCpuID(StringRef CPUName) {
StringRef CPU = PPC::normalizeCPUName(CPUName);
return StringSwitch<XCOFF::CFileCpuId>(CPU)
- .Cases("generic", "COM", XCOFF::TCPU_COM)
+ .Cases({"generic", "COM"}, XCOFF::TCPU_COM)
.Case("601", XCOFF::TCPU_601)
- .Cases("602", "603", "603e", "603ev", XCOFF::TCPU_603)
- .Cases("604", "604e", XCOFF::TCPU_604)
+ .Cases({"602", "603", "603e", "603ev"}, XCOFF::TCPU_603)
+ .Cases({"604", "604e"}, XCOFF::TCPU_604)
.Case("620", XCOFF::TCPU_620)
.Case("970", XCOFF::TCPU_970)
- .Cases("a2", "g3", "g4", "g5", "e500", XCOFF::TCPU_COM)
- .Cases("pwr3", "pwr4", XCOFF::TCPU_COM)
- .Cases("pwr5", "PWR5", XCOFF::TCPU_PWR5)
- .Cases("pwr5x", "PWR5X", XCOFF::TCPU_PWR5X)
- .Cases("pwr6", "PWR6", XCOFF::TCPU_PWR6)
- .Cases("pwr6x", "PWR6E", XCOFF::TCPU_PWR6E)
- .Cases("pwr7", "PWR7", XCOFF::TCPU_PWR7)
- .Cases("pwr8", "PWR8", XCOFF::TCPU_PWR8)
- .Cases("pwr9", "PWR9", XCOFF::TCPU_PWR9)
- .Cases("pwr10", "PWR10", XCOFF::TCPU_PWR10)
- .Cases("ppc", "PPC", "ppc32", "ppc64", XCOFF::TCPU_COM)
+ .Cases({"a2", "g3", "g4", "g5", "e500"}, XCOFF::TCPU_COM)
+ .Cases({"pwr3", "pwr4"}, XCOFF::TCPU_COM)
+ .Cases({"pwr5", "PWR5"}, XCOFF::TCPU_PWR5)
+ .Cases({"pwr5x", "PWR5X"}, XCOFF::TCPU_PWR5X)
+ .Cases({"pwr6", "PWR6"}, XCOFF::TCPU_PWR6)
+ .Cases({"pwr6x", "PWR6E"}, XCOFF::TCPU_PWR6E)
+ .Cases({"pwr7", "PWR7"}, XCOFF::TCPU_PWR7)
+ .Cases({"pwr8", "PWR8"}, XCOFF::TCPU_PWR8)
+ .Cases({"pwr9", "PWR9"}, XCOFF::TCPU_PWR9)
+ .Cases({"pwr10", "PWR10"}, XCOFF::TCPU_PWR10)
+ .Cases({"ppc", "PPC", "ppc32", "ppc64"}, XCOFF::TCPU_COM)
.Case("ppc64le", XCOFF::TCPU_PWR8)
.Case("future", XCOFF::TCPU_PWR10)
- .Cases("any", "ANY", XCOFF::TCPU_ANY)
+ .Cases({"any", "ANY"}, XCOFF::TCPU_ANY)
.Default(XCOFF::TCPU_INVALID);
}
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index cdcf7a8..ed0443f 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -1860,7 +1860,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
case bitc::METADATA_COMPILE_UNIT: {
- if (Record.size() < 14 || Record.size() > 22)
+ if (Record.size() < 14 || Record.size() > 23)
return error("Invalid record");
// Ignore Record[0], which indicates whether this compile unit is
@@ -1869,11 +1869,13 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
const auto LangVersionMask = (uint64_t(1) << 63);
const bool HasVersionedLanguage = Record[1] & LangVersionMask;
+ const uint32_t LanguageVersion = Record.size() > 22 ? Record[22] : 0;
auto *CU = DICompileUnit::getDistinct(
Context,
HasVersionedLanguage
- ? DISourceLanguageName(Record[1] & ~LangVersionMask, 0)
+ ? DISourceLanguageName(Record[1] & ~LangVersionMask,
+ LanguageVersion)
: DISourceLanguageName(Record[1]),
getMDOrNull(Record[2]), getMDString(Record[3]), Record[4],
getMDString(Record[5]), Record[6], getMDString(Record[7]), Record[8],
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 54e916e..8ff3aa9 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -2142,6 +2142,7 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
Record.push_back(N->getRangesBaseAddress());
Record.push_back(VE.getMetadataOrNullID(N->getRawSysRoot()));
Record.push_back(VE.getMetadataOrNullID(N->getRawSDK()));
+ Record.push_back(Lang.hasVersionedName() ? Lang.getVersion() : 0);
Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev);
Record.clear();
diff --git a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
index 323b21e..4e6f93e 100644
--- a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
+++ b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
@@ -1102,8 +1102,6 @@ void TrieRawHashMapHandle::print(
if (auto Err = Printer.printRecords())
OS << "error: " << toString(std::move(Err)) << "\n";
-
- return;
}
Error TrieRawHashMapHandle::validate(
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 05fffe9..e2af0c5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -119,6 +119,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/VCSRevision.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -476,6 +477,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool AsmPrinter::doInitialization(Module &M) {
+ VFS = vfs::getRealFileSystem();
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
MMI = MMIWP ? &MMIWP->getMMI() : nullptr;
HasSplitStack = false;
@@ -1683,7 +1685,7 @@ static ConstantInt *extractNumericCGTypeId(const Function &F) {
return nullptr;
}
-/// Emits .callgraph section.
+/// Emits .llvm.callgraph section.
void AsmPrinter::emitCallGraphSection(const MachineFunction &MF,
FunctionCallGraphInfo &FuncCGInfo) {
if (!MF.getTarget().Options.EmitCallGraphSection)
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index c364ffc..8dd8b9da 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -36,6 +36,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -98,6 +99,7 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
unsigned BufNum = addInlineAsmDiagBuffer(Str, LocMDNode);
SourceMgr &SrcMgr = *MMI->getContext().getInlineSourceManager();
SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
+ SrcMgr.setVirtualFileSystem(VFS);
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 433877f..567acf7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1039,12 +1039,17 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
} else
NewCU.addString(Die, dwarf::DW_AT_producer, Producer);
- if (auto Lang = DIUnit->getSourceLanguage(); Lang.hasVersionedName())
+ if (auto Lang = DIUnit->getSourceLanguage(); Lang.hasVersionedName()) {
NewCU.addUInt(Die, dwarf::DW_AT_language_name, dwarf::DW_FORM_data2,
Lang.getName());
- else
+
+ if (uint32_t LangVersion = Lang.getVersion(); LangVersion != 0)
+ NewCU.addUInt(Die, dwarf::DW_AT_language_version, /*Form=*/std::nullopt,
+ LangVersion);
+ } else {
NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
Lang.getName());
+ }
NewCU.addString(Die, dwarf::DW_AT_name, FN);
StringRef SysRoot = DIUnit->getSysRoot();
@@ -2066,11 +2071,36 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
if (NoDebug)
return;
+ auto RecordLineZero = [&]() {
+ // Preserve the file and column numbers, if we can, to save space in
+ // the encoded line table.
+ // Do not update PrevInstLoc, it remembers the last non-0 line.
+ const MDNode *Scope = nullptr;
+ unsigned Column = 0;
+ if (PrevInstLoc) {
+ Scope = PrevInstLoc.getScope();
+ Column = PrevInstLoc.getCol();
+ }
+ recordSourceLine(/*Line=*/0, Column, Scope, /*Flags=*/0);
+ };
+
+ // When we emit a line-0 record, we don't update PrevInstLoc; so look at
+ // the last line number actually emitted, to see if it was line 0.
+ unsigned LastAsmLine =
+ Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
+
// Check if source location changes, but ignore DBG_VALUE and CFI locations.
// If the instruction is part of the function frame setup code, do not emit
// any line record, as there is no correspondence with any user code.
- if (MI->isMetaInstruction() || MI->getFlag(MachineInstr::FrameSetup))
+ if (MI->isMetaInstruction())
+ return;
+ if (MI->getFlag(MachineInstr::FrameSetup)) {
+ // Prevent a loc from the previous block leaking into frame setup instrs.
+ if (LastAsmLine && PrevInstBB && PrevInstBB != MI->getParent())
+ RecordLineZero();
return;
+ }
+
const DebugLoc &DL = MI->getDebugLoc();
unsigned Flags = 0;
@@ -2093,11 +2123,6 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
LocationString);
};
- // When we emit a line-0 record, we don't update PrevInstLoc; so look at
- // the last line number actually emitted, to see if it was line 0.
- unsigned LastAsmLine =
- Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
-
// There may be a mixture of scopes using and not using Key Instructions.
// Not-Key-Instructions functions inlined into Key Instructions functions
// should use not-key is_stmt handling. Key Instructions functions inlined
@@ -2163,18 +2188,8 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
// - Instruction is at the top of a block; we don't want to inherit the
// location from the physically previous (maybe unrelated) block.
if (UnknownLocations == Enable || PrevLabel ||
- (PrevInstBB && PrevInstBB != MI->getParent())) {
- // Preserve the file and column numbers, if we can, to save space in
- // the encoded line table.
- // Do not update PrevInstLoc, it remembers the last non-0 line.
- const MDNode *Scope = nullptr;
- unsigned Column = 0;
- if (PrevInstLoc) {
- Scope = PrevInstLoc.getScope();
- Column = PrevInstLoc.getCol();
- }
- recordSourceLine(/*Line=*/0, Column, Scope, /*Flags=*/0);
- }
+ (PrevInstBB && PrevInstBB != MI->getParent()))
+ RecordLineZero();
return;
}
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index 2d50167..fae952e 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -491,6 +491,20 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
return true;
}
if (FBB) {
+ // If we get here with a MBB which ends like this:
+ //
+ // bb.1:
+ // successors: %bb.2;
+ // ...
+ // BNE $x1, $x0, %bb.2
+ // PseudoBR %bb.2
+ //
+ // Just remove conditional branch.
+ if (TBB == FBB) {
+ removeBranch(MBB);
+ insertUncondBranch(MBB, TBB);
+ return true;
+ }
// We need to split the basic block here to obtain two long-range
// unconditional branches.
NewBB = createNewBlockAfter(*MBB);
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 4320b1d..9e78ec9 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -819,7 +819,7 @@ void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
}
// Verify BFI has been updated correctly by recomputing BFI and comparing them.
-void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
+[[maybe_unused]] void CodeGenPrepare::verifyBFIUpdates(Function &F) {
DominatorTree NewDT(F);
LoopInfo NewLI(NewDT);
BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index 25c1db9..ded4df4 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -55,12 +55,10 @@ LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx,
}
LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx,
- LLT NewEltTy) {
+ ElementCount EC) {
return [=](const LegalityQuery &Query) {
const LLT OldTy = Query.Types[TypeIdx];
- ElementCount NewEltCount = NewEltTy.isVector() ? NewEltTy.getElementCount()
- : ElementCount::getFixed(1);
- return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount));
+ return std::make_pair(TypeIdx, OldTy.changeElementCount(EC));
};
}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index cffaf7c..38ec83f 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3292,8 +3292,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
if (TypeIdx != 2)
return UnableToLegalize;
Observer.changingInstr(MI);
- // TODO: Probably should be zext
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
}
@@ -3325,8 +3324,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
if (TypeIdx == 2) {
Observer.changingInstr(MI);
- // TODO: Probably should be zext
- widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 055fdc6..ca82857 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -818,8 +818,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
if (!DefMI)
return false;
- const TargetMachine& TM = DefMI->getMF()->getTarget();
- if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
+ if (DefMI->getFlag(MachineInstr::FmNoNans))
return true;
// If the value is a constant, we can obviously see if it is a NaN or not.
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index e359831..ea08365 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -1257,7 +1257,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
Tracker.clear();
}
-static void LLVM_ATTRIBUTE_UNUSED printSpillReloadChain(
+[[maybe_unused]] static void printSpillReloadChain(
DenseMap<MachineInstr *, SmallVector<MachineInstr *>> &SpillChain,
DenseMap<MachineInstr *, SmallVector<MachineInstr *>> &ReloadChain,
MachineInstr *Leader) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 358e060..c97300d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17759,7 +17759,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
- const TargetOptions &Options = DAG.getTarget().Options;
SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
@@ -17825,7 +17824,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
bool AllowNewConst = (Level < AfterLegalizeDAG);
// If nnan is enabled, fold lots of things.
- if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
+ if (Flags.hasNoNaNs() && AllowNewConst) {
// If allowed, fold (fadd (fneg x), x) -> 0.0
if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
return DAG.getConstantFP(0.0, DL, VT);
@@ -17974,7 +17973,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
EVT VT = N->getValueType(0);
SDLoc DL(N);
- const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
@@ -18002,7 +18000,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0 == N1) {
// (fsub x, x) -> 0.0
- if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
+ if (Flags.hasNoNaNs())
return DAG.getConstantFP(0.0f, DL, VT);
}
@@ -18313,7 +18311,6 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
EVT VT = N->getValueType(0);
SDLoc DL(N);
- const TargetOptions &Options = DAG.getTarget().Options;
// FMA nodes have flags that propagate to the created nodes.
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
MatchContextClass matcher(DAG, TLI, N);
@@ -18339,8 +18336,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
}
- if ((Options.NoNaNsFPMath && N->getFlags().hasNoInfs()) ||
- (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) {
+ if (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs()) {
if (N->getFlags().hasNoSignedZeros() ||
(N2CFP && !N2CFP->isExactlyValue(-0.0))) {
if (N0CFP && N0CFP->isZero())
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 0f2b518..cb0038c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3971,8 +3971,14 @@ void SelectionDAGBuilder::visitSIToFP(const User &I) {
}
void SelectionDAGBuilder::visitPtrToAddr(const User &I) {
- // FIXME: this is not correct for pointers with addr width != pointer width
- visitPtrToInt(I);
+ SDValue N = getValue(I.getOperand(0));
+ // By definition the type of the ptrtoaddr must be equal to the address type.
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ EVT AddrVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ // The address width must be smaller or equal to the pointer representation
+ // width, so we lower ptrtoaddr as a truncate (possibly folded to a no-op).
+ N = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), AddrVT, N);
+ setValue(&I, N);
}
void SelectionDAGBuilder::visitPtrToInt(const User &I) {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index 6610eef..c61f757 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -181,8 +181,8 @@ DWARFDebugFrame::DWARFDebugFrame(Triple::ArchType Arch,
DWARFDebugFrame::~DWARFDebugFrame() = default;
-static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data,
- uint64_t Offset, int Length) {
+[[maybe_unused]] static void dumpDataAux(DataExtractor Data, uint64_t Offset,
+ int Length) {
errs() << "DUMP: ";
for (int i = 0; i < Length; ++i) {
uint8_t c = Data.getU8(&Offset);
diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
index 0ffe3ae..f343925 100644
--- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
@@ -56,6 +56,7 @@ add_llvm_component_library(LLVMOrcJIT
SectCreate.cpp
SelfExecutorProcessControl.cpp
SimpleRemoteEPC.cpp
+ SimpleRemoteMemoryMapper.cpp
Speculation.cpp
SpeculateAnalyses.cpp
ExecutorProcessControl.cpp
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
index 50e6b25..0833af7 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
@@ -57,16 +57,17 @@ public:
std::swap(FR.Actions, G.allocActions());
Parent.EPC.callSPSWrapperAsync<
- rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>(
- Parent.SAs.Finalize,
+ rt::SPSSimpleExecutorMemoryManagerInitializeSignature>(
+ Parent.SAs.Initialize,
[OnFinalize = std::move(OnFinalize), AllocAddr = this->AllocAddr](
- Error SerializationErr, Error FinalizeErr) mutable {
+ Error SerializationErr,
+ Expected<ExecutorAddr> InitializeKey) mutable {
// FIXME: Release abandoned alloc.
if (SerializationErr) {
- cantFail(std::move(FinalizeErr));
+ cantFail(InitializeKey.takeError());
OnFinalize(std::move(SerializationErr));
- } else if (FinalizeErr)
- OnFinalize(std::move(FinalizeErr));
+ } else if (!InitializeKey)
+ OnFinalize(InitializeKey.takeError());
else
OnFinalize(FinalizedAlloc(AllocAddr));
},
@@ -76,8 +77,8 @@ public:
void abandon(OnAbandonedFunction OnAbandoned) override {
// FIXME: Return memory to pool instead.
Parent.EPC.callSPSWrapperAsync<
- rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
- Parent.SAs.Deallocate,
+ rt::SPSSimpleExecutorMemoryManagerReleaseSignature>(
+ Parent.SAs.Release,
[OnAbandoned = std::move(OnAbandoned)](Error SerializationErr,
Error DeallocateErr) mutable {
if (SerializationErr) {
@@ -123,9 +124,8 @@ void EPCGenericJITLinkMemoryManager::allocate(const JITLinkDylib *JD,
void EPCGenericJITLinkMemoryManager::deallocate(
std::vector<FinalizedAlloc> Allocs, OnDeallocatedFunction OnDeallocated) {
- EPC.callSPSWrapperAsync<
- rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
- SAs.Deallocate,
+ EPC.callSPSWrapperAsync<rt::SPSSimpleExecutorMemoryManagerReleaseSignature>(
+ SAs.Release,
[OnDeallocated = std::move(OnDeallocated)](Error SerErr,
Error DeallocErr) mutable {
if (SerErr) {
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
index fec7062..cc72488 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
@@ -25,9 +25,9 @@ EPCGenericRTDyldMemoryManager::CreateWithDefaultBootstrapSymbols(
if (auto Err = EPC.getBootstrapSymbols(
{{SAs.Instance, rt::SimpleExecutorMemoryManagerInstanceName},
{SAs.Reserve, rt::SimpleExecutorMemoryManagerReserveWrapperName},
- {SAs.Finalize, rt::SimpleExecutorMemoryManagerFinalizeWrapperName},
- {SAs.Deallocate,
- rt::SimpleExecutorMemoryManagerDeallocateWrapperName},
+ {SAs.Initialize,
+ rt::SimpleExecutorMemoryManagerInitializeWrapperName},
+ {SAs.Release, rt::SimpleExecutorMemoryManagerReleaseWrapperName},
{SAs.RegisterEHFrame, rt::RegisterEHFrameSectionAllocActionName},
{SAs.DeregisterEHFrame,
rt::DeregisterEHFrameSectionAllocActionName}}))
@@ -48,7 +48,7 @@ EPCGenericRTDyldMemoryManager::~EPCGenericRTDyldMemoryManager() {
Error Err = Error::success();
if (auto Err2 = EPC.callSPSWrapper<
- rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
+ rt::SPSSimpleExecutorMemoryManagerReleaseSignature>(
SAs.Reserve, Err, SAs.Instance, FinalizedAllocs)) {
// FIXME: Report errors through EPC once that functionality is available.
logAllUnhandledErrors(std::move(Err2), errs(), "");
@@ -267,10 +267,10 @@ bool EPCGenericRTDyldMemoryManager::finalizeMemory(std::string *ErrMsg) {
// We'll also need to make an extra allocation for the eh-frame wrapper call
// arguments.
- Error FinalizeErr = Error::success();
+ Expected<ExecutorAddr> InitializeKey((ExecutorAddr()));
if (auto Err = EPC.callSPSWrapper<
- rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>(
- SAs.Finalize, FinalizeErr, SAs.Instance, std::move(FR))) {
+ rt::SPSSimpleExecutorMemoryManagerInitializeSignature>(
+ SAs.Initialize, InitializeKey, SAs.Instance, std::move(FR))) {
std::lock_guard<std::mutex> Lock(M);
this->ErrMsg = toString(std::move(Err));
dbgs() << "Serialization error: " << this->ErrMsg << "\n";
@@ -278,9 +278,9 @@ bool EPCGenericRTDyldMemoryManager::finalizeMemory(std::string *ErrMsg) {
*ErrMsg = this->ErrMsg;
return true;
}
- if (FinalizeErr) {
+ if (!InitializeKey) {
std::lock_guard<std::mutex> Lock(M);
- this->ErrMsg = toString(std::move(FinalizeErr));
+ this->ErrMsg = toString(InitializeKey.takeError());
dbgs() << "Finalization error: " << this->ErrMsg << "\n";
if (ErrMsg)
*ErrMsg = this->ErrMsg;
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
index 26e8f53..cc99d3c 100644
--- a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
@@ -23,10 +23,12 @@ const char *SimpleExecutorMemoryManagerInstanceName =
"__llvm_orc_SimpleExecutorMemoryManager_Instance";
const char *SimpleExecutorMemoryManagerReserveWrapperName =
"__llvm_orc_SimpleExecutorMemoryManager_reserve_wrapper";
-const char *SimpleExecutorMemoryManagerFinalizeWrapperName =
- "__llvm_orc_SimpleExecutorMemoryManager_finalize_wrapper";
-const char *SimpleExecutorMemoryManagerDeallocateWrapperName =
- "__llvm_orc_SimpleExecutorMemoryManager_deallocate_wrapper";
+const char *SimpleExecutorMemoryManagerInitializeWrapperName =
+ "__llvm_orc_SimpleExecutorMemoryManager_initialize_wrapper";
+const char *SimpleExecutorMemoryManagerDeinitializeWrapperName =
+ "__llvm_orc_SimpleExecutorMemoryManager_deinitialize_wrapper";
+const char *SimpleExecutorMemoryManagerReleaseWrapperName =
+ "__llvm_orc_SimpleExecutorMemoryManager_release_wrapper";
const char *ExecutorSharedMemoryMapperServiceInstanceName =
"__llvm_orc_ExecutorSharedMemoryMapperService_Instance";
diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
index 87d7578..dec1df7 100644
--- a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
@@ -216,9 +216,9 @@ SimpleRemoteEPC::createDefaultMemoryManager(SimpleRemoteEPC &SREPC) {
if (auto Err = SREPC.getBootstrapSymbols(
{{SAs.Allocator, rt::SimpleExecutorMemoryManagerInstanceName},
{SAs.Reserve, rt::SimpleExecutorMemoryManagerReserveWrapperName},
- {SAs.Finalize, rt::SimpleExecutorMemoryManagerFinalizeWrapperName},
- {SAs.Deallocate,
- rt::SimpleExecutorMemoryManagerDeallocateWrapperName}}))
+ {SAs.Initialize,
+ rt::SimpleExecutorMemoryManagerInitializeWrapperName},
+ {SAs.Release, rt::SimpleExecutorMemoryManagerReleaseWrapperName}}))
return std::move(Err);
return std::make_unique<EPCGenericJITLinkMemoryManager>(SREPC, SAs);
diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.cpp
new file mode 100644
index 0000000..b82de3f
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.cpp
@@ -0,0 +1,104 @@
+//===---- SimpleRemoteMemoryMapper.cpp - Remote memory mapper ----*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.h"
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+
+namespace llvm::orc {
+
+SimpleRemoteMemoryMapper::SimpleRemoteMemoryMapper(ExecutorProcessControl &EPC,
+ SymbolAddrs SAs)
+ : EPC(EPC), SAs(SAs) {}
+
+void SimpleRemoteMemoryMapper::reserve(size_t NumBytes,
+ OnReservedFunction OnReserved) {
+ EPC.callSPSWrapperAsync<rt::SPSSimpleRemoteMemoryMapReserveSignature>(
+ SAs.Reserve,
+ [NumBytes, OnReserved = std::move(OnReserved)](
+ Error SerializationErr, Expected<ExecutorAddr> Result) mutable {
+ if (SerializationErr) {
+ cantFail(Result.takeError());
+ return OnReserved(std::move(SerializationErr));
+ }
+
+ if (Result)
+ OnReserved(ExecutorAddrRange(*Result, NumBytes));
+ else
+ OnReserved(Result.takeError());
+ },
+ SAs.Instance, static_cast<uint64_t>(NumBytes));
+}
+
+char *SimpleRemoteMemoryMapper::prepare(jitlink::LinkGraph &G,
+ ExecutorAddr Addr, size_t ContentSize) {
+ return G.allocateBuffer(ContentSize).data();
+}
+
+void SimpleRemoteMemoryMapper::initialize(MemoryMapper::AllocInfo &AI,
+ OnInitializedFunction OnInitialized) {
+
+ tpctypes::FinalizeRequest FR;
+
+ std::swap(FR.Actions, AI.Actions);
+ FR.Segments.reserve(AI.Segments.size());
+
+ for (auto Seg : AI.Segments)
+ FR.Segments.push_back({Seg.AG, AI.MappingBase + Seg.Offset,
+ Seg.ContentSize + Seg.ZeroFillSize,
+ ArrayRef<char>(Seg.WorkingMem, Seg.ContentSize)});
+
+ EPC.callSPSWrapperAsync<rt::SPSSimpleRemoteMemoryMapInitializeSignature>(
+ SAs.Initialize,
+ [OnInitialized = std::move(OnInitialized)](
+ Error SerializationErr, Expected<ExecutorAddr> Result) mutable {
+ if (SerializationErr) {
+ cantFail(Result.takeError());
+ return OnInitialized(std::move(SerializationErr));
+ }
+
+ OnInitialized(std::move(Result));
+ },
+ SAs.Instance, std::move(FR));
+}
+
+void SimpleRemoteMemoryMapper::deinitialize(
+ ArrayRef<ExecutorAddr> Allocations,
+ MemoryMapper::OnDeinitializedFunction OnDeinitialized) {
+ EPC.callSPSWrapperAsync<rt::SPSSimpleRemoteMemoryMapDeinitializeSignature>(
+ SAs.Deinitialize,
+ [OnDeinitialized = std::move(OnDeinitialized)](Error SerializationErr,
+ Error Result) mutable {
+ if (SerializationErr) {
+ cantFail(std::move(Result));
+ return OnDeinitialized(std::move(SerializationErr));
+ }
+
+ OnDeinitialized(std::move(Result));
+ },
+ SAs.Instance, Allocations);
+}
+
+void SimpleRemoteMemoryMapper::release(ArrayRef<ExecutorAddr> Bases,
+ OnReleasedFunction OnReleased) {
+ EPC.callSPSWrapperAsync<rt::SPSSimpleRemoteMemoryMapReleaseSignature>(
+ SAs.Release,
+ [OnReleased = std::move(OnReleased)](Error SerializationErr,
+ Error Result) mutable {
+ if (SerializationErr) {
+ cantFail(std::move(Result));
+ return OnReleased(std::move(SerializationErr));
+ }
+
+ return OnReleased(std::move(Result));
+ },
+ SAs.Instance, Bases);
+}
+
+} // namespace llvm::orc
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
index 3cdffb8..fe881a1 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
@@ -8,6 +8,7 @@
#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
#include "llvm/Support/FormatVariadic.h"
@@ -18,166 +19,167 @@ namespace orc {
namespace rt_bootstrap {
SimpleExecutorMemoryManager::~SimpleExecutorMemoryManager() {
- assert(Allocations.empty() && "shutdown not called?");
+ assert(Slabs.empty() && "shutdown not called?");
}
-Expected<ExecutorAddr> SimpleExecutorMemoryManager::allocate(uint64_t Size) {
+Expected<ExecutorAddr> SimpleExecutorMemoryManager::reserve(uint64_t Size) {
std::error_code EC;
auto MB = sys::Memory::allocateMappedMemory(
Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC);
if (EC)
return errorCodeToError(EC);
std::lock_guard<std::mutex> Lock(M);
- assert(!Allocations.count(MB.base()) && "Duplicate allocation addr");
- Allocations[MB.base()].Size = Size;
+ assert(!Slabs.count(MB.base()) && "Duplicate allocation addr");
+ Slabs[MB.base()].Size = Size;
return ExecutorAddr::fromPtr(MB.base());
}
-Error SimpleExecutorMemoryManager::finalize(tpctypes::FinalizeRequest &FR) {
- ExecutorAddr Base(~0ULL);
+Expected<ExecutorAddr>
+SimpleExecutorMemoryManager::initialize(tpctypes::FinalizeRequest &FR) {
std::vector<shared::WrapperFunctionCall> DeallocationActions;
- size_t SuccessfulFinalizationActions = 0;
if (FR.Segments.empty()) {
- // NOTE: Finalizing nothing is currently a no-op. Should it be an error?
if (FR.Actions.empty())
- return Error::success();
+ return make_error<StringError>("Finalization request is empty",
+ inconvertibleErrorCode());
else
return make_error<StringError>("Finalization actions attached to empty "
"finalization request",
inconvertibleErrorCode());
}
- for (auto &Seg : FR.Segments)
- Base = std::min(Base, Seg.Addr);
-
- for (auto &ActPair : FR.Actions)
- if (ActPair.Dealloc)
- DeallocationActions.push_back(ActPair.Dealloc);
-
- // Get the Allocation for this finalization.
- size_t AllocSize = 0;
- {
- std::lock_guard<std::mutex> Lock(M);
- auto I = Allocations.find(Base.toPtr<void *>());
- if (I == Allocations.end())
- return make_error<StringError>("Attempt to finalize unrecognized "
- "allocation " +
- formatv("{0:x}", Base.getValue()),
- inconvertibleErrorCode());
- AllocSize = I->second.Size;
- I->second.DeallocationActions = std::move(DeallocationActions);
- }
- ExecutorAddr AllocEnd = Base + ExecutorAddrDiff(AllocSize);
-
- // Bail-out function: this will run deallocation actions corresponding to any
- // completed finalization actions, then deallocate memory.
- auto BailOut = [&](Error Err) {
- std::pair<void *, Allocation> AllocToDestroy;
-
- // Get allocation to destroy.
- {
- std::lock_guard<std::mutex> Lock(M);
- auto I = Allocations.find(Base.toPtr<void *>());
-
- // Check for missing allocation (effective a double free).
- if (I == Allocations.end())
- return joinErrors(
- std::move(Err),
- make_error<StringError>("No allocation entry found "
- "for " +
- formatv("{0:x}", Base.getValue()),
- inconvertibleErrorCode()));
- AllocToDestroy = std::move(*I);
- Allocations.erase(I);
- }
+ ExecutorAddrRange RR(FR.Segments.front().Addr, FR.Segments.front().Addr);
- // Run deallocation actions for all completed finalization actions.
- while (SuccessfulFinalizationActions)
- Err =
- joinErrors(std::move(Err), FR.Actions[--SuccessfulFinalizationActions]
- .Dealloc.runWithSPSRetErrorMerged());
-
- // Deallocate memory.
- sys::MemoryBlock MB(AllocToDestroy.first, AllocToDestroy.second.Size);
- if (auto EC = sys::Memory::releaseMappedMemory(MB))
- Err = joinErrors(std::move(Err), errorCodeToError(EC));
-
- return Err;
- };
+ std::vector<sys::MemoryBlock> MBsToReset;
+ auto ResetMBs = make_scope_exit([&]() {
+ for (auto &MB : MBsToReset)
+ sys::Memory::protectMappedMemory(MB, sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE);
+ sys::Memory::InvalidateInstructionCache(RR.Start.toPtr<void *>(),
+ RR.size());
+ });
// Copy content and apply permissions.
for (auto &Seg : FR.Segments) {
+ RR.Start = std::min(RR.Start, Seg.Addr);
+ RR.End = std::max(RR.End, Seg.Addr + Seg.Size);
// Check segment ranges.
if (LLVM_UNLIKELY(Seg.Size < Seg.Content.size()))
- return BailOut(make_error<StringError>(
+ return make_error<StringError>(
formatv("Segment {0:x} content size ({1:x} bytes) "
"exceeds segment size ({2:x} bytes)",
Seg.Addr.getValue(), Seg.Content.size(), Seg.Size),
- inconvertibleErrorCode()));
+ inconvertibleErrorCode());
ExecutorAddr SegEnd = Seg.Addr + ExecutorAddrDiff(Seg.Size);
- if (LLVM_UNLIKELY(Seg.Addr < Base || SegEnd > AllocEnd))
- return BailOut(make_error<StringError>(
+ if (LLVM_UNLIKELY(Seg.Addr < RR.Start || SegEnd > RR.End))
+ return make_error<StringError>(
formatv("Segment {0:x} -- {1:x} crosses boundary of "
"allocation {2:x} -- {3:x}",
- Seg.Addr.getValue(), SegEnd.getValue(), Base.getValue(),
- AllocEnd.getValue()),
- inconvertibleErrorCode()));
+ Seg.Addr, SegEnd, RR.Start, RR.End),
+ inconvertibleErrorCode());
char *Mem = Seg.Addr.toPtr<char *>();
if (!Seg.Content.empty())
memcpy(Mem, Seg.Content.data(), Seg.Content.size());
memset(Mem + Seg.Content.size(), 0, Seg.Size - Seg.Content.size());
assert(Seg.Size <= std::numeric_limits<size_t>::max());
+
+ sys::MemoryBlock MB(Mem, Seg.Size);
if (auto EC = sys::Memory::protectMappedMemory(
- {Mem, static_cast<size_t>(Seg.Size)},
- toSysMemoryProtectionFlags(Seg.RAG.Prot)))
- return BailOut(errorCodeToError(EC));
+ MB, toSysMemoryProtectionFlags(Seg.RAG.Prot)))
+ return errorCodeToError(EC);
+
+ MBsToReset.push_back(MB);
+
if ((Seg.RAG.Prot & MemProt::Exec) == MemProt::Exec)
sys::Memory::InvalidateInstructionCache(Mem, Seg.Size);
}
- // Run finalization actions.
- for (auto &ActPair : FR.Actions) {
- if (auto Err = ActPair.Finalize.runWithSPSRetErrorMerged())
- return BailOut(std::move(Err));
- ++SuccessfulFinalizationActions;
+ auto DeallocActions = runFinalizeActions(FR.Actions);
+ if (!DeallocActions)
+ return DeallocActions.takeError();
+
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ auto Region = createRegionInfo(RR, "In initialize");
+ if (!Region)
+ return Region.takeError();
+ Region->DeallocActions = std::move(*DeallocActions);
}
- return Error::success();
+ // Successful initialization.
+ ResetMBs.release();
+
+ return RR.Start;
}
-Error SimpleExecutorMemoryManager::deallocate(
- const std::vector<ExecutorAddr> &Bases) {
- std::vector<std::pair<void *, Allocation>> AllocPairs;
- AllocPairs.reserve(Bases.size());
+Error SimpleExecutorMemoryManager::deinitialize(
+ const std::vector<ExecutorAddr> &InitKeys) {
+ Error Err = Error::success();
- // Get allocation to destroy.
+ for (auto &KeyAddr : llvm::reverse(InitKeys)) {
+ std::vector<shared::WrapperFunctionCall> DeallocActions;
+ {
+ std::scoped_lock<std::mutex> Lock(M);
+ auto Slab = getSlabInfo(KeyAddr, "In deinitialize");
+ if (!Slab) {
+ Err = joinErrors(std::move(Err), Slab.takeError());
+ continue;
+ }
+
+ auto RI = getRegionInfo(*Slab, KeyAddr, "In deinitialize");
+ if (!RI) {
+ Err = joinErrors(std::move(Err), RI.takeError());
+ continue;
+ }
+
+ DeallocActions = std::move(RI->DeallocActions);
+ }
+
+ Err = joinErrors(std::move(Err),
+ runDeallocActions(std::move(DeallocActions)));
+ }
+
+ return Err;
+}
+
+Error SimpleExecutorMemoryManager::release(
+ const std::vector<ExecutorAddr> &Bases) {
Error Err = Error::success();
- {
- std::lock_guard<std::mutex> Lock(M);
- for (auto &Base : Bases) {
- auto I = Allocations.find(Base.toPtr<void *>());
-
- // Check for missing allocation (effective a double free).
- if (I != Allocations.end()) {
- AllocPairs.push_back(std::move(*I));
- Allocations.erase(I);
- } else
+
+ // TODO: Prohibit new initializations within the slabs being removed?
+ for (auto &Base : llvm::reverse(Bases)) {
+ std::vector<shared::WrapperFunctionCall> DeallocActions;
+ sys::MemoryBlock MB;
+
+ {
+ std::scoped_lock<std::mutex> Lock(M);
+
+ auto SlabI = Slabs.find(Base.toPtr<void *>());
+ if (SlabI == Slabs.end()) {
Err = joinErrors(
std::move(Err),
- make_error<StringError>("No allocation entry found "
- "for " +
- formatv("{0:x}", Base.getValue()),
+ make_error<StringError>("In release, " + formatv("{0:x}", Base) +
+ " is not part of any reserved "
+ "address range",
inconvertibleErrorCode()));
+ continue;
+ }
+
+ auto &Slab = SlabI->second;
+
+ for (auto &[Addr, Region] : Slab.Regions)
+ llvm::copy(Region.DeallocActions, back_inserter(DeallocActions));
+
+ MB = {Base.toPtr<void *>(), Slab.Size};
+
+ Slabs.erase(SlabI);
}
- }
- while (!AllocPairs.empty()) {
- auto &P = AllocPairs.back();
- Err = joinErrors(std::move(Err), deallocateImpl(P.first, P.second));
- AllocPairs.pop_back();
+ Err = joinErrors(std::move(Err), runDeallocActions(DeallocActions));
+ if (auto EC = sys::Memory::releaseMappedMemory(MB))
+ Err = joinErrors(std::move(Err), errorCodeToError(EC));
}
return Err;
@@ -185,16 +187,15 @@ Error SimpleExecutorMemoryManager::deallocate(
Error SimpleExecutorMemoryManager::shutdown() {
- AllocationsMap AM;
+ // TODO: Prevent new allocations during shutdown.
+ std::vector<ExecutorAddr> Bases;
{
- std::lock_guard<std::mutex> Lock(M);
- AM = std::move(Allocations);
+ std::scoped_lock<std::mutex> Lock(M);
+ for (auto &[Base, Slab] : Slabs)
+ Bases.push_back(ExecutorAddr::fromPtr(Base));
}
- Error Err = Error::success();
- for (auto &KV : AM)
- Err = joinErrors(std::move(Err), deallocateImpl(KV.first, KV.second));
- return Err;
+ return release(Bases);
}
void SimpleExecutorMemoryManager::addBootstrapSymbols(
@@ -202,58 +203,150 @@ void SimpleExecutorMemoryManager::addBootstrapSymbols(
M[rt::SimpleExecutorMemoryManagerInstanceName] = ExecutorAddr::fromPtr(this);
M[rt::SimpleExecutorMemoryManagerReserveWrapperName] =
ExecutorAddr::fromPtr(&reserveWrapper);
- M[rt::SimpleExecutorMemoryManagerFinalizeWrapperName] =
- ExecutorAddr::fromPtr(&finalizeWrapper);
- M[rt::SimpleExecutorMemoryManagerDeallocateWrapperName] =
- ExecutorAddr::fromPtr(&deallocateWrapper);
+ M[rt::SimpleExecutorMemoryManagerInitializeWrapperName] =
+ ExecutorAddr::fromPtr(&initializeWrapper);
+ M[rt::SimpleExecutorMemoryManagerDeinitializeWrapperName] =
+ ExecutorAddr::fromPtr(&deinitializeWrapper);
+ M[rt::SimpleExecutorMemoryManagerReleaseWrapperName] =
+ ExecutorAddr::fromPtr(&releaseWrapper);
}
-Error SimpleExecutorMemoryManager::deallocateImpl(void *Base, Allocation &A) {
- Error Err = Error::success();
+Expected<SimpleExecutorMemoryManager::SlabInfo &>
+SimpleExecutorMemoryManager::getSlabInfo(ExecutorAddr A, StringRef Context) {
+ auto MakeBadSlabError = [&]() {
+ return make_error<StringError>(
+ Context + ", address " + formatv("{0:x}", A) +
+ " is not part of any reserved address range",
+ inconvertibleErrorCode());
+ };
- while (!A.DeallocationActions.empty()) {
- Err = joinErrors(std::move(Err),
- A.DeallocationActions.back().runWithSPSRetErrorMerged());
- A.DeallocationActions.pop_back();
+ auto I = Slabs.upper_bound(A.toPtr<void *>());
+ if (I == Slabs.begin())
+ return MakeBadSlabError();
+ --I;
+ if (!ExecutorAddrRange(ExecutorAddr::fromPtr(I->first), I->second.Size)
+ .contains(A))
+ return MakeBadSlabError();
+
+ return I->second;
+}
+
+Expected<SimpleExecutorMemoryManager::SlabInfo &>
+SimpleExecutorMemoryManager::getSlabInfo(ExecutorAddrRange R,
+ StringRef Context) {
+ auto MakeBadSlabError = [&]() {
+ return make_error<StringError>(
+ Context + ", range " + formatv("{0:x}", R) +
+ " is not part of any reserved address range",
+ inconvertibleErrorCode());
+ };
+
+ auto I = Slabs.upper_bound(R.Start.toPtr<void *>());
+ if (I == Slabs.begin())
+ return MakeBadSlabError();
+ --I;
+ if (!ExecutorAddrRange(ExecutorAddr::fromPtr(I->first), I->second.Size)
+ .contains(R))
+ return MakeBadSlabError();
+
+ return I->second;
+}
+
+Expected<SimpleExecutorMemoryManager::RegionInfo &>
+SimpleExecutorMemoryManager::createRegionInfo(ExecutorAddrRange R,
+ StringRef Context) {
+
+ auto Slab = getSlabInfo(R, Context);
+ if (!Slab)
+ return Slab.takeError();
+
+ auto MakeBadRegionError = [&](ExecutorAddrRange Other, bool Prev) {
+ return make_error<StringError>(Context + ", region " + formatv("{0:x}", R) +
+ " overlaps " +
+ (Prev ? "previous" : "following") +
+ " region " + formatv("{0:x}", Other),
+ inconvertibleErrorCode());
+ };
+
+ auto I = Slab->Regions.upper_bound(R.Start);
+ if (I != Slab->Regions.begin()) {
+ auto J = std::prev(I);
+ ExecutorAddrRange PrevRange(J->first, J->second.Size);
+ if (PrevRange.overlaps(R))
+ return MakeBadRegionError(PrevRange, true);
+ }
+ if (I != Slab->Regions.end()) {
+ ExecutorAddrRange NextRange(I->first, I->second.Size);
+ if (NextRange.overlaps(R))
+ return MakeBadRegionError(NextRange, false);
}
- sys::MemoryBlock MB(Base, A.Size);
- if (auto EC = sys::Memory::releaseMappedMemory(MB))
- Err = joinErrors(std::move(Err), errorCodeToError(EC));
+ auto &RInfo = Slab->Regions[R.Start];
+ RInfo.Size = R.size();
+ return RInfo;
+}
- return Err;
+Expected<SimpleExecutorMemoryManager::RegionInfo &>
+SimpleExecutorMemoryManager::getRegionInfo(SlabInfo &Slab, ExecutorAddr A,
+ StringRef Context) {
+ auto I = Slab.Regions.find(A);
+ if (I == Slab.Regions.end())
+ return make_error<StringError>(
+ Context + ", address " + formatv("{0:x}", A) +
+ " does not correspond to the start of any initialized region",
+ inconvertibleErrorCode());
+
+ return I->second;
+}
+
+Expected<SimpleExecutorMemoryManager::RegionInfo &>
+SimpleExecutorMemoryManager::getRegionInfo(ExecutorAddr A, StringRef Context) {
+ auto Slab = getSlabInfo(A, Context);
+ if (!Slab)
+ return Slab.takeError();
+
+ return getRegionInfo(*Slab, A, Context);
}
llvm::orc::shared::CWrapperFunctionResult
SimpleExecutorMemoryManager::reserveWrapper(const char *ArgData,
size_t ArgSize) {
- return shared::WrapperFunction<
- rt::SPSSimpleExecutorMemoryManagerReserveSignature>::
+ return shared::WrapperFunction<rt::SPSSimpleRemoteMemoryMapReserveSignature>::
handle(ArgData, ArgSize,
shared::makeMethodWrapperHandler(
- &SimpleExecutorMemoryManager::allocate))
+ &SimpleExecutorMemoryManager::reserve))
+ .release();
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+SimpleExecutorMemoryManager::initializeWrapper(const char *ArgData,
+ size_t ArgSize) {
+ return shared::
+ WrapperFunction<rt::SPSSimpleRemoteMemoryMapInitializeSignature>::handle(
+ ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &SimpleExecutorMemoryManager::initialize))
.release();
}
llvm::orc::shared::CWrapperFunctionResult
-SimpleExecutorMemoryManager::finalizeWrapper(const char *ArgData,
- size_t ArgSize) {
+SimpleExecutorMemoryManager::deinitializeWrapper(const char *ArgData,
+ size_t ArgSize) {
return shared::WrapperFunction<
- rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>::
+ rt::SPSSimpleRemoteMemoryMapDeinitializeSignature>::
handle(ArgData, ArgSize,
shared::makeMethodWrapperHandler(
- &SimpleExecutorMemoryManager::finalize))
+ &SimpleExecutorMemoryManager::deinitialize))
.release();
}
llvm::orc::shared::CWrapperFunctionResult
-SimpleExecutorMemoryManager::deallocateWrapper(const char *ArgData,
- size_t ArgSize) {
- return shared::WrapperFunction<
- rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>::
+SimpleExecutorMemoryManager::releaseWrapper(const char *ArgData,
+ size_t ArgSize) {
+ return shared::WrapperFunction<rt::SPSSimpleRemoteMemoryMapReleaseSignature>::
handle(ArgData, ArgSize,
shared::makeMethodWrapperHandler(
- &SimpleExecutorMemoryManager::deallocate))
+ &SimpleExecutorMemoryManager::release))
.release();
}
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 2430d98..3908a78 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2374,16 +2374,21 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
Out << "!DICompileUnit(";
MDFieldPrinter Printer(Out, WriterCtx);
- auto Lang = N->getSourceLanguage();
- if (Lang.hasVersionedName())
+ DISourceLanguageName Lang = N->getSourceLanguage();
+
+ if (Lang.hasVersionedName()) {
Printer.printDwarfEnum(
"sourceLanguageName",
static_cast<llvm::dwarf::SourceLanguageName>(Lang.getName()),
dwarf::SourceLanguageNameString,
/* ShouldSkipZero */ false);
- else
+
+ Printer.printInt("sourceLanguageVersion", Lang.getVersion(),
+ /*ShouldSkipZero=*/true);
+ } else {
Printer.printDwarfEnum("language", Lang.getName(), dwarf::LanguageString,
/* ShouldSkipZero */ false);
+ }
Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
Printer.printString("producer", N->getProducer());
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index f28b989..d8374b6 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -6041,8 +6041,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
Triple T(TT);
// The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
// the address space of globals to 1. This does not apply to SPIRV Logical.
- if (((T.isAMDGPU() && !T.isAMDGCN()) ||
- (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
+ if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
!DL.contains("-G") && !DL.starts_with("G")) {
return DL.empty() ? std::string("G1") : (DL + "-G1").str();
}
@@ -6055,35 +6054,43 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
return DL.str();
}
+ // AMDGPU data layout upgrades.
std::string Res = DL.str();
- // AMDGCN data layout upgrades.
- if (T.isAMDGCN()) {
+ if (T.isAMDGPU()) {
// Define address spaces for constants.
if (!DL.contains("-G") && !DL.starts_with("G"))
Res.append(Res.empty() ? "G1" : "-G1");
- // Add missing non-integral declarations.
- // This goes before adding new address spaces to prevent incoherent string
- // values.
- if (!DL.contains("-ni") && !DL.starts_with("ni"))
- Res.append("-ni:7:8:9");
- // Update ni:7 to ni:7:8:9.
- if (DL.ends_with("ni:7"))
- Res.append(":8:9");
- if (DL.ends_with("ni:7:8"))
- Res.append(":9");
-
- // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
- // resources) An empty data layout has already been upgraded to G1 by now.
- if (!DL.contains("-p7") && !DL.starts_with("p7"))
- Res.append("-p7:160:256:256:32");
- if (!DL.contains("-p8") && !DL.starts_with("p8"))
- Res.append("-p8:128:128:128:48");
- constexpr StringRef OldP8("-p8:128:128-");
- if (DL.contains(OldP8))
- Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
- if (!DL.contains("-p9") && !DL.starts_with("p9"))
- Res.append("-p9:192:256:256:32");
+ // AMDGCN data layout upgrades.
+ if (T.isAMDGCN()) {
+
+ // Add missing non-integral declarations.
+ // This goes before adding new address spaces to prevent incoherent string
+ // values.
+ if (!DL.contains("-ni") && !DL.starts_with("ni"))
+ Res.append("-ni:7:8:9");
+ // Update ni:7 to ni:7:8:9.
+ if (DL.ends_with("ni:7"))
+ Res.append(":8:9");
+ if (DL.ends_with("ni:7:8"))
+ Res.append(":9");
+
+ // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
+ // resources) An empty data layout has already been upgraded to G1 by now.
+ if (!DL.contains("-p7") && !DL.starts_with("p7"))
+ Res.append("-p7:160:256:256:32");
+ if (!DL.contains("-p8") && !DL.starts_with("p8"))
+ Res.append("-p8:128:128:128:48");
+ constexpr StringRef OldP8("-p8:128:128-");
+ if (DL.contains(OldP8))
+ Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
+ if (!DL.contains("-p9") && !DL.starts_with("p9"))
+ Res.append("-p9:192:256:256:32");
+ }
+
+ // Upgrade the ELF mangling mode.
+ if (!DL.contains("m:e"))
+ Res = Res.empty() ? "m:e" : "m:e-" + Res;
return Res;
}
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index 9601a8a..5883606 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -294,9 +294,9 @@ void DebugInfoFinder::processSubprogram(DISubprogram *SP) {
// just DISubprogram's, referenced from anywhere within the Function being
// cloned prior to calling MapMetadata / RemapInstruction to avoid their
// duplication later as DICompileUnit's are also directly referenced by
- // llvm.dbg.cu list. Thefore we need to collect DICompileUnit's here as well.
- // Also, DICompileUnit's may reference DISubprogram's too and therefore need
- // to be at least looked through.
+ // llvm.dbg.cu list. Therefore we need to collect DICompileUnit's here as
+ // well. Also, DICompileUnit's may reference DISubprogram's too and therefore
+ // need to be at least looked through.
processCompileUnit(SP->getUnit());
processType(SP->getType());
for (auto *Element : SP->getTemplateParams()) {
@@ -377,7 +377,7 @@ bool DebugInfoFinder::addScope(DIScope *Scope) {
/// Recursively handle DILocations in followup metadata etc.
///
-/// TODO: If for example a followup loop metadata would refence itself this
+/// TODO: If for example a followup loop metadata would reference itself this
/// function would go into infinite recursion. We do not expect such cycles in
/// the loop metadata (except for the self-referencing first element
/// "LoopID"). However, we could at least handle such situations more gracefully
@@ -679,7 +679,7 @@ private:
auto Variables = nullptr;
auto TemplateParams = nullptr;
- // Make a distinct DISubprogram, for situations that warrent it.
+ // Make a distinct DISubprogram, for situations that warrant it.
auto distinctMDSubprogram = [&]() {
return DISubprogram::getDistinct(
MDS->getContext(), FileAndScope, MDS->getName(), LinkageName,
@@ -1095,6 +1095,35 @@ LLVMDIBuilderCreateFile(LLVMDIBuilderRef Builder, const char *Filename,
StringRef(Directory, DirectoryLen)));
}
+static llvm::DIFile::ChecksumKind
+map_from_llvmChecksumKind(LLVMChecksumKind CSKind) {
+ switch (CSKind) {
+ case LLVMChecksumKind::CSK_MD5:
+ return llvm::DIFile::CSK_MD5;
+ case LLVMChecksumKind::CSK_SHA1:
+ return llvm::DIFile::CSK_SHA1;
+ case LLVMChecksumKind::CSK_SHA256:
+ return llvm::DIFile::CSK_SHA256;
+ }
+ llvm_unreachable("Unhandled Checksum Kind");
+}
+
+LLVMMetadataRef LLVMDIBuilderCreateFileWithChecksum(
+ LLVMDIBuilderRef Builder, const char *Filename, size_t FilenameLen,
+ const char *Directory, size_t DirectoryLen, LLVMChecksumKind ChecksumKind,
+ const char *Checksum, size_t ChecksumLen, const char *Source,
+ size_t SourceLen) {
+ StringRef ChkSum = StringRef(Checksum, ChecksumLen);
+ auto CSK = map_from_llvmChecksumKind(ChecksumKind);
+ llvm::DIFile::ChecksumInfo<StringRef> CSInfo(CSK, ChkSum);
+ std::optional<StringRef> Src;
+ if (SourceLen > 0)
+ Src = StringRef(Source, SourceLen);
+ return wrap(unwrap(Builder)->createFile(StringRef(Filename, FilenameLen),
+ StringRef(Directory, DirectoryLen),
+ CSInfo, Src));
+}
+
LLVMMetadataRef
LLVMDIBuilderCreateModule(LLVMDIBuilderRef Builder, LLVMMetadataRef ParentScope,
const char *Name, size_t NameLen,
@@ -2014,7 +2043,7 @@ void at::remapAssignID(DenseMap<DIAssignID *, DIAssignID *> &Map,
I.setMetadata(LLVMContext::MD_DIAssignID, GetNewID(ID));
}
-/// Collect constant properies (base, size, offset) of \p StoreDest.
+/// Collect constant properties (base, size, offset) of \p StoreDest.
/// Return std::nullopt if any properties are not constants or the
/// offset from the base pointer is negative.
static std::optional<AssignmentInfo>
@@ -2300,7 +2329,7 @@ PreservedAnalyses AssignmentTrackingPass::run(Function &F,
return PreservedAnalyses::all();
// Record that this module uses assignment tracking. It doesn't matter that
- // some functons in the module may not use it - the debug info in those
+ // some functions in the module may not use it - the debug info in those
// functions will still be handled properly.
setAssignmentTrackingModuleFlag(*F.getParent());
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index 9db48e8..0e9535d 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -1034,6 +1034,10 @@ static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) {
}
// DirectX resources
+ if (Name == "dx.Padding")
+ return TargetTypeInfo(
+ ArrayType::get(Type::getInt8Ty(C), Ty->getIntParameter(0)),
+ TargetExtType::CanBeGlobal);
if (Name.starts_with("dx."))
return TargetTypeInfo(PointerType::get(C, 0), TargetExtType::CanBeGlobal,
TargetExtType::CanBeLocal,
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index c79a950..3572852 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6479,9 +6479,12 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
NumRows->getZExtValue() * NumColumns->getZExtValue(),
"Result of a matrix operation does not fit in the returned vector!");
- if (Stride)
+ if (Stride) {
+ Check(Stride->getBitWidth() <= 64, "Stride bitwidth cannot exceed 64!",
+ IF);
Check(Stride->getZExtValue() >= NumRows->getZExtValue(),
"Stride must be greater or equal than the number of rows!", IF);
+ }
break;
}
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index e6544f3..aec8891 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1257,38 +1257,6 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
return Result;
}
-void lto::updateMemProfAttributes(Module &Mod,
- const ModuleSummaryIndex &Index) {
- llvm::TimeTraceScope timeScope("LTO update memprof attributes");
- if (Index.withSupportsHotColdNew())
- return;
-
- // The profile matcher applies hotness attributes directly for allocations,
- // and those will cause us to generate calls to the hot/cold interfaces
- // unconditionally. If supports-hot-cold-new was not enabled in the LTO
- // link then assume we don't want these calls (e.g. not linking with
- // the appropriate library, or otherwise trying to disable this behavior).
- for (auto &F : Mod) {
- for (auto &BB : F) {
- for (auto &I : BB) {
- auto *CI = dyn_cast<CallBase>(&I);
- if (!CI)
- continue;
- if (CI->hasFnAttr("memprof"))
- CI->removeFnAttr("memprof");
- // Strip off all memprof metadata as it is no longer needed.
- // Importantly, this avoids the addition of new memprof attributes
- // after inlining propagation.
- // TODO: If we support additional types of MemProf metadata beyond hot
- // and cold, we will need to update the metadata based on the allocator
- // APIs supported instead of completely stripping all.
- CI->setMetadata(LLVMContext::MD_memprof, nullptr);
- CI->setMetadata(LLVMContext::MD_callsite, nullptr);
- }
- }
- }
-}
-
Error LTO::runRegularLTO(AddStreamFn AddStream) {
llvm::TimeTraceScope timeScope("Run regular LTO");
LLVMContext &CombinedCtx = RegularLTO.CombinedModule->getContext();
@@ -1346,8 +1314,6 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
}
}
- updateMemProfAttributes(*RegularLTO.CombinedModule, ThinLTO.CombinedIndex);
-
bool WholeProgramVisibilityEnabledInLTO =
Conf.HasWholeProgramVisibility &&
// If validation is enabled, upgrade visibility only when all vtables
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 11a7b32..280c3d1 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -726,7 +726,6 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
}
// Do this after any importing so that imported code is updated.
- updateMemProfAttributes(Mod, CombinedIndex);
updatePublicTypeTestCalls(Mod, CombinedIndex.withWholeProgramVisibility());
if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod))
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index a755c22..aee3c3b 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -553,7 +553,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
SFrameSection =
Ctx->getELFSection(".sframe", ELF::SHT_GNU_SFRAME, ELF::SHF_ALLOC);
- CallGraphSection = Ctx->getELFSection(".callgraph", ELF::SHT_PROGBITS, 0);
+ CallGraphSection =
+ Ctx->getELFSection(".llvm.callgraph", ELF::SHT_PROGBITS, 0);
StackSizesSection = Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, 0);
@@ -1171,8 +1172,8 @@ MCObjectFileInfo::getCallGraphSection(const MCSection &TextSec) const {
}
return Ctx->getELFSection(
- ".callgraph", ELF::SHT_PROGBITS, Flags, 0, GroupName, true,
- ElfSec.getUniqueID(),
+ ".llvm.callgraph", ELF::SHT_PROGBITS, Flags, 0, GroupName,
+ /*IsComdat=*/true, ElfSec.getUniqueID(),
static_cast<const MCSymbolELF *>(TextSec.getBeginSymbol()));
}
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 421d6603..c3a27c9 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -488,6 +488,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_HEXAGON_MACH_V5, EF_HEXAGON_MACH);
BCaseMask(EF_HEXAGON_MACH_V55, EF_HEXAGON_MACH);
BCaseMask(EF_HEXAGON_MACH_V60, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V61, EF_HEXAGON_MACH);
BCaseMask(EF_HEXAGON_MACH_V62, EF_HEXAGON_MACH);
BCaseMask(EF_HEXAGON_MACH_V65, EF_HEXAGON_MACH);
BCaseMask(EF_HEXAGON_MACH_V66, EF_HEXAGON_MACH);
@@ -499,12 +500,21 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_HEXAGON_MACH_V71T, EF_HEXAGON_MACH);
BCaseMask(EF_HEXAGON_MACH_V73, EF_HEXAGON_MACH);
BCaseMask(EF_HEXAGON_MACH_V75, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V77, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V79, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V81, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V83, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V85, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V87, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V89, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V91, EF_HEXAGON_MACH);
BCaseMask(EF_HEXAGON_ISA_V2, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V3, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V4, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V5, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V55, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V60, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V61, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V62, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V65, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V66, EF_HEXAGON_ISA);
@@ -514,6 +524,14 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_HEXAGON_ISA_V71, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V73, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V75, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V77, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V79, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V81, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V83, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V85, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V87, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V89, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V91, EF_HEXAGON_ISA);
break;
case ELF::EM_AVR:
BCaseMask(EF_AVR_ARCH_AVR1, EF_AVR_ARCH_MASK);
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 53cf004..e45cac8 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -2027,13 +2027,13 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
#define LOOPNEST_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
MPM.addPass(createModuleToFunctionPassAdaptor( \
- createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
+ createFunctionToLoopPassAdaptor(CREATE_PASS, false))); \
return Error::success(); \
}
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
MPM.addPass(createModuleToFunctionPassAdaptor( \
- createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
+ createFunctionToLoopPassAdaptor(CREATE_PASS, false))); \
return Error::success(); \
}
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
@@ -2041,9 +2041,8 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
auto Params = parsePassParameters(PARSER, Name, NAME); \
if (!Params) \
return Params.takeError(); \
- MPM.addPass( \
- createModuleToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \
- CREATE_PASS(Params.get()), false, false))); \
+ MPM.addPass(createModuleToFunctionPassAdaptor( \
+ createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), false))); \
return Error::success(); \
}
#include "PassRegistry.def"
@@ -2142,13 +2141,13 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
#define LOOPNEST_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
CGPM.addPass(createCGSCCToFunctionPassAdaptor( \
- createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
+ createFunctionToLoopPassAdaptor(CREATE_PASS, false))); \
return Error::success(); \
}
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
CGPM.addPass(createCGSCCToFunctionPassAdaptor( \
- createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
+ createFunctionToLoopPassAdaptor(CREATE_PASS, false))); \
return Error::success(); \
}
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
@@ -2156,9 +2155,8 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
auto Params = parsePassParameters(PARSER, Name, NAME); \
if (!Params) \
return Params.takeError(); \
- CGPM.addPass( \
- createCGSCCToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \
- CREATE_PASS(Params.get()), false, false))); \
+ CGPM.addPass(createCGSCCToFunctionPassAdaptor( \
+ createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), false))); \
return Error::success(); \
}
#include "PassRegistry.def"
@@ -2191,11 +2189,8 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
return Err;
// Add the nested pass manager with the appropriate adaptor.
bool UseMemorySSA = (Name == "loop-mssa");
- bool UseBFI = llvm::any_of(InnerPipeline, [](auto Pipeline) {
- return Pipeline.Name.contains("simple-loop-unswitch");
- });
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA,
- UseBFI));
+ FPM.addPass(
+ createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA));
return Error::success();
}
if (Name == "machine-function") {
@@ -2248,12 +2243,12 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
// The risk is that it may become obsolete if we're not careful.
#define LOOPNEST_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
- FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \
+ FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false)); \
return Error::success(); \
}
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
- FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \
+ FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false)); \
return Error::success(); \
}
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
@@ -2261,8 +2256,8 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
auto Params = parsePassParameters(PARSER, Name, NAME); \
if (!Params) \
return Params.takeError(); \
- FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), \
- false, false)); \
+ FPM.addPass( \
+ createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), false)); \
return Error::success(); \
}
#include "PassRegistry.def"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index fea0d25..bd03ac0 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -519,16 +519,14 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
- /*UseMemorySSA=*/true,
- /*UseBlockFrequencyInfo=*/true));
+ /*UseMemorySSA=*/true));
FPM.addPass(
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
FPM.addPass(InstCombinePass());
// The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
// *All* loop passes must preserve it, in order to be able to use it.
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
- /*UseMemorySSA=*/false,
- /*UseBlockFrequencyInfo=*/false));
+ /*UseMemorySSA=*/false));
// Delete small array after loop unroll.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -710,8 +708,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
- /*UseMemorySSA=*/true,
- /*UseBlockFrequencyInfo=*/true));
+ /*UseMemorySSA=*/true));
FPM.addPass(
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
FPM.addPass(InstCombinePass());
@@ -719,8 +716,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
// *All* loop passes must preserve it, in order to be able to use it.
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
- /*UseMemorySSA=*/false,
- /*UseBlockFrequencyInfo=*/false));
+ /*UseMemorySSA=*/false));
// Delete small array after loop unroll.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -773,7 +769,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
- /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+ /*UseMemorySSA=*/true));
FPM.addPass(CoroElidePass());
@@ -842,8 +838,7 @@ void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
createFunctionToLoopPassAdaptor(
LoopRotatePass(EnableLoopHeaderDuplication ||
Level != OptimizationLevel::Oz),
- /*UseMemorySSA=*/false,
- /*UseBlockFrequencyInfo=*/false),
+ /*UseMemorySSA=*/false),
PTO.EagerlyInvalidateAnalyses));
}
}
@@ -1358,8 +1353,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
OptimizationLevel::O3));
ExtraPasses.addPass(
- createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
- /*UseBlockFrequencyInfo=*/true));
+ createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true));
ExtraPasses.addPass(
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
ExtraPasses.addPass(InstCombinePass());
@@ -1438,7 +1432,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
FPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
- /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+ /*UseMemorySSA=*/true));
// Now that we've vectorized and unrolled loops, we may have more refined
// alignment information, try to re-derive it here.
@@ -1520,7 +1514,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
OptimizePM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
- /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+ /*USeMemorySSA=*/true));
}
OptimizePM.addPass(Float2IntPass());
@@ -1560,8 +1554,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
if (PTO.LoopInterchange)
LPM.addPass(LoopInterchangePass());
- OptimizePM.addPass(createFunctionToLoopPassAdaptor(
- std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
+ OptimizePM.addPass(
+ createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
// FIXME: This may not be the right place in the pipeline.
// We need to have the data to support the right place.
@@ -1658,6 +1652,16 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
ModulePassManager MPM;
+ // Currently this pipeline is only invoked in an LTO pre link pass or when we
+ // are not running LTO. If that changes the below checks may need updating.
+ assert(isLTOPreLink(Phase) || Phase == ThinOrFullLTOPhase::None);
+
+ // If we are invoking this in non-LTO mode, remove any MemProf related
+ // attributes and metadata, as we don't know whether we are linking with
+ // a library containing the necessary interfaces.
+ if (Phase == ThinOrFullLTOPhase::None)
+ MPM.addPass(MemProfRemoveInfo());
+
// Convert @llvm.global.annotations to !annotation metadata.
MPM.addPass(Annotation2MetadataPass());
@@ -1803,6 +1807,12 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
ModulePassManager MPM;
+ // If we are invoking this without a summary index noting that we are linking
+ // with a library containing the necessary APIs, remove any MemProf related
+ // attributes and metadata.
+ if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
+ MPM.addPass(MemProfRemoveInfo());
+
if (ImportSummary) {
// For ThinLTO we must apply the context disambiguation decisions early, to
// ensure we can correctly match the callsites to summary data.
@@ -1874,6 +1884,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level);
+ // If we are invoking this without a summary index noting that we are linking
+ // with a library containing the necessary APIs, remove any MemProf related
+ // attributes and metadata.
+ if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
+ MPM.addPass(MemProfRemoveInfo());
+
// Create a function that performs CFI checks for cross-DSO calls with targets
// in the current module.
MPM.addPass(CrossDSOCFIPass());
@@ -2111,7 +2127,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
MainFPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
- /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+ /*USeMemorySSA=*/true));
if (RunNewGVN)
MainFPM.addPass(NewGVNPass());
@@ -2141,8 +2157,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
PTO.ForgetAllSCEVInLoopUnroll));
// The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
// *All* loop passes must preserve it, in order to be able to use it.
- MainFPM.addPass(createFunctionToLoopPassAdaptor(
- std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
+ MainFPM.addPass(
+ createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
MainFPM.addPass(LoopDistributePass());
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 1b16525..884d8da 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -113,6 +113,7 @@ MODULE_PASS("pgo-force-function-attrs",
? PGOOpt->ColdOptType
: PGOOptions::ColdFuncOpt::Default))
MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation())
+MODULE_PASS("memprof-remove-attributes", MemProfRemoveInfo())
MODULE_PASS("memprof-module", ModuleMemProfilerPass())
MODULE_PASS("mergefunc", MergeFunctionsPass())
MODULE_PASS("metarenamer", MetaRenamerPass())
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 3c8e44a..0208735 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -302,7 +302,7 @@ void ProfOStream::patch(ArrayRef<PatchItem> P) {
std::string getPGOFuncName(StringRef Name, GlobalValue::LinkageTypes Linkage,
StringRef FileName,
- uint64_t Version LLVM_ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] uint64_t Version) {
// Value names may be prefixed with a binary '1' to indicate
// that the backend should not modify the symbols due to any platform
// naming convention. Do not include that '1' in the PGO profile name.
diff --git a/llvm/lib/Remarks/BitstreamRemarkParser.h b/llvm/lib/Remarks/BitstreamRemarkParser.h
index 4f66c47..914edd8 100644
--- a/llvm/lib/Remarks/BitstreamRemarkParser.h
+++ b/llvm/lib/Remarks/BitstreamRemarkParser.h
@@ -112,7 +112,7 @@ public:
/// Helper to parse a META_BLOCK for a bitstream remark container.
class BitstreamMetaParserHelper
: public BitstreamBlockParserHelper<BitstreamMetaParserHelper> {
- friend class BitstreamBlockParserHelper;
+ friend class BitstreamBlockParserHelper<BitstreamMetaParserHelper>;
public:
struct ContainerInfo {
@@ -137,7 +137,7 @@ protected:
/// Helper to parse a REMARK_BLOCK for a bitstream remark container.
class BitstreamRemarkParserHelper
: public BitstreamBlockParserHelper<BitstreamRemarkParserHelper> {
- friend class BitstreamBlockParserHelper;
+ friend class BitstreamBlockParserHelper<BitstreamRemarkParserHelper>;
protected:
SmallVector<uint64_t, 5> Record;
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 8623c06..b4de79a 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -130,44 +130,46 @@ struct fltSemantics {
bool hasSignBitInMSB = true;
};
-static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
-static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
-static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
-static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
-static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
-static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
-static constexpr fltSemantics semFloat8E5M2FNUZ = {
+constexpr fltSemantics APFloatBase::semIEEEhalf = {15, -14, 11, 16};
+constexpr fltSemantics APFloatBase::semBFloat = {127, -126, 8, 16};
+constexpr fltSemantics APFloatBase::semIEEEsingle = {127, -126, 24, 32};
+constexpr fltSemantics APFloatBase::semIEEEdouble = {1023, -1022, 53, 64};
+constexpr fltSemantics APFloatBase::semIEEEquad = {16383, -16382, 113, 128};
+constexpr fltSemantics APFloatBase::semFloat8E5M2 = {15, -14, 3, 8};
+constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = {
15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
-static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8};
-static constexpr fltSemantics semFloat8E4M3FN = {
+constexpr fltSemantics APFloatBase::semFloat8E4M3 = {7, -6, 4, 8};
+constexpr fltSemantics APFloatBase::semFloat8E4M3FN = {
8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
-static constexpr fltSemantics semFloat8E4M3FNUZ = {
+constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
-static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
+constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = {
4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
-static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8};
-static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
-static constexpr fltSemantics semFloat8E8M0FNU = {127,
- -127,
- 1,
- 8,
- fltNonfiniteBehavior::NanOnly,
- fltNanEncoding::AllOnes,
- false,
- false,
- false};
-
-static constexpr fltSemantics semFloat6E3M2FN = {
+constexpr fltSemantics APFloatBase::semFloat8E3M4 = {3, -2, 5, 8};
+constexpr fltSemantics APFloatBase::semFloatTF32 = {127, -126, 11, 19};
+constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = {
+ 127,
+ -127,
+ 1,
+ 8,
+ fltNonfiniteBehavior::NanOnly,
+ fltNanEncoding::AllOnes,
+ false,
+ false,
+ false};
+
+constexpr fltSemantics APFloatBase::semFloat6E3M2FN = {
4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly};
-static constexpr fltSemantics semFloat6E2M3FN = {
+constexpr fltSemantics APFloatBase::semFloat6E2M3FN = {
2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly};
-static constexpr fltSemantics semFloat4E2M1FN = {
+constexpr fltSemantics APFloatBase::semFloat4E2M1FN = {
2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly};
-static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
-static constexpr fltSemantics semBogus = {0, 0, 0, 0};
-static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
-static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
- 53 + 53, 128};
+constexpr fltSemantics APFloatBase::semX87DoubleExtended = {16383, -16382, 64,
+ 80};
+constexpr fltSemantics APFloatBase::semBogus = {0, 0, 0, 0};
+constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {-1, 0, 0, 128};
+constexpr fltSemantics APFloatBase::semPPCDoubleDoubleLegacy = {
+ 1023, -1022 + 53, 53 + 53, 128};
const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
switch (S) {
@@ -261,36 +263,6 @@ APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
llvm_unreachable("Unknown floating semantics");
}
-const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
-const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
-const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
-const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
-const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
-const fltSemantics &APFloatBase::PPCDoubleDouble() {
- return semPPCDoubleDouble;
-}
-const fltSemantics &APFloatBase::PPCDoubleDoubleLegacy() {
- return semPPCDoubleDoubleLegacy;
-}
-const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
-const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
-const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; }
-const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
-const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
-const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
- return semFloat8E4M3B11FNUZ;
-}
-const fltSemantics &APFloatBase::Float8E3M4() { return semFloat8E3M4; }
-const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
-const fltSemantics &APFloatBase::Float8E8M0FNU() { return semFloat8E8M0FNU; }
-const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; }
-const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; }
-const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; }
-const fltSemantics &APFloatBase::x87DoubleExtended() {
- return semX87DoubleExtended;
-}
-const fltSemantics &APFloatBase::Bogus() { return semBogus; }
-
bool APFloatBase::isRepresentableBy(const fltSemantics &A,
const fltSemantics &B) {
return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent &&
@@ -1029,7 +1001,7 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
// For x87 extended precision, we want to make a NaN, not a
// pseudo-NaN. Maybe we should expose the ability to make
// pseudo-NaNs?
- if (semantics == &semX87DoubleExtended)
+ if (semantics == &APFloatBase::semX87DoubleExtended)
APInt::tcSetBit(significand, QNaNBit + 1);
}
@@ -1054,7 +1026,7 @@ IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
category = rhs.category;
sign = rhs.sign;
- rhs.semantics = &semBogus;
+ rhs.semantics = &APFloatBase::semBogus;
return *this;
}
@@ -1247,7 +1219,7 @@ IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
assign(rhs);
}
-IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
+IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&APFloatBase::semBogus) {
*this = std::move(rhs);
}
@@ -2607,8 +2579,8 @@ APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
shift = toSemantics.precision - fromSemantics.precision;
bool X86SpecialNan = false;
- if (&fromSemantics == &semX87DoubleExtended &&
- &toSemantics != &semX87DoubleExtended && category == fcNaN &&
+ if (&fromSemantics == &APFloatBase::semX87DoubleExtended &&
+ &toSemantics != &APFloatBase::semX87DoubleExtended && category == fcNaN &&
(!(*significandParts() & 0x8000000000000000ULL) ||
!(*significandParts() & 0x4000000000000000ULL))) {
// x86 has some unusual NaNs which cannot be represented in any other
@@ -2694,7 +2666,7 @@ APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
// For x87 extended precision, we want to make a NaN, not a special NaN if
// the input wasn't special either.
- if (!X86SpecialNan && semantics == &semX87DoubleExtended)
+ if (!X86SpecialNan && semantics == &APFloatBase::semX87DoubleExtended)
APInt::tcSetBit(significandParts(), semantics->precision - 1);
// Convert of sNaN creates qNaN and raises an exception (invalid op).
@@ -3530,7 +3502,8 @@ hash_code hash_value(const IEEEFloat &Arg) {
// the actual IEEE respresentations. We compensate for that here.
APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
+ assert(semantics ==
+ (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended);
assert(partCount()==2);
uint64_t myexponent, mysignificand;
@@ -3560,7 +3533,8 @@ APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
}
APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
+ assert(semantics ==
+ (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy);
assert(partCount()==2);
uint64_t words[2];
@@ -3574,14 +3548,14 @@ APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
// Declare fltSemantics before APFloat that uses it (and
// saves pointer to it) to ensure correct destruction order.
fltSemantics extendedSemantics = *semantics;
- extendedSemantics.minExponent = semIEEEdouble.minExponent;
+ extendedSemantics.minExponent = APFloatBase::semIEEEdouble.minExponent;
IEEEFloat extended(*this);
fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
assert(fs == opOK && !losesInfo);
(void)fs;
IEEEFloat u(extended);
- fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
+ fs = u.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
assert(fs == opOK || fs == opInexact);
(void)fs;
words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
@@ -3597,7 +3571,7 @@ APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
IEEEFloat v(extended);
v.subtract(u, rmNearestTiesToEven);
- fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
+ fs = v.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
assert(fs == opOK && !losesInfo);
(void)fs;
words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
@@ -3611,8 +3585,9 @@ APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
template <const fltSemantics &S>
APInt IEEEFloat::convertIEEEFloatToAPInt() const {
assert(semantics == &S);
- const int bias =
- (semantics == &semFloat8E8M0FNU) ? -S.minExponent : -(S.minExponent - 1);
+ const int bias = (semantics == &APFloatBase::semFloat8E8M0FNU)
+ ? -S.minExponent
+ : -(S.minExponent - 1);
constexpr unsigned int trailing_significand_bits = S.precision - 1;
constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
constexpr integerPart integer_bit =
@@ -3677,87 +3652,87 @@ APInt IEEEFloat::convertIEEEFloatToAPInt() const {
APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
assert(partCount() == 2);
- return convertIEEEFloatToAPInt<semIEEEquad>();
+ return convertIEEEFloatToAPInt<APFloatBase::semIEEEquad>();
}
APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
assert(partCount()==1);
- return convertIEEEFloatToAPInt<semIEEEdouble>();
+ return convertIEEEFloatToAPInt<APFloatBase::semIEEEdouble>();
}
APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
assert(partCount()==1);
- return convertIEEEFloatToAPInt<semIEEEsingle>();
+ return convertIEEEFloatToAPInt<APFloatBase::semIEEEsingle>();
}
APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semBFloat>();
+ return convertIEEEFloatToAPInt<APFloatBase::semBFloat>();
}
APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
assert(partCount()==1);
- return convertIEEEFloatToAPInt<semIEEEhalf>();
+ return convertIEEEFloatToAPInt<APFloatBase::APFloatBase::semIEEEhalf>();
}
APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E5M2>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2>();
}
APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2FNUZ>();
}
APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E4M3>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3>();
}
APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FN>();
}
APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FNUZ>();
}
APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3B11FNUZ>();
}
APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E3M4>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E3M4>();
}
APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloatTF32>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloatTF32>();
}
APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E8M0FNU>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E8M0FNU>();
}
APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat6E3M2FN>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat6E3M2FN>();
}
APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat6E2M3FN>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat6E2M3FN>();
}
APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat4E2M1FN>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat4E2M1FN>();
}
// This function creates an APInt that is just a bit map of the floating
@@ -3765,74 +3740,77 @@ APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
// and treating the result as a normal integer is unlikely to be useful.
APInt IEEEFloat::bitcastToAPInt() const {
- if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEhalf)
return convertHalfAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semBFloat)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semBFloat)
return convertBFloatAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
return convertFloatAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
return convertDoubleAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
return convertQuadrupleAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
+ if (semantics ==
+ (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy)
return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2)
return convertFloat8E5M2APFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2FNUZ)
return convertFloat8E5M2FNUZAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3)
return convertFloat8E4M3APFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FN)
return convertFloat8E4M3FNAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FNUZ)
return convertFloat8E4M3FNUZAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
+ if (semantics ==
+ (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3B11FNUZ)
return convertFloat8E4M3B11FNUZAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E3M4)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E3M4)
return convertFloat8E3M4APFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloatTF32)
return convertFloatTF32APFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E8M0FNU)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E8M0FNU)
return convertFloat8E8M0FNUAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E3M2FN)
return convertFloat6E3M2FNAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E2M3FN)
return convertFloat6E2M3FNAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat4E2M1FN)
return convertFloat4E2M1FNAPFloatToAPInt();
- assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
+ assert(semantics ==
+ (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended &&
"unknown format!");
return convertF80LongDoubleAPFloatToAPInt();
}
float IEEEFloat::convertToFloat() const {
- assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
+ assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle &&
"Float semantics are not IEEEsingle");
APInt api = bitcastToAPInt();
return api.bitsToFloat();
}
double IEEEFloat::convertToDouble() const {
- assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
+ assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble &&
"Float semantics are not IEEEdouble");
APInt api = bitcastToAPInt();
return api.bitsToDouble();
@@ -3840,7 +3818,7 @@ double IEEEFloat::convertToDouble() const {
#ifdef HAS_IEE754_FLOAT128
float128 IEEEFloat::convertToQuad() const {
- assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&
+ assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad &&
"Float semantics are not IEEEquads");
APInt api = bitcastToAPInt();
return api.bitsToQuad();
@@ -3861,7 +3839,7 @@ void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
uint64_t mysignificand = i1;
uint8_t myintegerbit = mysignificand >> 63;
- initialize(&semX87DoubleExtended);
+ initialize(&APFloatBase::semX87DoubleExtended);
assert(partCount()==2);
sign = static_cast<unsigned int>(i2>>15);
@@ -3893,14 +3871,16 @@ void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
// Get the first double and convert to our format.
initFromDoubleAPInt(APInt(64, i1));
- fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
+ fs = convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
+ &losesInfo);
assert(fs == opOK && !losesInfo);
(void)fs;
// Unless we have a special case, add in second double.
if (isFiniteNonZero()) {
- IEEEFloat v(semIEEEdouble, APInt(64, i2));
- fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
+ IEEEFloat v(APFloatBase::semIEEEdouble, APInt(64, i2));
+ fs = v.convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
+ &losesInfo);
assert(fs == opOK && !losesInfo);
(void)fs;
@@ -3918,7 +3898,7 @@ void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
uint64_t val = api.getRawData()[0];
uint64_t myexponent = (val & exponent_mask);
- initialize(&semFloat8E8M0FNU);
+ initialize(&APFloatBase::semFloat8E8M0FNU);
assert(partCount() == 1);
// This format has unsigned representation only
@@ -4025,109 +4005,109 @@ void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
}
void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
- initFromIEEEAPInt<semIEEEquad>(api);
+ initFromIEEEAPInt<APFloatBase::semIEEEquad>(api);
}
void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
- initFromIEEEAPInt<semIEEEdouble>(api);
+ initFromIEEEAPInt<APFloatBase::semIEEEdouble>(api);
}
void IEEEFloat::initFromFloatAPInt(const APInt &api) {
- initFromIEEEAPInt<semIEEEsingle>(api);
+ initFromIEEEAPInt<APFloatBase::semIEEEsingle>(api);
}
void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
- initFromIEEEAPInt<semBFloat>(api);
+ initFromIEEEAPInt<APFloatBase::semBFloat>(api);
}
void IEEEFloat::initFromHalfAPInt(const APInt &api) {
- initFromIEEEAPInt<semIEEEhalf>(api);
+ initFromIEEEAPInt<APFloatBase::semIEEEhalf>(api);
}
void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E5M2>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E5M2>(api);
}
void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E5M2FNUZ>(api);
}
void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E4M3>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E4M3>(api);
}
void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E4M3FN>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E4M3FN>(api);
}
void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E4M3FNUZ>(api);
}
void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E4M3B11FNUZ>(api);
}
void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E3M4>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E3M4>(api);
}
void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
- initFromIEEEAPInt<semFloatTF32>(api);
+ initFromIEEEAPInt<APFloatBase::semFloatTF32>(api);
}
void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat6E3M2FN>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat6E3M2FN>(api);
}
void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat6E2M3FN>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat6E2M3FN>(api);
}
void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat4E2M1FN>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat4E2M1FN>(api);
}
/// Treat api as containing the bits of a floating point number.
void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
assert(api.getBitWidth() == Sem->sizeInBits);
- if (Sem == &semIEEEhalf)
+ if (Sem == &APFloatBase::semIEEEhalf)
return initFromHalfAPInt(api);
- if (Sem == &semBFloat)
+ if (Sem == &APFloatBase::semBFloat)
return initFromBFloatAPInt(api);
- if (Sem == &semIEEEsingle)
+ if (Sem == &APFloatBase::semIEEEsingle)
return initFromFloatAPInt(api);
- if (Sem == &semIEEEdouble)
+ if (Sem == &APFloatBase::semIEEEdouble)
return initFromDoubleAPInt(api);
- if (Sem == &semX87DoubleExtended)
+ if (Sem == &APFloatBase::semX87DoubleExtended)
return initFromF80LongDoubleAPInt(api);
- if (Sem == &semIEEEquad)
+ if (Sem == &APFloatBase::semIEEEquad)
return initFromQuadrupleAPInt(api);
- if (Sem == &semPPCDoubleDoubleLegacy)
+ if (Sem == &APFloatBase::semPPCDoubleDoubleLegacy)
return initFromPPCDoubleDoubleLegacyAPInt(api);
- if (Sem == &semFloat8E5M2)
+ if (Sem == &APFloatBase::semFloat8E5M2)
return initFromFloat8E5M2APInt(api);
- if (Sem == &semFloat8E5M2FNUZ)
+ if (Sem == &APFloatBase::semFloat8E5M2FNUZ)
return initFromFloat8E5M2FNUZAPInt(api);
- if (Sem == &semFloat8E4M3)
+ if (Sem == &APFloatBase::semFloat8E4M3)
return initFromFloat8E4M3APInt(api);
- if (Sem == &semFloat8E4M3FN)
+ if (Sem == &APFloatBase::semFloat8E4M3FN)
return initFromFloat8E4M3FNAPInt(api);
- if (Sem == &semFloat8E4M3FNUZ)
+ if (Sem == &APFloatBase::semFloat8E4M3FNUZ)
return initFromFloat8E4M3FNUZAPInt(api);
- if (Sem == &semFloat8E4M3B11FNUZ)
+ if (Sem == &APFloatBase::semFloat8E4M3B11FNUZ)
return initFromFloat8E4M3B11FNUZAPInt(api);
- if (Sem == &semFloat8E3M4)
+ if (Sem == &APFloatBase::semFloat8E3M4)
return initFromFloat8E3M4APInt(api);
- if (Sem == &semFloatTF32)
+ if (Sem == &APFloatBase::semFloatTF32)
return initFromFloatTF32APInt(api);
- if (Sem == &semFloat8E8M0FNU)
+ if (Sem == &APFloatBase::semFloat8E8M0FNU)
return initFromFloat8E8M0FNUAPInt(api);
- if (Sem == &semFloat6E3M2FN)
+ if (Sem == &APFloatBase::semFloat6E3M2FN)
return initFromFloat6E3M2FNAPInt(api);
- if (Sem == &semFloat6E2M3FN)
+ if (Sem == &APFloatBase::semFloat6E2M3FN)
return initFromFloat6E2M3FNAPInt(api);
- if (Sem == &semFloat4E2M1FN)
+ if (Sem == &APFloatBase::semFloat4E2M1FN)
return initFromFloat4E2M1FNAPInt(api);
llvm_unreachable("unsupported semantics");
@@ -4202,11 +4182,11 @@ IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
}
IEEEFloat::IEEEFloat(float f) {
- initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
+ initFromAPInt(&APFloatBase::semIEEEsingle, APInt::floatToBits(f));
}
IEEEFloat::IEEEFloat(double d) {
- initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
+ initFromAPInt(&APFloatBase::semIEEEdouble, APInt::doubleToBits(d));
}
namespace {
@@ -4815,38 +4795,40 @@ IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
: Semantics(&S),
- Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
- assert(Semantics == &semPPCDoubleDouble);
+ Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble),
+ APFloat(APFloatBase::semIEEEdouble)}) {
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
- : Semantics(&S),
- Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
- APFloat(semIEEEdouble, uninitialized)}) {
- assert(Semantics == &semPPCDoubleDouble);
+ : Semantics(&S), Floats(new APFloat[2]{
+ APFloat(APFloatBase::semIEEEdouble, uninitialized),
+ APFloat(APFloatBase::semIEEEdouble, uninitialized)}) {
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
- : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
- APFloat(semIEEEdouble)}) {
- assert(Semantics == &semPPCDoubleDouble);
+ : Semantics(&S),
+ Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble, I),
+ APFloat(APFloatBase::semIEEEdouble)}) {
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
: Semantics(&S),
Floats(new APFloat[2]{
- APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
- APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
- assert(Semantics == &semPPCDoubleDouble);
+ APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[0])),
+ APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
APFloat &&Second)
: Semantics(&S),
Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
- assert(Semantics == &semPPCDoubleDouble);
- assert(&Floats[0].getSemantics() == &semIEEEdouble);
- assert(&Floats[1].getSemantics() == &semIEEEdouble);
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
+ assert(&Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
}
DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
@@ -4854,14 +4836,14 @@ DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
APFloat(RHS.Floats[1])}
: nullptr) {
- assert(Semantics == &semPPCDoubleDouble);
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
: Semantics(RHS.Semantics), Floats(RHS.Floats) {
- RHS.Semantics = &semBogus;
+ RHS.Semantics = &APFloatBase::semBogus;
RHS.Floats = nullptr;
- assert(Semantics == &semPPCDoubleDouble);
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
@@ -5009,12 +4991,12 @@ APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
CC(RHS.Floats[1]);
- assert(&A.getSemantics() == &semIEEEdouble);
- assert(&AA.getSemantics() == &semIEEEdouble);
- assert(&C.getSemantics() == &semIEEEdouble);
- assert(&CC.getSemantics() == &semIEEEdouble);
- assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
- assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
+ assert(&A.getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&AA.getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&C.getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&CC.getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&Out.Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&Out.Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
return Out.addImpl(A, AA, C, CC, RM);
}
@@ -5119,28 +5101,32 @@ APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
APFloat::roundingMode RM) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
- auto Ret =
- Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
- *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ auto Ret = Tmp.divide(
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
+ *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
return Ret;
}
APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
- auto Ret =
- Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
- *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ auto Ret = Tmp.remainder(
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
+ *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
return Ret;
}
APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
- auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
- *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ auto Ret = Tmp.mod(
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
+ *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
return Ret;
}
@@ -5148,17 +5134,21 @@ APFloat::opStatus
DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
const DoubleAPFloat &Addend,
APFloat::roundingMode RM) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
auto Ret = Tmp.fusedMultiplyAdd(
- APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
- APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
- *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy,
+ Multiplicand.bitcastToAPInt()),
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()),
+ RM);
+ *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
return Ret;
}
APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
const APFloat &Hi = getFirst();
const APFloat &Lo = getSecond();
@@ -5309,22 +5299,28 @@ void DoubleAPFloat::makeZero(bool Neg) {
}
void DoubleAPFloat::makeLargest(bool Neg) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
- Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ Floats[0] =
+ APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
+ Floats[1] =
+ APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
if (Neg)
changeSign();
}
void DoubleAPFloat::makeSmallest(bool Neg) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
Floats[0].makeSmallest(Neg);
Floats[1].makeZero(/* Neg = */ false);
}
void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ Floats[0] =
+ APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x0360000000000000ull));
if (Neg)
Floats[0].changeSign();
Floats[1].makeZero(/* Neg = */ false);
@@ -5355,7 +5351,8 @@ hash_code hash_value(const DoubleAPFloat &Arg) {
}
APInt DoubleAPFloat::bitcastToAPInt() const {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
uint64_t Data[] = {
Floats[0].bitcastToAPInt().getRawData()[0],
Floats[1].bitcastToAPInt().getRawData()[0],
@@ -5365,10 +5362,11 @@ APInt DoubleAPFloat::bitcastToAPInt() const {
Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
roundingMode RM) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat Tmp(semPPCDoubleDoubleLegacy);
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy);
auto Ret = Tmp.convertFromString(S, RM);
- *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
return Ret;
}
@@ -5379,7 +5377,8 @@ Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
// nextUp must choose the smallest output > input that follows these rules.
// nexDown must choose the largest output < input that follows these rules.
APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
// nextDown(x) = -nextUp(-x)
if (nextDown) {
changeSign();
@@ -5481,7 +5480,8 @@ APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
APFloat::opStatus DoubleAPFloat::convertToSignExtendedInteger(
MutableArrayRef<integerPart> Input, unsigned int Width, bool IsSigned,
roundingMode RM, bool *IsExact) const {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
// If Hi is not finite, or Lo is zero, the value is entirely represented
// by Hi. Delegate to the simpler single-APFloat conversion.
@@ -5761,8 +5761,9 @@ unsigned int DoubleAPFloat::convertToHexString(char *DST,
unsigned int HexDigits,
bool UpperCase,
roundingMode RM) const {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ return APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
.convertToHexString(DST, HexDigits, UpperCase, RM);
}
@@ -5799,7 +5800,8 @@ bool DoubleAPFloat::isLargest() const {
}
bool DoubleAPFloat::isInteger() const {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
return Floats[0].isInteger() && Floats[1].isInteger();
}
@@ -5807,8 +5809,9 @@ void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
unsigned FormatPrecision,
unsigned FormatMaxPadding,
bool TruncateZero) const {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
.toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
}
@@ -5840,14 +5843,17 @@ int ilogb(const DoubleAPFloat &Arg) {
DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
APFloat::roundingMode RM) {
- assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
+ assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
+ "Unexpected Semantics");
+ return DoubleAPFloat(APFloatBase::PPCDoubleDouble(),
+ scalbn(Arg.Floats[0], Exp, RM),
scalbn(Arg.Floats[1], Exp, RM));
}
DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
APFloat::roundingMode RM) {
- assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
+ "Unexpected Semantics");
// Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in
// [1.0, 2.0).
@@ -5943,7 +5949,8 @@ DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
}
APFloat First = scalbn(Hi, -Exp, RM);
- return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
+ return DoubleAPFloat(APFloatBase::PPCDoubleDouble(), std::move(First),
+ std::move(Second));
}
} // namespace detail
@@ -5955,9 +5962,8 @@ APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
}
if (usesLayout<DoubleAPFloat>(Semantics)) {
const fltSemantics& S = F.getSemantics();
- new (&Double)
- DoubleAPFloat(Semantics, APFloat(std::move(F), S),
- APFloat(semIEEEdouble));
+ new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
+ APFloat(APFloatBase::IEEEdouble()));
return;
}
llvm_unreachable("Unexpected semantics");
@@ -6065,8 +6071,9 @@ APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
return U.IEEE.convert(ToSemantics, RM, losesInfo);
if (usesLayout<IEEEFloat>(getSemantics()) &&
usesLayout<DoubleAPFloat>(ToSemantics)) {
- assert(&ToSemantics == &semPPCDoubleDouble);
- auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
+ assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble);
+ auto Ret =
+ U.IEEE.convert(APFloatBase::semPPCDoubleDoubleLegacy, RM, losesInfo);
*this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
return Ret;
}
@@ -6113,13 +6120,15 @@ APFloat::opStatus APFloat::convertToInteger(APSInt &result,
}
double APFloat::convertToDouble() const {
- if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
+ if (&getSemantics() ==
+ (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
return getIEEE().convertToDouble();
assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
"Float semantics is not representable by IEEEdouble");
APFloat Temp = *this;
bool LosesInfo;
- opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
+ opStatus St =
+ Temp.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
(void)St;
return Temp.getIEEE().convertToDouble();
@@ -6127,13 +6136,14 @@ double APFloat::convertToDouble() const {
#ifdef HAS_IEE754_FLOAT128
float128 APFloat::convertToQuad() const {
- if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)
+ if (&getSemantics() == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
return getIEEE().convertToQuad();
assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
"Float semantics is not representable by IEEEquad");
APFloat Temp = *this;
bool LosesInfo;
- opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo);
+ opStatus St =
+ Temp.convert(APFloatBase::semIEEEquad, rmNearestTiesToEven, &LosesInfo);
assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
(void)St;
return Temp.getIEEE().convertToQuad();
@@ -6141,18 +6151,84 @@ float128 APFloat::convertToQuad() const {
#endif
float APFloat::convertToFloat() const {
- if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
+ if (&getSemantics() ==
+ (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
return getIEEE().convertToFloat();
assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
"Float semantics is not representable by IEEEsingle");
APFloat Temp = *this;
bool LosesInfo;
- opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
+ opStatus St =
+ Temp.convert(APFloatBase::semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
(void)St;
return Temp.getIEEE().convertToFloat();
}
+APFloat::Storage::~Storage() {
+ if (usesLayout<IEEEFloat>(*semantics)) {
+ IEEE.~IEEEFloat();
+ return;
+ }
+ if (usesLayout<DoubleAPFloat>(*semantics)) {
+ Double.~DoubleAPFloat();
+ return;
+ }
+ llvm_unreachable("Unexpected semantics");
+}
+
+APFloat::Storage::Storage(const APFloat::Storage &RHS) {
+ if (usesLayout<IEEEFloat>(*RHS.semantics)) {
+ new (this) IEEEFloat(RHS.IEEE);
+ return;
+ }
+ if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+ new (this) DoubleAPFloat(RHS.Double);
+ return;
+ }
+ llvm_unreachable("Unexpected semantics");
+}
+
+APFloat::Storage::Storage(APFloat::Storage &&RHS) {
+ if (usesLayout<IEEEFloat>(*RHS.semantics)) {
+ new (this) IEEEFloat(std::move(RHS.IEEE));
+ return;
+ }
+ if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+ new (this) DoubleAPFloat(std::move(RHS.Double));
+ return;
+ }
+ llvm_unreachable("Unexpected semantics");
+}
+
+APFloat::Storage &APFloat::Storage::operator=(const APFloat::Storage &RHS) {
+ if (usesLayout<IEEEFloat>(*semantics) &&
+ usesLayout<IEEEFloat>(*RHS.semantics)) {
+ IEEE = RHS.IEEE;
+ } else if (usesLayout<DoubleAPFloat>(*semantics) &&
+ usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+ Double = RHS.Double;
+ } else if (this != &RHS) {
+ this->~Storage();
+ new (this) Storage(RHS);
+ }
+ return *this;
+}
+
+APFloat::Storage &APFloat::Storage::operator=(APFloat::Storage &&RHS) {
+ if (usesLayout<IEEEFloat>(*semantics) &&
+ usesLayout<IEEEFloat>(*RHS.semantics)) {
+ IEEE = std::move(RHS.IEEE);
+ } else if (usesLayout<DoubleAPFloat>(*semantics) &&
+ usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+ Double = std::move(RHS.Double);
+ } else if (this != &RHS) {
+ this->~Storage();
+ new (this) Storage(std::move(RHS));
+ }
+ return *this;
+}
+
} // namespace llvm
#undef APFLOAT_DISPATCH_ON_SEMANTICS
diff --git a/llvm/lib/Support/PrettyStackTrace.cpp b/llvm/lib/Support/PrettyStackTrace.cpp
index 82b0e6a..eff9947 100644
--- a/llvm/lib/Support/PrettyStackTrace.cpp
+++ b/llvm/lib/Support/PrettyStackTrace.cpp
@@ -141,7 +141,7 @@ extern "C" const char *__crashreporter_info__
asm(".desc ___crashreporter_info__, 0x10");
#endif
-static void setCrashLogMessage(const char *msg) LLVM_ATTRIBUTE_UNUSED;
+[[maybe_unused]] static void setCrashLogMessage(const char *msg);
static void setCrashLogMessage(const char *msg) {
#ifdef HAVE_CRASHREPORTERCLIENT_H
(void)CRSetCrashLogMessage(msg);
diff --git a/llvm/lib/Support/SourceMgr.cpp b/llvm/lib/Support/SourceMgr.cpp
index a43cf37a..299615a 100644
--- a/llvm/lib/Support/SourceMgr.cpp
+++ b/llvm/lib/Support/SourceMgr.cpp
@@ -24,6 +24,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -38,6 +39,22 @@ using namespace llvm;
static const size_t TabStop = 8;
+// Out of line to avoid needing definition of vfs::FileSystem in header.
+SourceMgr::SourceMgr() = default;
+SourceMgr::SourceMgr(IntrusiveRefCntPtr<vfs::FileSystem> FS)
+ : FS(std::move(FS)) {}
+SourceMgr::SourceMgr(SourceMgr &&) = default;
+SourceMgr &SourceMgr::operator=(SourceMgr &&) = default;
+SourceMgr::~SourceMgr() = default;
+
+IntrusiveRefCntPtr<vfs::FileSystem> SourceMgr::getVirtualFileSystem() const {
+ return FS;
+}
+
+void SourceMgr::setVirtualFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS) {
+ this->FS = std::move(FS);
+}
+
unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
SMLoc IncludeLoc,
std::string &IncludedFile) {
@@ -52,8 +69,11 @@ unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
ErrorOr<std::unique_ptr<MemoryBuffer>>
SourceMgr::OpenIncludeFile(const std::string &Filename,
std::string &IncludedFile) {
- ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr =
- MemoryBuffer::getFile(Filename);
+ auto GetFile = [this](StringRef Path) {
+ return FS ? FS->getBufferForFile(Path) : MemoryBuffer::getFile(Path);
+ };
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr = GetFile(Filename);
SmallString<64> Buffer(Filename);
// If the file didn't exist directly, see if it's in an include path.
@@ -61,7 +81,7 @@ SourceMgr::OpenIncludeFile(const std::string &Filename,
++i) {
Buffer = IncludeDirectories[i];
sys::path::append(Buffer, Filename);
- NewBufOrErr = MemoryBuffer::getFile(Buffer);
+ NewBufOrErr = GetFile(Buffer);
}
if (NewBufOrErr)
diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc
index dad0fa3..648d6a5 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -354,8 +354,8 @@ namespace llvm {
/// Emulates hitting "retry" from an "abort, retry, ignore" CRT debug report
/// dialog. "retry" raises an exception which ultimately triggers our stack
/// dumper.
-static LLVM_ATTRIBUTE_UNUSED int
-AvoidMessageBoxHook(int ReportType, char *Message, int *Return) {
+[[maybe_unused]] static int AvoidMessageBoxHook(int ReportType, char *Message,
+ int *Return) {
// Set *Return to the retry code for the return value of _CrtDbgReport:
// http://msdn.microsoft.com/en-us/library/8hyw4sy7(v=vs.71).aspx
// This may also trigger just-in-time debugging via DebugBreak().
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 42043f7..b1024a8 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
@@ -128,6 +129,7 @@ int llvm::TableGenMain(const char *argv0,
// Record the location of the include directory so that the lexer can find
// it later.
SrcMgr.setIncludeDirs(IncludeDirs);
+ SrcMgr.setVirtualFileSystem(vfs::getRealFileSystem());
TGParser Parser(SrcMgr, MacroNames, Records, NoWarnOnUnusedTemplateArgs);
diff --git a/llvm/lib/TableGen/Parser.cpp b/llvm/lib/TableGen/Parser.cpp
index 2c3726a..db45054 100644
--- a/llvm/lib/TableGen/Parser.cpp
+++ b/llvm/lib/TableGen/Parser.cpp
@@ -9,6 +9,7 @@
#include "llvm/TableGen/Parser.h"
#include "TGParser.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/TableGen/Record.h"
using namespace llvm;
@@ -21,6 +22,7 @@ bool llvm::TableGenParseFile(SourceMgr &InputSrcMgr, RecordKeeper &Records) {
SrcMgr = SourceMgr();
SrcMgr.takeSourceBuffersFrom(InputSrcMgr);
SrcMgr.setIncludeDirs(InputSrcMgr.getIncludeDirs());
+ SrcMgr.setVirtualFileSystem(InputSrcMgr.getVirtualFileSystem());
SrcMgr.setDiagHandler(InputSrcMgr.getDiagHandler(),
InputSrcMgr.getDiagContext());
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index be2f2e4..662d84b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1561,6 +1561,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
@@ -1717,6 +1718,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
@@ -7775,6 +7777,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
return LowerVECREDUCE(Op, DAG);
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_FMUL:
+ return LowerVECREDUCE_MUL(Op, DAG);
case ISD::ATOMIC_LOAD_AND:
return LowerATOMIC_LOAD_AND(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
@@ -16794,6 +16799,33 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
}
}
+SDValue AArch64TargetLowering::LowerVECREDUCE_MUL(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ assert(SrcVT.isScalableVector() && "Unexpected operand type!");
+
+ SDVTList SrcVTs = DAG.getVTList(SrcVT, SrcVT);
+ unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
+ SDValue Identity = DAG.getNeutralElement(BaseOpc, DL, SrcVT, Op->getFlags());
+
+ // Whilst we don't know the size of the vector we do know the maximum size so
+ // can perform a tree reduction with an identity vector, which means once we
+ // arrive at the result the remaining stages (when the vector is smaller than
+ // the maximum) have no affect.
+
+ unsigned Segments = AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
+ unsigned Stages = llvm::Log2_32(Segments * SrcVT.getVectorMinNumElements());
+
+ for (unsigned I = 0; I < Stages; ++I) {
+ Src = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, SrcVTs, Src, Identity);
+ Src = DAG.getNode(BaseOpc, DL, SrcVT, Src.getValue(0), Src.getValue(1));
+ }
+
+ return DAG.getExtractVectorElt(DL, Op.getValueType(), Src, 0);
+}
+
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
@@ -18144,8 +18176,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store,
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
Instruction *Load, Value *Mask, IntrinsicInst *DI) const {
const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
- if (Factor != 2 && Factor != 4) {
- LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n");
+ if (Factor != 2 && Factor != 3 && Factor != 4) {
+ LLVM_DEBUG(dbgs() << "Matching ld2, ld3 and ld4 patterns failed\n");
return false;
}
auto *LI = dyn_cast<LoadInst>(Load);
@@ -18223,8 +18255,8 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
Instruction *Store, Value *Mask,
ArrayRef<Value *> InterleavedValues) const {
unsigned Factor = InterleavedValues.size();
- if (Factor != 2 && Factor != 4) {
- LLVM_DEBUG(dbgs() << "Matching st2 and st4 patterns failed\n");
+ if (Factor != 2 && Factor != 3 && Factor != 4) {
+ LLVM_DEBUG(dbgs() << "Matching st2, st3 and st4 patterns failed\n");
return false;
}
StoreInst *SI = dyn_cast<StoreInst>(Store);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 00956fd..9495c9f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -752,6 +752,7 @@ private:
SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECREDUCE_MUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 7322212..fe84193 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -233,6 +233,12 @@ def G_SDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_USDOT : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -278,6 +284,7 @@ def : GINodeEquiv<G_UADDLV, AArch64uaddlv>;
def : GINodeEquiv<G_UDOT, AArch64udot>;
def : GINodeEquiv<G_SDOT, AArch64sdot>;
+def : GINodeEquiv<G_USDOT, AArch64usdot>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 30dfcf2b..12c600f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -10600,6 +10600,9 @@ describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
Register DestReg = DestSrc->Destination->getReg();
Register SrcReg = DestSrc->Source->getReg();
+ if (!DestReg.isValid() || !SrcReg.isValid())
+ return std::nullopt;
+
auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
// If the described register is the destination, just return the source.
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 9e2d698..05a4313 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1855,6 +1855,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerTriOp(AArch64::G_UDOT);
case Intrinsic::aarch64_neon_sdot:
return LowerTriOp(AArch64::G_SDOT);
+ case Intrinsic::aarch64_neon_usdot:
+ return LowerTriOp(AArch64::G_USDOT);
case Intrinsic::aarch64_neon_sqxtn:
return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
case Intrinsic::aarch64_neon_sqxtun:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 1b559a6..8ed4062 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -514,8 +514,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
- setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, MVT::i32,
- Legal);
+ setOperationAction({ISD::ABS, ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX},
+ MVT::i32, Legal);
setOperationAction(
{ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index bfe2c80..a67b12a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -901,6 +901,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
+ addRulesForGOpcs({G_READSTEADYCOUNTER}, Standard).Uni(S64, {{Sgpr64}, {}});
+
bool hasSALUFloat = ST->hasSALUFloatInsts();
addRulesForGOpcs({G_FADD}, Standard)
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index d0ad120..b841171 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -1488,6 +1488,12 @@ let AssemblerPredicate = isGFX12Plus in {
def : MnemonicAlias<"ds_load_tr_b64", "ds_load_tr8_b64">, Requires<[isGFX1250Plus]>;
def : MnemonicAlias<"ds_load_tr_b128", "ds_load_tr16_b128">, Requires<[isGFX1250Plus]>;
+// Additional aliases for ds load transpose instructions.
+def : MnemonicAlias<"ds_load_b64_tr_b8", "ds_load_tr8_b64">, Requires<[isGFX125xOnly]>;
+def : MnemonicAlias<"ds_load_b128_tr_b16", "ds_load_tr16_b128">, Requires<[isGFX125xOnly]>;
+def : MnemonicAlias<"ds_load_b64_tr_b4", "ds_load_tr4_b64">, Requires<[isGFX125xOnly]>;
+def : MnemonicAlias<"ds_load_b96_tr_b6", "ds_load_tr6_b96">, Requires<[isGFX125xOnly]>;
+
//===----------------------------------------------------------------------===//
// GFX11.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index be62395..e3f3aba 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -513,8 +513,7 @@ void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
}
if (Imm == AMDGPU::EncValues::LITERAL_CONST) {
- Op = decodeLiteralConstant(
- Desc, OpDesc, OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64);
+ Op = decodeLiteralConstant(Desc, OpDesc);
continue;
}
@@ -893,6 +892,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// have EXEC as implicit destination. Issue a warning if encoding for
// vdst is not EXEC.
if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
+ MCII->get(MI.getOpcode()).getNumDefs() == 0 &&
MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
if (Bytes_[0] != ExecEncoding)
@@ -1545,21 +1545,21 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
MCOperand
AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const {
if (HasLiteral) {
- if (Literal64 != Val)
+ if (Literal != Val)
return errOperand(Val, "More than one unique literal is illegal");
}
HasLiteral = true;
- Literal = Literal64 = Val;
+ Literal = Val;
- bool UseLit64 = Hi_32(Literal64) == 0;
+ bool UseLit64 = Hi_32(Literal) == 0;
return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit(
- LitModifier::Lit64, Literal64, getContext()))
- : MCOperand::createImm(Literal64);
+ LitModifier::Lit64, Literal, getContext()))
+ : MCOperand::createImm(Literal);
}
-MCOperand AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc,
- const MCOperandInfo &OpDesc,
- bool ExtendFP64) const {
+MCOperand
+AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc,
+ const MCOperandInfo &OpDesc) const {
// For now all literal constants are supposed to be unsigned integer
// ToDo: deal with signed/unsigned 64-bit integer constants
// ToDo: deal with float/double constants
@@ -1569,35 +1569,79 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc,
Twine(Bytes.size()));
}
HasLiteral = true;
- Literal = Literal64 = eatBytes<uint32_t>(Bytes);
- if (ExtendFP64)
- Literal64 <<= 32;
+ Literal = eatBytes<uint32_t>(Bytes);
}
- int64_t Val = ExtendFP64 ? Literal64 : Literal;
+ // For disassembling always assume all inline constants are available.
+ bool HasInv2Pi = true;
- bool CanUse64BitLiterals =
- STI.hasFeature(AMDGPU::Feature64BitLiterals) &&
- !(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
-
- bool UseLit64 = false;
- if (CanUse64BitLiterals) {
- if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64)
- UseLit64 = false;
- else if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64)
- UseLit64 = Hi_32(Literal64) == 0;
+ // Invalid instruction codes may contain literals for inline-only
+ // operands, so we support them here as well.
+ int64_t Val = Literal;
+ bool UseLit = false;
+ switch (OpDesc.OperandType) {
+ default:
+ llvm_unreachable("Unexpected operand type!");
+ case AMDGPU::OPERAND_REG_IMM_BF16:
+ case AMDGPU::OPERAND_REG_INLINE_C_BF16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
+ UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_V2BF16:
+ UseLit = AMDGPU::isInlinableLiteralV2BF16(Val);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ UseLit = AMDGPU::isInlinableLiteralV2F16(Val);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
+ break;
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ UseLit = AMDGPU::isInlinableLiteralV2I16(Val);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+ case AMDGPU::OPERAND_REG_IMM_V2FP32:
+ case AMDGPU::OPERAND_REG_IMM_V2INT32:
+ case AMDGPU::OPERAND_KIMM32:
+ UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
+ Val <<= 32;
+ break;
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi);
+ break;
+ case MCOI::OPERAND_REGISTER:
+ // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
+ // decoding a literal in a position of a register operand. Give
+ // it special handling in the caller, decodeImmOperands(), instead
+ // of quietly allowing it here.
+ break;
}
- return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit(
- LitModifier::Lit64, Val, getContext()))
- : MCOperand::createImm(Val);
+ return UseLit ? MCOperand::createExpr(AMDGPUMCExpr::createLit(
+ LitModifier::Lit, Val, getContext()))
+ : MCOperand::createImm(Val);
}
-MCOperand
-AMDGPUDisassembler::decodeLiteral64Constant(const MCInst &Inst) const {
+MCOperand AMDGPUDisassembler::decodeLiteral64Constant() const {
assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
if (!HasLiteral) {
@@ -1606,25 +1650,13 @@ AMDGPUDisassembler::decodeLiteral64Constant(const MCInst &Inst) const {
Twine(Bytes.size()));
}
HasLiteral = true;
- Literal64 = eatBytes<uint64_t>(Bytes);
- }
-
- bool UseLit64 = false;
- const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
- const MCOperandInfo &OpDesc = Desc.operands()[Inst.getNumOperands()];
- if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64) {
- UseLit64 = false;
- } else {
- assert(OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64);
- UseLit64 = Hi_32(Literal64) == 0;
+ Literal = eatBytes<uint64_t>(Bytes);
}
+ bool UseLit64 = Hi_32(Literal) == 0;
return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit(
- LitModifier::Lit64, Literal64, getContext()))
- : MCOperand::createImm(Literal64);
+ LitModifier::Lit64, Literal, getContext()))
+ : MCOperand::createImm(Literal);
}
MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
@@ -1913,7 +1945,7 @@ MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const MCInst &Inst,
return MCOperand::createImm(Val);
if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
- return decodeLiteral64Constant(Inst);
+ return decodeLiteral64Constant();
}
switch (Width) {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 2751857..d103d79 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -44,8 +44,7 @@ private:
const unsigned HwModeRegClass;
const unsigned TargetMaxInstBytes;
mutable ArrayRef<uint8_t> Bytes;
- mutable uint32_t Literal;
- mutable uint64_t Literal64;
+ mutable uint64_t Literal;
mutable bool HasLiteral;
mutable std::optional<bool> EnableWavefrontSize32;
unsigned CodeObjectVersion;
@@ -144,9 +143,8 @@ public:
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const;
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc,
- const MCOperandInfo &OpDesc,
- bool ExtendFP64) const;
- MCOperand decodeLiteral64Constant(const MCInst &Inst) const;
+ const MCOperandInfo &OpDesc) const;
+ MCOperand decodeLiteral64Constant() const;
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 6de59be..8ea64d1 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -3711,6 +3711,12 @@ defm GLOBAL_LOAD_TR_B64_w32 : VFLAT_Real_AllAddr_gfx1250<0x058, "globa
defm GLOBAL_LOAD_TR4_B64 : VFLAT_Real_AllAddr_gfx1250<0x073>;
defm GLOBAL_LOAD_TR6_B96 : VFLAT_Real_AllAddr_gfx1250<0x074>;
+// Additional aliases for global load transpose instructions.
+def : MnemonicAlias<"global_load_b128_tr_b16", "global_load_tr16_b128">, Requires<[isGFX125xOnly]>;
+def : MnemonicAlias<"global_load_b64_tr_b8", "global_load_tr8_b64">, Requires<[isGFX125xOnly]>;
+def : MnemonicAlias<"global_load_b64_tr_b4", "global_load_tr4_b64">, Requires<[isGFX125xOnly]>;
+def : MnemonicAlias<"global_load_b96_tr_b6", "global_load_tr6_b96">, Requires<[isGFX125xOnly]>;
+
defm FLAT_ATOMIC_ADD_F64 : VFLAT_Real_Atomics_gfx1250<0x055>;
defm FLAT_ATOMIC_MIN_F64 : VFLAT_Real_Atomics_gfx1250<0x05b, "flat_atomic_min_num_f64">;
defm FLAT_ATOMIC_MAX_F64 : VFLAT_Real_Atomics_gfx1250<0x05c, "flat_atomic_max_num_f64">;
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 979a8b0..4b22c68 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include <algorithm>
+#include <array>
namespace llvm {
@@ -45,7 +46,7 @@ struct GCNRegPressure {
return !Value[SGPR] && !Value[VGPR] && !Value[AGPR] && !Value[AVGPR];
}
- void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); }
+ void clear() { Value.fill(0); }
unsigned getNumRegs(RegKind Kind) const {
assert(Kind < TOTAL_KINDS);
@@ -127,9 +128,7 @@ struct GCNRegPressure {
bool less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const;
- bool operator==(const GCNRegPressure &O) const {
- return std::equal(&Value[0], &Value[ValueArraySize], O.Value);
- }
+ bool operator==(const GCNRegPressure &O) const { return Value == O.Value; }
bool operator!=(const GCNRegPressure &O) const {
return !(*this == O);
@@ -160,7 +159,7 @@ private:
/// Pressure for all register kinds (first all regular registers kinds, then
/// all tuple register kinds).
- unsigned Value[ValueArraySize];
+ std::array<unsigned, ValueArraySize> Value;
static unsigned getRegKind(const TargetRegisterClass *RC,
const SIRegisterInfo *STI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 58482ea..9fbf9e5 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -69,6 +69,12 @@ static cl::opt<bool> GCNTrackers(
cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
cl::init(false));
+static cl::opt<unsigned> PendingQueueLimit(
+ "amdgpu-scheduler-pending-queue-limit", cl::Hidden,
+ cl::desc(
+ "Max (Available+Pending) size to inspect pending queue (0 disables)"),
+ cl::init(256));
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
#define DUMP_MAX_REG_PRESSURE
static cl::opt<bool> PrintMaxRPRegUsageBeforeScheduler(
@@ -335,17 +341,52 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
}
}
+static bool shouldCheckPending(SchedBoundary &Zone,
+ const TargetSchedModel *SchedModel) {
+ bool HasBufferedModel =
+ SchedModel->hasInstrSchedModel() && SchedModel->getMicroOpBufferSize();
+ unsigned Combined = Zone.Available.size() + Zone.Pending.size();
+ return Combined <= PendingQueueLimit && HasBufferedModel;
+}
+
+static SUnit *pickOnlyChoice(SchedBoundary &Zone,
+ const TargetSchedModel *SchedModel) {
+ // pickOnlyChoice() releases pending instructions and checks for new hazards.
+ SUnit *OnlyChoice = Zone.pickOnlyChoice();
+ if (!shouldCheckPending(Zone, SchedModel) || Zone.Pending.empty())
+ return OnlyChoice;
+
+ return nullptr;
+}
+
+void GCNSchedStrategy::printCandidateDecision(const SchedCandidate &Current,
+ const SchedCandidate &Preferred) {
+ LLVM_DEBUG({
+ dbgs() << "Prefer:\t\t";
+ DAG->dumpNode(*Preferred.SU);
+
+ if (Current.SU) {
+ dbgs() << "Not:\t";
+ DAG->dumpNode(*Current.SU);
+ }
+
+ dbgs() << "Reason:\t\t";
+ traceCandidate(Preferred);
+ });
+}
+
// This function is mostly cut and pasted from
// GenericScheduler::pickNodeFromQueue()
void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
- SchedCandidate &Cand,
+ SchedCandidate &Cand, bool &IsPending,
bool IsBottomUp) {
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
unsigned SGPRPressure = 0;
unsigned VGPRPressure = 0;
+ IsPending = false;
if (DAG->isTrackingPressure()) {
if (!GCNTrackers) {
SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
@@ -358,8 +399,9 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
VGPRPressure = T->getPressure().getArchVGPRNum();
}
}
- ReadyQueue &Q = Zone.Available;
- for (SUnit *SU : Q) {
+ LLVM_DEBUG(dbgs() << "Available Q:\n");
+ ReadyQueue &AQ = Zone.Available;
+ for (SUnit *SU : AQ) {
SchedCandidate TryCand(ZonePolicy);
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
@@ -371,27 +413,55 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
// Initialize resource delta if needed in case future heuristics query it.
if (TryCand.ResDelta == SchedResourceDelta())
TryCand.initResourceDelta(Zone.DAG, SchedModel);
+ LLVM_DEBUG(printCandidateDecision(Cand, TryCand));
Cand.setBest(TryCand);
- LLVM_DEBUG(traceCandidate(Cand));
+ } else {
+ printCandidateDecision(TryCand, Cand);
+ }
+ }
+
+ if (!shouldCheckPending(Zone, SchedModel))
+ return;
+
+ LLVM_DEBUG(dbgs() << "Pending Q:\n");
+ ReadyQueue &PQ = Zone.Pending;
+ for (SUnit *SU : PQ) {
+
+ SchedCandidate TryCand(ZonePolicy);
+ initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
+ VGPRPressure, IsBottomUp);
+ // Pass SchedBoundary only when comparing nodes from the same boundary.
+ SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
+ tryPendingCandidate(Cand, TryCand, ZoneArg);
+ if (TryCand.Reason != NoCand) {
+ // Initialize resource delta if needed in case future heuristics query it.
+ if (TryCand.ResDelta == SchedResourceDelta())
+ TryCand.initResourceDelta(Zone.DAG, SchedModel);
+ LLVM_DEBUG(printCandidateDecision(Cand, TryCand));
+ IsPending = true;
+ Cand.setBest(TryCand);
+ } else {
+ printCandidateDecision(TryCand, Cand);
}
}
}
// This function is mostly cut and pasted from
// GenericScheduler::pickNodeBidirectional()
-SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
+SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode,
+ bool &PickedPending) {
// Schedule as far as possible in the direction of no choice. This is most
// efficient, but also provides the best heuristics for CriticalPSets.
- if (SUnit *SU = Bot.pickOnlyChoice()) {
+ if (SUnit *SU = pickOnlyChoice(Bot, SchedModel)) {
IsTopNode = false;
return SU;
}
- if (SUnit *SU = Top.pickOnlyChoice()) {
+ if (SUnit *SU = pickOnlyChoice(Top, SchedModel)) {
IsTopNode = true;
return SU;
}
- // Set the bottom-up policy based on the state of the current bottom zone and
- // the instructions outside the zone, including the top zone.
+ // Set the bottom-up policy based on the state of the current bottom zone
+ // and the instructions outside the zone, including the top zone.
CandPolicy BotPolicy;
setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
// Set the top-down policy based on the state of the current top zone and
@@ -399,12 +469,14 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
CandPolicy TopPolicy;
setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
+ bool BotPending = false;
// See if BotCand is still valid (because we previously scheduled from Top).
LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
if (!BotCand.isValid() || BotCand.SU->isScheduled ||
BotCand.Policy != BotPolicy) {
BotCand.reset(CandPolicy());
pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand,
+ BotPending,
/*IsBottomUp=*/true);
assert(BotCand.Reason != NoCand && "failed to find the first candidate");
} else {
@@ -414,6 +486,7 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
SchedCandidate TCand;
TCand.reset(CandPolicy());
pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand,
+ BotPending,
/*IsBottomUp=*/true);
assert(TCand.SU == BotCand.SU &&
"Last pick result should correspond to re-picking right now");
@@ -421,12 +494,14 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
#endif
}
+ bool TopPending = false;
// Check if the top Q has a better candidate.
LLVM_DEBUG(dbgs() << "Picking from Top:\n");
if (!TopCand.isValid() || TopCand.SU->isScheduled ||
TopCand.Policy != TopPolicy) {
TopCand.reset(CandPolicy());
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand,
+ TopPending,
/*IsBottomUp=*/false);
assert(TopCand.Reason != NoCand && "failed to find the first candidate");
} else {
@@ -436,6 +511,7 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
SchedCandidate TCand;
TCand.reset(CandPolicy());
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand,
+ TopPending,
/*IsBottomUp=*/false);
assert(TCand.SU == TopCand.SU &&
"Last pick result should correspond to re-picking right now");
@@ -446,12 +522,21 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
// Pick best from BotCand and TopCand.
LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
dbgs() << "Bot Cand: "; traceCandidate(BotCand););
- SchedCandidate Cand = BotCand;
- TopCand.Reason = NoCand;
- tryCandidate(Cand, TopCand, nullptr);
- if (TopCand.Reason != NoCand) {
- Cand.setBest(TopCand);
+ SchedCandidate Cand = BotPending ? TopCand : BotCand;
+ SchedCandidate TryCand = BotPending ? BotCand : TopCand;
+ PickedPending = BotPending && TopPending;
+
+ TryCand.Reason = NoCand;
+ if (BotPending || TopPending) {
+ PickedPending |= tryPendingCandidate(Cand, TopCand, nullptr);
+ } else {
+ tryCandidate(Cand, TryCand, nullptr);
+ }
+
+ if (TryCand.Reason != NoCand) {
+ Cand.setBest(TryCand);
}
+
LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
IsTopNode = Cand.AtTop;
@@ -466,35 +551,55 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
return nullptr;
}
+ bool PickedPending;
SUnit *SU;
do {
+ PickedPending = false;
if (RegionPolicy.OnlyTopDown) {
- SU = Top.pickOnlyChoice();
+ SU = pickOnlyChoice(Top, SchedModel);
if (!SU) {
CandPolicy NoPolicy;
TopCand.reset(NoPolicy);
pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand,
+ PickedPending,
/*IsBottomUp=*/false);
assert(TopCand.Reason != NoCand && "failed to find a candidate");
SU = TopCand.SU;
}
IsTopNode = true;
} else if (RegionPolicy.OnlyBottomUp) {
- SU = Bot.pickOnlyChoice();
+ SU = pickOnlyChoice(Bot, SchedModel);
if (!SU) {
CandPolicy NoPolicy;
BotCand.reset(NoPolicy);
pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand,
+ PickedPending,
/*IsBottomUp=*/true);
assert(BotCand.Reason != NoCand && "failed to find a candidate");
SU = BotCand.SU;
}
IsTopNode = false;
} else {
- SU = pickNodeBidirectional(IsTopNode);
+ SU = pickNodeBidirectional(IsTopNode, PickedPending);
}
} while (SU->isScheduled);
+ if (PickedPending) {
+ unsigned ReadyCycle = IsTopNode ? SU->TopReadyCycle : SU->BotReadyCycle;
+ SchedBoundary &Zone = IsTopNode ? Top : Bot;
+ unsigned CurrentCycle = Zone.getCurrCycle();
+ if (ReadyCycle > CurrentCycle)
+ Zone.bumpCycle(ReadyCycle);
+
+ // FIXME: checkHazard() doesn't give information about which cycle the
+ // hazard will resolve so just keep bumping the cycle by 1. This could be
+ // made more efficient if checkHazard() returned more details.
+ while (Zone.checkHazard(SU))
+ Zone.bumpCycle(Zone.getCurrCycle() + 1);
+
+ Zone.releasePending();
+ }
+
if (SU->isTopReady())
Top.removeReady(SU);
if (SU->isBottomReady())
@@ -540,6 +645,47 @@ GCNSchedStageID GCNSchedStrategy::getNextStage() const {
return *std::next(CurrentStage);
}
+bool GCNSchedStrategy::tryPendingCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary *Zone) const {
+ // Initialize the candidate if needed.
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return true;
+ }
+
+ // Bias PhysReg Defs and copies to their uses and defined respectively.
+ if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
+ biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
+ return TryCand.Reason != NoCand;
+
+ // Avoid exceeding the target's limit.
+ if (DAG->isTrackingPressure() &&
+ tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
+ RegExcess, TRI, DAG->MF))
+ return TryCand.Reason != NoCand;
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (DAG->isTrackingPressure() &&
+ tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
+ TryCand, Cand, RegCritical, TRI, DAG->MF))
+ return TryCand.Reason != NoCand;
+
+ bool SameBoundary = Zone != nullptr;
+ if (SameBoundary) {
+ TryCand.initResourceDelta(DAG, SchedModel);
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return TryCand.Reason != NoCand;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources, TryCand, Cand,
+ ResourceDemand))
+ return TryCand.Reason != NoCand;
+ }
+
+ return false;
+}
+
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
const MachineSchedContext *C, bool IsLegacyScheduler)
: GCNSchedStrategy(C) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 8ea4267..975781f 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -44,17 +44,32 @@ raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
/// heuristics to determine excess/critical pressure sets.
class GCNSchedStrategy : public GenericScheduler {
protected:
- SUnit *pickNodeBidirectional(bool &IsTopNode);
+ SUnit *pickNodeBidirectional(bool &IsTopNode, bool &PickedPending);
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
- SchedCandidate &Cand, bool IsBottomUp);
+ SchedCandidate &Cand, bool &IsPending,
+ bool IsBottomUp);
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
const RegPressureTracker &RPTracker,
const SIRegisterInfo *SRI, unsigned SGPRPressure,
unsigned VGPRPressure, bool IsBottomUp);
+ /// Evaluates instructions in the pending queue using a subset of scheduling
+ /// heuristics.
+ ///
+ /// Instructions that cannot be issued due to hardware constraints are placed
+ /// in the pending queue rather than the available queue, making them normally
+ /// invisible to scheduling heuristics. However, in certain scenarios (such as
+ /// avoiding register spilling), it may be beneficial to consider scheduling
+ /// these not-yet-ready instructions.
+ bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
+ SchedBoundary *Zone) const;
+
+ void printCandidateDecision(const SchedCandidate &Current,
+ const SchedCandidate &Preferred);
+
std::vector<unsigned> Pressure;
std::vector<unsigned> MaxPressure;
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 2aa54c9..09ef6ac 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -45,6 +45,9 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
// Legalize loads and stores to the private address space.
setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);
+ // 32-bit ABS is legal for AMDGPU except for R600
+ setOperationAction(ISD::ABS, MVT::i32, Expand);
+
// EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
// spaces, so it is custom lowered to handle those where it isn't.
for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD})
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d516330..50447f4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -9072,6 +9072,67 @@ void SIInstrInfo::movePackToVALU(SIInstrWorklist &Worklist,
MachineOperand &Src1 = Inst.getOperand(2);
const DebugLoc &DL = Inst.getDebugLoc();
+ if (ST.useRealTrue16Insts()) {
+ Register SrcReg0, SrcReg1;
+ if (!Src0.isReg() || !RI.isVGPR(MRI, Src0.getReg())) {
+ SrcReg0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), SrcReg0).add(Src0);
+ } else {
+ SrcReg0 = Src0.getReg();
+ }
+
+ if (!Src1.isReg() || !RI.isVGPR(MRI, Src1.getReg())) {
+ SrcReg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), SrcReg1).add(Src1);
+ } else {
+ SrcReg1 = Src1.getReg();
+ }
+
+ bool isSrc0Reg16 = MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
+ bool isSrc1Reg16 = MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
+
+ auto NewMI = BuildMI(*MBB, Inst, DL, get(AMDGPU::REG_SEQUENCE), ResultReg);
+ switch (Inst.getOpcode()) {
+ case AMDGPU::S_PACK_LL_B32_B16:
+ NewMI
+ .addReg(SrcReg0, 0,
+ isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
+ .addImm(AMDGPU::lo16)
+ .addReg(SrcReg1, 0,
+ isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
+ .addImm(AMDGPU::hi16);
+ break;
+ case AMDGPU::S_PACK_LH_B32_B16:
+ NewMI
+ .addReg(SrcReg0, 0,
+ isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
+ .addImm(AMDGPU::lo16)
+ .addReg(SrcReg1, 0, AMDGPU::hi16)
+ .addImm(AMDGPU::hi16);
+ break;
+ case AMDGPU::S_PACK_HL_B32_B16:
+ NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
+ .addImm(AMDGPU::lo16)
+ .addReg(SrcReg1, 0,
+ isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
+ .addImm(AMDGPU::hi16);
+ break;
+ case AMDGPU::S_PACK_HH_B32_B16:
+ NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
+ .addImm(AMDGPU::lo16)
+ .addReg(SrcReg1, 0, AMDGPU::hi16)
+ .addImm(AMDGPU::hi16);
+ break;
+ default:
+ llvm_unreachable("unhandled s_pack_* instruction");
+ }
+
+ MachineOperand &Dest = Inst.getOperand(0);
+ MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
+ return;
+ }
+
switch (Inst.getOpcode()) {
case AMDGPU::S_PACK_LL_B32_B16: {
Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index e979eeb..df27ec1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -879,6 +879,11 @@ public:
MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
}
+ bool isMFMA(uint16_t Opcode) const {
+ return isMAI(Opcode) && Opcode != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
+ Opcode != AMDGPU::V_ACCVGPR_READ_B32_e64;
+ }
+
static bool isDOT(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
}
@@ -895,6 +900,10 @@ public:
return isMFMA(MI) || isWMMA(MI) || isSWMMAC(MI);
}
+ bool isMFMAorWMMA(uint16_t Opcode) const {
+ return isMFMA(Opcode) || isWMMA(Opcode) || isSWMMAC(Opcode);
+ }
+
static bool isSWMMAC(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC;
}
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index c684f9e..7431e11 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -47,9 +47,6 @@ private:
const MachineBasicBlock &From,
const MachineBasicBlock &To) const;
bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
- // Check if the machine instruction being processed is a supported packed
- // instruction.
- bool isUnpackingSupportedInstr(MachineInstr &MI) const;
// Creates a list of packed instructions following an MFMA that are suitable
// for unpacking.
void collectUnpackingCandidates(MachineInstr &BeginMI,
@@ -454,23 +451,6 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
return true;
}
-// If support is extended to new operations, add tests in
-// llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir.
-bool SIPreEmitPeephole::isUnpackingSupportedInstr(MachineInstr &MI) const {
- if (!TII->isNeverCoissue(MI))
- return false;
- unsigned Opcode = MI.getOpcode();
- switch (Opcode) {
- case AMDGPU::V_PK_ADD_F32:
- case AMDGPU::V_PK_MUL_F32:
- case AMDGPU::V_PK_FMA_F32:
- return true;
- default:
- return false;
- }
- llvm_unreachable("Fully covered switch");
-}
-
bool SIPreEmitPeephole::canUnpackingClobberRegister(const MachineInstr &MI) {
unsigned OpCode = MI.getOpcode();
Register DstReg = MI.getOperand(0).getReg();
@@ -612,10 +592,13 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
for (auto I = std::next(BeginMI.getIterator()); I != E; ++I) {
MachineInstr &Instr = *I;
+ uint16_t UnpackedOpCode = mapToUnpackedOpcode(Instr);
+ bool IsUnpackable =
+ !(UnpackedOpCode == std::numeric_limits<uint16_t>::max());
if (Instr.isMetaInstruction())
continue;
if ((Instr.isTerminator()) ||
- (TII->isNeverCoissue(Instr) && !isUnpackingSupportedInstr(Instr)) ||
+ (TII->isNeverCoissue(Instr) && !IsUnpackable) ||
(SIInstrInfo::modifiesModeRegister(Instr) &&
Instr.modifiesRegister(AMDGPU::EXEC, TRI)))
return;
@@ -639,7 +622,7 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
if (TRI->regsOverlap(MFMADef, InstrMO.getReg()))
return;
}
- if (!isUnpackingSupportedInstr(Instr))
+ if (!IsUnpackable)
continue;
if (canUnpackingClobberRegister(Instr))
@@ -654,7 +637,6 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
if (TotalCyclesBetweenCandidates < NumMFMACycles - 1)
InstrsToUnpack.insert(&Instr);
}
- return;
}
void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) {
@@ -681,7 +663,6 @@ void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) {
HiDstOp.setIsRenamable(DstOp.isRenamable());
I.eraseFromParent();
- return;
}
MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I,
@@ -689,8 +670,8 @@ MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I,
bool IsHiBits) {
MachineBasicBlock &MBB = *I.getParent();
const DebugLoc &DL = I.getDebugLoc();
- const MachineOperand *SrcMO1 = TII->getNamedOperand(I, AMDGPU::OpName::src0);
- const MachineOperand *SrcMO2 = TII->getNamedOperand(I, AMDGPU::OpName::src1);
+ const MachineOperand *SrcMO0 = TII->getNamedOperand(I, AMDGPU::OpName::src0);
+ const MachineOperand *SrcMO1 = TII->getNamedOperand(I, AMDGPU::OpName::src1);
Register DstReg = I.getOperand(0).getReg();
unsigned OpCode = I.getOpcode();
Register UnpackedDstReg = IsHiBits ? TRI->getSubReg(DstReg, AMDGPU::sub1)
@@ -704,15 +685,15 @@ MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I,
MachineInstrBuilder NewMI = BuildMI(MBB, I, DL, TII->get(UnpackedOpcode));
NewMI.addDef(UnpackedDstReg); // vdst
- addOperandAndMods(NewMI, Src0Mods, IsHiBits, *SrcMO1);
- addOperandAndMods(NewMI, Src1Mods, IsHiBits, *SrcMO2);
+ addOperandAndMods(NewMI, Src0Mods, IsHiBits, *SrcMO0);
+ addOperandAndMods(NewMI, Src1Mods, IsHiBits, *SrcMO1);
if (AMDGPU::hasNamedOperand(OpCode, AMDGPU::OpName::src2)) {
- const MachineOperand *SrcMO3 =
+ const MachineOperand *SrcMO2 =
TII->getNamedOperand(I, AMDGPU::OpName::src2);
unsigned Src2Mods =
TII->getNamedOperand(I, AMDGPU::OpName::src2_modifiers)->getImm();
- addOperandAndMods(NewMI, Src2Mods, IsHiBits, *SrcMO3);
+ addOperandAndMods(NewMI, Src2Mods, IsHiBits, *SrcMO2);
}
NewMI.addImm(ClampVal); // clamp
// Packed instructions do not support output modifiers. safe to assign them 0
@@ -789,9 +770,13 @@ bool SIPreEmitPeephole::run(MachineFunction &MF) {
// TODO: Fold this into previous block, if possible. Evaluate and handle any
// side effects.
+
+ // Perform the extra MF scans only for supported archs
+ if (!ST.hasGFX940Insts())
+ return Changed;
for (MachineBasicBlock &MBB : MF) {
- // Unpack packed instructions overlapped by MFMAs. This allows the compiler
- // to co-issue unpacked instructions with MFMA
+ // Unpack packed instructions overlapped by MFMAs. This allows the
+ // compiler to co-issue unpacked instructions with MFMA
auto SchedModel = TII->getSchedModel();
SetVector<MachineInstr *> InstrsToUnpack;
for (auto &MI : make_early_inc_range(MBB.instrs())) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index a01a5fd..5e3195b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1697,9 +1697,6 @@ LLVM_READNONE
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
LLVM_READNONE
-bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
-
-LLVM_READNONE
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
LLVM_READNONE
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 9945ecc..0d7b6d1 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -161,8 +161,8 @@ namespace {
friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
return TE.PseudoOpc < PseudoOpc;
}
- friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
- const NEONLdStTableEntry &TE) {
+ [[maybe_unused]] friend bool operator<(unsigned PseudoOpc,
+ const NEONLdStTableEntry &TE) {
return PseudoOpc < TE.PseudoOpc;
}
};
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 67ea2dd..35e1127 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -21287,21 +21287,28 @@ bool ARMTargetLowering::useLoadStackGuardNode(const Module &M) const {
}
void ARMTargetLowering::insertSSPDeclarations(Module &M) const {
+ // MSVC CRT provides functionalities for stack protection.
RTLIB::LibcallImpl SecurityCheckCookieLibcall =
getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
- if (SecurityCheckCookieLibcall == RTLIB::Unsupported)
- return TargetLowering::insertSSPDeclarations(M);
- // MSVC CRT has a global variable holding security cookie.
- M.getOrInsertGlobal("__security_cookie",
- PointerType::getUnqual(M.getContext()));
+ RTLIB::LibcallImpl SecurityCookieVar =
+ getLibcallImpl(RTLIB::STACK_CHECK_GUARD);
+ if (SecurityCheckCookieLibcall != RTLIB::Unsupported &&
+ SecurityCookieVar != RTLIB::Unsupported) {
+ // MSVC CRT has a global variable holding security cookie.
+ M.getOrInsertGlobal(getLibcallImplName(SecurityCookieVar),
+ PointerType::getUnqual(M.getContext()));
- // MSVC CRT has a function to validate security cookie.
- FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
- getLibcallImplName(SecurityCheckCookieLibcall),
- Type::getVoidTy(M.getContext()), PointerType::getUnqual(M.getContext()));
- if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
- F->addParamAttr(0, Attribute::AttrKind::InReg);
+ // MSVC CRT has a function to validate security cookie.
+ FunctionCallee SecurityCheckCookie =
+ M.getOrInsertFunction(getLibcallImplName(SecurityCheckCookieLibcall),
+ Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(M.getContext()));
+ if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
+ F->addParamAttr(0, Attribute::AttrKind::InReg);
+ }
+
+ TargetLowering::insertSSPDeclarations(M);
}
Function *ARMTargetLowering::getSSPStackGuardCheck(const Module &M) const {
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 228114c..44c4830 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -57,6 +57,7 @@ def ResBindTy : DXILOpParamType;
def ResPropsTy : DXILOpParamType;
def SplitDoubleTy : DXILOpParamType;
def BinaryWithCarryTy : DXILOpParamType;
+def DimensionsTy : DXILOpParamType;
class DXILOpClass;
@@ -901,6 +902,13 @@ def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> {
let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
}
+def GetDimensions : DXILOp<72, getDimensions> {
+ let Doc = "gets the dimensions of a buffer or texture";
+ let arguments = [HandleTy, Int32Ty];
+ let result = DimensionsTy;
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
def Barrier : DXILOp<80, barrier> {
let Doc = "inserts a memory barrier in the shader";
let intrinsics = [
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index 1aed8f9..944b2e6 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -261,6 +261,12 @@ static StructType *getBinaryWithCarryType(LLVMContext &Context) {
return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c");
}
+static StructType *getDimensionsType(LLVMContext &Ctx) {
+ Type *Int32Ty = Type::getInt32Ty(Ctx);
+ return getOrCreateStructType("dx.types.Dimensions",
+ {Int32Ty, Int32Ty, Int32Ty, Int32Ty}, Ctx);
+}
+
static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
Type *OverloadTy) {
switch (Kind) {
@@ -318,6 +324,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
return getSplitDoubleType(Ctx);
case OpParamType::BinaryWithCarryTy:
return getBinaryWithCarryType(Ctx);
+ case OpParamType::DimensionsTy:
+ return getDimensionsType(Ctx);
}
llvm_unreachable("Invalid parameter kind");
return nullptr;
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 610d8b6..e46a393 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -627,6 +627,28 @@ public:
});
}
+ [[nodiscard]] bool lowerGetDimensionsX(Function &F) {
+ IRBuilder<> &IRB = OpBuilder.getIRB();
+ Type *Int32Ty = IRB.getInt32Ty();
+
+ return replaceFunction(F, [&](CallInst *CI) -> Error {
+ IRB.SetInsertPoint(CI);
+ Value *Handle =
+ createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
+ Value *Undef = UndefValue::get(Int32Ty);
+
+ Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
+ OpCode::GetDimensions, {Handle, Undef}, CI->getName(), Int32Ty);
+ if (Error E = OpCall.takeError())
+ return E;
+ Value *Dim = IRB.CreateExtractValue(*OpCall, 0);
+
+ CI->replaceAllUsesWith(Dim);
+ CI->eraseFromParent();
+ return Error::success();
+ });
+ }
+
[[nodiscard]] bool lowerGetPointer(Function &F) {
// These should have already been handled in DXILResourceAccess, so we can
// just clean up the dead prototype.
@@ -934,6 +956,9 @@ public:
case Intrinsic::dx_resource_updatecounter:
HasErrors |= lowerUpdateCounter(F);
break;
+ case Intrinsic::dx_resource_getdimensions_x:
+ HasErrors |= lowerGetDimensionsX(F);
+ break;
case Intrinsic::ctpop:
HasErrors |= lowerCtpopToCountBits(F);
break;
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
index 82c43ff..26a8728 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
@@ -1165,12 +1165,15 @@ void DXILBitcodeWriter::writeValueSymbolTableForwardDecl() {}
/// Returns the bit offset to backpatch with the location of the real VST.
void DXILBitcodeWriter::writeModuleInfo() {
// Emit various pieces of data attached to a module.
- if (!M.getTargetTriple().empty())
- writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE,
- M.getTargetTriple().str(), 0 /*TODO*/);
- const std::string &DL = M.getDataLayoutStr();
- if (!DL.empty())
- writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
+
+ // We need to hardcode a triple and datalayout that's compatible with the
+ // historical DXIL triple and datalayout from DXC.
+ StringRef Triple = "dxil-ms-dx";
+ StringRef DL = "e-m:e-p:32:32-i1:8-i8:8-i16:32-i32:32-i64:64-"
+ "f16:32-f32:32-f64:64-n8:16:32:64";
+ writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, Triple, 0 /*TODO*/);
+ writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
+
if (!M.getModuleInlineAsm().empty())
writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(),
0 /*TODO*/);
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
index 1eb03bf..725f2b1 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
@@ -149,11 +149,6 @@ public:
std::string Data;
llvm::raw_string_ostream OS(Data);
- Triple OriginalTriple = M.getTargetTriple();
- // Set to DXIL triple when write to bitcode.
- // Only the output bitcode need to be DXIL triple.
- M.setTargetTriple(Triple("dxil-ms-dx"));
-
// Perform late legalization of lifetime intrinsics that would otherwise
// fail the Module Verifier if performed in an earlier pass
legalizeLifetimeIntrinsics(M);
@@ -165,9 +160,6 @@ public:
// not-so-legal legalizations
removeLifetimeIntrinsics(M);
- // Recover triple.
- M.setTargetTriple(OriginalTriple);
-
Constant *ModuleConstant =
ConstantDataArray::get(M.getContext(), arrayRefFromStringRef(Data));
auto *GV = new llvm::GlobalVariable(M, ModuleConstant->getType(), true,
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 5f180d6..3bd6ed4 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -66,6 +66,10 @@ public:
void remapInstruction(MCInst &Instr) const;
+ Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address) const override;
+
private:
bool makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address,
uint64_t &BytesToSkip, raw_ostream &CS) const;
@@ -567,6 +571,18 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
return Result;
}
+Expected<bool> HexagonDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
+ uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address) const {
+ // At the start of a symbol, force a fresh packet by resetting any
+ // in-progress bundle state. This prevents packets from straddling label
+ // boundaries when data (e.g. jump tables) appears in between.
+ Size = 0;
+ resetBundle();
+ return true;
+}
+
static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
ArrayRef<MCPhysReg> Table) {
if (RegNo < Table.size()) {
diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 52e6b0b..68f5312 100644
--- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -174,8 +174,8 @@ namespace {
const TargetRegisterInfo *TRI;
};
- raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P)
- LLVM_ATTRIBUTE_UNUSED;
+ [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+ const PrintRegSet &P);
raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
OS << '{';
for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 14b6bb3..9087f9d 100644
--- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -272,15 +272,14 @@ namespace {
OS << *I << ' ' << **I << '\n';
}
- raw_ostream &operator<< (raw_ostream &OS,
- const NodeVect &S) LLVM_ATTRIBUTE_UNUSED;
+ [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const NodeVect &S);
raw_ostream &operator<< (raw_ostream &OS, const NodeVect &S) {
dump_node_container(OS, S);
return OS;
}
- raw_ostream &operator<< (raw_ostream &OS,
- const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED;
+ [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+ const NodeToUsesMap &M);
raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){
for (const auto &I : M) {
const UseSet &Us = I.second;
@@ -914,9 +913,8 @@ namespace {
const NodeToValueMap &Map;
};
- raw_ostream &operator<< (raw_ostream &OS,
- const LocationAsBlock &Loc) LLVM_ATTRIBUTE_UNUSED ;
- raw_ostream &operator<< (raw_ostream &OS, const LocationAsBlock &Loc) {
+ [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+ const LocationAsBlock &Loc) {
for (const auto &I : Loc.Map) {
OS << I.first << " -> ";
if (BasicBlock *B = cast_or_null<BasicBlock>(I.second))
diff --git a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 14a7ae7..3900aac 100644
--- a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -132,8 +132,7 @@ namespace {
const TargetRegisterInfo &TRI;
friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P);
};
- raw_ostream &operator<<(raw_ostream &OS,
- const PrintFP &P) LLVM_ATTRIBUTE_UNUSED;
+ [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P);
raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) {
OS << "{ SplitB:" << PrintMB(P.FP.SplitB)
<< ", PredR:" << printReg(P.FP.PredR, &P.TRI)
diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
index f9fdab4..9c81e96 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -51,11 +51,11 @@ private:
const TargetRegisterInfo &TRI;
};
- raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR)
- LLVM_ATTRIBUTE_UNUSED;
- raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) {
- return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg);
- }
+[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+ const PrintRegister &PR);
+raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &PR) {
+ return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg);
+}
class HexagonGenPredicate : public MachineFunctionPass {
public:
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index bfea50e..6b48a21 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -422,12 +422,12 @@ static MCTargetStreamer *createHexagonNullTargetStreamer(MCStreamer &S) {
return new HexagonTargetStreamer(S);
}
-static void LLVM_ATTRIBUTE_UNUSED clearFeature(MCSubtargetInfo* STI, uint64_t F) {
+[[maybe_unused]] static void clearFeature(MCSubtargetInfo *STI, uint64_t F) {
if (STI->hasFeature(F))
STI->ToggleFeature(F);
}
-static bool LLVM_ATTRIBUTE_UNUSED checkFeature(MCSubtargetInfo* STI, uint64_t F) {
+[[maybe_unused]] static bool checkFeature(MCSubtargetInfo *STI, uint64_t F) {
return STI->hasFeature(F);
}
diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp
index 1ce8d7e3..df0c8c1 100644
--- a/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -264,9 +264,10 @@ public:
} // end anonymous namespace
-static bool CC_Mips(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
- Type *OrigTy, CCState &State) LLVM_ATTRIBUTE_UNUSED;
+[[maybe_unused]] static bool CC_Mips(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
+ CCState &State);
static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 7f1ff45..2fd7327 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -3176,9 +3176,10 @@ static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT, MVT LocVT,
F64Regs);
}
-static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
- Type *OrigTy, CCState &State) LLVM_ATTRIBUTE_UNUSED;
+[[maybe_unused]] static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
+ CCState &State);
#include "MipsGenCallingConv.inc"
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 272c21f..2f1a7ad 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -749,7 +749,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setTruncStoreAction(VT, MVT::i1, Expand);
}
- // Disable generations of extload/truncstore for v2i16/v2i8. The generic
+ // Disable generations of extload/truncstore for v2i32/v2i16/v2i8. The generic
// expansion for these nodes when they are unaligned is incorrect if the
// type is a vector.
//
@@ -757,7 +757,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// TargetLowering::expandUnalignedLoad/Store.
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
MVT::v2i8, Expand);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i32,
+ {MVT::v2i8, MVT::v2i16}, Expand);
setTruncStoreAction(MVT::v2i16, MVT::v2i8, Expand);
+ setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
+ setTruncStoreAction(MVT::v2i32, MVT::v2i8, Expand);
// Register custom handling for illegal type loads/stores. We'll try to custom
// lower almost all illegal types and logic in the lowering will discard cases
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 8851a0f..e857b2d 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -3356,10 +3356,10 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
bool isValidInsnFormat(StringRef Format, const MCSubtargetInfo &STI) {
return StringSwitch<bool>(Format)
- .Cases("r", "r4", "i", "b", "sb", "u", "j", "uj", "s", true)
- .Cases("cr", "ci", "ciw", "css", "cl", "cs", "ca", "cb", "cj",
+ .Cases({"r", "r4", "i", "b", "sb", "u", "j", "uj", "s"}, true)
+ .Cases({"cr", "ci", "ciw", "css", "cl", "cs", "ca", "cb", "cj"},
STI.hasFeature(RISCV::FeatureStdExtZca))
- .Cases("qc.eai", "qc.ei", "qc.eb", "qc.ej", "qc.es",
+ .Cases({"qc.eai", "qc.ei", "qc.eb", "qc.ej", "qc.es"},
!STI.hasFeature(RISCV::Feature64Bit))
.Default(false);
}
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 662d3f6..b1794b7 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -717,6 +717,18 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.clampScalar(0, sXLen, sXLen)
.lower();
+ LegalityPredicate InsertVectorEltPred = [=](const LegalityQuery &Query) {
+ LLT VecTy = Query.Types[0];
+ LLT EltTy = Query.Types[1];
+ return VecTy.getElementType() == EltTy;
+ };
+
+ getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
+ .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
+ InsertVectorEltPred, typeIs(2, sXLen)))
+ .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), InsertVectorEltPred,
+ typeIs(2, sXLen)));
+
getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
index 50730c6..ab93bba 100644
--- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
+++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
@@ -43,7 +43,7 @@ const llvm::StringRef RISCVLMULInstrument::DESC_NAME = "RISCV-LMUL";
bool RISCVLMULInstrument::isDataValid(llvm::StringRef Data) {
// Return true if not one of the valid LMUL strings
return StringSwitch<bool>(Data)
- .Cases("M1", "M2", "M4", "M8", "MF2", "MF4", "MF8", true)
+ .Cases({"M1", "M2", "M4", "M8", "MF2", "MF4", "MF8"}, true)
.Default(false);
}
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 5ceb477..19992e6 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -695,6 +695,9 @@ def HasStdExtZvfbfa : Predicate<"Subtarget->hasStdExtZvfbfa()">,
def FeatureStdExtZvfbfmin
: RISCVExtension<1, 0, "Vector BF16 Converts", [FeatureStdExtZve32f]>;
+def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvfbfmin),
+ "'Zvfbfmin' (Vector BF16 Converts)">;
def FeatureStdExtZvfbfwma
: RISCVExtension<1, 0, "Vector BF16 widening mul-add",
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7123a2d..169465e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1672,6 +1672,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.useRVVForFixedLengthVectors())
setTargetDAGCombine(ISD::BITCAST);
+ setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
+
// Disable strict node mutation.
IsStrictFPEnabled = true;
EnableExtLdPromotion = true;
@@ -24828,7 +24830,8 @@ bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// instruction, as it is usually smaller than the alternative sequence.
// TODO: Add vector division?
bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
- return OptSize && !VT.isVector();
+ return OptSize && !VT.isVector() &&
+ VT.getSizeInBits() <= getMaxDivRemBitWidthSupported();
}
bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const {
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 1b7cb9b..636e31c 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -699,7 +699,8 @@ public:
"Can't encode VTYPE for uninitialized or unknown");
if (TWiden != 0)
return RISCVVType::encodeXSfmmVType(SEW, TWiden, AltFmt);
- return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
+ return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic,
+ AltFmt);
}
bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index ddb53a2..12f776b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3775,11 +3775,13 @@ std::string RISCVInstrInfo::createMIROperandComment(
#define CASE_VFMA_OPCODE_VV(OP) \
CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \
+ case CASE_VFMA_OPCODE_LMULS_MF4(OP##_ALT, VV, E16): \
case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \
case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64)
#define CASE_VFMA_SPLATS(OP) \
CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \
+ case CASE_VFMA_OPCODE_LMULS_MF4(OP##_ALT, VFPR16, E16): \
case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \
case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64)
// clang-format on
@@ -4003,11 +4005,13 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP##_ALT, NEWOP##_ALT, VV, E16) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP##_ALT, NEWOP##_ALT, VFPR16, E16) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \
CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64)
// clang-format on
@@ -4469,6 +4473,20 @@ bool RISCVInstrInfo::simplifyInstruction(MachineInstr &MI) const {
CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \
CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \
CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \
+
+#define CASE_FP_WIDEOP_OPCODE_LMULS_ALT(OP) \
+ CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16)
+
+#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(OP) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16)
// clang-format on
MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
@@ -4478,6 +4496,8 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
switch (MI.getOpcode()) {
default:
return nullptr;
+ case CASE_FP_WIDEOP_OPCODE_LMULS_ALT(FWADD_ALT_WV):
+ case CASE_FP_WIDEOP_OPCODE_LMULS_ALT(FWSUB_ALT_WV):
case CASE_FP_WIDEOP_OPCODE_LMULS(FWADD_WV):
case CASE_FP_WIDEOP_OPCODE_LMULS(FWSUB_WV): {
assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
@@ -4494,6 +4514,8 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
llvm_unreachable("Unexpected opcode");
CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(FWADD_WV)
CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(FWSUB_WV)
+ CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(FWADD_ALT_WV)
+ CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(FWSUB_ALT_WV)
}
// clang-format on
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 65865ce..eb3c9b0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -5862,20 +5862,6 @@ multiclass VPatConversionWF_VF<string intrinsic, string instruction,
}
}
-multiclass VPatConversionWF_VF_BF<string intrinsic, string instruction,
- bit isSEWAware = 0> {
- foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in
- {
- defvar fvti = fvtiToFWti.Vti;
- defvar fwti = fvtiToFWti.Wti;
- let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
- GetVTypePredicates<fwti>.Predicates) in
- defm : VPatConversion<intrinsic, instruction, "V",
- fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
- fvti.LMul, fwti.RegClass, fvti.RegClass, isSEWAware>;
- }
-}
-
multiclass VPatConversionVI_WF<string intrinsic, string instruction> {
foreach vtiToWti = AllWidenableIntToFloatVectors in {
defvar vti = vtiToWti.Vti;
@@ -5969,20 +5955,6 @@ multiclass VPatConversionVF_WF_RTZ<string intrinsic, string instruction,
}
}
-multiclass VPatConversionVF_WF_BF_RM<string intrinsic, string instruction,
- bit isSEWAware = 0> {
- foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in {
- defvar fvti = fvtiToFWti.Vti;
- defvar fwti = fvtiToFWti.Wti;
- let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
- GetVTypePredicates<fwti>.Predicates) in
- defm : VPatConversionRoundingMode<intrinsic, instruction, "W",
- fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
- fvti.LMul, fvti.RegClass, fwti.RegClass,
- isSEWAware>;
- }
-}
-
multiclass VPatCompare_VI<string intrinsic, string inst,
ImmLeaf ImmType> {
foreach vti = AllIntegerVectors in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index 0be9eab..9358486 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -36,7 +36,7 @@ defm VFWMACCBF16_V : VWMAC_FV_V_F<"vfwmaccbf16", 0b111011>;
//===----------------------------------------------------------------------===//
// Pseudo instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZvfbfminOrZvfofp8min] in {
+let Predicates = [HasStdExtZvfbfmin] in {
defm PseudoVFWCVTBF16_F_F : VPseudoVWCVTD_V;
defm PseudoVFNCVTBF16_F_F : VPseudoVNCVTD_W_RM;
}
@@ -44,10 +44,364 @@ let Predicates = [HasStdExtZvfbfminOrZvfofp8min] in {
let mayRaiseFPException = true, Predicates = [HasStdExtZvfbfwma] in
defm PseudoVFWMACCBF16 : VPseudoVWMAC_VV_VF_BF_RM;
+defset list<VTypeInfoToWide> AllWidenableIntToBF16Vectors = {
+ def : VTypeInfoToWide<VI8MF8, VBF16MF4>;
+ def : VTypeInfoToWide<VI8MF4, VBF16MF2>;
+ def : VTypeInfoToWide<VI8MF2, VBF16M1>;
+ def : VTypeInfoToWide<VI8M1, VBF16M2>;
+ def : VTypeInfoToWide<VI8M2, VBF16M4>;
+ def : VTypeInfoToWide<VI8M4, VBF16M8>;
+}
+
+multiclass VPseudoVALU_VV_VF_RM_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoBinaryFV_VV_RM<m, 16/*sew*/>,
+ SchedBinary<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF_RM<m, f, f.SEW>,
+ SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVALU_VF_RM_BF16 {
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF_RM<m, f, f.SEW>,
+ SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVFWALU_VV_VF_RM_BF16 {
+ foreach m = MxListFW in {
+ defm "" : VPseudoBinaryW_VV_RM<m, sew=16>,
+ SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxListFW in {
+ defm "" : VPseudoBinaryW_VF_RM<m, f, sew=f.SEW>,
+ SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVFWALU_WV_WF_RM_BF16 {
+ foreach m = MxListFW in {
+ defm "" : VPseudoBinaryW_WV_RM<m, sew=16>,
+ SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+ defvar f = SCALAR_F16;
+ foreach m = f.MxListFW in {
+ defm "" : VPseudoBinaryW_WF_RM<m, f, sew=f.SEW>,
+ SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVFMUL_VV_VF_RM_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoBinaryFV_VV_RM<m, 16/*sew*/>,
+ SchedBinary<"WriteVFMulV", "ReadVFMulV", "ReadVFMulV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF_RM<m, f, f.SEW>,
+ SchedBinary<"WriteVFMulF", "ReadVFMulV", "ReadVFMulF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVWMUL_VV_VF_RM_BF16 {
+ foreach m = MxListFW in {
+ defm "" : VPseudoBinaryW_VV_RM<m, sew=16>,
+ SchedBinary<"WriteVFWMulV", "ReadVFWMulV", "ReadVFWMulV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxListFW in {
+ defm "" : VPseudoBinaryW_VF_RM<m, f, sew=f.SEW>,
+ SchedBinary<"WriteVFWMulF", "ReadVFWMulV", "ReadVFWMulF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVMAC_VV_VF_AAXA_RM_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoTernaryV_VV_AAXA_RM<m, 16/*sew*/>,
+ SchedTernary<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV",
+ "ReadVFMulAddV", m.MX, 16/*sew*/>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoTernaryV_VF_AAXA_RM<m, f, f.SEW>,
+ SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
+ "ReadVFMulAddV", m.MX, f.SEW>;
+ }
+}
+
+multiclass VPseudoVWMAC_VV_VF_RM_BF16 {
+ foreach m = MxListFW in {
+ defm "" : VPseudoTernaryW_VV_RM<m, sew=16>,
+ SchedTernary<"WriteVFWMulAddV", "ReadVFWMulAddV",
+ "ReadVFWMulAddV", "ReadVFWMulAddV", m.MX, 16/*sew*/>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxListFW in {
+ defm "" : VPseudoTernaryW_VF_RM<m, f, sew=f.SEW>,
+ SchedTernary<"WriteVFWMulAddF", "ReadVFWMulAddV",
+ "ReadVFWMulAddF", "ReadVFWMulAddV", m.MX, f.SEW>;
+ }
+}
+
+multiclass VPseudoVRCP_V_BF16 {
+ foreach m = MxListF in {
+ defvar mx = m.MX;
+ let VLMul = m.value in {
+ def "_V_" # mx # "_E16"
+ : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/,
+ forcePassthruRead=true>;
+ def "_V_" # mx # "_E16_MASK"
+ : VPseudoUnaryMask<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/,
+ forcePassthruRead=true>;
+ }
+ }
+}
+
+multiclass VPseudoVRCP_V_RM_BF16 {
+ foreach m = MxListF in {
+ defvar mx = m.MX;
+ let VLMul = m.value in {
+ def "_V_" # mx # "_E16"
+ : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/,
+ forcePassthruRead=true>;
+ def "_V_" # mx # "_E16_MASK"
+ : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/,
+ forcePassthruRead=true>;
+ }
+ }
+}
+
+multiclass VPseudoVMAX_VV_VF_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoBinaryV_VV<m, sew=16>,
+ SchedBinary<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV",
+ m.MX, 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF<m, f, f.SEW>,
+ SchedBinary<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF",
+ m.MX, f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVSGNJ_VV_VF_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoBinaryV_VV<m, sew=16>,
+ SchedBinary<"WriteVFSgnjV", "ReadVFSgnjV", "ReadVFSgnjV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF<m, f, f.SEW>,
+ SchedBinary<"WriteVFSgnjF", "ReadVFSgnjV", "ReadVFSgnjF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVWCVTF_V_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListW in
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=8,
+ TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX, 8/*sew*/,
+ forcePassthruRead=true>;
+}
+
+multiclass VPseudoVWCVTD_V_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=16,
+ TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV", m.MX, 16/*sew*/,
+ forcePassthruRead=true>;
+}
+
+multiclass VPseudoVNCVTD_W_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in
+ defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, sew=16,
+ TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, 16/*sew*/,
+ forcePassthruRead=true>;
+}
+
+multiclass VPseudoVNCVTD_W_RM_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m,
+ constraint, sew=16,
+ TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, 16/*sew*/,
+ forcePassthruRead=true>;
+}
+
+let Predicates = [HasStdExtZvfbfa], AltFmtType = IS_ALTFMT in {
+let mayRaiseFPException = true in {
+defm PseudoVFADD_ALT : VPseudoVALU_VV_VF_RM_BF16;
+defm PseudoVFSUB_ALT : VPseudoVALU_VV_VF_RM_BF16;
+defm PseudoVFRSUB_ALT : VPseudoVALU_VF_RM_BF16;
+}
+
+let mayRaiseFPException = true in {
+defm PseudoVFWADD_ALT : VPseudoVFWALU_VV_VF_RM_BF16;
+defm PseudoVFWSUB_ALT : VPseudoVFWALU_VV_VF_RM_BF16;
+defm PseudoVFWADD_ALT : VPseudoVFWALU_WV_WF_RM_BF16;
+defm PseudoVFWSUB_ALT : VPseudoVFWALU_WV_WF_RM_BF16;
+}
+
+let mayRaiseFPException = true in
+defm PseudoVFMUL_ALT : VPseudoVFMUL_VV_VF_RM_BF16;
+
+let mayRaiseFPException = true in
+defm PseudoVFWMUL_ALT : VPseudoVWMUL_VV_VF_RM_BF16;
+
+let mayRaiseFPException = true in {
+defm PseudoVFMACC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFNMACC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFMSAC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFNMSAC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFMADD_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFNMADD_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFMSUB_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFNMSUB_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+}
+
+let mayRaiseFPException = true in {
+defm PseudoVFWMACC_ALT : VPseudoVWMAC_VV_VF_RM_BF16;
+defm PseudoVFWNMACC_ALT : VPseudoVWMAC_VV_VF_RM_BF16;
+defm PseudoVFWMSAC_ALT : VPseudoVWMAC_VV_VF_RM_BF16;
+defm PseudoVFWNMSAC_ALT : VPseudoVWMAC_VV_VF_RM_BF16;
+}
+
+let mayRaiseFPException = true in
+defm PseudoVFRSQRT7_ALT : VPseudoVRCP_V_BF16;
+
+let mayRaiseFPException = true in
+defm PseudoVFREC7_ALT : VPseudoVRCP_V_RM_BF16;
+
+let mayRaiseFPException = true in {
+defm PseudoVFMIN_ALT : VPseudoVMAX_VV_VF_BF16;
+defm PseudoVFMAX_ALT : VPseudoVMAX_VV_VF_BF16;
+}
+
+defm PseudoVFSGNJ_ALT : VPseudoVSGNJ_VV_VF_BF16;
+defm PseudoVFSGNJN_ALT : VPseudoVSGNJ_VV_VF_BF16;
+defm PseudoVFSGNJX_ALT : VPseudoVSGNJ_VV_VF_BF16;
+
+let mayRaiseFPException = true in {
+defm PseudoVMFEQ_ALT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFNE_ALT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFLT_ALT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFLE_ALT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFGT_ALT : VPseudoVCMPM_VF;
+defm PseudoVMFGE_ALT : VPseudoVCMPM_VF;
+}
+
+defm PseudoVFCLASS_ALT : VPseudoVCLS_V;
+
+defm PseudoVFMERGE_ALT : VPseudoVMRG_FM;
+
+defm PseudoVFMV_V_ALT : VPseudoVMV_F;
+
+let mayRaiseFPException = true in {
+defm PseudoVFWCVT_F_XU_ALT : VPseudoVWCVTF_V_BF16;
+defm PseudoVFWCVT_F_X_ALT : VPseudoVWCVTF_V_BF16;
+
+defm PseudoVFWCVT_F_F_ALT : VPseudoVWCVTD_V_BF16;
+} // mayRaiseFPException = true
+
+let mayRaiseFPException = true in {
+let hasSideEffects = 0, hasPostISelHook = 1 in {
+defm PseudoVFNCVT_XU_F_ALT : VPseudoVNCVTI_W_RM;
+defm PseudoVFNCVT_X_F_ALT : VPseudoVNCVTI_W_RM;
+}
+
+defm PseudoVFNCVT_RTZ_XU_F_ALT : VPseudoVNCVTI_W;
+defm PseudoVFNCVT_RTZ_X_F_ALT : VPseudoVNCVTI_W;
+
+defm PseudoVFNCVT_F_F_ALT : VPseudoVNCVTD_W_RM_BF16;
+
+defm PseudoVFNCVT_ROD_F_F_ALT : VPseudoVNCVTD_W_BF16;
+} // mayRaiseFPException = true
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ defvar f = SCALAR_F16;
+ let HasSEWOp = 1, BaseInstr = VFMV_F_S in
+ def "PseudoVFMV_" # f.FX # "_S_ALT" :
+ RISCVVPseudo<(outs f.fprclass:$rd), (ins VR:$rs2, sew:$sew)>,
+ Sched<[WriteVMovFS, ReadVMovFS]>;
+ let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, isReMaterializable = 1,
+ Constraints = "$rd = $passthru" in
+ def "PseudoVFMV_S_" # f.FX # "_ALT" :
+ RISCVVPseudo<(outs VR:$rd),
+ (ins VR:$passthru, f.fprclass:$rs1, AVL:$vl, sew:$sew)>,
+ Sched<[WriteVMovSF, ReadVMovSF_V, ReadVMovSF_F]>;
+}
+
+defm PseudoVFSLIDE1UP_ALT : VPseudoVSLD1_VF<"@earlyclobber $rd">;
+defm PseudoVFSLIDE1DOWN_ALT : VPseudoVSLD1_VF;
+} // Predicates = [HasStdExtZvfbfa], AltFmtType = IS_ALTFMT
+
//===----------------------------------------------------------------------===//
// Patterns
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZvfbfminOrZvfofp8min] in {
+multiclass VPatConversionWF_VF_BF<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
+ foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in
+ {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ defm : VPatConversion<intrinsic, instruction, "V",
+ fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
+ fvti.LMul, fwti.RegClass, fvti.RegClass, isSEWAware>;
+ }
+}
+
+multiclass VPatConversionVF_WF_BF_RM<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
+ foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ defm : VPatConversionRoundingMode<intrinsic, instruction, "W",
+ fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
+ fvti.LMul, fvti.RegClass, fwti.RegClass,
+ isSEWAware>;
+ }
+}
+
+let Predicates = [HasStdExtZvfbfmin] in {
defm : VPatConversionWF_VF_BF<"int_riscv_vfwcvtbf16_f_f_v",
"PseudoVFWCVTBF16_F_F", isSEWAware=1>;
defm : VPatConversionVF_WF_BF_RM<"int_riscv_vfncvtbf16_f_f_w",
@@ -56,7 +410,6 @@ let Predicates = [HasStdExtZvfbfminOrZvfofp8min] in {
foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
- let Predicates = [HasVInstructionsBF16Minimal] in
def : Pat<(fwti.Vector (any_riscv_fpextend_vl
(fvti.Vector fvti.RegClass:$rs1),
(fvti.Mask VMV0:$vm),
@@ -66,18 +419,16 @@ let Predicates = [HasStdExtZvfbfminOrZvfofp8min] in {
(fvti.Mask VMV0:$vm),
GPR:$vl, fvti.Log2SEW, TA_MA)>;
- let Predicates = [HasVInstructionsBF16Minimal] in
- def : Pat<(fvti.Vector (any_riscv_fpround_vl
- (fwti.Vector fwti.RegClass:$rs1),
- (fwti.Mask VMV0:$vm), VLOpFrag)),
- (!cast<Instruction>("PseudoVFNCVTBF16_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
- (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
- (fwti.Mask VMV0:$vm),
- // Value to indicate no rounding mode change in
- // RISCVInsertReadWriteCSR
- FRM_DYN,
- GPR:$vl, fvti.Log2SEW, TA_MA)>;
- let Predicates = [HasVInstructionsBF16Minimal] in
+ def : Pat<(fvti.Vector (any_riscv_fpround_vl
+ (fwti.Vector fwti.RegClass:$rs1),
+ (fwti.Mask VMV0:$vm), VLOpFrag)),
+ (!cast<Instruction>("PseudoVFNCVTBF16_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
+ (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
+ (fwti.Mask VMV0:$vm),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, fvti.Log2SEW, TA_MA)>;
def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))),
(!cast<Instruction>("PseudoVFNCVTBF16_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW)
(fvti.Vector (IMPLICIT_DEF)),
@@ -87,6 +438,130 @@ let Predicates = [HasStdExtZvfbfminOrZvfofp8min] in {
FRM_DYN,
fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
+
+ defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllBF16Vectors>;
+ defm : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER",
+ AllBF16Vectors, uimm5>;
+ defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16",
+ eew=16, vtilist=AllBF16Vectors>;
+ defm : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllBF16Vectors, uimm5>;
+ defm : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllBF16Vectors, uimm5>;
+
+ foreach fvti = AllBF16Vectors in {
+ defm : VPatBinaryCarryInTAIL<"int_riscv_vmerge", "PseudoVMERGE", "VVM",
+ fvti.Vector,
+ fvti.Vector, fvti.Vector, fvti.Mask,
+ fvti.Log2SEW, fvti.LMul, fvti.RegClass,
+ fvti.RegClass, fvti.RegClass>;
+ defm : VPatBinaryCarryInTAIL<"int_riscv_vfmerge", "PseudoVFMERGE",
+ "V"#fvti.ScalarSuffix#"M",
+ fvti.Vector,
+ fvti.Vector, fvti.Scalar, fvti.Mask,
+ fvti.Log2SEW, fvti.LMul, fvti.RegClass,
+ fvti.RegClass, fvti.ScalarRegClass>;
+ defvar instr = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX);
+ def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$passthru),
+ (fvti.Vector fvti.RegClass:$rs2),
+ (fvti.Scalar (fpimm0)),
+ (fvti.Mask VMV0:$vm), VLOpFrag)),
+ (instr fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0,
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>;
+
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), fvti.RegClass:$rs1,
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm),
+ fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ (SplatFPOp (fvti.Scalar fpimm0)),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2,
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
+ fvti.RegClass:$rs1,
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$passthru,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm),
+ GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
+ (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))),
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$passthru,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX)
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask VMV0:$vm),
+ GPR:$vl, fvti.Log2SEW)>;
+
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
+ (SplatFPOp (fvti.Scalar fpimm0)),
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$passthru,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm),
+ GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$passthru,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2,
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector
+ (riscv_vrgather_vv_vl fvti.RegClass:$rs2,
+ (ivti.Vector fvti.RegClass:$rs1),
+ fvti.RegClass:$passthru,
+ (fvti.Mask VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VV_"# fvti.LMul.MX#"_E"# fvti.SEW#"_MASK")
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, fvti.RegClass:$rs1,
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fvti.Vector (riscv_vrgather_vx_vl fvti.RegClass:$rs2, GPR:$rs1,
+ fvti.RegClass:$passthru,
+ (fvti.Mask VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VX_"# fvti.LMul.MX#"_MASK")
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, GPR:$rs1,
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fvti.Vector
+ (riscv_vrgather_vx_vl fvti.RegClass:$rs2,
+ uimm5:$imm,
+ fvti.RegClass:$passthru,
+ (fvti.Mask VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VI_"# fvti.LMul.MX#"_MASK")
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, uimm5:$imm,
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
}
let Predicates = [HasStdExtZvfbfwma] in {
@@ -97,3 +572,224 @@ let Predicates = [HasStdExtZvfbfwma] in {
defm : VPatWidenFPMulAccSDNode_VV_VF_RM<"PseudoVFWMACCBF16",
AllWidenableBF16ToFloatVectors>;
}
+
+multiclass VPatConversionVI_VF_BF16<string intrinsic, string instruction> {
+ foreach fvti = AllBF16Vectors in {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "V",
+ ivti.Vector, fvti.Vector, ivti.Mask, fvti.Log2SEW,
+ fvti.LMul, ivti.RegClass, fvti.RegClass>;
+ }
+}
+
+multiclass VPatConversionWF_VI_BF16<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
+ foreach vtiToWti = AllWidenableIntToBF16Vectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "V",
+ fwti.Vector, vti.Vector, fwti.Mask, vti.Log2SEW,
+ vti.LMul, fwti.RegClass, vti.RegClass, isSEWAware>;
+ }
+}
+
+multiclass VPatConversionWF_VF_BF16<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
+ foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypeMinimalPredicates<fvti>.Predicates,
+ GetVTypeMinimalPredicates<fwti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "V",
+ fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
+ fvti.LMul, fwti.RegClass, fvti.RegClass, isSEWAware>;
+ }
+}
+
+multiclass VPatConversionVI_WF_BF16<string intrinsic, string instruction> {
+ foreach vtiToWti = AllWidenableIntToBF16Vectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "W",
+ vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, fwti.RegClass>;
+ }
+}
+
+multiclass VPatConversionVI_WF_RM_BF16<string intrinsic, string instruction> {
+ foreach vtiToWti = AllWidenableIntToBF16Vectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversionRoundingMode<intrinsic, instruction, "W",
+ vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, fwti.RegClass>;
+ }
+}
+
+multiclass VPatConversionVF_WF_BF16<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
+ foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "W",
+ fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
+ fvti.LMul, fvti.RegClass, fwti.RegClass, isSEWAware>;
+ }
+}
+
+let Predicates = [HasStdExtZvfbfa] in {
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfadd", "PseudoVFADD_ALT",
+ AllBF16Vectors, isSEWAware = 1>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfsub", "PseudoVFSUB_ALT",
+ AllBF16Vectors, isSEWAware = 1>;
+defm : VPatBinaryV_VX_RM<"int_riscv_vfrsub", "PseudoVFRSUB_ALT",
+ AllBF16Vectors, isSEWAware = 1>;
+defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwadd", "PseudoVFWADD_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwsub", "PseudoVFWSUB_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwadd_w", "PseudoVFWADD_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwsub_w", "PseudoVFWSUB_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfmul", "PseudoVFMUL_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwmul", "PseudoVFWMUL_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmacc", "PseudoVFMACC_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmacc", "PseudoVFNMACC_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsac", "PseudoVFMSAC_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsac", "PseudoVFNMSAC_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmadd", "PseudoVFMADD_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmadd", "PseudoVFNMADD_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsub", "PseudoVFMSUB_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsub", "PseudoVFNMSUB_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmacc", "PseudoVFWMACC_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmacc", "PseudoVFWNMACC_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmsac", "PseudoVFWMSAC_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmsac", "PseudoVFWNMSAC_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatUnaryV_V<"int_riscv_vfrsqrt7", "PseudoVFRSQRT7_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatUnaryV_V_RM<"int_riscv_vfrec7", "PseudoVFREC7_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfmin", "PseudoVFMIN_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfmax", "PseudoVFMAX_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnj", "PseudoVFSGNJ_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjn", "PseudoVFSGNJN_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjx", "PseudoVFSGNJX_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryM_VV_VX<"int_riscv_vmfeq", "PseudoVMFEQ_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VV_VX<"int_riscv_vmfle", "PseudoVMFLE_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VV_VX<"int_riscv_vmflt", "PseudoVMFLT_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VV_VX<"int_riscv_vmfne", "PseudoVMFNE_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VX<"int_riscv_vmfgt", "PseudoVMFGT_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VX<"int_riscv_vmfge", "PseudoVMFGE_ALT", AllBF16Vectors>;
+defm : VPatBinarySwappedM_VV<"int_riscv_vmfgt", "PseudoVMFLT_ALT", AllBF16Vectors>;
+defm : VPatBinarySwappedM_VV<"int_riscv_vmfge", "PseudoVMFLE_ALT", AllBF16Vectors>;
+defm : VPatConversionVI_VF_BF16<"int_riscv_vfclass", "PseudoVFCLASS_ALT">;
+foreach vti = AllBF16Vectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatBinaryCarryInTAIL<"int_riscv_vfmerge", "PseudoVFMERGE_ALT",
+ "V"#vti.ScalarSuffix#"M",
+ vti.Vector,
+ vti.Vector, vti.Scalar, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, vti.ScalarRegClass>;
+}
+defm : VPatConversionWF_VI_BF16<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU_ALT",
+ isSEWAware=1>;
+defm : VPatConversionWF_VI_BF16<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X_ALT",
+ isSEWAware=1>;
+defm : VPatConversionWF_VF_BF16<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F_ALT",
+ isSEWAware=1>;
+defm : VPatConversionVI_WF_RM_BF16<"int_riscv_vfncvt_xu_f_w", "PseudoVFNCVT_XU_F_ALT">;
+defm : VPatConversionVI_WF_RM_BF16<"int_riscv_vfncvt_x_f_w", "PseudoVFNCVT_X_F_ALT">;
+defm : VPatConversionVI_WF_BF16<"int_riscv_vfncvt_rtz_xu_f_w", "PseudoVFNCVT_RTZ_XU_F_ALT">;
+defm : VPatConversionVI_WF_BF16<"int_riscv_vfncvt_rtz_x_f_w", "PseudoVFNCVT_RTZ_X_F_ALT">;
+defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatConversionVF_WF_BF16<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F_ALT",
+ isSEWAware=1>;
+defm : VPatBinaryV_VX<"int_riscv_vfslide1up", "PseudoVFSLIDE1UP_ALT", AllBF16Vectors>;
+defm : VPatBinaryV_VX<"int_riscv_vfslide1down", "PseudoVFSLIDE1DOWN_ALT", AllBF16Vectors>;
+
+foreach fvti = AllBF16Vectors in {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = GetVTypePredicates<ivti>.Predicates in {
+ // 13.16. Vector Floating-Point Move Instruction
+ // If we're splatting fpimm0, use vmv.v.x vd, x0.
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar (fpimm0)), VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
+ $passthru, 0, GPR:$vl, fvti.Log2SEW, TU_MU)>;
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))), VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_X_"#fvti.LMul.MX)
+ $passthru, GPR:$imm, GPR:$vl, fvti.Log2SEW, TU_MU)>;
+ }
+
+ let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMV_V_ALT_" # fvti.ScalarSuffix # "_" #
+ fvti.LMul.MX)
+ $passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2),
+ GPR:$vl, fvti.Log2SEW, TU_MU)>;
+ }
+}
+
+foreach vti = NoGroupBF16Vectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru),
+ (vti.Scalar (fpimm0)),
+ VLOpFrag)),
+ (PseudoVMV_S_X $passthru, (XLenVT X0), GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru),
+ (vti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))),
+ VLOpFrag)),
+ (PseudoVMV_S_X $passthru, GPR:$imm, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru),
+ vti.ScalarRegClass:$rs1,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMV_S_"#vti.ScalarSuffix#"_ALT")
+ vti.RegClass:$passthru,
+ (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>;
+ }
+
+ defvar vfmv_f_s_inst = !cast<Instruction>(!strconcat("PseudoVFMV_",
+ vti.ScalarSuffix,
+ "_S_ALT"));
+ // Only pattern-match extract-element operations where the index is 0. Any
+ // other index will have been custom-lowered to slide the vector correctly
+ // into place.
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ def : Pat<(vti.Scalar (extractelt (vti.Vector vti.RegClass:$rs2), 0)),
+ (vfmv_f_s_inst vti.RegClass:$rs2, vti.Log2SEW)>;
+}
+} // Predicates = [HasStdExtZvfbfa]
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index f863392a..637d61fe 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -270,7 +270,7 @@ class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
// and floating point computation.
// The V pipeline is modeled by the VCQ, VA, VL, and VS resources. There can
// be one or two VA (Vector Arithmetic).
-multiclass SiFive7ProcResources<bit extraVALU = false> {
+multiclass SiFive7ProcResources<bit dualVALU = false> {
let BufferSize = 0 in {
def PipeA : ProcResource<1>;
def PipeB : ProcResource<1>;
@@ -279,7 +279,7 @@ multiclass SiFive7ProcResources<bit extraVALU = false> {
def FDiv : ProcResource<1>; // FP Division/Sqrt
// Arithmetic sequencer(s)
- if extraVALU then {
+ if dualVALU then {
// VA1 can handle any vector airthmetic instruction.
def VA1 : ProcResource<1>;
// VA2 generally can only handle simple vector arithmetic.
@@ -305,7 +305,7 @@ multiclass SiFive7ProcResources<bit extraVALU = false> {
def PipeAB : ProcResGroup<[!cast<ProcResource>(NAME#"PipeA"),
!cast<ProcResource>(NAME#"PipeB")]>;
- if extraVALU then
+ if dualVALU then
def VA1OrVA2 : ProcResGroup<[!cast<ProcResource>(NAME#"VA1"),
!cast<ProcResource>(NAME#"VA2")]>;
}
@@ -1550,10 +1550,10 @@ multiclass SiFive7ReadAdvance {
/// This multiclass is a "bundle" of (1) processor resources (i.e. pipes) and
/// (2) WriteRes entries. It's parameterized by config values that will
/// eventually be supplied by different SchedMachineModels.
-multiclass SiFive7SchedResources<int vlen, bit extraVALU,
+multiclass SiFive7SchedResources<int vlen, bit dualVALU,
SiFive7FPLatencies fpLatencies,
bit hasFastGather> {
- defm SiFive7 : SiFive7ProcResources<extraVALU>;
+ defm SiFive7 : SiFive7ProcResources<dualVALU>;
// Pull out defs from SiFive7ProcResources so we can refer to them by name.
defvar SiFive7PipeA = !cast<ProcResource>(NAME # SiFive7PipeA);
@@ -1562,10 +1562,10 @@ multiclass SiFive7SchedResources<int vlen, bit extraVALU,
defvar SiFive7IDiv = !cast<ProcResource>(NAME # SiFive7IDiv);
defvar SiFive7FDiv = !cast<ProcResource>(NAME # SiFive7FDiv);
// Pass SiFive7VA for VA1 and VA1OrVA2 if there is only 1 VALU.
- defvar SiFive7VA1 = !if (extraVALU,
+ defvar SiFive7VA1 = !if (dualVALU,
!cast<ProcResource>(NAME # SiFive7VA1),
!cast<ProcResource>(NAME # SiFive7VA));
- defvar SiFive7VA1OrVA2 = !if (extraVALU,
+ defvar SiFive7VA1OrVA2 = !if (dualVALU,
!cast<ProcResGroup>(NAME # SiFive7VA1OrVA2),
!cast<ProcResource>(NAME # SiFive7VA));
defvar SiFive7VA = !cast<ProcResource>(NAME # SiFive7VA);
@@ -1608,7 +1608,7 @@ class SiFive7SchedMachineModel<int vlen> : SchedMachineModel {
HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
HasStdExtZkr];
int VLEN = vlen;
- bit HasExtraVALU = false;
+ bit HasDualVALU = false;
SiFive7FPLatencies FPLatencies;
bit HasFastGather = false;
@@ -1635,7 +1635,7 @@ def SiFive7VLEN512Model : SiFive7SchedMachineModel<512> {
}
def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> {
- let HasExtraVALU = true;
+ let HasDualVALU = true;
let FPLatencies = SiFive7LowFPLatencies;
let HasFastGather = true;
}
@@ -1643,7 +1643,7 @@ def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> {
/// Binding models to their scheduling resources.
foreach model = [SiFive7VLEN512Model, SiFive7VLEN1024X300Model] in {
let SchedModel = model in
- defm model.Name : SiFive7SchedResources<model.VLEN, model.HasExtraVALU,
+ defm model.Name : SiFive7SchedResources<model.VLEN, model.HasDualVALU,
model.FPLatencies,
model.HasFastGather>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 6acf799..334db4b 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -288,9 +288,12 @@ public:
bool hasVInstructionsI64() const { return HasStdExtZve64x; }
bool hasVInstructionsF16Minimal() const { return HasStdExtZvfhmin; }
bool hasVInstructionsF16() const { return HasStdExtZvfh; }
- bool hasVInstructionsBF16Minimal() const { return HasStdExtZvfbfmin; }
+ bool hasVInstructionsBF16Minimal() const {
+ return HasStdExtZvfbfmin || HasStdExtZvfbfa;
+ }
bool hasVInstructionsF32() const { return HasStdExtZve32f; }
bool hasVInstructionsF64() const { return HasStdExtZve64d; }
+ bool hasVInstructionsBF16() const { return HasStdExtZvfbfa; }
// F16 and F64 both require F32.
bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); }
bool hasVInstructionsFullMultiply() const { return HasStdExtV; }
diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
index 56a6168..640b014 100644
--- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
@@ -78,6 +78,8 @@ public:
void outputExecutionModeFromNumthreadsAttribute(
const MCRegister &Reg, const Attribute &Attr,
SPIRV::ExecutionMode::ExecutionMode EM);
+ void outputExecutionModeFromEnableMaximalReconvergenceAttr(
+ const MCRegister &Reg, const SPIRVSubtarget &ST);
void outputExecutionMode(const Module &M);
void outputAnnotations(const Module &M);
void outputModuleSections();
@@ -495,6 +497,20 @@ void SPIRVAsmPrinter::outputExecutionModeFromNumthreadsAttribute(
outputMCInst(Inst);
}
+void SPIRVAsmPrinter::outputExecutionModeFromEnableMaximalReconvergenceAttr(
+ const MCRegister &Reg, const SPIRVSubtarget &ST) {
+ assert(ST.canUseExtension(SPIRV::Extension::SPV_KHR_maximal_reconvergence) &&
+ "Function called when SPV_KHR_maximal_reconvergence is not enabled.");
+
+ MCInst Inst;
+ Inst.setOpcode(SPIRV::OpExecutionMode);
+ Inst.addOperand(MCOperand::createReg(Reg));
+ unsigned EM =
+ static_cast<unsigned>(SPIRV::ExecutionMode::MaximallyReconvergesKHR);
+ Inst.addOperand(MCOperand::createImm(EM));
+ outputMCInst(Inst);
+}
+
void SPIRVAsmPrinter::outputExecutionMode(const Module &M) {
NamedMDNode *Node = M.getNamedMetadata("spirv.ExecutionMode");
if (Node) {
@@ -551,6 +567,10 @@ void SPIRVAsmPrinter::outputExecutionMode(const Module &M) {
if (Attribute Attr = F.getFnAttribute("hlsl.numthreads"); Attr.isValid())
outputExecutionModeFromNumthreadsAttribute(
FReg, Attr, SPIRV::ExecutionMode::LocalSize);
+ if (Attribute Attr = F.getFnAttribute("enable-maximal-reconvergence");
+ Attr.getValueAsBool()) {
+ outputExecutionModeFromEnableMaximalReconvergenceAttr(FReg, *ST);
+ }
if (MDNode *Node = F.getMetadata("work_group_size_hint"))
outputExecutionModeFromMDNode(FReg, Node,
SPIRV::ExecutionMode::LocalSizeHint, 3, 1);
diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index 5f3ed86..96f5dee 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -153,7 +153,9 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>>
SPIRV::Extension::Extension::
SPV_EXT_relaxed_printf_string_address_space},
{"SPV_INTEL_predicated_io",
- SPIRV::Extension::Extension::SPV_INTEL_predicated_io}};
+ SPIRV::Extension::Extension::SPV_INTEL_predicated_io},
+ {"SPV_KHR_maximal_reconvergence",
+ SPIRV::Extension::Extension::SPV_KHR_maximal_reconvergence}};
bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName,
StringRef ArgValue,
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index c6c6182..a151fd2 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -1392,19 +1392,19 @@ void SPIRVEmitIntrinsics::preprocessCompositeConstants(IRBuilder<> &B) {
Constant *AggrConst = nullptr;
Type *ResTy = nullptr;
if (auto *COp = dyn_cast<ConstantVector>(Op)) {
- AggrConst = cast<Constant>(COp);
+ AggrConst = COp;
ResTy = COp->getType();
} else if (auto *COp = dyn_cast<ConstantArray>(Op)) {
- AggrConst = cast<Constant>(COp);
+ AggrConst = COp;
ResTy = B.getInt32Ty();
} else if (auto *COp = dyn_cast<ConstantStruct>(Op)) {
- AggrConst = cast<Constant>(COp);
+ AggrConst = COp;
ResTy = B.getInt32Ty();
} else if (auto *COp = dyn_cast<ConstantDataArray>(Op)) {
- AggrConst = cast<Constant>(COp);
+ AggrConst = COp;
ResTy = B.getInt32Ty();
} else if (auto *COp = dyn_cast<ConstantAggregateZero>(Op)) {
- AggrConst = cast<Constant>(COp);
+ AggrConst = COp;
ResTy = Op->getType()->isVectorTy() ? COp->getType() : B.getInt32Ty();
}
if (AggrConst) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index a466ab2..a0cff4d 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -3765,7 +3765,6 @@ void SPIRVInstructionSelector::decorateUsesAsNonUniform(
SPIRV::Decoration::NonUniformEXT, {});
}
}
- return;
}
bool SPIRVInstructionSelector::extractSubvector(
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index 5144fb1..61a0bbe 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1200,6 +1200,23 @@ void addOpAccessChainReqs(const MachineInstr &Instr,
return;
}
+ bool IsNonUniform =
+ hasNonUniformDecoration(Instr.getOperand(0).getReg(), MRI);
+
+ auto FirstIndexReg = Instr.getOperand(3).getReg();
+ bool FirstIndexIsConstant =
+ Subtarget.getInstrInfo()->isConstantInstr(*MRI.getVRegDef(FirstIndexReg));
+
+ if (StorageClass == SPIRV::StorageClass::StorageClass::StorageBuffer) {
+ if (IsNonUniform)
+ Handler.addRequirements(
+ SPIRV::Capability::StorageBufferArrayNonUniformIndexingEXT);
+ else if (!FirstIndexIsConstant)
+ Handler.addRequirements(
+ SPIRV::Capability::StorageBufferArrayDynamicIndexing);
+ return;
+ }
+
Register PointeeTypeReg = ResTypeInst->getOperand(2).getReg();
MachineInstr *PointeeType = MRI.getUniqueVRegDef(PointeeTypeReg);
if (PointeeType->getOpcode() != SPIRV::OpTypeImage &&
@@ -1208,27 +1225,25 @@ void addOpAccessChainReqs(const MachineInstr &Instr,
return;
}
- bool IsNonUniform =
- hasNonUniformDecoration(Instr.getOperand(0).getReg(), MRI);
if (isUniformTexelBuffer(PointeeType)) {
if (IsNonUniform)
Handler.addRequirements(
SPIRV::Capability::UniformTexelBufferArrayNonUniformIndexingEXT);
- else
+ else if (!FirstIndexIsConstant)
Handler.addRequirements(
SPIRV::Capability::UniformTexelBufferArrayDynamicIndexingEXT);
} else if (isInputAttachment(PointeeType)) {
if (IsNonUniform)
Handler.addRequirements(
SPIRV::Capability::InputAttachmentArrayNonUniformIndexingEXT);
- else
+ else if (!FirstIndexIsConstant)
Handler.addRequirements(
SPIRV::Capability::InputAttachmentArrayDynamicIndexingEXT);
} else if (isStorageTexelBuffer(PointeeType)) {
if (IsNonUniform)
Handler.addRequirements(
SPIRV::Capability::StorageTexelBufferArrayNonUniformIndexingEXT);
- else
+ else if (!FirstIndexIsConstant)
Handler.addRequirements(
SPIRV::Capability::StorageTexelBufferArrayDynamicIndexingEXT);
} else if (isSampledImage(PointeeType) ||
@@ -1237,14 +1252,14 @@ void addOpAccessChainReqs(const MachineInstr &Instr,
if (IsNonUniform)
Handler.addRequirements(
SPIRV::Capability::SampledImageArrayNonUniformIndexingEXT);
- else
+ else if (!FirstIndexIsConstant)
Handler.addRequirements(
SPIRV::Capability::SampledImageArrayDynamicIndexing);
} else if (isStorageImage(PointeeType)) {
if (IsNonUniform)
Handler.addRequirements(
SPIRV::Capability::StorageImageArrayNonUniformIndexingEXT);
- else
+ else if (!FirstIndexIsConstant)
Handler.addRequirements(
SPIRV::Capability::StorageImageArrayDynamicIndexing);
}
@@ -2155,6 +2170,9 @@ static void collectReqs(const Module &M, SPIRV::ModuleAnalysisInfo &MAI,
SPIRV::OperandCategory::ExecutionModeOperand,
SPIRV::ExecutionMode::LocalSize, ST);
}
+ if (F.getFnAttribute("enable-maximal-reconvergence").getValueAsBool()) {
+ MAI.Reqs.addExtension(SPIRV::Extension::SPV_KHR_maximal_reconvergence);
+ }
if (F.getMetadata("work_group_size_hint"))
MAI.Reqs.getAndAddRequirements(
SPIRV::OperandCategory::ExecutionModeOperand,
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 2625642..7d08b29 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -386,6 +386,7 @@ defm SPV_KHR_float_controls2 : ExtensionOperand<124, [EnvVulkan, EnvOpenCL]>;
defm SPV_INTEL_tensor_float32_conversion : ExtensionOperand<125, [EnvOpenCL]>;
defm SPV_KHR_bfloat16 : ExtensionOperand<126, [EnvVulkan, EnvOpenCL]>;
defm SPV_INTEL_predicated_io : ExtensionOperand<127, [EnvOpenCL]>;
+defm SPV_KHR_maximal_reconvergence : ExtensionOperand<128, [EnvVulkan]>;
//===----------------------------------------------------------------------===//
// Multiclass used to define Capabilities enum values and at the same time
@@ -698,7 +699,7 @@ defm IntersectionNV: ExecutionModelOperand<5314, [RayTracingNV]>;
defm AnyHitNV: ExecutionModelOperand<5315, [RayTracingNV]>;
defm ClosestHitNV: ExecutionModelOperand<5316, [RayTracingNV]>;
defm MissNV: ExecutionModelOperand<5317, [RayTracingNV]>;
-defm CallableNV: ExecutionModelOperand<5318, [RayTracingNV]>;
+defm CallableNV : ExecutionModelOperand<5318, [RayTracingNV]>;
//===----------------------------------------------------------------------===//
// Multiclass used to define MemoryModel enum values and at the same time
@@ -805,6 +806,7 @@ defm RoundingModeRTNINTEL : ExecutionModeOperand<5621, [RoundToInfinityINTEL]>;
defm FloatingPointModeALTINTEL : ExecutionModeOperand<5622, [FloatingPointModeINTEL]>;
defm FloatingPointModeIEEEINTEL : ExecutionModeOperand<5623, [FloatingPointModeINTEL]>;
defm FPFastMathDefault : ExecutionModeOperand<6028, [FloatControls2]>;
+defm MaximallyReconvergesKHR : ExecutionModeOperand<6023, [Shader]>;
//===----------------------------------------------------------------------===//
// Multiclass used to define StorageClass enum values and at the same time
diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index 2934c88..fa08d44 100644
--- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -246,8 +246,7 @@ SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
}
}
-static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI)
-{
+[[maybe_unused]] static bool verifyLeafProcRegUse(MachineRegisterInfo *MRI) {
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg)
if (MRI->isPhysRegUsed(reg))
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
index d9c8e22..6e99fc3 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
@@ -23,7 +23,7 @@ std::optional<wasm::ValType> WebAssembly::parseType(StringRef Type) {
.Case("i64", wasm::ValType::I64)
.Case("f32", wasm::ValType::F32)
.Case("f64", wasm::ValType::F64)
- .Cases("v128", "i8x16", "i16x8", "i32x4", "i64x2", "f32x4", "f64x2",
+ .Cases({"v128", "i8x16", "i16x8", "i32x4", "i64x2", "f32x4", "f64x2"},
wasm::ValType::V128)
.Case("funcref", wasm::ValType::FUNCREF)
.Case("externref", wasm::ValType::EXTERNREF)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 0f6e1ca..ed54404d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1763,6 +1763,26 @@ defm RELAXED_DOT :
"i16x8.relaxed_dot_i8x16_i7x16_s\t$dst, $lhs, $rhs",
"i16x8.relaxed_dot_i8x16_i7x16_s", 0x112>;
+def : Pat<
+ (v8i16 (add
+ (wasm_shuffle
+ (v8i16 (extmul_low_s v16i8:$lhs, v16i8:$rhs)),
+ (v8i16 (extmul_high_s v16i8:$lhs, v16i8:$rhs)),
+ (i32 0), (i32 1), (i32 4), (i32 5),
+ (i32 8), (i32 9), (i32 12), (i32 13),
+ (i32 16), (i32 17), (i32 20), (i32 21),
+ (i32 24), (i32 25), (i32 28), (i32 29)),
+ (wasm_shuffle
+ (v8i16 (extmul_low_s v16i8:$lhs, v16i8:$rhs)),
+ (v8i16 (extmul_high_s v16i8:$lhs, v16i8:$rhs)),
+ (i32 2), (i32 3), (i32 6), (i32 7),
+ (i32 10), (i32 11), (i32 14), (i32 15),
+ (i32 18), (i32 19), (i32 22), (i32 23),
+ (i32 26), (i32 27), (i32 30), (i32 31)))
+ ),
+ (v8i16 (RELAXED_DOT v16i8:$lhs, v16i8:$rhs))
+>;
+
defm RELAXED_DOT_ADD :
RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc),
(outs), (ins),
@@ -1771,6 +1791,18 @@ defm RELAXED_DOT_ADD :
"i32x4.relaxed_dot_i8x16_i7x16_add_s\t$dst, $lhs, $rhs, $acc",
"i32x4.relaxed_dot_i8x16_i7x16_add_s", 0x113>;
+def : Pat<
+ (v4i32 (add
+ (v4i32 (int_wasm_extadd_pairwise_signed
+ (v8i16 (int_wasm_relaxed_dot_i8x16_i7x16_signed v16i8:$lhs, v16i8:$rhs)))),
+ (v4i32 V128:$acc))),
+ (v4i32 (RELAXED_DOT_ADD v16i8:$lhs, v16i8:$rhs, (v4i32 V128:$acc)))
+ >;
+
+def : Pat<(v4i32 (partial_reduce_smla (v4i32 V128:$acc), (v16i8 V128:$lhs),
+ (v16i8 V128:$rhs))),
+ (RELAXED_DOT_ADD $lhs, $rhs, $acc)>, Requires<[HasRelaxedSIMD]>;
+
//===----------------------------------------------------------------------===//
// Relaxed BFloat16 dot product
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index a8908d4..ac251fd 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -3514,15 +3514,16 @@ bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
// xacquire <insn> ; xacquire must be accompanied by 'lock'
bool IsPrefix =
StringSwitch<bool>(Name)
- .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
- .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
- .Cases("xacquire", "xrelease", true)
- .Cases("acquire", "release", isParsingIntelSyntax())
+ .Cases({"cs", "ds", "es", "fs", "gs", "ss"}, true)
+ .Cases({"rex64", "data32", "data16", "addr32", "addr16"}, true)
+ .Cases({"xacquire", "xrelease"}, true)
+ .Cases({"acquire", "release"}, isParsingIntelSyntax())
.Default(false);
auto isLockRepeatNtPrefix = [](StringRef N) {
return StringSwitch<bool>(N)
- .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
+ .Cases({"lock", "rep", "repe", "repz", "repne", "repnz", "notrack"},
+ true)
.Default(false);
};
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index b81641f..28fa2cd 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -414,8 +414,6 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
getActionDefinitionsBuilder(G_SEXT_INREG).lower();
- getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
-
// fp constants
getActionDefinitionsBuilder(G_FCONSTANT)
.legalFor({s32, s64})
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 6db780f..8e08d16 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1338,6 +1338,10 @@ def ProcessorFeatures {
list<SubtargetFeature> PTLFeatures =
!listremove(ARLSFeatures, [FeatureWIDEKL]);
+ // Novalake
+ list<SubtargetFeature> NVLFeatures =
+ !listconcat(PTLFeatures, [FeaturePREFETCHI]);
+
// Clearwaterforest
list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI,
FeatureAVXVNNIINT16,
@@ -1883,6 +1887,9 @@ foreach P = ["pantherlake", "wildcatlake"] in {
def : ProcModel<P, AlderlakePModel,
ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
}
+def : ProcModel<"novalake", AlderlakePModel, ProcessorFeatures.NVLFeatures,
+ ProcessorFeatures.ADLTuning>;
+
def : ProcModel<"clearwaterforest", AlderlakePModel,
ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
def : ProcModel<"emeraldrapids", SapphireRapidsModel,
diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp
index e0991aa..9f88fda 100644
--- a/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -602,8 +602,7 @@ namespace {
friend bool operator<(const TableEntry &TE, unsigned V) {
return TE.from < V;
}
- friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
- const TableEntry &TE) {
+ [[maybe_unused]] friend bool operator<(unsigned V, const TableEntry &TE) {
return V < TE.from;
}
};
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c32b1a6..b05d7c7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29755,65 +29755,30 @@ static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl,
const X86Subtarget &Subtarget,
SelectionDAG &DAG,
SDValue *Low = nullptr) {
- unsigned NumElts = VT.getVectorNumElements();
-
// For vXi8 we will unpack the low and high half of each 128 bit lane to widen
// to a vXi16 type. Do the multiplies, shift the results and pack the half
// lane results back together.
// We'll take different approaches for signed and unsigned.
- // For unsigned we'll use punpcklbw/punpckhbw to put zero extend the bytes
- // and use pmullw to calculate the full 16-bit product.
+ // For unsigned we'll use punpcklbw/punpckhbw to zero extend the bytes to
+ // words and use pmullw to calculate the full 16-bit product.
// For signed we'll use punpcklbw/punpckbw to extend the bytes to words and
// shift them left into the upper byte of each word. This allows us to use
// pmulhw to calculate the full 16-bit product. This trick means we don't
// need to sign extend the bytes to use pmullw.
-
- MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
+ MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
SDValue Zero = DAG.getConstant(0, dl, VT);
- SDValue ALo, AHi;
+ SDValue ALo, AHi, BLo, BHi;
if (IsSigned) {
ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, A));
- AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, A));
- } else {
- ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Zero));
- AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Zero));
- }
-
- SDValue BLo, BHi;
- if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) {
- // If the RHS is a constant, manually unpackl/unpackh and extend.
- SmallVector<SDValue, 16> LoOps, HiOps;
- for (unsigned i = 0; i != NumElts; i += 16) {
- for (unsigned j = 0; j != 8; ++j) {
- SDValue LoOp = B.getOperand(i + j);
- SDValue HiOp = B.getOperand(i + j + 8);
-
- if (IsSigned) {
- LoOp = DAG.getAnyExtOrTrunc(LoOp, dl, MVT::i16);
- HiOp = DAG.getAnyExtOrTrunc(HiOp, dl, MVT::i16);
- LoOp = DAG.getNode(ISD::SHL, dl, MVT::i16, LoOp,
- DAG.getConstant(8, dl, MVT::i16));
- HiOp = DAG.getNode(ISD::SHL, dl, MVT::i16, HiOp,
- DAG.getConstant(8, dl, MVT::i16));
- } else {
- LoOp = DAG.getZExtOrTrunc(LoOp, dl, MVT::i16);
- HiOp = DAG.getZExtOrTrunc(HiOp, dl, MVT::i16);
- }
-
- LoOps.push_back(LoOp);
- HiOps.push_back(HiOp);
- }
- }
-
- BLo = DAG.getBuildVector(ExVT, dl, LoOps);
- BHi = DAG.getBuildVector(ExVT, dl, HiOps);
- } else if (IsSigned) {
BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, B));
+ AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, A));
BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, B));
} else {
+ ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Zero));
BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Zero));
+ AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Zero));
BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Zero));
}
@@ -29826,7 +29791,7 @@ static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl,
if (Low)
*Low = getPack(DAG, Subtarget, dl, VT, RLo, RHi);
- return getPack(DAG, Subtarget, dl, VT, RLo, RHi, /*PackHiHalf*/ true);
+ return getPack(DAG, Subtarget, dl, VT, RLo, RHi, /*PackHiHalf=*/true);
}
static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
@@ -44848,10 +44813,16 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
}
case X86ISD::PCMPGT:
// icmp sgt(0, R) == ashr(R, BitWidth-1).
- // iff we only need the sign bit then we can use R directly.
- if (OriginalDemandedBits.isSignMask() &&
- ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()))
- return TLO.CombineTo(Op, Op.getOperand(1));
+ if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode())) {
+ // iff we only need the signbit then we can use R directly.
+ if (OriginalDemandedBits.isSignMask())
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // otherwise we just need R's signbit for the comparison.
+ APInt SignMask = APInt::getSignMask(BitWidth);
+ if (SimplifyDemandedBits(Op.getOperand(1), SignMask, OriginalDemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+ }
break;
case X86ISD::MOVMSK: {
SDValue Src = Op.getOperand(0);
@@ -47761,6 +47732,15 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
DL, DAG, Subtarget))
return V;
+ // If the sign bit is known then BLENDV can be folded away.
+ if (N->getOpcode() == X86ISD::BLENDV) {
+ KnownBits KnownCond = DAG.computeKnownBits(Cond);
+ if (KnownCond.isNegative())
+ return LHS;
+ if (KnownCond.isNonNegative())
+ return RHS;
+ }
+
if (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::BLENDV) {
SmallVector<int, 64> CondMask;
if (createShuffleMaskFromVSELECT(CondMask, Cond,
@@ -58342,11 +58322,12 @@ static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) {
} else if (Op1.getOpcode() == ISD::AND && Sub.getValue(0).use_empty()) {
SDValue Src = Op1;
SDValue Op10 = Op1.getOperand(0);
- if (Op10.getOpcode() == ISD::XOR && isAllOnesConstant(Op10.getOperand(1))) {
- // res, flags2 = sub 0, (and (xor X, -1), Y)
+ if (Op10.getOpcode() == ISD::XOR && isAllOnesConstant(Op10.getOperand(1)) &&
+ llvm::isOneConstant(Op1.getOperand(1))) {
+ // res, flags2 = sub 0, (and (xor X, -1), 1)
// cload/cstore ..., cond_ne, flag2
// ->
- // res, flags2 = sub 0, (and X, Y)
+ // res, flags2 = sub 0, (and X, 1)
// cload/cstore ..., cond_e, flag2
Src = DAG.getNode(ISD::AND, DL, Op1.getValueType(), Op10.getOperand(0),
Op1.getOperand(1));
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index 6dd43b2..37d7772 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -606,16 +606,24 @@ Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
void X86TargetLowering::insertSSPDeclarations(Module &M) const {
// MSVC CRT provides functionalities for stack protection.
- if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
- Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
+ RTLIB::LibcallImpl SecurityCheckCookieLibcall =
+ getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
+
+ RTLIB::LibcallImpl SecurityCookieVar =
+ getLibcallImpl(RTLIB::STACK_CHECK_GUARD);
+ if (SecurityCheckCookieLibcall != RTLIB::Unsupported &&
+ SecurityCookieVar != RTLIB::Unsupported) {
+ // MSVC CRT provides functionalities for stack protection.
// MSVC CRT has a global variable holding security cookie.
- M.getOrInsertGlobal("__security_cookie",
+ M.getOrInsertGlobal(getLibcallImplName(SecurityCookieVar),
PointerType::getUnqual(M.getContext()));
// MSVC CRT has a function to validate security cookie.
- FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
- "__security_check_cookie", Type::getVoidTy(M.getContext()),
- PointerType::getUnqual(M.getContext()));
+ FunctionCallee SecurityCheckCookie =
+ M.getOrInsertFunction(getLibcallImplName(SecurityCheckCookieLibcall),
+ Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(M.getContext()));
+
if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
F->setCallingConv(CallingConv::X86_FastCall);
F->addParamAttr(0, Attribute::AttrKind::InReg);
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 0fd44b7..ec31675 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1256,8 +1256,17 @@ def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
(MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
(MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
- (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
+
+// If the globaladdr is an absolute_symbol, don't bother using the sign extending
+// instruction since there's no benefit to using it with absolute symbols.
+def globalAddrNoAbsSym : PatLeaf<(tglobaladdr:$dst), [{
+ auto *GA = cast<GlobalAddressSDNode>(N);
+ return !GA->getGlobal()->getAbsoluteSymbolRange();
+}]>;
+def : Pat<(i64 (X86Wrapper globalAddrNoAbsSym:$dst)),
+ (MOV64ri32 tglobaladdr:$dst)>,
+ Requires<[KernelCode]>;
+
def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
(MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper mcsym:$dst)),
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 1d2cd39..5c23f91 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10809,39 +10809,27 @@ void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
if (!ST.hasSSE1())
return;
- // PXOR is safe to use because it doesn't affect flags.
- BuildMI(MBB, Iter, DL, get(X86::PXORrr), Reg)
- .addReg(Reg, RegState::Undef)
- .addReg(Reg, RegState::Undef);
+ BuildMI(MBB, Iter, DL, get(X86::V_SET0), Reg);
} else if (X86::VR256RegClass.contains(Reg)) {
// YMM#
if (!ST.hasAVX())
return;
- // VPXOR is safe to use because it doesn't affect flags.
- BuildMI(MBB, Iter, DL, get(X86::VPXORrr), Reg)
- .addReg(Reg, RegState::Undef)
- .addReg(Reg, RegState::Undef);
+ BuildMI(MBB, Iter, DL, get(X86::AVX_SET0), Reg);
} else if (X86::VR512RegClass.contains(Reg)) {
// ZMM#
if (!ST.hasAVX512())
return;
- // VPXORY is safe to use because it doesn't affect flags.
- BuildMI(MBB, Iter, DL, get(X86::VPXORYrr), Reg)
- .addReg(Reg, RegState::Undef)
- .addReg(Reg, RegState::Undef);
+ BuildMI(MBB, Iter, DL, get(X86::AVX512_512_SET0), Reg);
} else if (X86::VK1RegClass.contains(Reg) || X86::VK2RegClass.contains(Reg) ||
X86::VK4RegClass.contains(Reg) || X86::VK8RegClass.contains(Reg) ||
X86::VK16RegClass.contains(Reg)) {
if (!ST.hasVLX())
return;
- // KXOR is safe to use because it doesn't affect flags.
- unsigned Op = ST.hasBWI() ? X86::KXORQkk : X86::KXORWkk;
- BuildMI(MBB, Iter, DL, get(Op), Reg)
- .addReg(Reg, RegState::Undef)
- .addReg(Reg, RegState::Undef);
+ unsigned Op = ST.hasBWI() ? X86::KSET0Q : X86::KSET0W;
+ BuildMI(MBB, Iter, DL, get(Op), Reg);
}
}
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 481a9be..713d504 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1928,6 +1928,17 @@ static void addConstantComments(const MachineInstr *MI,
#define INSTR_CASE(Prefix, Instr, Suffix, Postfix) \
case X86::Prefix##Instr##Suffix##rm##Postfix:
+#define CASE_AVX512_ARITH_RM(Instr) \
+ INSTR_CASE(V, Instr, Z128, ) \
+ INSTR_CASE(V, Instr, Z128, k) \
+ INSTR_CASE(V, Instr, Z128, kz) \
+ INSTR_CASE(V, Instr, Z256, ) \
+ INSTR_CASE(V, Instr, Z256, k) \
+ INSTR_CASE(V, Instr, Z256, kz) \
+ INSTR_CASE(V, Instr, Z, ) \
+ INSTR_CASE(V, Instr, Z, k) \
+ INSTR_CASE(V, Instr, Z, kz)
+
#define CASE_ARITH_RM(Instr) \
INSTR_CASE(, Instr, , ) /* SSE */ \
INSTR_CASE(V, Instr, , ) /* AVX-128 */ \
@@ -1943,40 +1954,26 @@ static void addConstantComments(const MachineInstr *MI,
INSTR_CASE(V, Instr, Z, kz)
// TODO: Add additional instructions when useful.
- CASE_ARITH_RM(PMADDUBSW) {
- unsigned SrcIdx = getSrcIdx(MI, 1);
- if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
- if (C->getType()->getScalarSizeInBits() == 8) {
- std::string Comment;
- raw_string_ostream CS(Comment);
- unsigned VectorWidth =
- X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
- CS << "[";
- printConstant(C, VectorWidth, CS);
- CS << "]";
- OutStreamer.AddComment(CS.str());
- }
- }
- break;
- }
-
+ CASE_ARITH_RM(PMADDUBSW)
CASE_ARITH_RM(PMADDWD)
+ CASE_ARITH_RM(PMULDQ)
+ CASE_ARITH_RM(PMULUDQ)
+ CASE_ARITH_RM(PMULLD)
+ CASE_AVX512_ARITH_RM(PMULLQ)
CASE_ARITH_RM(PMULLW)
CASE_ARITH_RM(PMULHW)
CASE_ARITH_RM(PMULHUW)
CASE_ARITH_RM(PMULHRSW) {
unsigned SrcIdx = getSrcIdx(MI, 1);
if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
- if (C->getType()->getScalarSizeInBits() == 16) {
- std::string Comment;
- raw_string_ostream CS(Comment);
- unsigned VectorWidth =
- X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
- CS << "[";
- printConstant(C, VectorWidth, CS);
- CS << "]";
- OutStreamer.AddComment(CS.str());
- }
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+ unsigned VectorWidth =
+ X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
+ CS << "[";
+ printConstant(C, VectorWidth, CS);
+ CS << "]";
+ OutStreamer.AddComment(CS.str());
}
break;
}
diff --git a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
index 89d5e0d..f6cea85 100644
--- a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
+++ b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
@@ -22,13 +22,13 @@ StringRef ARM::getArchSynonym(StringRef Arch) {
.Case("v5e", "v5te")
.Case("v6j", "v6")
.Case("v6hl", "v6k")
- .Cases("v6m", "v6sm", "v6s-m", "v6-m")
- .Cases("v6z", "v6zk", "v6kz")
- .Cases("v7", "v7a", "v7hl", "v7l", "v7-a")
+ .Cases({"v6m", "v6sm", "v6s-m"}, "v6-m")
+ .Cases({"v6z", "v6zk"}, "v6kz")
+ .Cases({"v7", "v7a", "v7hl", "v7l"}, "v7-a")
.Case("v7r", "v7-r")
.Case("v7m", "v7-m")
.Case("v7em", "v7e-m")
- .Cases("v8", "v8a", "v8l", "aarch64", "arm64", "v8-a")
+ .Cases({"v8", "v8a", "v8l", "aarch64", "arm64"}, "v8-a")
.Case("v8.1a", "v8.1-a")
.Case("v8.2a", "v8.2-a")
.Case("v8.3a", "v8.3-a")
@@ -39,7 +39,7 @@ StringRef ARM::getArchSynonym(StringRef Arch) {
.Case("v8.8a", "v8.8-a")
.Case("v8.9a", "v8.9-a")
.Case("v8r", "v8-r")
- .Cases("v9", "v9a", "v9-a")
+ .Cases({"v9", "v9a"}, "v9-a")
.Case("v9.1a", "v9.1-a")
.Case("v9.2a", "v9.2-a")
.Case("v9.3a", "v9.3-a")
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 928e779..6065575 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -70,8 +70,8 @@
using namespace llvm;
-static std::unique_ptr<llvm::MemoryBuffer>
- LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
+[[maybe_unused]] static std::unique_ptr<llvm::MemoryBuffer>
+getProcCpuinfoContent() {
const char *CPUInfoFile = "/proc/cpuinfo";
if (const char *CpuinfoIntercept = std::getenv("LLVM_CPUINFO"))
CPUInfoFile = CpuinfoIntercept;
@@ -1152,6 +1152,20 @@ static StringRef getIntelProcessorTypeAndSubtype(unsigned Family,
break;
}
break;
+ case 0x12:
+ switch (Model) {
+ // Novalake:
+ case 0x1:
+ case 0x3:
+ CPU = "novalake";
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_NOVALAKE;
+ break;
+ default: // Unknown family 0x12 CPU.
+ break;
+ }
+ break;
+
default:
break; // Unknown.
}
diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp
index 9268df2..31126cc 100644
--- a/llvm/lib/TargetParser/RISCVISAInfo.cpp
+++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp
@@ -887,7 +887,7 @@ void RISCVISAInfo::updateImplication() {
}
static constexpr StringLiteral CombineIntoExts[] = {
- {"b"}, {"zk"}, {"zkn"}, {"zks"}, {"zvkn"},
+ {"a"}, {"b"}, {"zk"}, {"zkn"}, {"zks"}, {"zvkn"},
{"zvknc"}, {"zvkng"}, {"zvks"}, {"zvksc"}, {"zvksg"},
};
diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp b/llvm/lib/TargetParser/TargetDataLayout.cpp
index 950bb2b..d765d9c 100644
--- a/llvm/lib/TargetParser/TargetDataLayout.cpp
+++ b/llvm/lib/TargetParser/TargetDataLayout.cpp
@@ -548,8 +548,11 @@ std::string Triple::computeDataLayout(StringRef ABIName) const {
case Triple::csky:
return computeCSKYDataLayout(*this);
case Triple::dxil:
+ // TODO: We need to align vectors on the element size generally, but for now
+ // we hard code this for 3-element 32- and 64-bit vectors as a workaround.
+ // See https://github.com/llvm/llvm-project/issues/123968
return "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-"
- "f32:32-f64:64-n8:16:32:64";
+ "f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64";
case Triple::hexagon:
return "e-m:e-p:32:32:32-a:0-n16:32-"
"i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-"
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index f021094..1068ce4 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -579,87 +579,89 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {
}
static Triple::ArchType parseArch(StringRef ArchName) {
- auto AT = StringSwitch<Triple::ArchType>(ArchName)
- .Cases("i386", "i486", "i586", "i686", Triple::x86)
- // FIXME: Do we need to support these?
- .Cases("i786", "i886", "i986", Triple::x86)
- .Cases("amd64", "x86_64", "x86_64h", Triple::x86_64)
- .Cases("powerpc", "powerpcspe", "ppc", "ppc32", Triple::ppc)
- .Cases("powerpcle", "ppcle", "ppc32le", Triple::ppcle)
- .Cases("powerpc64", "ppu", "ppc64", Triple::ppc64)
- .Cases("powerpc64le", "ppc64le", Triple::ppc64le)
- .Case("xscale", Triple::arm)
- .Case("xscaleeb", Triple::armeb)
- .Case("aarch64", Triple::aarch64)
- .Case("aarch64_be", Triple::aarch64_be)
- .Case("aarch64_32", Triple::aarch64_32)
- .Case("arc", Triple::arc)
- .Case("arm64", Triple::aarch64)
- .Case("arm64_32", Triple::aarch64_32)
- .Case("arm64e", Triple::aarch64)
- .Case("arm64ec", Triple::aarch64)
- .Case("arm", Triple::arm)
- .Case("armeb", Triple::armeb)
- .Case("thumb", Triple::thumb)
- .Case("thumbeb", Triple::thumbeb)
- .Case("avr", Triple::avr)
- .Case("m68k", Triple::m68k)
- .Case("msp430", Triple::msp430)
- .Cases("mips", "mipseb", "mipsallegrex", "mipsisa32r6",
- "mipsr6", Triple::mips)
- .Cases("mipsel", "mipsallegrexel", "mipsisa32r6el", "mipsr6el",
- Triple::mipsel)
- .Cases("mips64", "mips64eb", "mipsn32", "mipsisa64r6",
- "mips64r6", "mipsn32r6", Triple::mips64)
- .Cases("mips64el", "mipsn32el", "mipsisa64r6el", "mips64r6el",
- "mipsn32r6el", Triple::mips64el)
- .Case("r600", Triple::r600)
- .Case("amdgcn", Triple::amdgcn)
- .Case("riscv32", Triple::riscv32)
- .Case("riscv64", Triple::riscv64)
- .Case("riscv32be", Triple::riscv32be)
- .Case("riscv64be", Triple::riscv64be)
- .Case("hexagon", Triple::hexagon)
- .Cases("s390x", "systemz", Triple::systemz)
- .Case("sparc", Triple::sparc)
- .Case("sparcel", Triple::sparcel)
- .Cases("sparcv9", "sparc64", Triple::sparcv9)
- .Case("tce", Triple::tce)
- .Case("tcele", Triple::tcele)
- .Case("xcore", Triple::xcore)
- .Case("nvptx", Triple::nvptx)
- .Case("nvptx64", Triple::nvptx64)
- .Case("amdil", Triple::amdil)
- .Case("amdil64", Triple::amdil64)
- .Case("hsail", Triple::hsail)
- .Case("hsail64", Triple::hsail64)
- .Case("spir", Triple::spir)
- .Case("spir64", Triple::spir64)
- .Cases("spirv", "spirv1.5", "spirv1.6", Triple::spirv)
- .Cases("spirv32", "spirv32v1.0", "spirv32v1.1", "spirv32v1.2",
- "spirv32v1.3", "spirv32v1.4", "spirv32v1.5",
- "spirv32v1.6", Triple::spirv32)
- .Cases("spirv64", "spirv64v1.0", "spirv64v1.1", "spirv64v1.2",
- "spirv64v1.3", "spirv64v1.4", "spirv64v1.5",
- "spirv64v1.6", Triple::spirv64)
- .StartsWith("kalimba", Triple::kalimba)
- .Case("lanai", Triple::lanai)
- .Case("renderscript32", Triple::renderscript32)
- .Case("renderscript64", Triple::renderscript64)
- .Case("shave", Triple::shave)
- .Case("ve", Triple::ve)
- .Case("wasm32", Triple::wasm32)
- .Case("wasm64", Triple::wasm64)
- .Case("csky", Triple::csky)
- .Case("loongarch32", Triple::loongarch32)
- .Case("loongarch64", Triple::loongarch64)
- .Cases("dxil", "dxilv1.0", "dxilv1.1", "dxilv1.2", "dxilv1.3",
- "dxilv1.4", "dxilv1.5", "dxilv1.6", "dxilv1.7",
- "dxilv1.8", Triple::dxil)
- // Note: Cases has max limit of 10.
- .Case("dxilv1.9", Triple::dxil)
- .Case("xtensa", Triple::xtensa)
- .Default(Triple::UnknownArch);
+ auto AT =
+ StringSwitch<Triple::ArchType>(ArchName)
+ .Cases({"i386", "i486", "i586", "i686"}, Triple::x86)
+ // FIXME: Do we need to support these?
+ .Cases({"i786", "i886", "i986"}, Triple::x86)
+ .Cases({"amd64", "x86_64", "x86_64h"}, Triple::x86_64)
+ .Cases({"powerpc", "powerpcspe", "ppc", "ppc32"}, Triple::ppc)
+ .Cases({"powerpcle", "ppcle", "ppc32le"}, Triple::ppcle)
+ .Cases({"powerpc64", "ppu", "ppc64"}, Triple::ppc64)
+ .Cases({"powerpc64le", "ppc64le"}, Triple::ppc64le)
+ .Case("xscale", Triple::arm)
+ .Case("xscaleeb", Triple::armeb)
+ .Case("aarch64", Triple::aarch64)
+ .Case("aarch64_be", Triple::aarch64_be)
+ .Case("aarch64_32", Triple::aarch64_32)
+ .Case("arc", Triple::arc)
+ .Case("arm64", Triple::aarch64)
+ .Case("arm64_32", Triple::aarch64_32)
+ .Case("arm64e", Triple::aarch64)
+ .Case("arm64ec", Triple::aarch64)
+ .Case("arm", Triple::arm)
+ .Case("armeb", Triple::armeb)
+ .Case("thumb", Triple::thumb)
+ .Case("thumbeb", Triple::thumbeb)
+ .Case("avr", Triple::avr)
+ .Case("m68k", Triple::m68k)
+ .Case("msp430", Triple::msp430)
+ .Cases({"mips", "mipseb", "mipsallegrex", "mipsisa32r6", "mipsr6"},
+ Triple::mips)
+ .Cases({"mipsel", "mipsallegrexel", "mipsisa32r6el", "mipsr6el"},
+ Triple::mipsel)
+ .Cases({"mips64", "mips64eb", "mipsn32", "mipsisa64r6", "mips64r6",
+ "mipsn32r6"},
+ Triple::mips64)
+ .Cases({"mips64el", "mipsn32el", "mipsisa64r6el", "mips64r6el",
+ "mipsn32r6el"},
+ Triple::mips64el)
+ .Case("r600", Triple::r600)
+ .Case("amdgcn", Triple::amdgcn)
+ .Case("riscv32", Triple::riscv32)
+ .Case("riscv64", Triple::riscv64)
+ .Case("riscv32be", Triple::riscv32be)
+ .Case("riscv64be", Triple::riscv64be)
+ .Case("hexagon", Triple::hexagon)
+ .Cases({"s390x", "systemz"}, Triple::systemz)
+ .Case("sparc", Triple::sparc)
+ .Case("sparcel", Triple::sparcel)
+ .Cases({"sparcv9", "sparc64"}, Triple::sparcv9)
+ .Case("tce", Triple::tce)
+ .Case("tcele", Triple::tcele)
+ .Case("xcore", Triple::xcore)
+ .Case("nvptx", Triple::nvptx)
+ .Case("nvptx64", Triple::nvptx64)
+ .Case("amdil", Triple::amdil)
+ .Case("amdil64", Triple::amdil64)
+ .Case("hsail", Triple::hsail)
+ .Case("hsail64", Triple::hsail64)
+ .Case("spir", Triple::spir)
+ .Case("spir64", Triple::spir64)
+ .Cases({"spirv", "spirv1.5", "spirv1.6"}, Triple::spirv)
+ .Cases({"spirv32", "spirv32v1.0", "spirv32v1.1", "spirv32v1.2",
+ "spirv32v1.3", "spirv32v1.4", "spirv32v1.5", "spirv32v1.6"},
+ Triple::spirv32)
+ .Cases({"spirv64", "spirv64v1.0", "spirv64v1.1", "spirv64v1.2",
+ "spirv64v1.3", "spirv64v1.4", "spirv64v1.5", "spirv64v1.6"},
+ Triple::spirv64)
+ .StartsWith("kalimba", Triple::kalimba)
+ .Case("lanai", Triple::lanai)
+ .Case("renderscript32", Triple::renderscript32)
+ .Case("renderscript64", Triple::renderscript64)
+ .Case("shave", Triple::shave)
+ .Case("ve", Triple::ve)
+ .Case("wasm32", Triple::wasm32)
+ .Case("wasm64", Triple::wasm64)
+ .Case("csky", Triple::csky)
+ .Case("loongarch32", Triple::loongarch32)
+ .Case("loongarch64", Triple::loongarch64)
+ .Cases({"dxil", "dxilv1.0", "dxilv1.1", "dxilv1.2", "dxilv1.3",
+ "dxilv1.4", "dxilv1.5", "dxilv1.6", "dxilv1.7", "dxilv1.8",
+ "dxilv1.9"},
+ Triple::dxil)
+ .Case("xtensa", Triple::xtensa)
+ .Default(Triple::UnknownArch);
// Some architectures require special parsing logic just to compute the
// ArchType result.
@@ -1071,7 +1073,7 @@ Triple::Triple(std::string &&Str) : Data(std::move(Str)) {
.StartsWith("mips64", Triple::GNUABI64)
.StartsWith("mipsisa64", Triple::GNUABI64)
.StartsWith("mipsisa32", Triple::GNU)
- .Cases("mips", "mipsel", "mipsr6", "mipsr6el", Triple::GNU)
+ .Cases({"mips", "mipsel", "mipsr6", "mipsr6el"}, Triple::GNU)
.Default(UnknownEnvironment);
}
}
diff --git a/llvm/lib/TargetParser/Unix/Host.inc b/llvm/lib/TargetParser/Unix/Host.inc
index aeb2f59..38b942d 100644
--- a/llvm/lib/TargetParser/Unix/Host.inc
+++ b/llvm/lib/TargetParser/Unix/Host.inc
@@ -59,10 +59,30 @@ static std::string updateTripleOSVersion(std::string TargetTripleString) {
if (TT.getOS() == Triple::AIX && !TT.getOSMajorVersion()) {
struct utsname name;
if (uname(&name) != -1) {
+ std::string release = name.release;
+
+ if (strcmp(name.sysname, "OS400") == 0) {
+ /*
+ PASE uses different versioning system than AIX.
+ The following table shows the currently supported PASE
+ releases and the corresponding AIX release:
+ --------------------------
+ PASE | AIX
+ --------------------------
+ V7R4 | 7.2 (TL2)
+ --------------------------
+ V7R5 | 7.2 (TL5)
+ --------------------------
+ V7R6 | 7.3 (TL1)
+ --------------------------
+ */
+ release = (release == "4" || release == "5") ? "2" : "3";
+ }
+
std::string NewOSName = std::string(Triple::getOSTypeName(Triple::AIX));
NewOSName += name.version;
NewOSName += '.';
- NewOSName += name.release;
+ NewOSName += release;
NewOSName += ".0.0";
TT.setOSName(NewOSName);
return TT.str();
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index e382cfe..dd13ce3 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -176,6 +176,8 @@ constexpr FeatureBitset FeaturesArrowlakeS =
FeatureSM4;
constexpr FeatureBitset FeaturesPantherlake =
(FeaturesArrowlakeS ^ FeatureWIDEKL);
+constexpr FeatureBitset FeaturesNovalake =
+ FeaturesPantherlake | FeaturePREFETCHI;
constexpr FeatureBitset FeaturesClearwaterforest =
(FeaturesSierraforest ^ FeatureWIDEKL) | FeatureAVXVNNIINT16 |
FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeaturePREFETCHI | FeatureUSERMSR;
@@ -379,6 +381,8 @@ constexpr ProcInfo Processors[] = {
// Pantherlake microarchitecture based processors.
{ {"pantherlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesPantherlake, 'p', false },
{ {"wildcatlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesPantherlake, 'p', false },
+ // Novalake microarchitecture based processors.
+ { {"novalake"}, CK_Novalake, FEATURE_AVX2, FeaturesNovalake, 'r', false },
// Sierraforest microarchitecture based processors.
{ {"sierraforest"}, CK_Sierraforest, FEATURE_AVX2, FeaturesSierraforest, 'p', false },
// Grandridge microarchitecture based processors.
diff --git a/llvm/lib/Transforms/CFGuard/CFGuard.cpp b/llvm/lib/Transforms/CFGuard/CFGuard.cpp
index b73a0ce..4645670 100644
--- a/llvm/lib/Transforms/CFGuard/CFGuard.cpp
+++ b/llvm/lib/Transforms/CFGuard/CFGuard.cpp
@@ -147,7 +147,7 @@ public:
private:
// Only add checks if the module has the cfguard=2 flag.
- int cfguard_module_flag = 0;
+ int CFGuardModuleFlag = 0;
StringRef GuardFnName;
Mechanism GuardMechanism = Mechanism::Check;
FunctionType *GuardFnType = nullptr;
@@ -162,9 +162,7 @@ public:
static char ID;
// Default constructor required for the INITIALIZE_PASS macro.
- CFGuard(CFGuardImpl::Mechanism M) : FunctionPass(ID), Impl(M) {
- initializeCFGuardPass(*PassRegistry::getPassRegistry());
- }
+ CFGuard(CFGuardImpl::Mechanism M) : FunctionPass(ID), Impl(M) {}
bool doInitialization(Module &M) override { return Impl.doInitialization(M); }
bool runOnFunction(Function &F) override { return Impl.runOnFunction(F); }
@@ -173,7 +171,6 @@ public:
} // end anonymous namespace
void CFGuardImpl::insertCFGuardCheck(CallBase *CB) {
-
assert(CB->getModule()->getTargetTriple().isOSWindows() &&
"Only applicable for Windows targets");
assert(CB->isIndirectCall() &&
@@ -202,7 +199,6 @@ void CFGuardImpl::insertCFGuardCheck(CallBase *CB) {
}
void CFGuardImpl::insertCFGuardDispatch(CallBase *CB) {
-
assert(CB->getModule()->getTargetTriple().isOSWindows() &&
"Only applicable for Windows targets");
assert(CB->isIndirectCall() &&
@@ -236,14 +232,13 @@ void CFGuardImpl::insertCFGuardDispatch(CallBase *CB) {
}
bool CFGuardImpl::doInitialization(Module &M) {
-
// Check if this module has the cfguard flag and read its value.
if (auto *MD =
mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("cfguard")))
- cfguard_module_flag = MD->getZExtValue();
+ CFGuardModuleFlag = MD->getZExtValue();
// Skip modules for which CFGuard checks have been disabled.
- if (cfguard_module_flag != 2)
+ if (CFGuardModuleFlag != 2)
return false;
// Set up prototypes for the guard check and dispatch functions.
@@ -264,9 +259,8 @@ bool CFGuardImpl::doInitialization(Module &M) {
}
bool CFGuardImpl::runOnFunction(Function &F) {
-
// Skip modules for which CFGuard checks have been disabled.
- if (cfguard_module_flag != 2)
+ if (CFGuardModuleFlag != 2)
return false;
SmallVector<CallBase *, 8> IndirectCalls;
@@ -286,19 +280,16 @@ bool CFGuardImpl::runOnFunction(Function &F) {
}
// If no checks are needed, return early.
- if (IndirectCalls.empty()) {
+ if (IndirectCalls.empty())
return false;
- }
// For each indirect call/invoke, add the appropriate dispatch or check.
if (GuardMechanism == Mechanism::Dispatch) {
- for (CallBase *CB : IndirectCalls) {
+ for (CallBase *CB : IndirectCalls)
insertCFGuardDispatch(CB);
- }
} else {
- for (CallBase *CB : IndirectCalls) {
+ for (CallBase *CB : IndirectCalls)
insertCFGuardCheck(CB);
- }
}
return true;
diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
index f166fef..cf7e450 100644
--- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
@@ -153,26 +153,23 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine();
bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe);
if (IsCallerPresplitCoroutine && HasAttr) {
- BranchProbability MinBranchProbability(
- static_cast<int>(CoroElideBranchRatio * MinBlockCounterExecution),
- MinBlockCounterExecution);
-
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
- auto Prob = BranchProbability::getBranchProbability(
- BFI.getBlockFreq(CB->getParent()).getFrequency(),
- BFI.getEntryFreq().getFrequency());
+ auto BlockFreq = BFI.getBlockFreq(CB->getParent()).getFrequency();
+ auto EntryFreq = BFI.getEntryFreq().getFrequency();
+ uint64_t MinFreq =
+ static_cast<uint64_t>(EntryFreq * CoroElideBranchRatio);
- if (Prob < MinBranchProbability) {
+ if (BlockFreq < MinFreq) {
ORE.emit([&]() {
return OptimizationRemarkMissed(
DEBUG_TYPE, "CoroAnnotationElideUnlikely", Caller)
<< "'" << ore::NV("callee", Callee->getName())
<< "' not elided in '"
<< ore::NV("caller", Caller->getName())
- << "' because of low probability: "
- << ore::NV("probability", Prob) << " (threshold: "
- << ore::NV("threshold", MinBranchProbability) << ")";
+ << "' because of low frequency: "
+ << ore::NV("block_freq", BlockFreq)
+ << " (threshold: " << ore::NV("min_freq", MinFreq) << ")";
});
continue;
}
@@ -188,7 +185,8 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
return OptimizationRemark(DEBUG_TYPE, "CoroAnnotationElide", Caller)
<< "'" << ore::NV("callee", Callee->getName())
<< "' elided in '" << ore::NV("caller", Caller->getName())
- << "' (probability: " << ore::NV("probability", Prob) << ")";
+ << "' (block_freq: " << ore::NV("block_freq", BlockFreq)
+ << ")";
});
FAM.invalidate(*Caller, PreservedAnalyses::none());
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 5066a99..894d83f 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -6150,3 +6150,42 @@ void MemProfContextDisambiguation::run(
IndexCallsiteContextGraph CCG(Index, isPrevailing);
CCG.process();
}
+
+// Strips MemProf attributes and metadata. Can be invoked by the pass pipeline
+// when we don't have an index that has recorded that we are linking with
+// allocation libraries containing the necessary APIs for downstream
+// transformations.
+PreservedAnalyses MemProfRemoveInfo::run(Module &M, ModuleAnalysisManager &AM) {
+ // The profile matcher applies hotness attributes directly for allocations,
+ // and those will cause us to generate calls to the hot/cold interfaces
+ // unconditionally. If supports-hot-cold-new was not enabled in the LTO
+ // link then assume we don't want these calls (e.g. not linking with
+ // the appropriate library, or otherwise trying to disable this behavior).
+ bool Changed = false;
+ for (auto &F : M) {
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ auto *CI = dyn_cast<CallBase>(&I);
+ if (!CI)
+ continue;
+ if (CI->hasFnAttr("memprof")) {
+ CI->removeFnAttr("memprof");
+ Changed = true;
+ }
+ if (!CI->hasMetadata(LLVMContext::MD_callsite)) {
+ assert(!CI->hasMetadata(LLVMContext::MD_memprof));
+ continue;
+ }
+ // Strip off all memprof metadata as it is no longer needed.
+ // Importantly, this avoids the addition of new memprof attributes
+ // after inlining propagation.
+ CI->setMetadata(LLVMContext::MD_memprof, nullptr);
+ CI->setMetadata(LLVMContext::MD_callsite, nullptr);
+ Changed = true;
+ }
+ }
+ }
+ if (!Changed)
+ return PreservedAnalyses::all();
+ return PreservedAnalyses::none();
+}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 4c9b10a..cdc559b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -156,9 +156,9 @@ Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
Value *Src = CI.getOperand(0);
Type *Ty = CI.getType();
- if (auto *SrcC = dyn_cast<Constant>(Src))
- if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL))
- return replaceInstUsesWith(CI, Res);
+ if (Value *Res =
+ simplifyCastInst(CI.getOpcode(), Src, Ty, SQ.getWithInstruction(&CI)))
+ return replaceInstUsesWith(CI, Res);
// Try to eliminate a cast of a cast.
if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 07ad65c..fba1ccf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1481,13 +1481,13 @@ Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,
return new ICmpInst(Pred, Y, ConstantInt::get(SrcTy, C.logBase2()));
}
- if (Cmp.isEquality() && Trunc->hasOneUse()) {
+ if (Cmp.isEquality() && (Trunc->hasOneUse() || Trunc->hasNoUnsignedWrap())) {
// Canonicalize to a mask and wider compare if the wide type is suitable:
// (trunc X to i8) == C --> (X & 0xff) == (zext C)
if (!SrcTy->isVectorTy() && shouldChangeType(DstBits, SrcBits)) {
Constant *Mask =
ConstantInt::get(SrcTy, APInt::getLowBitsSet(SrcBits, DstBits));
- Value *And = Builder.CreateAnd(X, Mask);
+ Value *And = Trunc->hasNoUnsignedWrap() ? X : Builder.CreateAnd(X, Mask);
Constant *WideC = ConstantInt::get(SrcTy, C.zext(SrcBits));
return new ICmpInst(Pred, And, WideC);
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 09cb225..a8eb9b9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3757,6 +3757,10 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder,
// (x < y) ? -1 : zext(x > y)
// (x > y) ? 1 : sext(x != y)
// (x > y) ? 1 : sext(x < y)
+// (x == y) ? 0 : (x > y ? 1 : -1)
+// (x == y) ? 0 : (x < y ? -1 : 1)
+// Special case: x == C ? 0 : (x > C - 1 ? 1 : -1)
+// Special case: x == C ? 0 : (x < C + 1 ? -1 : 1)
// Into ucmp/scmp(x, y), where signedness is determined by the signedness
// of the comparison in the original sequence.
Instruction *InstCombinerImpl::foldSelectToCmp(SelectInst &SI) {
@@ -3849,6 +3853,44 @@ Instruction *InstCombinerImpl::foldSelectToCmp(SelectInst &SI) {
}
}
+ // Special cases with constants: x == C ? 0 : (x > C-1 ? 1 : -1)
+ if (Pred == ICmpInst::ICMP_EQ && match(TV, m_Zero())) {
+ const APInt *C;
+ if (match(RHS, m_APInt(C))) {
+ CmpPredicate InnerPred;
+ Value *InnerRHS;
+ const APInt *InnerTV, *InnerFV;
+ if (match(FV,
+ m_Select(m_ICmp(InnerPred, m_Specific(LHS), m_Value(InnerRHS)),
+ m_APInt(InnerTV), m_APInt(InnerFV)))) {
+
+ // x == C ? 0 : (x > C-1 ? 1 : -1)
+ if (ICmpInst::isGT(InnerPred) && InnerTV->isOne() &&
+ InnerFV->isAllOnes()) {
+ IsSigned = ICmpInst::isSigned(InnerPred);
+ bool CanSubOne = IsSigned ? !C->isMinSignedValue() : !C->isMinValue();
+ if (CanSubOne) {
+ APInt Cminus1 = *C - 1;
+ if (match(InnerRHS, m_SpecificInt(Cminus1)))
+ Replace = true;
+ }
+ }
+
+ // x == C ? 0 : (x < C+1 ? -1 : 1)
+ if (ICmpInst::isLT(InnerPred) && InnerTV->isAllOnes() &&
+ InnerFV->isOne()) {
+ IsSigned = ICmpInst::isSigned(InnerPred);
+ bool CanAddOne = IsSigned ? !C->isMaxSignedValue() : !C->isMaxValue();
+ if (CanAddOne) {
+ APInt Cplus1 = *C + 1;
+ if (match(InnerRHS, m_SpecificInt(Cplus1)))
+ Replace = true;
+ }
+ }
+ }
+ }
+ }
+
Intrinsic::ID IID = IsSigned ? Intrinsic::scmp : Intrinsic::ucmp;
if (Replace)
return replaceInstUsesWith(
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 511bca4..2646334 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -605,17 +605,16 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
return Mapping;
}
-namespace llvm {
-void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize,
- bool IsKasan, uint64_t *ShadowBase,
- int *MappingScale, bool *OrShadowOffset) {
+void llvm::getAddressSanitizerParams(const Triple &TargetTriple, int LongSize,
+ bool IsKasan, uint64_t *ShadowBase,
+ int *MappingScale, bool *OrShadowOffset) {
auto Mapping = getShadowMapping(TargetTriple, LongSize, IsKasan);
*ShadowBase = Mapping.Offset;
*MappingScale = Mapping.Scale;
*OrShadowOffset = Mapping.OrShadowOffset;
}
-void removeASanIncompatibleFnAttributes(Function &F, bool ReadsArgMem) {
+void llvm::removeASanIncompatibleFnAttributes(Function &F, bool ReadsArgMem) {
// Sanitizer checks read from shadow, which invalidates memory(argmem: *).
//
// This is not only true for sanitized functions, because AttrInfer can
@@ -668,8 +667,6 @@ ASanAccessInfo::ASanAccessInfo(bool IsWrite, bool CompileKernel,
AccessSizeIndex(AccessSizeIndex), IsWrite(IsWrite),
CompileKernel(CompileKernel) {}
-} // namespace llvm
-
static uint64_t getRedzoneSizeForScale(int MappingScale) {
// Redzone used for stack and globals is at least 32 bytes.
// For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
@@ -677,11 +674,10 @@ static uint64_t getRedzoneSizeForScale(int MappingScale) {
}
static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) {
- if (TargetTriple.isOSEmscripten()) {
+ if (TargetTriple.isOSEmscripten())
return kAsanEmscriptenCtorAndDtorPriority;
- } else {
+ else
return kAsanCtorAndDtorPriority;
- }
}
static Twine genName(StringRef suffix) {
@@ -848,6 +844,7 @@ struct AddressSanitizer {
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
bool maybeInsertDynamicShadowAtFunctionEntry(Function &F);
void markEscapedLocalAllocas(Function &F);
+ void markCatchParametersAsUninteresting(Function &F);
private:
friend struct FunctionStackPoisoner;
@@ -3001,6 +2998,22 @@ void AddressSanitizer::markEscapedLocalAllocas(Function &F) {
}
}
}
+// Mitigation for https://github.com/google/sanitizers/issues/749
+// We don't instrument Windows catch-block parameters to avoid
+// interfering with exception handling assumptions.
+void AddressSanitizer::markCatchParametersAsUninteresting(Function &F) {
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (auto *CatchPad = dyn_cast<CatchPadInst>(&I)) {
+ // Mark the parameters to a catch-block as uninteresting to avoid
+ // instrumenting them.
+ for (Value *Operand : CatchPad->arg_operands())
+ if (auto *AI = dyn_cast<AllocaInst>(Operand))
+ ProcessedAllocas[AI] = false;
+ }
+ }
+ }
+}
bool AddressSanitizer::suppressInstrumentationSiteForDebug(int &Instrumented) {
bool ShouldInstrument =
@@ -3045,6 +3058,9 @@ bool AddressSanitizer::instrumentFunction(Function &F,
// can be passed to that intrinsic.
markEscapedLocalAllocas(F);
+ if (TargetTriple.isOSWindows())
+ markCatchParametersAsUninteresting(F);
+
// We want to instrument every address only once per basic block (unless there
// are calls between uses).
SmallPtrSet<Value *, 16> TempsToInstrument;
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 7c78eb3..72e8e50 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -396,9 +396,8 @@ class CHR {
} // end anonymous namespace
-static inline
-raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
- const CHRStats &Stats) {
+[[maybe_unused]] static inline raw_ostream &operator<<(raw_ostream &OS,
+ const CHRStats &Stats) {
Stats.print(OS);
return OS;
}
@@ -425,8 +424,8 @@ static bool shouldApply(Function &F, ProfileSummaryInfo &PSI) {
return PSI.isFunctionEntryHot(&F);
}
-static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
- CHRStats *Stats) {
+[[maybe_unused]] static void dumpIR(Function &F, const char *Label,
+ CHRStats *Stats) {
StringRef FuncName = F.getName();
StringRef ModuleName = F.getParent()->getName();
(void)(FuncName); // Unused in release build.
@@ -1622,7 +1621,7 @@ static void insertTrivialPHIs(CHRScope *Scope,
}
// Assert that all the CHR regions of the scope have a biased branch or select.
-static void LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]] static void
assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
#ifndef NDEBUG
auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
@@ -1644,8 +1643,9 @@ assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
// Assert that all the condition values of the biased branches and selects have
// been hoisted to the pre-entry block or outside of the scope.
-static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
- CHRScope *Scope, BasicBlock *PreEntryBlock) {
+[[maybe_unused]] static void
+assertBranchOrSelectConditionHoisted(CHRScope *Scope,
+ BasicBlock *PreEntryBlock) {
CHR_DEBUG(dbgs() << "Biased regions condition values \n");
for (RegInfo &RI : Scope->CHRRegions) {
Region *R = RI.R;
@@ -2007,8 +2007,8 @@ void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
}
}
-static void LLVM_ATTRIBUTE_UNUSED
-dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
+[[maybe_unused]] static void dumpScopes(SmallVectorImpl<CHRScope *> &Scopes,
+ const char *Label) {
dbgs() << Label << " " << Scopes.size() << "\n";
for (CHRScope *Scope : Scopes) {
dbgs() << *Scope << "\n";
@@ -2092,8 +2092,6 @@ bool CHR::run() {
return Changed;
}
-namespace llvm {
-
ControlHeightReductionPass::ControlHeightReductionPass() {
parseCHRFilterFiles();
}
@@ -2116,5 +2114,3 @@ PreservedAnalyses ControlHeightReductionPass::run(
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
-
-} // namespace llvm
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 09db464..386e48f 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -326,8 +326,7 @@ const unsigned BBState::OverflowOccurredValue = 0xffffffff;
namespace llvm {
-raw_ostream &operator<<(raw_ostream &OS,
- BBState &BBState) LLVM_ATTRIBUTE_UNUSED;
+[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, BBState &BBState);
} // end namespace llvm
diff --git a/llvm/lib/Transforms/ObjCARC/PtrState.h b/llvm/lib/Transforms/ObjCARC/PtrState.h
index 232db2b..5cc4212 100644
--- a/llvm/lib/Transforms/ObjCARC/PtrState.h
+++ b/llvm/lib/Transforms/ObjCARC/PtrState.h
@@ -47,8 +47,7 @@ enum Sequence {
S_MovableRelease ///< objc_release(x), !clang.imprecise_release.
};
-raw_ostream &operator<<(raw_ostream &OS,
- const Sequence S) LLVM_ATTRIBUTE_UNUSED;
+[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const Sequence S);
/// Unidirectional information about either a
/// retain-decrement-use-release sequence or release-use-decrement-retain
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 7ad710d..6141b6d 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -77,6 +77,7 @@
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -805,9 +806,8 @@ tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
return nullptr;
}
-namespace {
// Returns true if \p I is an intrinsic that does not read or write memory.
-bool isNoopIntrinsic(Instruction *I) {
+static bool isNoopIntrinsic(Instruction *I) {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_start:
@@ -828,7 +828,7 @@ bool isNoopIntrinsic(Instruction *I) {
}
// Check if we can ignore \p D for DSE.
-bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
+static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
Instruction *DI = D->getMemoryInst();
// Calls that only access inaccessible memory cannot read or write any memory
// locations we consider for elimination.
@@ -856,6 +856,8 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
return false;
}
+namespace {
+
// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
// defined by `MemDef`.
struct MemoryLocationWrapper {
@@ -889,23 +891,25 @@ struct MemoryDefWrapper {
SmallVector<MemoryLocationWrapper, 1> DefinedLocations;
};
-bool hasInitializesAttr(Instruction *I) {
- CallBase *CB = dyn_cast<CallBase>(I);
- return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
-}
-
struct ArgumentInitInfo {
unsigned Idx;
bool IsDeadOrInvisibleOnUnwind;
ConstantRangeList Inits;
};
+} // namespace
+
+static bool hasInitializesAttr(Instruction *I) {
+ CallBase *CB = dyn_cast<CallBase>(I);
+ return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
+}
// Return the intersected range list of the initializes attributes of "Args".
// "Args" are call arguments that alias to each other.
// If any argument in "Args" doesn't have dead_on_unwind attr and
// "CallHasNoUnwindAttr" is false, return empty.
-ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
- bool CallHasNoUnwindAttr) {
+static ConstantRangeList
+getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
+ bool CallHasNoUnwindAttr) {
if (Args.empty())
return {};
@@ -925,6 +929,8 @@ ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
return IntersectedIntervals;
}
+namespace {
+
struct DSEState {
Function &F;
AliasAnalysis &AA;
@@ -2328,10 +2334,11 @@ struct DSEState {
// change state: whether make any change.
bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
};
+} // namespace
// Return true if "Arg" is function local and isn't captured before "CB".
-bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
- EarliestEscapeAnalysis &EA) {
+static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
+ EarliestEscapeAnalysis &EA) {
const Value *UnderlyingObj = getUnderlyingObject(Arg);
return isIdentifiedFunctionLocal(UnderlyingObj) &&
capturesNothing(
@@ -2627,7 +2634,6 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
return MadeChange;
}
-} // end anonymous namespace
//===----------------------------------------------------------------------===//
// DSE Pass
@@ -2728,8 +2734,6 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
false)
-namespace llvm {
-LLVM_ABI FunctionPass *createDeadStoreEliminationPass() {
+LLVM_ABI FunctionPass *llvm::createDeadStoreEliminationPass() {
return new DSELegacyPass();
}
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
index 1c88532..b9534def 100644
--- a/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -73,24 +73,17 @@
#include <utility>
using namespace llvm;
+using namespace llvm::GVNExpression;
#define DEBUG_TYPE "gvn-sink"
STATISTIC(NumRemoved, "Number of instructions removed");
-namespace llvm {
-namespace GVNExpression {
-
LLVM_DUMP_METHOD void Expression::dump() const {
print(dbgs());
dbgs() << "\n";
}
-} // end namespace GVNExpression
-} // end namespace llvm
-
-namespace {
-
static bool isMemoryInst(const Instruction *I) {
return isa<LoadInst>(I) || isa<StoreInst>(I) ||
(isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
@@ -99,6 +92,8 @@ static bool isMemoryInst(const Instruction *I) {
//===----------------------------------------------------------------------===//
+namespace {
+
/// Candidate solution for sinking. There may be different ways to
/// sink instructions, differing in the number of instructions sunk,
/// the number of predecessors sunk from and the number of PHIs
@@ -125,14 +120,6 @@ struct SinkingInstructionCandidate {
}
};
-#ifndef NDEBUG
-raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
- OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
- << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
- return OS;
-}
-#endif
-
//===----------------------------------------------------------------------===//
/// Describes a PHI node that may or may not exist. These track the PHIs
@@ -256,8 +243,18 @@ public:
return Values == Other.Values && Blocks == Other.Blocks;
}
};
+} // namespace
-template <typename ModelledPHI> struct DenseMapInfo {
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS,
+ const SinkingInstructionCandidate &C) {
+ OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
+ << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
+ return OS;
+}
+#endif
+
+template <> struct llvm::DenseMapInfo<ModelledPHI> {
static inline ModelledPHI &getEmptyKey() {
static ModelledPHI Dummy = ModelledPHI::createDummy(0);
return Dummy;
@@ -275,7 +272,9 @@ template <typename ModelledPHI> struct DenseMapInfo {
}
};
-using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
+using ModelledPHISet = DenseSet<ModelledPHI>;
+
+namespace {
//===----------------------------------------------------------------------===//
// ValueTable
@@ -290,7 +289,7 @@ using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
///
/// This class also contains fields for discriminators used when determining
/// equivalence of instructions with sideeffects.
-class InstructionUseExpr : public GVNExpression::BasicExpression {
+class InstructionUseExpr : public BasicExpression {
unsigned MemoryUseOrder = -1;
bool Volatile = false;
ArrayRef<int> ShuffleMask;
@@ -298,7 +297,7 @@ class InstructionUseExpr : public GVNExpression::BasicExpression {
public:
InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
BumpPtrAllocator &A)
- : GVNExpression::BasicExpression(I->getNumUses()) {
+ : BasicExpression(I->getNumUses()) {
allocateOperands(R, A);
setOpcode(I->getOpcode());
setType(I->getType());
@@ -315,8 +314,8 @@ public:
void setVolatile(bool V) { Volatile = V; }
hash_code getHashValue() const override {
- return hash_combine(GVNExpression::BasicExpression::getHashValue(),
- MemoryUseOrder, Volatile, ShuffleMask);
+ return hash_combine(BasicExpression::getHashValue(), MemoryUseOrder,
+ Volatile, ShuffleMask);
}
template <typename Function> hash_code getHashValue(Function MapFn) {
@@ -332,7 +331,7 @@ using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
class ValueTable {
DenseMap<Value *, uint32_t> ValueNumbering;
- DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
+ DenseMap<Expression *, uint32_t> ExpressionNumbering;
DenseMap<size_t, uint32_t> HashNumbering;
BumpPtrAllocator Allocator;
ArrayRecycler<Value *> Recycler;
@@ -594,6 +593,7 @@ private:
}
}
};
+} // namespace
std::optional<SinkingInstructionCandidate>
GVNSink::analyzeInstructionForSinking(LockstepReverseIterator<false> &LRI,
@@ -851,8 +851,6 @@ void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
NumRemoved += Insts.size() - 1;
}
-} // end anonymous namespace
-
PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
GVNSink G;
if (!G.run(F))
diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index d99f1eb..ddb99a5 100644
--- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -75,8 +75,6 @@ static cl::opt<bool>
"expressed as branches by widenable conditions"),
cl::init(true));
-namespace {
-
// Get the condition of \p I. It can either be a guard or a conditional branch.
static Value *getCondition(Instruction *I) {
if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
@@ -130,6 +128,8 @@ findInsertionPointForWideCondition(Instruction *WCOrGuard) {
return std::nullopt;
}
+namespace {
+
class GuardWideningImpl {
DominatorTree &DT;
PostDominatorTree *PDT;
@@ -328,7 +328,7 @@ public:
/// The entry point for this pass.
bool run();
};
-}
+} // namespace
static bool isSupportedGuardInstruction(const Instruction *Insn) {
if (isGuard(Insn))
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index c327311..7ebcc21 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -53,6 +53,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
@@ -117,6 +118,10 @@ static cl::opt<bool>
LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
cl::desc("Predicate conditions in read only loops"));
+static cl::opt<bool> LoopPredicationTraps(
+ "indvars-predicate-loop-traps", cl::Hidden, cl::init(true),
+ cl::desc("Predicate conditions that trap in loops with only local writes"));
+
static cl::opt<bool>
AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
cl::desc("Allow widening of indvars to eliminate s/zext"));
@@ -1704,6 +1709,24 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
return Changed;
}
+static bool crashingBBWithoutEffect(const BasicBlock &BB) {
+ return llvm::all_of(BB, [](const Instruction &I) {
+ // TODO: for now this is overly restrictive, to make sure nothing in this
+ // BB can depend on the loop body.
+ // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a
+ // load does not have a side effect, but we could have
+ // %a = load ptr, ptr %ptr
+ // %b = load i32, ptr %a
+ // Now if the loop stored a non-nullptr to %a, we could cause a nullptr
+ // dereference by skipping over loop iterations.
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->onlyAccessesInaccessibleMemory())
+ return true;
+ }
+ return isa<UnreachableInst>(I);
+ });
+}
+
bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -1816,11 +1839,25 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// suggestions on how to improve this? I can obviously bail out for outer
// loops, but that seems less than ideal. MemorySSA can find memory writes,
// is that enough for *all* side effects?
+ bool HasThreadLocalSideEffects = false;
for (BasicBlock *BB : L->blocks())
for (auto &I : *BB)
// TODO:isGuaranteedToTransfer
- if (I.mayHaveSideEffects())
- return false;
+ if (I.mayHaveSideEffects()) {
+ if (!LoopPredicationTraps)
+ return false;
+ HasThreadLocalSideEffects = true;
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ // Simple stores cannot be observed by other threads.
+ // If HasThreadLocalSideEffects is set, we check
+ // crashingBBWithoutEffect to make sure that the crashing BB cannot
+ // observe them either.
+ if (!SI->isSimple())
+ return false;
+ } else {
+ return false;
+ }
+ }
bool Changed = false;
// Finally, do the actual predication for all predicatable blocks. A couple
@@ -1840,6 +1877,19 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ if (HasThreadLocalSideEffects) {
+ const BasicBlock *Unreachable = nullptr;
+ for (const BasicBlock *Succ : BI->successors()) {
+ if (isa<UnreachableInst>(Succ->getTerminator()))
+ Unreachable = Succ;
+ }
+ // Exit BB which have one branch back into the loop and another one to
+ // a trap can still be optimized, because local side effects cannot
+ // be observed in the exit case (the trap). We could be smarter about
+ // this, but for now lets pattern match common cases that directly trap.
+ if (Unreachable == nullptr || !crashingBBWithoutEffect(*Unreachable))
+ return Changed;
+ }
Value *NewCond;
if (ExitCount == ExactBTC) {
NewCond = L->contains(BI->getSuccessor(0)) ?
diff --git a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
index 3c14036e..6fb8197 100644
--- a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
@@ -26,8 +26,6 @@
using namespace llvm;
-namespace llvm {
-
static cl::opt<unsigned>
JumpTableSizeThreshold("jump-table-to-switch-size-threshold", cl::Hidden,
cl::desc("Only split jump tables with size less or "
@@ -43,8 +41,8 @@ static cl::opt<unsigned> FunctionSizeThreshold(
"or equal than this threshold."),
cl::init(50));
+namespace llvm {
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
-
} // end namespace llvm
#define DEBUG_TYPE "jump-table-to-switch"
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 9655173..b2c526b 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -116,8 +116,6 @@ STATISTIC(NumIntAssociationsHoisted,
STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions "
"reassociated and hoisted out of the loop");
-namespace llvm {
-
/// Memory promotion is enabled by default.
static cl::opt<bool>
DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
@@ -156,7 +154,7 @@ static cl::opt<unsigned> IntAssociationUpperLimit(
// which may not be precise, since optimizeUses is capped. The result is
// correct, but we may not get as "far up" as possible to get which access is
// clobbering the one queried.
-cl::opt<unsigned> SetLicmMssaOptCap(
+cl::opt<unsigned> llvm::SetLicmMssaOptCap(
"licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
"for faster compile. Caps the MemorySSA clobbering calls."));
@@ -164,15 +162,15 @@ cl::opt<unsigned> SetLicmMssaOptCap(
// Experimentally, memory promotion carries less importance than sinking and
// hoisting. Limit when we do promotion when using MemorySSA, in order to save
// compile time.
-cl::opt<unsigned> SetLicmMssaNoAccForPromotionCap(
+cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
"licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
"effect. When MSSA in LICM is enabled, then this is the maximum "
"number of accesses allowed to be present in a loop in order to "
"enable memory promotion."));
+namespace llvm {
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
-
} // end namespace llvm
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
@@ -1120,11 +1118,10 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
return false;
}
-namespace {
/// Return true if-and-only-if we know how to (mechanically) both hoist and
/// sink a given instruction out of a loop. Does not address legality
/// concerns such as aliasing or speculation safety.
-bool isHoistableAndSinkableInst(Instruction &I) {
+static bool isHoistableAndSinkableInst(Instruction &I) {
// Only these instructions are hoistable/sinkable.
return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
@@ -1136,8 +1133,8 @@ bool isHoistableAndSinkableInst(Instruction &I) {
}
/// Return true if I is the only Instruction with a MemoryAccess in L.
-bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
- const MemorySSAUpdater &MSSAU) {
+static bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
+ const MemorySSAUpdater &MSSAU) {
for (auto *BB : L->getBlocks())
if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
int NotAPhi = 0;
@@ -1151,7 +1148,6 @@ bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
}
return true;
}
-}
static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA,
BatchAAResults &BAA,
diff --git a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
index 73f1942..7706de8 100644
--- a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
@@ -21,8 +21,7 @@
#define DEBUG_TYPE "loop-bound-split"
-namespace llvm {
-
+using namespace llvm;
using namespace PatternMatch;
namespace {
@@ -358,8 +357,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
IRBuilder<> Builder(&PostLoopPreHeader->front());
// Update phi nodes in header of post-loop.
- bool isExitingLatch =
- (L.getExitingBlock() == L.getLoopLatch()) ? true : false;
+ bool isExitingLatch = L.getExitingBlock() == L.getLoopLatch();
Value *ExitingCondLCSSAPhi = nullptr;
for (PHINode &PN : L.getHeader()->phis()) {
// Create LCSSA phi node in preheader of post-loop.
@@ -472,8 +470,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
PreservedAnalyses LoopBoundSplitPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &U) {
- Function &F = *L.getHeader()->getParent();
- (void)F;
+ [[maybe_unused]] Function &F = *L.getHeader()->getParent();
LLVM_DEBUG(dbgs() << "Spliting bound of loop in " << F.getName() << ": " << L
<< "\n");
@@ -486,5 +483,3 @@ PreservedAnalyses LoopBoundSplitPass::run(Loop &L, LoopAnalysisManager &AM,
return getLoopPassPreservedAnalyses();
}
-
-} // end namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 20733032..19eccb9 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -368,7 +368,7 @@ private:
Valid = false;
}
- bool reportInvalidCandidate(llvm::Statistic &Stat) const {
+ bool reportInvalidCandidate(Statistic &Stat) const {
using namespace ore;
assert(L && Preheader && "Fusion candidate not initialized properly!");
#if LLVM_ENABLE_STATS
@@ -445,6 +445,7 @@ struct FusionCandidateCompare {
"No dominance relationship between these fusion candidates!");
}
};
+} // namespace
using LoopVector = SmallVector<Loop *, 4>;
@@ -461,9 +462,15 @@ using LoopVector = SmallVector<Loop *, 4>;
using FusionCandidateSet = std::set<FusionCandidate, FusionCandidateCompare>;
using FusionCandidateCollection = SmallVector<FusionCandidateSet, 4>;
-#if !defined(NDEBUG)
-static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
- const FusionCandidate &FC) {
+#ifndef NDEBUG
+static void printLoopVector(const LoopVector &LV) {
+ dbgs() << "****************************\n";
+ for (const Loop *L : LV)
+ printLoop(*L, dbgs());
+ dbgs() << "****************************\n";
+}
+
+static raw_ostream &operator<<(raw_ostream &OS, const FusionCandidate &FC) {
if (FC.isValid())
OS << FC.Preheader->getName();
else
@@ -472,8 +479,8 @@ static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
return OS;
}
-static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
- const FusionCandidateSet &CandSet) {
+static raw_ostream &operator<<(raw_ostream &OS,
+ const FusionCandidateSet &CandSet) {
for (const FusionCandidate &FC : CandSet)
OS << FC << '\n';
@@ -489,7 +496,9 @@ printFusionCandidates(const FusionCandidateCollection &FusionCandidates) {
dbgs() << "****************************\n";
}
}
-#endif
+#endif // NDEBUG
+
+namespace {
/// Collect all loops in function at the same nest level, starting at the
/// outermost level.
@@ -550,15 +559,6 @@ private:
LoopsOnLevelTy LoopsOnLevel;
};
-#ifndef NDEBUG
-static void printLoopVector(const LoopVector &LV) {
- dbgs() << "****************************\n";
- for (auto *L : LV)
- printLoop(*L, dbgs());
- dbgs() << "****************************\n";
-}
-#endif
-
struct LoopFuser {
private:
// Sets of control flow equivalent fusion candidates for a given nest level.
@@ -1850,7 +1850,7 @@ private:
/// <Cand1 Preheader> and <Cand2 Preheader>: <Stat Description>
template <typename RemarkKind>
void reportLoopFusion(const FusionCandidate &FC0, const FusionCandidate &FC1,
- llvm::Statistic &Stat) {
+ Statistic &Stat) {
assert(FC0.Preheader && FC1.Preheader &&
"Expecting valid fusion candidates");
using namespace ore;
diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index 32078b1..d827e64 100644
--- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -8,7 +8,6 @@
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -16,8 +15,6 @@
using namespace llvm;
-namespace llvm {
-
/// Explicitly specialize the pass manager's run method to handle loop nest
/// structure updates.
PreservedAnalyses
@@ -185,7 +182,6 @@ LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
}
return PA;
}
-} // namespace llvm
void FunctionToLoopPassAdaptor::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
@@ -193,6 +189,7 @@ void FunctionToLoopPassAdaptor::printPipeline(
Pass->printPipeline(OS, MapClassName2PassName);
OS << ')';
}
+
PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
FunctionAnalysisManager &AM) {
// Before we even compute any loop analyses, first run a miniature function
@@ -219,9 +216,6 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
// Get the analysis results needed by loop passes.
MemorySSA *MSSA =
UseMemorySSA ? (&AM.getResult<MemorySSAAnalysis>(F).getMSSA()) : nullptr;
- BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData()
- ? (&AM.getResult<BlockFrequencyAnalysis>(F))
- : nullptr;
LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F),
AM.getResult<AssumptionAnalysis>(F),
AM.getResult<DominatorTreeAnalysis>(F),
@@ -229,7 +223,6 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
AM.getResult<ScalarEvolutionAnalysis>(F),
AM.getResult<TargetLibraryAnalysis>(F),
AM.getResult<TargetIRAnalysis>(F),
- BFI,
MSSA};
// Setup the loop analysis manager from its proxy. It is important that
diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 448dc2b..f3e6cbf 100644
--- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -540,8 +540,6 @@ bool LoopVersioningLICM::run(DominatorTree *DT) {
return Changed;
}
-namespace llvm {
-
PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &LAR,
LPMUpdater &U) {
@@ -556,4 +554,3 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
}
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 7cae94eb..3487e81 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -97,6 +97,12 @@ static cl::opt<MatrixLayoutTy> MatrixLayout(
static cl::opt<bool> PrintAfterTransposeOpt("matrix-print-after-transpose-opt",
cl::init(false));
+static cl::opt<unsigned> SplitMatmulRemainderOverThreshold(
+ "matrix-split-matmul-remainder-over-threshold", cl::Hidden,
+ cl::desc("Illegal remainder vectors over this size in bits should be split "
+ "in the inner loop of matmul"),
+ cl::init(0));
+
/// Helper function to either return Scope, if it is a subprogram or the
/// attached subprogram for a local scope.
static DISubprogram *getSubprogram(DIScope *Scope) {
@@ -115,18 +121,16 @@ static bool isSplat(Value *V) {
/// Match any mul operation (fp or integer).
template <typename LTy, typename RTy>
-auto m_AnyMul(const LTy &L, const RTy &R) {
+static auto m_AnyMul(const LTy &L, const RTy &R) {
return m_CombineOr(m_Mul(L, R), m_FMul(L, R));
}
/// Match any add operation (fp or integer).
template <typename LTy, typename RTy>
-auto m_AnyAdd(const LTy &L, const RTy &R) {
+static auto m_AnyAdd(const LTy &L, const RTy &R) {
return m_CombineOr(m_Add(L, R), m_FAdd(L, R));
}
-namespace {
-
// Given an element pointer \p BasePtr to the start of a (sub) matrix, compute
// the start address of vector \p VecIdx with type (\p EltType x \p NumElements)
// assuming \p Stride elements between start two consecutive vectors.
@@ -167,9 +171,9 @@ namespace {
// v_2_0 |v_2_1 |v_2_2 |v_2_3
// v_3_0 {v_3_1 {v_3_2 v_3_3
//
-Value *computeVectorAddr(Value *BasePtr, Value *VecIdx, Value *Stride,
- unsigned NumElements, Type *EltType,
- IRBuilder<> &Builder) {
+static Value *computeVectorAddr(Value *BasePtr, Value *VecIdx, Value *Stride,
+ unsigned NumElements, Type *EltType,
+ IRBuilder<> &Builder) {
assert((!isa<ConstantInt>(Stride) ||
cast<ConstantInt>(Stride)->getZExtValue() >= NumElements) &&
@@ -338,6 +342,8 @@ computeShapeInfoForInst(Instruction *I,
return std::nullopt;
}
+namespace {
+
/// LowerMatrixIntrinsics contains the methods used to lower matrix intrinsics.
///
/// Currently, the lowering for each matrix intrinsic is done as follows:
@@ -371,7 +377,8 @@ class LowerMatrixIntrinsics {
LoopInfo *LI = nullptr;
OptimizationRemarkEmitter *ORE = nullptr;
- /// Contains estimates of the number of operations (loads, stores, compute) required to lower a matrix operation.
+ /// Contains estimates of the number of operations (loads, stores, compute)
+ /// required to lower a matrix operation.
struct OpInfoTy {
/// Number of stores emitted to generate this matrix.
unsigned NumStores = 0;
@@ -1719,6 +1726,31 @@ public:
ToRemove.push_back(MatMul);
}
+ /// Given \p Remainder iterations of the the matmul inner loop,
+ /// potentially lower \p Blocksize that is used for the underlying
+ /// vector.
+ unsigned capBlockSize(unsigned BlockSize, unsigned Remainder, Type *EltType) {
+ if (BlockSize <= Remainder)
+ return BlockSize;
+
+ // If the remainder is also a legal type just use it.
+ auto *VecTy = FixedVectorType::get(EltType, Remainder);
+ if (TTI.isTypeLegal(VecTy))
+ return Remainder;
+
+ // Similarly, if the vector is small enough that we don't want
+ // to split further.
+ if (VecTy->getPrimitiveSizeInBits() <= SplitMatmulRemainderOverThreshold)
+ return Remainder;
+
+ // Gradually lower the vectorization factor to cover the
+ // remainder.
+ do {
+ BlockSize /= 2;
+ } while (BlockSize > Remainder);
+ return BlockSize;
+ }
+
/// Compute \p Result += \p A * \p B for input matrices with left-associating
/// addition.
///
@@ -1756,10 +1788,8 @@ public:
bool isSumZero = isa<ConstantAggregateZero>(Result.getColumn(J));
for (unsigned I = 0; I < R; I += BlockSize) {
- // Gradually lower the vectorization factor to cover the remainder.
- while (I + BlockSize > R)
- BlockSize /= 2;
-
+ // Lower block size to make sure we stay within bounds.
+ BlockSize = capBlockSize(BlockSize, R - I, Result.getElementType());
Value *Sum = IsTiled ? Result.extractVector(I, J, BlockSize, Builder)
: nullptr;
for (unsigned K = 0; K < M; ++K) {
@@ -1784,9 +1814,8 @@ public:
unsigned BlockSize = VF;
bool isSumZero = isa<ConstantAggregateZero>(Result.getRow(I));
for (unsigned J = 0; J < C; J += BlockSize) {
- // Gradually lower the vectorization factor to cover the remainder.
- while (J + BlockSize > C)
- BlockSize /= 2;
+ // Lower the vectorization factor to cover the remainder.
+ BlockSize = capBlockSize(BlockSize, C - J, Result.getElementType());
Value *Sum = nullptr;
for (unsigned K = 0; K < M; ++K) {
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 80aa98d..5a8f18a 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -160,9 +160,6 @@ static cl::opt<bool> EnablePhiOfOps("enable-phi-of-ops", cl::init(true),
//===----------------------------------------------------------------------===//
// Anchor methods.
-namespace llvm {
-namespace GVNExpression {
-
Expression::~Expression() = default;
BasicExpression::~BasicExpression() = default;
CallExpression::~CallExpression() = default;
@@ -171,9 +168,6 @@ StoreExpression::~StoreExpression() = default;
AggregateValueExpression::~AggregateValueExpression() = default;
PHIExpression::~PHIExpression() = default;
-} // end namespace GVNExpression
-} // end namespace llvm
-
namespace {
// Tarjan's SCC finding algorithm with Nuutila's improvements
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index ba58b8e..6d7ce36 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -2623,32 +2623,32 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
namespace {
- class ReassociateLegacyPass : public FunctionPass {
- ReassociatePass Impl;
+class ReassociateLegacyPass : public FunctionPass {
+ ReassociatePass Impl;
- public:
- static char ID; // Pass identification, replacement for typeid
+public:
+ static char ID; // Pass identification, replacement for typeid
- ReassociateLegacyPass() : FunctionPass(ID) {
- initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
- }
+ ReassociateLegacyPass() : FunctionPass(ID) {
+ initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
- FunctionAnalysisManager DummyFAM;
- auto PA = Impl.run(F, DummyFAM);
- return !PA.areAllPreserved();
- }
+ FunctionAnalysisManager DummyFAM;
+ auto PA = Impl.run(F, DummyFAM);
+ return !PA.areAllPreserved();
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addPreserved<BasicAAWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
- };
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
} // end anonymous namespace
diff --git a/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index 30b27cb..7646624 100644
--- a/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -107,9 +107,7 @@ PreservedAnalyses RegToMemPass::run(Function &F, FunctionAnalysisManager &AM) {
return PA;
}
-namespace llvm {
-
-void initializeRegToMemWrapperPassPass(PassRegistry &);
+namespace {
class RegToMemWrapperPass : public FunctionPass {
public:
@@ -136,7 +134,7 @@ public:
return N != 0 || Changed;
}
};
-} // namespace llvm
+} // namespace
INITIALIZE_PASS_BEGIN(RegToMemWrapperPass, "reg2mem", "", true, true)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 578fec7..5c60fad 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -344,6 +344,12 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
uint64_t SliceSizeInBits, Instruction *OldInst,
Instruction *Inst, Value *Dest, Value *Value,
const DataLayout &DL) {
+ // If we want allocas to be migrated using this helper then we need to ensure
+ // that the BaseFragments map code still works. A simple solution would be
+ // to choose to always clone alloca dbg_assigns (rather than sometimes
+ // "stealing" them).
+ assert(!isa<AllocaInst>(Inst) && "Unexpected alloca");
+
auto DVRAssignMarkerRange = at::getDVRAssignmentMarkers(OldInst);
// Nothing to do if OldInst has no linked dbg.assign intrinsics.
if (DVRAssignMarkerRange.empty())
@@ -429,11 +435,22 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID);
}
- ::Value *NewValue = Value ? Value : DbgAssign->getValue();
- DbgVariableRecord *NewAssign = cast<DbgVariableRecord>(cast<DbgRecord *>(
- DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,
- Dest, DIExpression::get(Expr->getContext(), {}),
- DbgAssign->getDebugLoc())));
+ DbgVariableRecord *NewAssign;
+ if (IsSplit) {
+ ::Value *NewValue = Value ? Value : DbgAssign->getValue();
+ NewAssign = cast<DbgVariableRecord>(cast<DbgRecord *>(
+ DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,
+ Dest, DIExpression::get(Expr->getContext(), {}),
+ DbgAssign->getDebugLoc())));
+ } else {
+ // The store is not split, simply steal the existing dbg_assign.
+ NewAssign = DbgAssign;
+ NewAssign->setAssignId(NewID); // FIXME: Can we avoid generating new IDs?
+ NewAssign->setAddress(Dest);
+ if (Value)
+ NewAssign->replaceVariableLocationOp(0u, Value);
+ assert(Expr == NewAssign->getExpression());
+ }
// If we've updated the value but the original dbg.assign has an arglist
// then kill it now - we can't use the requested new value.
@@ -464,9 +481,10 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
// noted as slightly offset (in code) from the store. In practice this
// should have little effect on the debugging experience due to the fact
// that all the split stores should get the same line number.
- NewAssign->moveBefore(DbgAssign->getIterator());
-
- NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
+ if (NewAssign != DbgAssign) {
+ NewAssign->moveBefore(DbgAssign->getIterator());
+ NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
+ }
LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n");
};
@@ -551,12 +569,10 @@ public:
}
/// Support comparison with a single offset to allow binary searches.
- friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS,
- uint64_t RHSOffset) {
+ [[maybe_unused]] friend bool operator<(const Slice &LHS, uint64_t RHSOffset) {
return LHS.beginOffset() < RHSOffset;
}
- friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
- const Slice &RHS) {
+ [[maybe_unused]] friend bool operator<(uint64_t LHSOffset, const Slice &RHS) {
return LHSOffset < RHS.beginOffset();
}
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index aae5d60..25a531c 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -50,9 +50,7 @@ using namespace llvm;
#define DEBUG_TYPE "scalarizer"
-namespace {
-
-BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
+static BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
BasicBlock *BB = Itr->getParent();
if (isa<PHINode>(Itr))
Itr = BB->getFirstInsertionPt();
@@ -76,6 +74,8 @@ using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
// along with a pointer to their scattered forms.
using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>;
+namespace {
+
struct VectorSplit {
// The type of the vector.
FixedVectorType *VecTy = nullptr;
@@ -196,6 +196,7 @@ struct VectorLayout {
// The size of each (non-remainder) fragment in bytes.
uint64_t SplitSize = 0;
};
+} // namespace
static bool isStructOfMatchingFixedVectors(Type *Ty) {
if (!isa<StructType>(Ty))
@@ -268,6 +269,7 @@ static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
return Res;
}
+namespace {
class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
public:
ScalarizerVisitor(DominatorTree *DT, const TargetTransformInfo *TTI,
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index e4ba70d..5af6c96 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -27,7 +27,6 @@
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -3611,8 +3610,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
AssumptionCache &AC, AAResults &AA,
TargetTransformInfo &TTI, bool Trivial,
bool NonTrivial, ScalarEvolution *SE,
- MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater) {
+ MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater) {
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.");
@@ -3652,35 +3650,6 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
if (F->hasOptSize())
return false;
- // Returns true if Loop L's loop nest is cold, i.e. if the headers of L,
- // of the loops L is nested in, and of the loops nested in L are all cold.
- auto IsLoopNestCold = [&](const Loop *L) {
- // Check L and all of its parent loops.
- auto *Parent = L;
- while (Parent) {
- if (!PSI->isColdBlock(Parent->getHeader(), BFI))
- return false;
- Parent = Parent->getParentLoop();
- }
- // Next check all loops nested within L.
- SmallVector<const Loop *, 4> Worklist;
- llvm::append_range(Worklist, L->getSubLoops());
- while (!Worklist.empty()) {
- auto *CurLoop = Worklist.pop_back_val();
- if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))
- return false;
- llvm::append_range(Worklist, CurLoop->getSubLoops());
- }
- return true;
- };
-
- // Skip cold loops in cold loop nests, as unswitching them brings little
- // benefit but increases the code size
- if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {
- LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
- return false;
- }
-
// Perform legality checks.
if (!isSafeForNoNTrivialUnswitching(L, LI))
return false;
@@ -3705,11 +3674,6 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
LPMUpdater &U) {
Function &F = *L.getHeader()->getParent();
(void)F;
- ProfileSummaryInfo *PSI = nullptr;
- if (auto OuterProxy =
- AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR)
- .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
- PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
<< "\n");
@@ -3720,7 +3684,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
AR.MSSA->verifyMemorySSA();
}
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
- &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, U))
+ &AR.SE, MSSAU ? &*MSSAU : nullptr, U))
return PreservedAnalyses::all();
if (AR.MSSA && VerifyMemorySSA)
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index ebcbd2b..fa66a03 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -149,8 +149,6 @@ bool SpeculativeExecutionLegacyPass::runOnFunction(Function &F) {
return Impl.runImpl(F, TTI);
}
-namespace llvm {
-
bool SpeculativeExecutionPass::runImpl(Function &F, TargetTransformInfo *TTI) {
if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence(&F)) {
LLVM_DEBUG(dbgs() << "Not running SpeculativeExecution because "
@@ -328,11 +326,11 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
return true;
}
-FunctionPass *createSpeculativeExecutionPass() {
+FunctionPass *llvm::createSpeculativeExecutionPass() {
return new SpeculativeExecutionLegacyPass();
}
-FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() {
+FunctionPass *llvm::createSpeculativeExecutionIfHasBranchDivergencePass() {
return new SpeculativeExecutionLegacyPass(/* OnlyIfDivergentTarget = */ true);
}
@@ -362,4 +360,3 @@ void SpeculativeExecutionPass::printPipeline(
OS << "only-if-divergent-target";
OS << '>';
}
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 7d01709..e94ad19 100644
--- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -716,8 +716,6 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
return Ret;
}
-namespace llvm {
-
PreservedAnalyses
StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) {
const DataLayout *DL = &F.getDataLayout();
@@ -735,5 +733,3 @@ StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserve<TargetIRAnalysis>();
return PA;
}
-
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 1d83ddc..89d41f3e 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -192,7 +192,7 @@ struct AllocaDerivedValueTracker {
SmallPtrSet<Instruction *, 32> AllocaUsers;
SmallPtrSet<Instruction *, 32> EscapePoints;
};
-}
+} // namespace
static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
if (F.callsFunctionThatReturnsTwice())
@@ -967,7 +967,7 @@ struct TailCallElim : public FunctionPass {
/*BFI=*/nullptr);
}
};
-}
+} // namespace
char TailCallElim::ID = 0;
INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim", "Tail Call Elimination",
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index b18acea..4fe736a 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -1106,7 +1106,6 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
Phi.replaceAllUsesWith(RdxResult);
- continue;
}
}
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 9693ae6..4947d03 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/ValueLatticeUtils.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
@@ -634,18 +635,10 @@ private:
/// Merge \p MergeWithV into \p IV and push \p V to the worklist, if \p IV
/// changes.
bool mergeInValue(ValueLatticeElement &IV, Value *V,
- ValueLatticeElement MergeWithV,
+ const ValueLatticeElement &MergeWithV,
ValueLatticeElement::MergeOptions Opts = {
/*MayIncludeUndef=*/false, /*CheckWiden=*/false});
- bool mergeInValue(Value *V, ValueLatticeElement MergeWithV,
- ValueLatticeElement::MergeOptions Opts = {
- /*MayIncludeUndef=*/false, /*CheckWiden=*/false}) {
- assert(!V->getType()->isStructTy() &&
- "non-structs should use markConstant");
- return mergeInValue(ValueState[V], V, MergeWithV, Opts);
- }
-
/// getValueState - Return the ValueLatticeElement object that corresponds to
/// the value. This function handles the case when the value hasn't been seen
/// yet by properly seeding constants etc.
@@ -768,6 +761,7 @@ private:
void handleCallArguments(CallBase &CB);
void handleExtractOfWithOverflow(ExtractValueInst &EVI,
const WithOverflowInst *WO, unsigned Idx);
+ bool isInstFullyOverDefined(Instruction &Inst);
private:
friend class InstVisitor<SCCPInstVisitor>;
@@ -987,7 +981,7 @@ public:
void trackValueOfArgument(Argument *A) {
if (A->getType()->isStructTy())
return (void)markOverdefined(A);
- mergeInValue(A, getArgAttributeVL(A));
+ mergeInValue(ValueState[A], A, getArgAttributeVL(A));
}
bool isStructLatticeConstant(Function *F, StructType *STy);
@@ -1128,8 +1122,7 @@ bool SCCPInstVisitor::isStructLatticeConstant(Function *F, StructType *STy) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
const auto &It = TrackedMultipleRetVals.find(std::make_pair(F, i));
assert(It != TrackedMultipleRetVals.end());
- ValueLatticeElement LV = It->second;
- if (!SCCPSolver::isConstant(LV))
+ if (!SCCPSolver::isConstant(It->second))
return false;
}
return true;
@@ -1160,7 +1153,7 @@ Constant *SCCPInstVisitor::getConstantOrNull(Value *V) const {
std::vector<Constant *> ConstVals;
auto *ST = cast<StructType>(V->getType());
for (unsigned I = 0, E = ST->getNumElements(); I != E; ++I) {
- ValueLatticeElement LV = LVs[I];
+ const ValueLatticeElement &LV = LVs[I];
ConstVals.push_back(SCCPSolver::isConstant(LV)
? getConstant(LV, ST->getElementType(I))
: UndefValue::get(ST->getElementType(I)));
@@ -1225,7 +1218,7 @@ void SCCPInstVisitor::visitInstruction(Instruction &I) {
}
bool SCCPInstVisitor::mergeInValue(ValueLatticeElement &IV, Value *V,
- ValueLatticeElement MergeWithV,
+ const ValueLatticeElement &MergeWithV,
ValueLatticeElement::MergeOptions Opts) {
if (IV.mergeIn(MergeWithV, Opts)) {
pushUsersToWorkList(V);
@@ -1264,7 +1257,7 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI,
return;
}
- ValueLatticeElement BCValue = getValueState(BI->getCondition());
+ const ValueLatticeElement &BCValue = getValueState(BI->getCondition());
ConstantInt *CI = getConstantInt(BCValue, BI->getCondition()->getType());
if (!CI) {
// Overdefined condition variables, and branches on unfoldable constant
@@ -1326,7 +1319,7 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI,
// the target as executable.
if (auto *IBR = dyn_cast<IndirectBrInst>(&TI)) {
// Casts are folded by visitCastInst.
- ValueLatticeElement IBRValue = getValueState(IBR->getAddress());
+ const ValueLatticeElement &IBRValue = getValueState(IBR->getAddress());
BlockAddress *Addr = dyn_cast_or_null<BlockAddress>(
getConstant(IBRValue, IBR->getAddress()->getType()));
if (!Addr) { // Overdefined or unknown condition?
@@ -1383,49 +1376,66 @@ bool SCCPInstVisitor::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const {
// 7. If a conditional branch has a value that is overdefined, make all
// successors executable.
void SCCPInstVisitor::visitPHINode(PHINode &PN) {
- // If this PN returns a struct, just mark the result overdefined.
- // TODO: We could do a lot better than this if code actually uses this.
- if (PN.getType()->isStructTy())
- return (void)markOverdefined(&PN);
-
- if (getValueState(&PN).isOverdefined())
- return; // Quick exit
-
// Super-extra-high-degree PHI nodes are unlikely to ever be marked constant,
// and slow us down a lot. Just mark them overdefined.
if (PN.getNumIncomingValues() > 64)
return (void)markOverdefined(&PN);
- unsigned NumActiveIncoming = 0;
+ if (isInstFullyOverDefined(PN))
+ return;
+ SmallVector<unsigned> FeasibleIncomingIndices;
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
+ continue;
+ FeasibleIncomingIndices.push_back(i);
+ }
// Look at all of the executable operands of the PHI node. If any of them
// are overdefined, the PHI becomes overdefined as well. If they are all
// constant, and they agree with each other, the PHI becomes the identical
// constant. If they are constant and don't agree, the PHI is a constant
// range. If there are no executable operands, the PHI remains unknown.
- ValueLatticeElement PhiState = getValueState(&PN);
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
- if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
- continue;
-
- ValueLatticeElement IV = getValueState(PN.getIncomingValue(i));
- PhiState.mergeIn(IV);
- NumActiveIncoming++;
- if (PhiState.isOverdefined())
- break;
+ if (StructType *STy = dyn_cast<StructType>(PN.getType())) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ ValueLatticeElement PhiState = getStructValueState(&PN, i);
+ if (PhiState.isOverdefined())
+ continue;
+ for (unsigned j : FeasibleIncomingIndices) {
+ const ValueLatticeElement &IV =
+ getStructValueState(PN.getIncomingValue(j), i);
+ PhiState.mergeIn(IV);
+ if (PhiState.isOverdefined())
+ break;
+ }
+ ValueLatticeElement &PhiStateRef = getStructValueState(&PN, i);
+ mergeInValue(PhiStateRef, &PN, PhiState,
+ ValueLatticeElement::MergeOptions().setMaxWidenSteps(
+ FeasibleIncomingIndices.size() + 1));
+ PhiStateRef.setNumRangeExtensions(
+ std::max((unsigned)FeasibleIncomingIndices.size(),
+ PhiStateRef.getNumRangeExtensions()));
+ }
+ } else {
+ ValueLatticeElement PhiState = getValueState(&PN);
+ for (unsigned i : FeasibleIncomingIndices) {
+ const ValueLatticeElement &IV = getValueState(PN.getIncomingValue(i));
+ PhiState.mergeIn(IV);
+ if (PhiState.isOverdefined())
+ break;
+ }
+ // We allow up to 1 range extension per active incoming value and one
+ // additional extension. Note that we manually adjust the number of range
+ // extensions to match the number of active incoming values. This helps to
+ // limit multiple extensions caused by the same incoming value, if other
+ // incoming values are equal.
+ ValueLatticeElement &PhiStateRef = ValueState[&PN];
+ mergeInValue(PhiStateRef, &PN, PhiState,
+ ValueLatticeElement::MergeOptions().setMaxWidenSteps(
+ FeasibleIncomingIndices.size() + 1));
+ PhiStateRef.setNumRangeExtensions(
+ std::max((unsigned)FeasibleIncomingIndices.size(),
+ PhiStateRef.getNumRangeExtensions()));
}
-
- // We allow up to 1 range extension per active incoming value and one
- // additional extension. Note that we manually adjust the number of range
- // extensions to match the number of active incoming values. This helps to
- // limit multiple extensions caused by the same incoming value, if other
- // incoming values are equal.
- mergeInValue(&PN, PhiState,
- ValueLatticeElement::MergeOptions().setMaxWidenSteps(
- NumActiveIncoming + 1));
- ValueLatticeElement &PhiStateRef = getValueState(&PN);
- PhiStateRef.setNumRangeExtensions(
- std::max(NumActiveIncoming, PhiStateRef.getNumRangeExtensions()));
}
void SCCPInstVisitor::visitReturnInst(ReturnInst &I) {
@@ -1481,7 +1491,7 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
}
}
- ValueLatticeElement OpSt = getValueState(I.getOperand(0));
+ const ValueLatticeElement &OpSt = getValueState(I.getOperand(0));
if (OpSt.isUnknownOrUndef())
return;
@@ -1496,9 +1506,9 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
if (I.getDestTy()->isIntOrIntVectorTy() &&
I.getSrcTy()->isIntOrIntVectorTy() &&
I.getOpcode() != Instruction::BitCast) {
- auto &LV = getValueState(&I);
ConstantRange OpRange =
OpSt.asConstantRange(I.getSrcTy(), /*UndefAllowed=*/false);
+ auto &LV = getValueState(&I);
Type *DestTy = I.getDestTy();
ConstantRange Res = ConstantRange::getEmpty(DestTy->getScalarSizeInBits());
@@ -1516,19 +1526,24 @@ void SCCPInstVisitor::handleExtractOfWithOverflow(ExtractValueInst &EVI,
const WithOverflowInst *WO,
unsigned Idx) {
Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
- ValueLatticeElement L = getValueState(LHS);
- ValueLatticeElement R = getValueState(RHS);
+ Type *Ty = LHS->getType();
+
addAdditionalUser(LHS, &EVI);
addAdditionalUser(RHS, &EVI);
- if (L.isUnknownOrUndef() || R.isUnknownOrUndef())
- return; // Wait to resolve.
- Type *Ty = LHS->getType();
+ const ValueLatticeElement &L = getValueState(LHS);
+ if (L.isUnknownOrUndef())
+ return; // Wait to resolve.
ConstantRange LR = L.asConstantRange(Ty, /*UndefAllowed=*/false);
+
+ const ValueLatticeElement &R = getValueState(RHS);
+ if (R.isUnknownOrUndef())
+ return; // Wait to resolve.
+
ConstantRange RR = R.asConstantRange(Ty, /*UndefAllowed=*/false);
if (Idx == 0) {
ConstantRange Res = LR.binaryOp(WO->getBinaryOp(), RR);
- mergeInValue(&EVI, ValueLatticeElement::getRange(Res));
+ mergeInValue(ValueState[&EVI], &EVI, ValueLatticeElement::getRange(Res));
} else {
assert(Idx == 1 && "Index can only be 0 or 1");
ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
@@ -1560,7 +1575,7 @@ void SCCPInstVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
if (auto *WO = dyn_cast<WithOverflowInst>(AggVal))
return handleExtractOfWithOverflow(EVI, WO, i);
ValueLatticeElement EltVal = getStructValueState(AggVal, i);
- mergeInValue(getValueState(&EVI), &EVI, EltVal);
+ mergeInValue(ValueState[&EVI], &EVI, EltVal);
} else {
// Otherwise, must be extracting from an array.
return (void)markOverdefined(&EVI);
@@ -1616,14 +1631,18 @@ void SCCPInstVisitor::visitSelectInst(SelectInst &I) {
if (ValueState[&I].isOverdefined())
return (void)markOverdefined(&I);
- ValueLatticeElement CondValue = getValueState(I.getCondition());
+ const ValueLatticeElement &CondValue = getValueState(I.getCondition());
if (CondValue.isUnknownOrUndef())
return;
if (ConstantInt *CondCB =
getConstantInt(CondValue, I.getCondition()->getType())) {
Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue();
- mergeInValue(&I, getValueState(OpVal));
+ const ValueLatticeElement &OpValState = getValueState(OpVal);
+ // Safety: ValueState[&I] doesn't invalidate OpValState since it is already
+ // in the map.
+ assert(ValueState.contains(&I) && "&I is not in ValueState map.");
+ mergeInValue(ValueState[&I], &I, OpValState);
return;
}
@@ -1721,7 +1740,7 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
// being a special floating value.
ValueLatticeElement NewV;
NewV.markConstant(C, /*MayIncludeUndef=*/true);
- return (void)mergeInValue(&I, NewV);
+ return (void)mergeInValue(ValueState[&I], &I, NewV);
}
}
@@ -1741,7 +1760,7 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
R = A.overflowingBinaryOp(BO->getOpcode(), B, OBO->getNoWrapKind());
else
R = A.binaryOp(BO->getOpcode(), B);
- mergeInValue(&I, ValueLatticeElement::getRange(R));
+ mergeInValue(ValueState[&I], &I, ValueLatticeElement::getRange(R));
// TODO: Currently we do not exploit special values that produce something
// better than overdefined with an overdefined operand for vector or floating
@@ -1767,7 +1786,7 @@ void SCCPInstVisitor::visitCmpInst(CmpInst &I) {
if (C) {
ValueLatticeElement CV;
CV.markConstant(C);
- mergeInValue(&I, CV);
+ mergeInValue(ValueState[&I], &I, CV);
return;
}
@@ -1802,7 +1821,7 @@ void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
Operands.reserve(I.getNumOperands());
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
- ValueLatticeElement State = getValueState(I.getOperand(i));
+ const ValueLatticeElement &State = getValueState(I.getOperand(i));
if (State.isUnknownOrUndef())
return; // Operands are not resolved yet.
@@ -1881,14 +1900,13 @@ void SCCPInstVisitor::visitLoadInst(LoadInst &I) {
if (ValueState[&I].isOverdefined())
return (void)markOverdefined(&I);
- ValueLatticeElement PtrVal = getValueState(I.getOperand(0));
+ const ValueLatticeElement &PtrVal = getValueState(I.getOperand(0));
if (PtrVal.isUnknownOrUndef())
return; // The pointer is not resolved yet!
- ValueLatticeElement &IV = ValueState[&I];
-
if (SCCPSolver::isConstant(PtrVal)) {
Constant *Ptr = getConstant(PtrVal, I.getOperand(0)->getType());
+ ValueLatticeElement &IV = ValueState[&I];
// load null is undefined.
if (isa<ConstantPointerNull>(Ptr)) {
@@ -1916,7 +1934,7 @@ void SCCPInstVisitor::visitLoadInst(LoadInst &I) {
}
// Fall back to metadata.
- mergeInValue(&I, getValueFromMetadata(&I));
+ mergeInValue(ValueState[&I], &I, getValueFromMetadata(&I));
}
void SCCPInstVisitor::visitCallBase(CallBase &CB) {
@@ -1944,7 +1962,7 @@ void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) {
return markOverdefined(&CB); // Can't handle struct args.
if (A.get()->getType()->isMetadataTy())
continue; // Carried in CB, not allowed in Operands.
- ValueLatticeElement State = getValueState(A);
+ const ValueLatticeElement &State = getValueState(A);
if (State.isUnknownOrUndef())
return; // Operands are not resolved yet.
@@ -1964,7 +1982,7 @@ void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) {
}
// Fall back to metadata.
- mergeInValue(&CB, getValueFromMetadata(&CB));
+ mergeInValue(ValueState[&CB], &CB, getValueFromMetadata(&CB));
}
void SCCPInstVisitor::handleCallArguments(CallBase &CB) {
@@ -1992,10 +2010,11 @@ void SCCPInstVisitor::handleCallArguments(CallBase &CB) {
mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg,
getMaxWidenStepsOpts());
}
- } else
- mergeInValue(&*AI,
- getValueState(*CAI).intersect(getArgAttributeVL(&*AI)),
- getMaxWidenStepsOpts());
+ } else {
+ ValueLatticeElement CallArg =
+ getValueState(*CAI).intersect(getArgAttributeVL(&*AI));
+ mergeInValue(ValueState[&*AI], &*AI, CallArg, getMaxWidenStepsOpts());
+ }
}
}
}
@@ -2076,7 +2095,8 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
if (II->getIntrinsicID() == Intrinsic::vscale) {
unsigned BitWidth = CB.getType()->getScalarSizeInBits();
const ConstantRange Result = getVScaleRange(II->getFunction(), BitWidth);
- return (void)mergeInValue(II, ValueLatticeElement::getRange(Result));
+ return (void)mergeInValue(ValueState[II], II,
+ ValueLatticeElement::getRange(Result));
}
if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) {
@@ -2094,7 +2114,8 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
ConstantRange Result =
ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges);
- return (void)mergeInValue(II, ValueLatticeElement::getRange(Result));
+ return (void)mergeInValue(ValueState[II], II,
+ ValueLatticeElement::getRange(Result));
}
}
@@ -2121,10 +2142,25 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
return handleCallOverdefined(CB); // Not tracking this callee.
// If so, propagate the return value of the callee into this call result.
- mergeInValue(&CB, TFRVI->second, getMaxWidenStepsOpts());
+ mergeInValue(ValueState[&CB], &CB, TFRVI->second, getMaxWidenStepsOpts());
}
}
+bool SCCPInstVisitor::isInstFullyOverDefined(Instruction &Inst) {
+ // For structure Type, we handle each member separately.
+ // A structure object won't be considered as overdefined when
+ // there is at least one member that is not overdefined.
+ if (StructType *STy = dyn_cast<StructType>(Inst.getType())) {
+ for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) {
+ if (!getStructValueState(&Inst, i).isOverdefined())
+ return false;
+ }
+ return true;
+ }
+
+ return getValueState(&Inst).isOverdefined();
+}
+
void SCCPInstVisitor::solve() {
// Process the work lists until they are empty!
while (!BBWorkList.empty() || !InstWorkList.empty()) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b96d29e..280eb20 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7957,9 +7957,9 @@ bool VPRecipeBuilder::getScaledReductions(
auto CollectExtInfo = [this, &Exts, &ExtOpTypes,
&ExtKinds](SmallVectorImpl<Value *> &Ops) -> bool {
for (const auto &[I, OpI] : enumerate(Ops)) {
- auto *CI = dyn_cast<ConstantInt>(OpI);
- if (I > 0 && CI &&
- canConstantBeExtended(CI, ExtOpTypes[0], ExtKinds[0])) {
+ const APInt *C;
+ if (I > 0 && match(OpI, m_APInt(C)) &&
+ canConstantBeExtended(C, ExtOpTypes[0], ExtKinds[0])) {
ExtOpTypes[I] = ExtOpTypes[0];
ExtKinds[I] = ExtKinds[0];
continue;
@@ -8240,14 +8240,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// the vector loop or when not folding the tail. In the later case, we know
// that the canonical induction increment will not overflow as the vector trip
// count is >= increment and a multiple of the increment.
+ VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion();
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
if (!HasNUW) {
- auto *IVInc = Plan->getVectorLoopRegion()
- ->getExitingBasicBlock()
- ->getTerminator()
- ->getOperand(0);
- assert(match(IVInc, m_VPInstruction<Instruction::Add>(
- m_Specific(Plan->getCanonicalIV()), m_VPValue())) &&
+ auto *IVInc =
+ LoopRegion->getExitingBasicBlock()->getTerminator()->getOperand(0);
+ assert(match(IVInc,
+ m_VPInstruction<Instruction::Add>(
+ m_Specific(LoopRegion->getCanonicalIV()), m_VPValue())) &&
"Did not find the canonical IV increment");
cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags();
}
@@ -8293,7 +8293,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
- VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion();
VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
HeaderVPBB);
@@ -8377,8 +8376,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
for (VPValue *Old : Old2New.keys())
Old->getDefiningRecipe()->eraseFromParent();
- assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) &&
- !Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() &&
+ assert(isa<VPRegionBlock>(LoopRegion) &&
+ !LoopRegion->getEntryBasicBlock()->empty() &&
"entry block must be set to a VPRegionBlock having a non-empty entry "
"VPBasicBlock");
@@ -9326,8 +9325,9 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
if (ResumePhiIter == MainScalarPH->phis().end()) {
VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
ResumePhi = ScalarPHBuilder.createScalarPhi(
- {VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
- "vec.epilog.resume.val");
+ {VectorTC,
+ MainPlan.getVectorLoopRegion()->getCanonicalIV()->getStartValue()},
+ {}, "vec.epilog.resume.val");
} else {
ResumePhi = cast<VPPhi>(&*ResumePhiIter);
if (MainScalarPH->begin() == MainScalarPH->end())
@@ -9354,7 +9354,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
Header->setName("vec.epilog.vector.body");
- VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV();
+ VPCanonicalIVPHIRecipe *IV = VectorLoop->getCanonicalIV();
// When vectorizing the epilogue loop, the canonical induction needs to be
// adjusted by the value after the main vector loop. Find the resume value
// created during execution of the main VPlan. It must be the first phi in the
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 88af2cf..9cd52da 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2242,8 +2242,49 @@ public:
/// may not be necessary.
bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const;
bool isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
- Align Alignment, const int64_t Diff, Value *Ptr0,
- Value *PtrN, StridedPtrInfo &SPtrInfo) const;
+ Align Alignment, const int64_t Diff,
+ const size_t Sz) const;
+
+ /// Return true if an array of scalar loads can be replaced with a strided
+ /// load (with constant stride).
+ ///
+ /// TODO:
+ /// It is possible that the load gets "widened". Suppose that originally each
+ /// load loads `k` bytes and `PointerOps` can be arranged as follows (`%s` is
+ /// constant): %b + 0 * %s + 0 %b + 0 * %s + 1 %b + 0 * %s + 2
+ /// ...
+ /// %b + 0 * %s + (w - 1)
+ ///
+ /// %b + 1 * %s + 0
+ /// %b + 1 * %s + 1
+ /// %b + 1 * %s + 2
+ /// ...
+ /// %b + 1 * %s + (w - 1)
+ /// ...
+ ///
+ /// %b + (n - 1) * %s + 0
+ /// %b + (n - 1) * %s + 1
+ /// %b + (n - 1) * %s + 2
+ /// ...
+ /// %b + (n - 1) * %s + (w - 1)
+ ///
+ /// In this case we will generate a strided load of type `<n x (k * w)>`.
+ ///
+ /// \param PointerOps list of pointer arguments of loads.
+ /// \param ElemTy original scalar type of loads.
+ /// \param Alignment alignment of the first load.
+ /// \param SortedIndices is the order of PointerOps as returned by
+ /// `sortPtrAccesses`
+ /// \param Diff Pointer difference between the lowest and the highes pointer
+ /// in `PointerOps` as returned by `getPointersDiff`.
+ /// \param Ptr0 first pointer in `PointersOps`.
+ /// \param PtrN last pointer in `PointersOps`.
+ /// \param SPtrInfo If the function return `true`, it also sets all the fields
+ /// of `SPtrInfo` necessary to generate the strided load later.
+ bool analyzeConstantStrideCandidate(
+ const ArrayRef<Value *> PointerOps, Type *ElemTy, Align Alignment,
+ const SmallVectorImpl<unsigned> &SortedIndices, const int64_t Diff,
+ Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const;
/// Return true if an array of scalar loads can be replaced with a strided
/// load (with run-time stride).
@@ -6849,9 +6890,8 @@ isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
/// current graph (for masked gathers extra extractelement instructions
/// might be required).
bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
- Align Alignment, const int64_t Diff, Value *Ptr0,
- Value *PtrN, StridedPtrInfo &SPtrInfo) const {
- const size_t Sz = PointerOps.size();
+ Align Alignment, const int64_t Diff,
+ const size_t Sz) const {
if (Diff % (Sz - 1) != 0)
return false;
@@ -6875,27 +6915,40 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
return false;
if (!TTI->isLegalStridedLoadStore(VecTy, Alignment))
return false;
+ return true;
+ }
+ return false;
+}
- // Iterate through all pointers and check if all distances are
- // unique multiple of Dist.
- SmallSet<int64_t, 4> Dists;
- for (Value *Ptr : PointerOps) {
- int64_t Dist = 0;
- if (Ptr == PtrN)
- Dist = Diff;
- else if (Ptr != Ptr0)
- Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
- // If the strides are not the same or repeated, we can't
- // vectorize.
- if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
- break;
- }
- if (Dists.size() == Sz) {
- Type *StrideTy = DL->getIndexType(Ptr0->getType());
- SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride);
- SPtrInfo.Ty = getWidenedType(ScalarTy, Sz);
- return true;
- }
+bool BoUpSLP::analyzeConstantStrideCandidate(
+ const ArrayRef<Value *> PointerOps, Type *ScalarTy, Align Alignment,
+ const SmallVectorImpl<unsigned> &SortedIndices, const int64_t Diff,
+ Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const {
+ const size_t Sz = PointerOps.size();
+ if (!isStridedLoad(PointerOps, ScalarTy, Alignment, Diff, Sz))
+ return false;
+
+ int64_t Stride = Diff / static_cast<int64_t>(Sz - 1);
+
+ // Iterate through all pointers and check if all distances are
+ // unique multiple of Dist.
+ SmallSet<int64_t, 4> Dists;
+ for (Value *Ptr : PointerOps) {
+ int64_t Dist = 0;
+ if (Ptr == PtrN)
+ Dist = Diff;
+ else if (Ptr != Ptr0)
+ Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
+ // If the strides are not the same or repeated, we can't
+ // vectorize.
+ if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
+ break;
+ }
+ if (Dists.size() == Sz) {
+ Type *StrideTy = DL->getIndexType(Ptr0->getType());
+ SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride);
+ SPtrInfo.Ty = getWidenedType(ScalarTy, Sz);
+ return true;
}
return false;
}
@@ -6995,8 +7048,8 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
Align Alignment =
cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
->getAlign();
- if (isStridedLoad(PointerOps, ScalarTy, Alignment, *Diff, Ptr0, PtrN,
- SPtrInfo))
+ if (analyzeConstantStrideCandidate(PointerOps, ScalarTy, Alignment, Order,
+ *Diff, Ptr0, PtrN, SPtrInfo))
return LoadsState::StridedVectorize;
}
if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||
@@ -17632,7 +17685,9 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
}
if (IsPHI ||
(!E->isGather() && E->State != TreeEntry::SplitVectorize &&
- E->doesNotNeedToSchedule()) ||
+ (E->doesNotNeedToSchedule() ||
+ (E->hasCopyableElements() && !E->isCopyableElement(LastInst) &&
+ isUsedOutsideBlock(LastInst)))) ||
(GatheredLoadsEntriesFirst.has_value() &&
E->Idx >= *GatheredLoadsEntriesFirst && !E->isGather() &&
E->getOpcode() == Instruction::Load)) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 0101942..d167009 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1753,14 +1753,14 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
}
#endif
-bool llvm::canConstantBeExtended(const ConstantInt *CI, Type *NarrowType,
+bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType,
TTI::PartialReductionExtendKind ExtKind) {
- APInt TruncatedVal = CI->getValue().trunc(NarrowType->getScalarSizeInBits());
- unsigned WideSize = CI->getType()->getScalarSizeInBits();
+ APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits());
+ unsigned WideSize = C->getBitWidth();
APInt ExtendedVal = ExtKind == TTI::PR_SignExtend
? TruncatedVal.sext(WideSize)
: TruncatedVal.zext(WideSize);
- return ExtendedVal == CI->getValue();
+ return ExtendedVal == *C;
}
TargetTransformInfo::OperandValueInfo
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 23f5623..84d2ea6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -407,6 +407,10 @@ public:
VPBasicBlock *getParent() { return Parent; }
const VPBasicBlock *getParent() const { return Parent; }
+ /// \return the VPRegionBlock which the recipe belongs to.
+ VPRegionBlock *getRegion();
+ const VPRegionBlock *getRegion() const;
+
/// The method which generates the output IR instructions that correspond to
/// this VPRecipe, thereby "executing" the VPlan.
virtual void execute(VPTransformState &State) = 0;
@@ -1012,6 +1016,8 @@ public:
// part if scalar. In the latter case, the recipe will be removed during
// unrolling.
ExtractLastElement,
+ // Extracts the last lane for each part from its operand.
+ ExtractLastLanePerPart,
// Extracts the second-to-last lane from its operand or the second-to-last
// part if it is scalar. In the latter case, the recipe will be removed
// during unrolling.
@@ -4058,8 +4064,29 @@ public:
/// Remove the current region from its VPlan, connecting its predecessor to
/// its entry, and its exiting block to its successor.
void dissolveToCFGLoop();
+
+ /// Returns the canonical induction recipe of the region.
+ VPCanonicalIVPHIRecipe *getCanonicalIV() {
+ VPBasicBlock *EntryVPBB = getEntryBasicBlock();
+ if (EntryVPBB->empty()) {
+ // VPlan native path. TODO: Unify both code paths.
+ EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
+ }
+ return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
+ }
+ const VPCanonicalIVPHIRecipe *getCanonicalIV() const {
+ return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
+ }
};
+inline VPRegionBlock *VPRecipeBase::getRegion() {
+ return getParent()->getParent();
+}
+
+inline const VPRegionBlock *VPRecipeBase::getRegion() const {
+ return getParent()->getParent();
+}
+
/// VPlan models a candidate for vectorization, encoding various decisions take
/// to produce efficient output IR, including which branches, basic-blocks and
/// output IR instructions to generate, and their cost. VPlan holds a
@@ -4252,12 +4279,14 @@ public:
BackedgeTakenCount = new VPValue();
return BackedgeTakenCount;
}
+ VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
/// The vector trip count.
VPValue &getVectorTripCount() { return VectorTripCount; }
/// Returns the VF of the vector loop region.
VPValue &getVF() { return VF; };
+ const VPValue &getVF() const { return VF; };
/// Returns VF * UF of the vector loop region.
VPValue &getVFxUF() { return VFxUF; }
@@ -4369,16 +4398,6 @@ public:
LLVM_DUMP_METHOD void dump() const;
#endif
- /// Returns the canonical induction recipe of the vector loop.
- VPCanonicalIVPHIRecipe *getCanonicalIV() {
- VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock();
- if (EntryVPBB->empty()) {
- // VPlan native path.
- EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
- }
- return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
- }
-
VPValue *getSCEVExpansion(const SCEV *S) const {
return SCEVToExpansion.lookup(S);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 07bfe7a..7e074c1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -116,6 +116,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::FirstActiveLane:
return Type::getIntNTy(Ctx, 64);
case VPInstruction::ExtractLastElement:
+ case VPInstruction::ExtractLastLanePerPart:
case VPInstruction::ExtractPenultimateElement: {
Type *BaseTy = inferScalarType(R->getOperand(0));
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
@@ -376,7 +377,7 @@ bool VPDominatorTree::properlyDominates(const VPRecipeBase *A,
#ifndef NDEBUG
auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * {
- auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
+ VPRegionBlock *Region = R->getRegion();
if (Region && Region->isReplicator()) {
assert(Region->getNumSuccessors() == 1 &&
Region->getNumPredecessors() == 1 && "Expected SESE region!");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index c0147ce..332791a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -658,9 +658,11 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
}
VPIRMetadata VPBranchWeights;
- auto *Term = VPBuilder(CheckBlockVPBB)
- .createNaryOp(VPInstruction::BranchOnCond, {CondVPV},
- Plan.getCanonicalIV()->getDebugLoc());
+ auto *Term =
+ VPBuilder(CheckBlockVPBB)
+ .createNaryOp(
+ VPInstruction::BranchOnCond, {CondVPV},
+ Plan.getVectorLoopRegion()->getCanonicalIV()->getDebugLoc());
if (AddBranchWeights) {
MDBuilder MDB(Plan.getContext());
MDNode *BranchWeights =
@@ -921,8 +923,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
if (DerivedIV->getNumUsers() == 1 &&
DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
- auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(),
- &Plan.getVectorTripCount());
+ auto *NewSel = Builder.createSelect(
+ AnyNaN, LoopRegion->getCanonicalIV(), &Plan.getVectorTripCount());
DerivedIV->moveAfter(&*Builder.getInsertPoint());
DerivedIV->setOperand(1, NewSel);
continue;
@@ -935,7 +937,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
"FMaxNum/FMinNum reduction.\n");
return false;
}
- auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV);
+ auto *NewSel =
+ Builder.createSelect(AnyNaN, LoopRegion->getCanonicalIV(), VecV);
ResumeR->setOperand(0, NewSel);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index 1580a3b..2aaabd9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -474,7 +474,7 @@ public:
/// Check if a constant \p CI can be safely treated as having been extended
/// from a narrower type with the given extension kind.
-bool canConstantBeExtended(const ConstantInt *CI, Type *NarrowType,
+bool canConstantBeExtended(const APInt *C, Type *NarrowType,
TTI::PartialReductionExtendKind ExtKind);
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index b42b049..d8203e2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -173,10 +173,10 @@ inline int_pred_ty<is_zero_int> m_ZeroInt() {
/// For vectors, this includes constants with undefined elements.
inline int_pred_ty<is_one> m_One() { return int_pred_ty<is_one>(); }
-struct bind_const_int {
- uint64_t &Res;
+struct bind_apint {
+ const APInt *&Res;
- bind_const_int(uint64_t &Res) : Res(Res) {}
+ bind_apint(const APInt *&Res) : Res(Res) {}
bool match(VPValue *VPV) const {
if (!VPV->isLiveIn())
@@ -188,7 +188,23 @@ struct bind_const_int {
const auto *CI = dyn_cast<ConstantInt>(V);
if (!CI)
return false;
- if (auto C = CI->getValue().tryZExtValue()) {
+ Res = &CI->getValue();
+ return true;
+ }
+};
+
+inline bind_apint m_APInt(const APInt *&C) { return C; }
+
+struct bind_const_int {
+ uint64_t &Res;
+
+ bind_const_int(uint64_t &Res) : Res(Res) {}
+
+ bool match(VPValue *VPV) const {
+ const APInt *APConst;
+ if (!bind_apint(APConst).match(VPV))
+ return false;
+ if (auto C = APConst->tryZExtValue()) {
Res = *C;
return true;
}
@@ -372,6 +388,12 @@ m_ExtractLastElement(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0);
}
+template <typename Op0_t>
+inline VPInstruction_match<VPInstruction::ExtractLastLanePerPart, Op0_t>
+m_ExtractLastLanePerPart(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::ExtractLastLanePerPart>(Op0);
+}
+
template <typename Op0_t, typename Op1_t, typename Op2_t>
inline VPInstruction_match<VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t>
m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
@@ -394,6 +416,12 @@ m_AnyOf(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::AnyOf>(Op0);
}
+template <typename Op0_t>
+inline VPInstruction_match<VPInstruction::FirstActiveLane, Op0_t>
+m_FirstActiveLane(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::FirstActiveLane>(Op0);
+}
+
template <unsigned Opcode, typename Op0_t>
inline AllRecipe_match<Opcode, Op0_t> m_Unary(const Op0_t &Op0) {
return AllRecipe_match<Opcode, Op0_t>(Op0);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
index 0c27d53..fb17d5d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -168,7 +168,8 @@ void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
// non-phi instructions.
auto &Plan = *HeaderVPBB->getPlan();
- auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
+ auto *IV =
+ new VPWidenCanonicalIVRecipe(HeaderVPBB->getParent()->getCanonicalIV());
Builder.setInsertPoint(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
Builder.insert(IV);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2368d18..d1e67e6b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -341,12 +341,12 @@ VPPartialReductionRecipe::computeCost(ElementCount VF,
ExtAType = GetExtendKind(ExtAR);
ExtBType = GetExtendKind(ExtBR);
- if (!ExtBR && Widen->getOperand(1)->isLiveIn()) {
- auto *CI = cast<ConstantInt>(Widen->getOperand(1)->getLiveInIRValue());
- if (canConstantBeExtended(CI, InputTypeA, ExtAType)) {
- InputTypeB = InputTypeA;
- ExtBType = ExtAType;
- }
+ using namespace VPlanPatternMatch;
+ const APInt *C;
+ if (!ExtBR && match(Widen->getOperand(1), m_APInt(C)) &&
+ canConstantBeExtended(C, InputTypeA, ExtAType)) {
+ InputTypeB = InputTypeA;
+ ExtBType = ExtAType;
}
};
@@ -511,6 +511,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::ExplicitVectorLength:
case VPInstruction::ExtractLastElement:
+ case VPInstruction::ExtractLastLanePerPart:
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::FirstActiveLane:
case VPInstruction::Not:
@@ -878,9 +879,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
return ReducedPartRdx;
}
+ case VPInstruction::ExtractLastLanePerPart:
case VPInstruction::ExtractLastElement:
case VPInstruction::ExtractPenultimateElement: {
- unsigned Offset = getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2;
+ unsigned Offset =
+ getOpcode() == VPInstruction::ExtractPenultimateElement ? 2 : 1;
Value *Res;
if (State.VF.isVector()) {
assert(Offset <= State.VF.getKnownMinValue() &&
@@ -1166,6 +1169,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
bool VPInstruction::isVectorToScalar() const {
return getOpcode() == VPInstruction::ExtractLastElement ||
+ getOpcode() == VPInstruction::ExtractLastLanePerPart ||
getOpcode() == VPInstruction::ExtractPenultimateElement ||
getOpcode() == Instruction::ExtractElement ||
getOpcode() == VPInstruction::ExtractLane ||
@@ -1229,6 +1233,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::ExtractLane:
case VPInstruction::ExtractLastElement:
+ case VPInstruction::ExtractLastLanePerPart:
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::ActiveLaneMask:
case VPInstruction::FirstActiveLane:
@@ -1376,6 +1381,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractLastElement:
O << "extract-last-element";
break;
+ case VPInstruction::ExtractLastLanePerPart:
+ O << "extract-last-lane-per-part";
+ break;
case VPInstruction::ExtractPenultimateElement:
O << "extract-penultimate-element";
break;
@@ -2344,7 +2352,7 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
return false;
auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
- auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
+ auto *CanIV = getRegion()->getCanonicalIV();
return StartC && StartC->isZero() && StepC && StepC->isOne() &&
getScalarType() == CanIV->getScalarType();
}
@@ -3068,7 +3076,7 @@ static void scalarizeInstruction(const Instruction *Instr,
State.AC->registerAssumption(II);
assert(
- (RepRecipe->getParent()->getParent() ||
+ (RepRecipe->getRegion() ||
!RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
all_of(RepRecipe->operands(),
[](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
@@ -3260,7 +3268,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
to_vector(operands()), VF);
// If the recipe is not predicated (i.e. not in a replicate region), return
// the scalar cost. Otherwise handle predicated cost.
- if (!getParent()->getParent()->isReplicator())
+ if (!getRegion()->isReplicator())
return ScalarCost;
// Account for the phi nodes that we will create.
@@ -3276,7 +3284,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
case Instruction::Store: {
// TODO: See getMemInstScalarizationCost for how to handle replicating and
// predicated cases.
- const VPRegionBlock *ParentRegion = getParent()->getParent();
+ const VPRegionBlock *ParentRegion = getRegion();
if (ParentRegion && ParentRegion->isReplicator())
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 40b7e8d..f5f616f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -501,7 +501,8 @@ static void removeRedundantInductionCasts(VPlan &Plan) {
/// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV
/// recipe, if it exists.
static void removeRedundantCanonicalIVs(VPlan &Plan) {
- VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV();
VPWidenCanonicalIVRecipe *WidenNewIV = nullptr;
for (VPUser *U : CanonicalIV->users()) {
WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);
@@ -512,7 +513,7 @@ static void removeRedundantCanonicalIVs(VPlan &Plan) {
if (!WidenNewIV)
return;
- VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
@@ -582,8 +583,9 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
FPMathOperator *FPBinOp, Instruction *TruncI,
VPValue *StartV, VPValue *Step, DebugLoc DL,
VPBuilder &Builder) {
- VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
- VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
+ VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV();
VPSingleDefRecipe *BaseIV = Builder.createDerivedIV(
Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx");
@@ -786,9 +788,7 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
ScalarEvolution &SE) {
VPValue *Incoming, *Mask;
if (!match(Op, m_VPInstruction<VPInstruction::ExtractLane>(
- m_VPInstruction<VPInstruction::FirstActiveLane>(
- m_VPValue(Mask)),
- m_VPValue(Incoming))))
+ m_FirstActiveLane(m_VPValue(Mask)), m_VPValue(Incoming))))
return nullptr;
auto *WideIV = getOptimizableIVOf(Incoming, SE);
@@ -800,8 +800,9 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
return nullptr;
// Calculate the final index.
- VPValue *EndValue = Plan.getCanonicalIV();
- auto CanonicalIVType = Plan.getCanonicalIV()->getScalarType();
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ auto *CanonicalIV = LoopRegion->getCanonicalIV();
+ Type *CanonicalIVType = CanonicalIV->getScalarType();
VPBuilder B(cast<VPBasicBlock>(PredVPBB));
DebugLoc DL = cast<VPInstruction>(Op)->getDebugLoc();
@@ -810,7 +811,8 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
Type *FirstActiveLaneType = TypeInfo.inferScalarType(FirstActiveLane);
FirstActiveLane = B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
FirstActiveLaneType, DL);
- EndValue = B.createNaryOp(Instruction::Add, {EndValue, FirstActiveLane}, DL);
+ VPValue *EndValue =
+ B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane}, DL);
// `getOptimizableIVOf()` always returns the pre-incremented IV, so if it
// changed it means the exit is using the incremented value, so we need to
@@ -1205,7 +1207,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
}
// Look through ExtractLastElement (BuildVector ....).
- if (match(&R, m_ExtractLastElement(m_BuildVector()))) {
+ if (match(&R, m_CombineOr(m_ExtractLastElement(m_BuildVector()),
+ m_ExtractLastLanePerPart(m_BuildVector())))) {
auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
Def->replaceAllUsesWith(
BuildVector->getOperand(BuildVector->getNumOperands() - 1));
@@ -1271,13 +1274,15 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
}
- if (match(Def, m_ExtractLastElement(m_Broadcast(m_VPValue(A))))) {
+ if (match(Def,
+ m_CombineOr(m_ExtractLastElement(m_Broadcast(m_VPValue(A))),
+ m_ExtractLastLanePerPart(m_Broadcast(m_VPValue(A)))))) {
Def->replaceAllUsesWith(A);
return;
}
- if (match(Def,
- m_VPInstruction<VPInstruction::ExtractLastElement>(m_VPValue(A))) &&
+ if (match(Def, m_CombineOr(m_ExtractLastElement(m_VPValue(A)),
+ m_ExtractLastLanePerPart(m_VPValue(A)))) &&
((isa<VPInstruction>(A) && vputils::isSingleScalar(A)) ||
(isa<VPReplicateRecipe>(A) &&
cast<VPReplicateRecipe>(A)->isSingleScalar())) &&
@@ -1285,6 +1290,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
[Def, A](VPUser *U) { return U->usesScalars(A) || Def == U; })) {
return Def->replaceAllUsesWith(A);
}
+
+ if (Plan->getUF() == 1 &&
+ match(Def, m_ExtractLastLanePerPart(m_VPValue(A)))) {
+ return Def->replaceAllUsesWith(
+ Builder.createNaryOp(VPInstruction::ExtractLastElement, {A}));
+ }
}
void VPlanTransforms::simplifyRecipes(VPlan &Plan) {
@@ -1322,8 +1333,11 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/);
Clone->insertBefore(RepOrWidenR);
- auto *Ext = new VPInstruction(VPInstruction::ExtractLastElement,
- {Clone->getOperand(0)});
+ unsigned ExtractOpc =
+ vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1))
+ ? VPInstruction::ExtractLastElement
+ : VPInstruction::ExtractLastLanePerPart;
+ auto *Ext = new VPInstruction(ExtractOpc, {Clone->getOperand(0)});
Ext->insertBefore(Clone);
Clone->setOperand(0, Ext);
RepR->eraseFromParent();
@@ -1337,7 +1351,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
!all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) {
return U->usesScalars(RepOrWidenR) ||
match(cast<VPRecipeBase>(U),
- m_ExtractLastElement(m_VPValue()));
+ m_CombineOr(m_ExtractLastElement(m_VPValue()),
+ m_ExtractLastLanePerPart(m_VPValue())));
}))
continue;
@@ -1530,7 +1545,7 @@ static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan,
return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);
});
- auto *CanIV = Plan.getCanonicalIV();
+ auto *CanIV = Plan.getVectorLoopRegion()->getCanonicalIV();
if (!match(Cond, m_SpecificICmp(CmpInst::ICMP_EQ,
m_Specific(CanIV->getBackedgeValue()),
m_Specific(&Plan.getVectorTripCount()))))
@@ -1843,8 +1858,8 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
return nullptr;
VPRegionBlock *EnclosingLoopRegion =
HoistCandidate->getParent()->getEnclosingLoopRegion();
- assert((!HoistCandidate->getParent()->getParent() ||
- HoistCandidate->getParent()->getParent() == EnclosingLoopRegion) &&
+ assert((!HoistCandidate->getRegion() ||
+ HoistCandidate->getRegion() == EnclosingLoopRegion) &&
"CFG in VPlan should still be flat, without replicate regions");
// Hoist candidate was already visited, no need to hoist.
if (!Visited.insert(HoistCandidate).second)
@@ -2107,9 +2122,18 @@ static void licm(VPlan &Plan) {
VPBasicBlock *Preheader = Plan.getVectorPreheader();
// Return true if we do not know how to (mechanically) hoist a given recipe
- // out of a loop region. Does not address legality concerns such as aliasing
- // or speculation safety.
+ // out of a loop region.
auto CannotHoistRecipe = [](VPRecipeBase &R) {
+ // Assumes don't alias anything or throw; as long as they're guaranteed to
+ // execute, they're safe to hoist.
+ if (match(&R, m_Intrinsic<Intrinsic::assume>()))
+ return false;
+
+ // TODO: Relax checks in the future, e.g. we could also hoist reads, if
+ // their memory location is not modified in the vector loop.
+ if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
+ return true;
+
// Allocas cannot be hoisted.
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
return RepR && RepR->getOpcode() == Instruction::Alloca;
@@ -2117,17 +2141,18 @@ static void licm(VPlan &Plan) {
// Hoist any loop invariant recipes from the vector loop region to the
// preheader. Preform a shallow traversal of the vector loop region, to
- // exclude recipes in replicate regions.
+ // exclude recipes in replicate regions. Since the top-level blocks in the
+ // vector loop region are guaranteed to execute if the vector pre-header is,
+ // we don't need to check speculation safety.
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ assert(Preheader->getSingleSuccessor() == LoopRegion &&
+ "Expected vector prehader's successor to be the vector loop region");
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(LoopRegion->getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
if (CannotHoistRecipe(R))
continue;
- // TODO: Relax checks in the future, e.g. we could also hoist reads, if
- // their memory location is not modified in the vector loop.
- if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() ||
- any_of(R.operands(), [](VPValue *Op) {
+ if (any_of(R.operands(), [](VPValue *Op) {
return !Op->isDefinedOutsideLoopRegions();
}))
continue;
@@ -2319,7 +2344,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) {
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
- auto *CanonicalIVPHI = Plan.getCanonicalIV();
+ auto *CanonicalIVPHI = TopRegion->getCanonicalIV();
VPValue *StartV = CanonicalIVPHI->getStartValue();
auto *CanonicalIVIncrement =
@@ -2358,7 +2383,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
// Create the active lane mask instruction in the VPlan preheader.
VPValue *ALMMultiplier = Plan.getOrAddLiveIn(
- ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
+ ConstantInt::get(TopRegion->getCanonicalIV()->getScalarType(), 1));
auto *EntryALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
{EntryIncrement, TC, ALMMultiplier}, DL,
"active.lane.mask.entry");
@@ -2394,13 +2419,15 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
/// TODO: Introduce explicit recipe for header-mask instead of searching
/// for the header-mask pattern manually.
static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) {
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
SmallVector<VPValue *> WideCanonicalIVs;
- auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(),
- IsaPred<VPWidenCanonicalIVRecipe>);
- assert(count_if(Plan.getCanonicalIV()->users(),
+ auto *FoundWidenCanonicalIVUser = find_if(
+ LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>);
+ assert(count_if(LoopRegion->getCanonicalIV()->users(),
IsaPred<VPWidenCanonicalIVRecipe>) <= 1 &&
"Must have at most one VPWideCanonicalIVRecipe");
- if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) {
+ if (FoundWidenCanonicalIVUser !=
+ LoopRegion->getCanonicalIV()->users().end()) {
auto *WideCanonicalIV =
cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
WideCanonicalIVs.push_back(WideCanonicalIV);
@@ -2408,7 +2435,7 @@ static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) {
// Also include VPWidenIntOrFpInductionRecipes that represent a widened
// version of the canonical induction.
- VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
if (WidenOriginalIV && WidenOriginalIV->isCanonical())
@@ -2441,8 +2468,9 @@ void VPlanTransforms::addActiveLaneMask(
"DataAndControlFlowWithoutRuntimeCheck implies "
"UseActiveLaneMaskForControlFlow");
- auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(),
- IsaPred<VPWidenCanonicalIVRecipe>);
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ auto *FoundWidenCanonicalIVUser = find_if(
+ LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>);
assert(FoundWidenCanonicalIVUser &&
"Must have widened canonical IV when tail folding!");
VPSingleDefRecipe *HeaderMask = findHeaderMask(Plan);
@@ -2455,7 +2483,7 @@ void VPlanTransforms::addActiveLaneMask(
} else {
VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV);
VPValue *ALMMultiplier = Plan.getOrAddLiveIn(
- ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
+ ConstantInt::get(LoopRegion->getCanonicalIV()->getScalarType(), 1));
LaneMask =
B.createNaryOp(VPInstruction::ActiveLaneMask,
{WideCanonicalIV, Plan.getTripCount(), ALMMultiplier},
@@ -2565,9 +2593,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
});
assert(all_of(Plan.getVFxUF().users(),
- [&Plan](VPUser *U) {
- return match(U, m_c_Add(m_Specific(Plan.getCanonicalIV()),
- m_Specific(&Plan.getVFxUF()))) ||
+ [&LoopRegion, &Plan](VPUser *U) {
+ return match(U,
+ m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
+ m_Specific(&Plan.getVFxUF()))) ||
isa<VPWidenPointerInductionRecipe>(U);
}) &&
"Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
@@ -2722,9 +2751,10 @@ void VPlanTransforms::addExplicitVectorLength(
VPlan &Plan, const std::optional<unsigned> &MaxSafeElements) {
if (Plan.hasScalarVFOnly())
return;
- VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
- auto *CanonicalIVPHI = Plan.getCanonicalIV();
+ auto *CanonicalIVPHI = LoopRegion->getCanonicalIV();
auto *CanIVTy = CanonicalIVPHI->getScalarType();
VPValue *StartV = CanonicalIVPHI->getStartValue();
@@ -2868,7 +2898,7 @@ void VPlanTransforms::replaceSymbolicStrides(
// evolution.
auto CanUseVersionedStride = [&Plan](VPUser &U, unsigned) {
auto *R = cast<VPRecipeBase>(&U);
- return R->getParent()->getParent() ||
+ return R->getRegion() ||
R->getParent() == Plan.getVectorLoopRegion()->getSinglePredecessor();
};
ValueToSCEVMapTy RewriteMap;
@@ -3773,8 +3803,7 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
continue;
auto *DefR = cast<VPRecipeWithIRFlags>(&R);
auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) {
- VPRegionBlock *ParentRegion =
- cast<VPRecipeBase>(U)->getParent()->getParent();
+ VPRegionBlock *ParentRegion = cast<VPRecipeBase>(U)->getRegion();
return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
};
if ((isa<VPReplicateRecipe>(DefR) &&
@@ -4164,7 +4193,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
// Adjust induction to reflect that the transformed plan only processes one
// original iteration.
- auto *CanIV = Plan.getCanonicalIV();
+ auto *CanIV = VectorLoop->getCanonicalIV();
auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
VPBuilder PHBuilder(Plan.getVectorPreheader());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 1c4adfc..5aeda3e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -69,7 +69,8 @@ class UnrollState {
VPBasicBlock::iterator InsertPtForPhi);
VPValue *getConstantVPV(unsigned Part) {
- Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
+ Type *CanIVIntTy =
+ Plan.getVectorLoopRegion()->getCanonicalIV()->getScalarType();
return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part));
}
@@ -351,8 +352,7 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
// Compute*Result which combine all parts to compute the final value.
VPValue *Op1;
if (match(&R, m_VPInstruction<VPInstruction::AnyOf>(m_VPValue(Op1))) ||
- match(&R, m_VPInstruction<VPInstruction::FirstActiveLane>(
- m_VPValue(Op1))) ||
+ match(&R, m_FirstActiveLane(m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>(
m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 66748c5..8b1b0e5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -53,7 +53,7 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
return Expanded;
}
-bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) {
+bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
if (isa<VPActiveLaneMaskPHIRecipe>(V))
return true;
@@ -67,12 +67,14 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) {
if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One())))
return B == Plan.getTripCount() &&
- (match(A, m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()), m_One(),
- m_Specific(&Plan.getVF()))) ||
+ (match(A,
+ m_ScalarIVSteps(
+ m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()),
+ m_One(), m_Specific(&Plan.getVF()))) ||
IsWideCanonicalIV(A));
return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) &&
- B == Plan.getOrCreateBackedgeTakenCount();
+ B == Plan.getBackedgeTakenCount();
}
const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
@@ -102,7 +104,8 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
return all_of(R->operands(), isUniformAcrossVFsAndUFs);
}
- auto *CanonicalIV = R->getParent()->getPlan()->getCanonicalIV();
+ auto *CanonicalIV =
+ R->getParent()->getEnclosingLoopRegion()->getCanonicalIV();
// Canonical IV chain is uniform.
if (V == CanonicalIV || V == CanonicalIV->getBackedgeValue())
return true;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 0222b0a..9a2497e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -64,7 +64,7 @@ inline bool isSingleScalar(const VPValue *VPV) {
return true;
if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) {
- const VPRegionBlock *RegionOfR = Rep->getParent()->getParent();
+ const VPRegionBlock *RegionOfR = Rep->getRegion();
// Don't consider recipes in replicate regions as uniform yet; their first
// lane cannot be accessed when executing the replicate region for other
// lanes.
@@ -90,7 +90,7 @@ inline bool isSingleScalar(const VPValue *VPV) {
}
/// Return true if \p V is a header mask in \p Plan.
-bool isHeaderMask(const VPValue *V, VPlan &Plan);
+bool isHeaderMask(const VPValue *V, const VPlan &Plan);
/// Checks if \p V is uniform across all VF lanes and UF parts. It is considered
/// as such if it is either loop invariant (defined outside the vector region)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 5262af6..91734a1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -298,11 +298,16 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
return false;
}
}
- if (const auto *EVL = dyn_cast<VPInstruction>(&R)) {
- if (EVL->getOpcode() == VPInstruction::ExplicitVectorLength &&
- !verifyEVLRecipe(*EVL)) {
- errs() << "EVL VPValue is not used correctly\n";
- return false;
+ if (const auto *VPI = dyn_cast<VPInstruction>(&R)) {
+ switch (VPI->getOpcode()) {
+ case VPInstruction::ExplicitVectorLength:
+ if (!verifyEVLRecipe(*VPI)) {
+ errs() << "EVL VPValue is not used correctly\n";
+ return false;
+ }
+ break;
+ default:
+ break;
}
}
}