aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/DXILResource.cpp47
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp27
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp6
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp10
-rw-r--r--llvm/lib/ExecutionEngine/Orc/CMakeLists.txt1
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp22
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp18
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp10
-rw-r--r--llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp6
-rw-r--r--llvm/lib/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.cpp104
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp367
-rw-r--r--llvm/lib/IR/Type.cpp4
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp18
-rw-r--r--llvm/lib/Support/Windows/Signals.inc4
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp40
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h1
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp129
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h8
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp180
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h19
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h3
-rw-r--r--llvm/lib/Target/DirectX/DXIL.td8
-rw-r--r--llvm/lib/Target/DirectX/DXILOpBuilder.cpp8
-rw-r--r--llvm/lib/Target/DirectX/DXILOpLowering.cpp25
-rw-r--r--llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp16
-rw-r--r--llvm/lib/Target/Mips/MipsFastISel.cpp7
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.cpp7
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp12
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td32
-rw-r--r--llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp2
-rw-r--r--llvm/lib/Target/X86/X86.td7
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp36
-rw-r--r--llvm/lib/TargetParser/Host.cpp14
-rw-r--r--llvm/lib/TargetParser/Unix/Host.inc22
-rw-r--r--llvm/lib/TargetParser/X86TargetParser.cpp4
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp22
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp6
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp16
-rw-r--r--llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/GuardWidening.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp54
-rw-r--r--llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp16
-rw-r--r--llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp11
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPassManager.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/Reassociate.cpp42
-rw-r--r--llvm/lib/Transforms/Scalar/Scalarizer.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp4
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp8
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanHelpers.h2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h24
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp12
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp24
62 files changed, 1096 insertions, 431 deletions
diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp
index b78cc03e..f9bf092 100644
--- a/llvm/lib/Analysis/DXILResource.cpp
+++ b/llvm/lib/Analysis/DXILResource.cpp
@@ -281,6 +281,38 @@ static StructType *getOrCreateElementStruct(Type *ElemType, StringRef Name) {
return StructType::create(ElemType, Name);
}
+static Type *getTypeWithoutPadding(Type *Ty) {
+ // Recursively remove padding from structures.
+ if (auto *ST = dyn_cast<StructType>(Ty)) {
+ LLVMContext &Ctx = Ty->getContext();
+ SmallVector<Type *> ElementTypes;
+ ElementTypes.reserve(ST->getNumElements());
+ for (Type *ElTy : ST->elements()) {
+ if (isa<PaddingExtType>(ElTy))
+ continue;
+ ElementTypes.push_back(getTypeWithoutPadding(ElTy));
+ }
+
+ // Handle explicitly padded cbuffer arrays like { [ n x paddedty ], ty }
+ if (ElementTypes.size() == 2)
+ if (auto *AT = dyn_cast<ArrayType>(ElementTypes[0]))
+ if (ElementTypes[1] == AT->getElementType())
+ return ArrayType::get(ElementTypes[1], AT->getNumElements() + 1);
+
+ // If we only have a single element, don't wrap it in a struct.
+ if (ElementTypes.size() == 1)
+ return ElementTypes[0];
+
+ return StructType::get(Ctx, ElementTypes, /*IsPacked=*/false);
+ }
+ // Arrays just need to have their element type adjusted.
+ if (auto *AT = dyn_cast<ArrayType>(Ty))
+ return ArrayType::get(getTypeWithoutPadding(AT->getElementType()),
+ AT->getNumElements());
+ // Anything else should be good as is.
+ return Ty;
+}
+
StructType *ResourceTypeInfo::createElementStruct(StringRef CBufferName) {
SmallString<64> TypeName;
@@ -334,14 +366,21 @@ StructType *ResourceTypeInfo::createElementStruct(StringRef CBufferName) {
}
case ResourceKind::CBuffer: {
auto *RTy = cast<CBufferExtType>(HandleTy);
- LayoutExtType *LayoutType = cast<LayoutExtType>(RTy->getResourceType());
- StructType *Ty = cast<StructType>(LayoutType->getWrappedType());
SmallString<64> Name = getResourceKindName(Kind);
if (!CBufferName.empty()) {
Name.append(".");
Name.append(CBufferName);
}
- return StructType::create(Ty->elements(), Name);
+
+ // TODO: Remove this when we update the frontend to use explicit padding.
+ if (LayoutExtType *LayoutType =
+ dyn_cast<LayoutExtType>(RTy->getResourceType())) {
+ StructType *Ty = cast<StructType>(LayoutType->getWrappedType());
+ return StructType::create(Ty->elements(), Name);
+ }
+
+ return getOrCreateElementStruct(
+ getTypeWithoutPadding(RTy->getResourceType()), Name);
}
case ResourceKind::Sampler: {
auto *RTy = cast<SamplerExtType>(HandleTy);
@@ -454,10 +493,10 @@ uint32_t ResourceTypeInfo::getCBufferSize(const DataLayout &DL) const {
Type *ElTy = cast<CBufferExtType>(HandleTy)->getResourceType();
+ // TODO: Remove this when we update the frontend to use explicit padding.
if (auto *LayoutTy = dyn_cast<LayoutExtType>(ElTy))
return LayoutTy->getSize();
- // TODO: What should we do with unannotated arrays?
return DL.getTypeAllocSize(ElTy);
}
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index a64b93d..442b9d1 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -4623,17 +4623,11 @@ const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
/// If Expr computes ~A, return A else return nullptr
static const SCEV *MatchNotExpr(const SCEV *Expr) {
- const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
- if (!Add || Add->getNumOperands() != 2 ||
- !Add->getOperand(0)->isAllOnesValue())
- return nullptr;
-
- const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
- if (!AddRHS || AddRHS->getNumOperands() != 2 ||
- !AddRHS->getOperand(0)->isAllOnesValue())
- return nullptr;
-
- return AddRHS->getOperand(1);
+ const SCEV *MulOp;
+ if (match(Expr, m_scev_Add(m_scev_AllOnes(),
+ m_scev_Mul(m_scev_AllOnes(), m_SCEV(MulOp)))))
+ return MulOp;
+ return nullptr;
}
/// Return a SCEV corresponding to ~V = -1-V
@@ -12220,12 +12214,11 @@ ScalarEvolution::computeConstantDifference(const SCEV *More, const SCEV *Less) {
// Try to match a common constant multiply.
auto MatchConstMul =
[](const SCEV *S) -> std::optional<std::pair<const SCEV *, APInt>> {
- auto *M = dyn_cast<SCEVMulExpr>(S);
- if (!M || M->getNumOperands() != 2 ||
- !isa<SCEVConstant>(M->getOperand(0)))
- return std::nullopt;
- return {
- {M->getOperand(1), cast<SCEVConstant>(M->getOperand(0))->getAPInt()}};
+ const APInt *C;
+ const SCEV *Op;
+ if (match(S, m_scev_Mul(m_scev_APInt(C), m_SCEV(Op))))
+ return {{Op, *C}};
+ return std::nullopt;
};
if (auto MatchedMore = MatchConstMul(More)) {
if (auto MatchedLess = MatchConstMul(Less)) {
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index 25c1db9..ded4df4 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -55,12 +55,10 @@ LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx,
}
LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx,
- LLT NewEltTy) {
+ ElementCount EC) {
return [=](const LegalityQuery &Query) {
const LLT OldTy = Query.Types[TypeIdx];
- ElementCount NewEltCount = NewEltTy.isVector() ? NewEltTy.getElementCount()
- : ElementCount::getFixed(1);
- return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount));
+ return std::make_pair(TypeIdx, OldTy.changeElementCount(EC));
};
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 055fdc6..ca82857 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -818,8 +818,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
if (!DefMI)
return false;
- const TargetMachine& TM = DefMI->getMF()->getTarget();
- if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
+ if (DefMI->getFlag(MachineInstr::FmNoNans))
return true;
// If the value is a constant, we can obviously see if it is a NaN or not.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 0f2b518..cb0038c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3971,8 +3971,14 @@ void SelectionDAGBuilder::visitSIToFP(const User &I) {
}
void SelectionDAGBuilder::visitPtrToAddr(const User &I) {
- // FIXME: this is not correct for pointers with addr width != pointer width
- visitPtrToInt(I);
+ SDValue N = getValue(I.getOperand(0));
+ // By definition the type of the ptrtoaddr must be equal to the address type.
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ EVT AddrVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ // The address width must be smaller or equal to the pointer representation
+ // width, so we lower ptrtoaddr as a truncate (possibly folded to a no-op).
+ N = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), AddrVT, N);
+ setValue(&I, N);
}
void SelectionDAGBuilder::visitPtrToInt(const User &I) {
diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
index 0ffe3ae..f343925 100644
--- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
@@ -56,6 +56,7 @@ add_llvm_component_library(LLVMOrcJIT
SectCreate.cpp
SelfExecutorProcessControl.cpp
SimpleRemoteEPC.cpp
+ SimpleRemoteMemoryMapper.cpp
Speculation.cpp
SpeculateAnalyses.cpp
ExecutorProcessControl.cpp
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
index 50e6b25..0833af7 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
@@ -57,16 +57,17 @@ public:
std::swap(FR.Actions, G.allocActions());
Parent.EPC.callSPSWrapperAsync<
- rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>(
- Parent.SAs.Finalize,
+ rt::SPSSimpleExecutorMemoryManagerInitializeSignature>(
+ Parent.SAs.Initialize,
[OnFinalize = std::move(OnFinalize), AllocAddr = this->AllocAddr](
- Error SerializationErr, Error FinalizeErr) mutable {
+ Error SerializationErr,
+ Expected<ExecutorAddr> InitializeKey) mutable {
// FIXME: Release abandoned alloc.
if (SerializationErr) {
- cantFail(std::move(FinalizeErr));
+ cantFail(InitializeKey.takeError());
OnFinalize(std::move(SerializationErr));
- } else if (FinalizeErr)
- OnFinalize(std::move(FinalizeErr));
+ } else if (!InitializeKey)
+ OnFinalize(InitializeKey.takeError());
else
OnFinalize(FinalizedAlloc(AllocAddr));
},
@@ -76,8 +77,8 @@ public:
void abandon(OnAbandonedFunction OnAbandoned) override {
// FIXME: Return memory to pool instead.
Parent.EPC.callSPSWrapperAsync<
- rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
- Parent.SAs.Deallocate,
+ rt::SPSSimpleExecutorMemoryManagerReleaseSignature>(
+ Parent.SAs.Release,
[OnAbandoned = std::move(OnAbandoned)](Error SerializationErr,
Error DeallocateErr) mutable {
if (SerializationErr) {
@@ -123,9 +124,8 @@ void EPCGenericJITLinkMemoryManager::allocate(const JITLinkDylib *JD,
void EPCGenericJITLinkMemoryManager::deallocate(
std::vector<FinalizedAlloc> Allocs, OnDeallocatedFunction OnDeallocated) {
- EPC.callSPSWrapperAsync<
- rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
- SAs.Deallocate,
+ EPC.callSPSWrapperAsync<rt::SPSSimpleExecutorMemoryManagerReleaseSignature>(
+ SAs.Release,
[OnDeallocated = std::move(OnDeallocated)](Error SerErr,
Error DeallocErr) mutable {
if (SerErr) {
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
index fec7062..cc72488 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
@@ -25,9 +25,9 @@ EPCGenericRTDyldMemoryManager::CreateWithDefaultBootstrapSymbols(
if (auto Err = EPC.getBootstrapSymbols(
{{SAs.Instance, rt::SimpleExecutorMemoryManagerInstanceName},
{SAs.Reserve, rt::SimpleExecutorMemoryManagerReserveWrapperName},
- {SAs.Finalize, rt::SimpleExecutorMemoryManagerFinalizeWrapperName},
- {SAs.Deallocate,
- rt::SimpleExecutorMemoryManagerDeallocateWrapperName},
+ {SAs.Initialize,
+ rt::SimpleExecutorMemoryManagerInitializeWrapperName},
+ {SAs.Release, rt::SimpleExecutorMemoryManagerReleaseWrapperName},
{SAs.RegisterEHFrame, rt::RegisterEHFrameSectionAllocActionName},
{SAs.DeregisterEHFrame,
rt::DeregisterEHFrameSectionAllocActionName}}))
@@ -48,7 +48,7 @@ EPCGenericRTDyldMemoryManager::~EPCGenericRTDyldMemoryManager() {
Error Err = Error::success();
if (auto Err2 = EPC.callSPSWrapper<
- rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
+ rt::SPSSimpleExecutorMemoryManagerReleaseSignature>(
SAs.Reserve, Err, SAs.Instance, FinalizedAllocs)) {
// FIXME: Report errors through EPC once that functionality is available.
logAllUnhandledErrors(std::move(Err2), errs(), "");
@@ -267,10 +267,10 @@ bool EPCGenericRTDyldMemoryManager::finalizeMemory(std::string *ErrMsg) {
// We'll also need to make an extra allocation for the eh-frame wrapper call
// arguments.
- Error FinalizeErr = Error::success();
+ Expected<ExecutorAddr> InitializeKey((ExecutorAddr()));
if (auto Err = EPC.callSPSWrapper<
- rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>(
- SAs.Finalize, FinalizeErr, SAs.Instance, std::move(FR))) {
+ rt::SPSSimpleExecutorMemoryManagerInitializeSignature>(
+ SAs.Initialize, InitializeKey, SAs.Instance, std::move(FR))) {
std::lock_guard<std::mutex> Lock(M);
this->ErrMsg = toString(std::move(Err));
dbgs() << "Serialization error: " << this->ErrMsg << "\n";
@@ -278,9 +278,9 @@ bool EPCGenericRTDyldMemoryManager::finalizeMemory(std::string *ErrMsg) {
*ErrMsg = this->ErrMsg;
return true;
}
- if (FinalizeErr) {
+ if (!InitializeKey) {
std::lock_guard<std::mutex> Lock(M);
- this->ErrMsg = toString(std::move(FinalizeErr));
+ this->ErrMsg = toString(InitializeKey.takeError());
dbgs() << "Finalization error: " << this->ErrMsg << "\n";
if (ErrMsg)
*ErrMsg = this->ErrMsg;
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
index 26e8f53..cc99d3c 100644
--- a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
@@ -23,10 +23,12 @@ const char *SimpleExecutorMemoryManagerInstanceName =
"__llvm_orc_SimpleExecutorMemoryManager_Instance";
const char *SimpleExecutorMemoryManagerReserveWrapperName =
"__llvm_orc_SimpleExecutorMemoryManager_reserve_wrapper";
-const char *SimpleExecutorMemoryManagerFinalizeWrapperName =
- "__llvm_orc_SimpleExecutorMemoryManager_finalize_wrapper";
-const char *SimpleExecutorMemoryManagerDeallocateWrapperName =
- "__llvm_orc_SimpleExecutorMemoryManager_deallocate_wrapper";
+const char *SimpleExecutorMemoryManagerInitializeWrapperName =
+ "__llvm_orc_SimpleExecutorMemoryManager_initialize_wrapper";
+const char *SimpleExecutorMemoryManagerDeinitializeWrapperName =
+ "__llvm_orc_SimpleExecutorMemoryManager_deinitialize_wrapper";
+const char *SimpleExecutorMemoryManagerReleaseWrapperName =
+ "__llvm_orc_SimpleExecutorMemoryManager_release_wrapper";
const char *ExecutorSharedMemoryMapperServiceInstanceName =
"__llvm_orc_ExecutorSharedMemoryMapperService_Instance";
diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
index 87d7578..dec1df7 100644
--- a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
@@ -216,9 +216,9 @@ SimpleRemoteEPC::createDefaultMemoryManager(SimpleRemoteEPC &SREPC) {
if (auto Err = SREPC.getBootstrapSymbols(
{{SAs.Allocator, rt::SimpleExecutorMemoryManagerInstanceName},
{SAs.Reserve, rt::SimpleExecutorMemoryManagerReserveWrapperName},
- {SAs.Finalize, rt::SimpleExecutorMemoryManagerFinalizeWrapperName},
- {SAs.Deallocate,
- rt::SimpleExecutorMemoryManagerDeallocateWrapperName}}))
+ {SAs.Initialize,
+ rt::SimpleExecutorMemoryManagerInitializeWrapperName},
+ {SAs.Release, rt::SimpleExecutorMemoryManagerReleaseWrapperName}}))
return std::move(Err);
return std::make_unique<EPCGenericJITLinkMemoryManager>(SREPC, SAs);
diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.cpp
new file mode 100644
index 0000000..b82de3f
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.cpp
@@ -0,0 +1,104 @@
+//===---- SimpleRemoteMemoryMapper.cpp - Remote memory mapper ----*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.h"
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+
+namespace llvm::orc {
+
+SimpleRemoteMemoryMapper::SimpleRemoteMemoryMapper(ExecutorProcessControl &EPC,
+ SymbolAddrs SAs)
+ : EPC(EPC), SAs(SAs) {}
+
+void SimpleRemoteMemoryMapper::reserve(size_t NumBytes,
+ OnReservedFunction OnReserved) {
+ EPC.callSPSWrapperAsync<rt::SPSSimpleRemoteMemoryMapReserveSignature>(
+ SAs.Reserve,
+ [NumBytes, OnReserved = std::move(OnReserved)](
+ Error SerializationErr, Expected<ExecutorAddr> Result) mutable {
+ if (SerializationErr) {
+ cantFail(Result.takeError());
+ return OnReserved(std::move(SerializationErr));
+ }
+
+ if (Result)
+ OnReserved(ExecutorAddrRange(*Result, NumBytes));
+ else
+ OnReserved(Result.takeError());
+ },
+ SAs.Instance, static_cast<uint64_t>(NumBytes));
+}
+
+char *SimpleRemoteMemoryMapper::prepare(jitlink::LinkGraph &G,
+ ExecutorAddr Addr, size_t ContentSize) {
+ return G.allocateBuffer(ContentSize).data();
+}
+
+void SimpleRemoteMemoryMapper::initialize(MemoryMapper::AllocInfo &AI,
+ OnInitializedFunction OnInitialized) {
+
+ tpctypes::FinalizeRequest FR;
+
+ std::swap(FR.Actions, AI.Actions);
+ FR.Segments.reserve(AI.Segments.size());
+
+ for (auto Seg : AI.Segments)
+ FR.Segments.push_back({Seg.AG, AI.MappingBase + Seg.Offset,
+ Seg.ContentSize + Seg.ZeroFillSize,
+ ArrayRef<char>(Seg.WorkingMem, Seg.ContentSize)});
+
+ EPC.callSPSWrapperAsync<rt::SPSSimpleRemoteMemoryMapInitializeSignature>(
+ SAs.Initialize,
+ [OnInitialized = std::move(OnInitialized)](
+ Error SerializationErr, Expected<ExecutorAddr> Result) mutable {
+ if (SerializationErr) {
+ cantFail(Result.takeError());
+ return OnInitialized(std::move(SerializationErr));
+ }
+
+ OnInitialized(std::move(Result));
+ },
+ SAs.Instance, std::move(FR));
+}
+
+void SimpleRemoteMemoryMapper::deinitialize(
+ ArrayRef<ExecutorAddr> Allocations,
+ MemoryMapper::OnDeinitializedFunction OnDeinitialized) {
+ EPC.callSPSWrapperAsync<rt::SPSSimpleRemoteMemoryMapDeinitializeSignature>(
+ SAs.Deinitialize,
+ [OnDeinitialized = std::move(OnDeinitialized)](Error SerializationErr,
+ Error Result) mutable {
+ if (SerializationErr) {
+ cantFail(std::move(Result));
+ return OnDeinitialized(std::move(SerializationErr));
+ }
+
+ OnDeinitialized(std::move(Result));
+ },
+ SAs.Instance, Allocations);
+}
+
+void SimpleRemoteMemoryMapper::release(ArrayRef<ExecutorAddr> Bases,
+ OnReleasedFunction OnReleased) {
+ EPC.callSPSWrapperAsync<rt::SPSSimpleRemoteMemoryMapReleaseSignature>(
+ SAs.Release,
+ [OnReleased = std::move(OnReleased)](Error SerializationErr,
+ Error Result) mutable {
+ if (SerializationErr) {
+ cantFail(std::move(Result));
+ return OnReleased(std::move(SerializationErr));
+ }
+
+ return OnReleased(std::move(Result));
+ },
+ SAs.Instance, Bases);
+}
+
+} // namespace llvm::orc
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
index 3cdffb8..fe881a1 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
@@ -8,6 +8,7 @@
#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
#include "llvm/Support/FormatVariadic.h"
@@ -18,166 +19,167 @@ namespace orc {
namespace rt_bootstrap {
SimpleExecutorMemoryManager::~SimpleExecutorMemoryManager() {
- assert(Allocations.empty() && "shutdown not called?");
+ assert(Slabs.empty() && "shutdown not called?");
}
-Expected<ExecutorAddr> SimpleExecutorMemoryManager::allocate(uint64_t Size) {
+Expected<ExecutorAddr> SimpleExecutorMemoryManager::reserve(uint64_t Size) {
std::error_code EC;
auto MB = sys::Memory::allocateMappedMemory(
Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC);
if (EC)
return errorCodeToError(EC);
std::lock_guard<std::mutex> Lock(M);
- assert(!Allocations.count(MB.base()) && "Duplicate allocation addr");
- Allocations[MB.base()].Size = Size;
+ assert(!Slabs.count(MB.base()) && "Duplicate allocation addr");
+ Slabs[MB.base()].Size = Size;
return ExecutorAddr::fromPtr(MB.base());
}
-Error SimpleExecutorMemoryManager::finalize(tpctypes::FinalizeRequest &FR) {
- ExecutorAddr Base(~0ULL);
+Expected<ExecutorAddr>
+SimpleExecutorMemoryManager::initialize(tpctypes::FinalizeRequest &FR) {
std::vector<shared::WrapperFunctionCall> DeallocationActions;
- size_t SuccessfulFinalizationActions = 0;
if (FR.Segments.empty()) {
- // NOTE: Finalizing nothing is currently a no-op. Should it be an error?
if (FR.Actions.empty())
- return Error::success();
+ return make_error<StringError>("Finalization request is empty",
+ inconvertibleErrorCode());
else
return make_error<StringError>("Finalization actions attached to empty "
"finalization request",
inconvertibleErrorCode());
}
- for (auto &Seg : FR.Segments)
- Base = std::min(Base, Seg.Addr);
-
- for (auto &ActPair : FR.Actions)
- if (ActPair.Dealloc)
- DeallocationActions.push_back(ActPair.Dealloc);
-
- // Get the Allocation for this finalization.
- size_t AllocSize = 0;
- {
- std::lock_guard<std::mutex> Lock(M);
- auto I = Allocations.find(Base.toPtr<void *>());
- if (I == Allocations.end())
- return make_error<StringError>("Attempt to finalize unrecognized "
- "allocation " +
- formatv("{0:x}", Base.getValue()),
- inconvertibleErrorCode());
- AllocSize = I->second.Size;
- I->second.DeallocationActions = std::move(DeallocationActions);
- }
- ExecutorAddr AllocEnd = Base + ExecutorAddrDiff(AllocSize);
-
- // Bail-out function: this will run deallocation actions corresponding to any
- // completed finalization actions, then deallocate memory.
- auto BailOut = [&](Error Err) {
- std::pair<void *, Allocation> AllocToDestroy;
-
- // Get allocation to destroy.
- {
- std::lock_guard<std::mutex> Lock(M);
- auto I = Allocations.find(Base.toPtr<void *>());
-
- // Check for missing allocation (effective a double free).
- if (I == Allocations.end())
- return joinErrors(
- std::move(Err),
- make_error<StringError>("No allocation entry found "
- "for " +
- formatv("{0:x}", Base.getValue()),
- inconvertibleErrorCode()));
- AllocToDestroy = std::move(*I);
- Allocations.erase(I);
- }
+ ExecutorAddrRange RR(FR.Segments.front().Addr, FR.Segments.front().Addr);
- // Run deallocation actions for all completed finalization actions.
- while (SuccessfulFinalizationActions)
- Err =
- joinErrors(std::move(Err), FR.Actions[--SuccessfulFinalizationActions]
- .Dealloc.runWithSPSRetErrorMerged());
-
- // Deallocate memory.
- sys::MemoryBlock MB(AllocToDestroy.first, AllocToDestroy.second.Size);
- if (auto EC = sys::Memory::releaseMappedMemory(MB))
- Err = joinErrors(std::move(Err), errorCodeToError(EC));
-
- return Err;
- };
+ std::vector<sys::MemoryBlock> MBsToReset;
+ auto ResetMBs = make_scope_exit([&]() {
+ for (auto &MB : MBsToReset)
+ sys::Memory::protectMappedMemory(MB, sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE);
+ sys::Memory::InvalidateInstructionCache(RR.Start.toPtr<void *>(),
+ RR.size());
+ });
// Copy content and apply permissions.
for (auto &Seg : FR.Segments) {
+ RR.Start = std::min(RR.Start, Seg.Addr);
+ RR.End = std::max(RR.End, Seg.Addr + Seg.Size);
// Check segment ranges.
if (LLVM_UNLIKELY(Seg.Size < Seg.Content.size()))
- return BailOut(make_error<StringError>(
+ return make_error<StringError>(
formatv("Segment {0:x} content size ({1:x} bytes) "
"exceeds segment size ({2:x} bytes)",
Seg.Addr.getValue(), Seg.Content.size(), Seg.Size),
- inconvertibleErrorCode()));
+ inconvertibleErrorCode());
ExecutorAddr SegEnd = Seg.Addr + ExecutorAddrDiff(Seg.Size);
- if (LLVM_UNLIKELY(Seg.Addr < Base || SegEnd > AllocEnd))
- return BailOut(make_error<StringError>(
+ if (LLVM_UNLIKELY(Seg.Addr < RR.Start || SegEnd > RR.End))
+ return make_error<StringError>(
formatv("Segment {0:x} -- {1:x} crosses boundary of "
"allocation {2:x} -- {3:x}",
- Seg.Addr.getValue(), SegEnd.getValue(), Base.getValue(),
- AllocEnd.getValue()),
- inconvertibleErrorCode()));
+ Seg.Addr, SegEnd, RR.Start, RR.End),
+ inconvertibleErrorCode());
char *Mem = Seg.Addr.toPtr<char *>();
if (!Seg.Content.empty())
memcpy(Mem, Seg.Content.data(), Seg.Content.size());
memset(Mem + Seg.Content.size(), 0, Seg.Size - Seg.Content.size());
assert(Seg.Size <= std::numeric_limits<size_t>::max());
+
+ sys::MemoryBlock MB(Mem, Seg.Size);
if (auto EC = sys::Memory::protectMappedMemory(
- {Mem, static_cast<size_t>(Seg.Size)},
- toSysMemoryProtectionFlags(Seg.RAG.Prot)))
- return BailOut(errorCodeToError(EC));
+ MB, toSysMemoryProtectionFlags(Seg.RAG.Prot)))
+ return errorCodeToError(EC);
+
+ MBsToReset.push_back(MB);
+
if ((Seg.RAG.Prot & MemProt::Exec) == MemProt::Exec)
sys::Memory::InvalidateInstructionCache(Mem, Seg.Size);
}
- // Run finalization actions.
- for (auto &ActPair : FR.Actions) {
- if (auto Err = ActPair.Finalize.runWithSPSRetErrorMerged())
- return BailOut(std::move(Err));
- ++SuccessfulFinalizationActions;
+ auto DeallocActions = runFinalizeActions(FR.Actions);
+ if (!DeallocActions)
+ return DeallocActions.takeError();
+
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ auto Region = createRegionInfo(RR, "In initialize");
+ if (!Region)
+ return Region.takeError();
+ Region->DeallocActions = std::move(*DeallocActions);
}
- return Error::success();
+ // Successful initialization.
+ ResetMBs.release();
+
+ return RR.Start;
}
-Error SimpleExecutorMemoryManager::deallocate(
- const std::vector<ExecutorAddr> &Bases) {
- std::vector<std::pair<void *, Allocation>> AllocPairs;
- AllocPairs.reserve(Bases.size());
+Error SimpleExecutorMemoryManager::deinitialize(
+ const std::vector<ExecutorAddr> &InitKeys) {
+ Error Err = Error::success();
- // Get allocation to destroy.
+ for (auto &KeyAddr : llvm::reverse(InitKeys)) {
+ std::vector<shared::WrapperFunctionCall> DeallocActions;
+ {
+ std::scoped_lock<std::mutex> Lock(M);
+ auto Slab = getSlabInfo(KeyAddr, "In deinitialize");
+ if (!Slab) {
+ Err = joinErrors(std::move(Err), Slab.takeError());
+ continue;
+ }
+
+ auto RI = getRegionInfo(*Slab, KeyAddr, "In deinitialize");
+ if (!RI) {
+ Err = joinErrors(std::move(Err), RI.takeError());
+ continue;
+ }
+
+ DeallocActions = std::move(RI->DeallocActions);
+ }
+
+ Err = joinErrors(std::move(Err),
+ runDeallocActions(std::move(DeallocActions)));
+ }
+
+ return Err;
+}
+
+Error SimpleExecutorMemoryManager::release(
+ const std::vector<ExecutorAddr> &Bases) {
Error Err = Error::success();
- {
- std::lock_guard<std::mutex> Lock(M);
- for (auto &Base : Bases) {
- auto I = Allocations.find(Base.toPtr<void *>());
-
- // Check for missing allocation (effective a double free).
- if (I != Allocations.end()) {
- AllocPairs.push_back(std::move(*I));
- Allocations.erase(I);
- } else
+
+ // TODO: Prohibit new initializations within the slabs being removed?
+ for (auto &Base : llvm::reverse(Bases)) {
+ std::vector<shared::WrapperFunctionCall> DeallocActions;
+ sys::MemoryBlock MB;
+
+ {
+ std::scoped_lock<std::mutex> Lock(M);
+
+ auto SlabI = Slabs.find(Base.toPtr<void *>());
+ if (SlabI == Slabs.end()) {
Err = joinErrors(
std::move(Err),
- make_error<StringError>("No allocation entry found "
- "for " +
- formatv("{0:x}", Base.getValue()),
+ make_error<StringError>("In release, " + formatv("{0:x}", Base) +
+ " is not part of any reserved "
+ "address range",
inconvertibleErrorCode()));
+ continue;
+ }
+
+ auto &Slab = SlabI->second;
+
+ for (auto &[Addr, Region] : Slab.Regions)
+ llvm::copy(Region.DeallocActions, back_inserter(DeallocActions));
+
+ MB = {Base.toPtr<void *>(), Slab.Size};
+
+ Slabs.erase(SlabI);
}
- }
- while (!AllocPairs.empty()) {
- auto &P = AllocPairs.back();
- Err = joinErrors(std::move(Err), deallocateImpl(P.first, P.second));
- AllocPairs.pop_back();
+ Err = joinErrors(std::move(Err), runDeallocActions(DeallocActions));
+ if (auto EC = sys::Memory::releaseMappedMemory(MB))
+ Err = joinErrors(std::move(Err), errorCodeToError(EC));
}
return Err;
@@ -185,16 +187,15 @@ Error SimpleExecutorMemoryManager::deallocate(
Error SimpleExecutorMemoryManager::shutdown() {
- AllocationsMap AM;
+ // TODO: Prevent new allocations during shutdown.
+ std::vector<ExecutorAddr> Bases;
{
- std::lock_guard<std::mutex> Lock(M);
- AM = std::move(Allocations);
+ std::scoped_lock<std::mutex> Lock(M);
+ for (auto &[Base, Slab] : Slabs)
+ Bases.push_back(ExecutorAddr::fromPtr(Base));
}
- Error Err = Error::success();
- for (auto &KV : AM)
- Err = joinErrors(std::move(Err), deallocateImpl(KV.first, KV.second));
- return Err;
+ return release(Bases);
}
void SimpleExecutorMemoryManager::addBootstrapSymbols(
@@ -202,58 +203,150 @@ void SimpleExecutorMemoryManager::addBootstrapSymbols(
M[rt::SimpleExecutorMemoryManagerInstanceName] = ExecutorAddr::fromPtr(this);
M[rt::SimpleExecutorMemoryManagerReserveWrapperName] =
ExecutorAddr::fromPtr(&reserveWrapper);
- M[rt::SimpleExecutorMemoryManagerFinalizeWrapperName] =
- ExecutorAddr::fromPtr(&finalizeWrapper);
- M[rt::SimpleExecutorMemoryManagerDeallocateWrapperName] =
- ExecutorAddr::fromPtr(&deallocateWrapper);
+ M[rt::SimpleExecutorMemoryManagerInitializeWrapperName] =
+ ExecutorAddr::fromPtr(&initializeWrapper);
+ M[rt::SimpleExecutorMemoryManagerDeinitializeWrapperName] =
+ ExecutorAddr::fromPtr(&deinitializeWrapper);
+ M[rt::SimpleExecutorMemoryManagerReleaseWrapperName] =
+ ExecutorAddr::fromPtr(&releaseWrapper);
}
-Error SimpleExecutorMemoryManager::deallocateImpl(void *Base, Allocation &A) {
- Error Err = Error::success();
+Expected<SimpleExecutorMemoryManager::SlabInfo &>
+SimpleExecutorMemoryManager::getSlabInfo(ExecutorAddr A, StringRef Context) {
+ auto MakeBadSlabError = [&]() {
+ return make_error<StringError>(
+ Context + ", address " + formatv("{0:x}", A) +
+ " is not part of any reserved address range",
+ inconvertibleErrorCode());
+ };
- while (!A.DeallocationActions.empty()) {
- Err = joinErrors(std::move(Err),
- A.DeallocationActions.back().runWithSPSRetErrorMerged());
- A.DeallocationActions.pop_back();
+ auto I = Slabs.upper_bound(A.toPtr<void *>());
+ if (I == Slabs.begin())
+ return MakeBadSlabError();
+ --I;
+ if (!ExecutorAddrRange(ExecutorAddr::fromPtr(I->first), I->second.Size)
+ .contains(A))
+ return MakeBadSlabError();
+
+ return I->second;
+}
+
+Expected<SimpleExecutorMemoryManager::SlabInfo &>
+SimpleExecutorMemoryManager::getSlabInfo(ExecutorAddrRange R,
+ StringRef Context) {
+ auto MakeBadSlabError = [&]() {
+ return make_error<StringError>(
+ Context + ", range " + formatv("{0:x}", R) +
+ " is not part of any reserved address range",
+ inconvertibleErrorCode());
+ };
+
+ auto I = Slabs.upper_bound(R.Start.toPtr<void *>());
+ if (I == Slabs.begin())
+ return MakeBadSlabError();
+ --I;
+ if (!ExecutorAddrRange(ExecutorAddr::fromPtr(I->first), I->second.Size)
+ .contains(R))
+ return MakeBadSlabError();
+
+ return I->second;
+}
+
+Expected<SimpleExecutorMemoryManager::RegionInfo &>
+SimpleExecutorMemoryManager::createRegionInfo(ExecutorAddrRange R,
+ StringRef Context) {
+
+ auto Slab = getSlabInfo(R, Context);
+ if (!Slab)
+ return Slab.takeError();
+
+ auto MakeBadRegionError = [&](ExecutorAddrRange Other, bool Prev) {
+ return make_error<StringError>(Context + ", region " + formatv("{0:x}", R) +
+ " overlaps " +
+ (Prev ? "previous" : "following") +
+ " region " + formatv("{0:x}", Other),
+ inconvertibleErrorCode());
+ };
+
+ auto I = Slab->Regions.upper_bound(R.Start);
+ if (I != Slab->Regions.begin()) {
+ auto J = std::prev(I);
+ ExecutorAddrRange PrevRange(J->first, J->second.Size);
+ if (PrevRange.overlaps(R))
+ return MakeBadRegionError(PrevRange, true);
+ }
+ if (I != Slab->Regions.end()) {
+ ExecutorAddrRange NextRange(I->first, I->second.Size);
+ if (NextRange.overlaps(R))
+ return MakeBadRegionError(NextRange, false);
}
- sys::MemoryBlock MB(Base, A.Size);
- if (auto EC = sys::Memory::releaseMappedMemory(MB))
- Err = joinErrors(std::move(Err), errorCodeToError(EC));
+ auto &RInfo = Slab->Regions[R.Start];
+ RInfo.Size = R.size();
+ return RInfo;
+}
- return Err;
+Expected<SimpleExecutorMemoryManager::RegionInfo &>
+SimpleExecutorMemoryManager::getRegionInfo(SlabInfo &Slab, ExecutorAddr A,
+ StringRef Context) {
+ auto I = Slab.Regions.find(A);
+ if (I == Slab.Regions.end())
+ return make_error<StringError>(
+ Context + ", address " + formatv("{0:x}", A) +
+ " does not correspond to the start of any initialized region",
+ inconvertibleErrorCode());
+
+ return I->second;
+}
+
+Expected<SimpleExecutorMemoryManager::RegionInfo &>
+SimpleExecutorMemoryManager::getRegionInfo(ExecutorAddr A, StringRef Context) {
+ auto Slab = getSlabInfo(A, Context);
+ if (!Slab)
+ return Slab.takeError();
+
+ return getRegionInfo(*Slab, A, Context);
}
llvm::orc::shared::CWrapperFunctionResult
SimpleExecutorMemoryManager::reserveWrapper(const char *ArgData,
size_t ArgSize) {
- return shared::WrapperFunction<
- rt::SPSSimpleExecutorMemoryManagerReserveSignature>::
+ return shared::WrapperFunction<rt::SPSSimpleRemoteMemoryMapReserveSignature>::
handle(ArgData, ArgSize,
shared::makeMethodWrapperHandler(
- &SimpleExecutorMemoryManager::allocate))
+ &SimpleExecutorMemoryManager::reserve))
+ .release();
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+SimpleExecutorMemoryManager::initializeWrapper(const char *ArgData,
+ size_t ArgSize) {
+ return shared::
+ WrapperFunction<rt::SPSSimpleRemoteMemoryMapInitializeSignature>::handle(
+ ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &SimpleExecutorMemoryManager::initialize))
.release();
}
llvm::orc::shared::CWrapperFunctionResult
-SimpleExecutorMemoryManager::finalizeWrapper(const char *ArgData,
- size_t ArgSize) {
+SimpleExecutorMemoryManager::deinitializeWrapper(const char *ArgData,
+ size_t ArgSize) {
return shared::WrapperFunction<
- rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>::
+ rt::SPSSimpleRemoteMemoryMapDeinitializeSignature>::
handle(ArgData, ArgSize,
shared::makeMethodWrapperHandler(
- &SimpleExecutorMemoryManager::finalize))
+ &SimpleExecutorMemoryManager::deinitialize))
.release();
}
llvm::orc::shared::CWrapperFunctionResult
-SimpleExecutorMemoryManager::deallocateWrapper(const char *ArgData,
- size_t ArgSize) {
- return shared::WrapperFunction<
- rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>::
+SimpleExecutorMemoryManager::releaseWrapper(const char *ArgData,
+ size_t ArgSize) {
+ return shared::WrapperFunction<rt::SPSSimpleRemoteMemoryMapReleaseSignature>::
handle(ArgData, ArgSize,
shared::makeMethodWrapperHandler(
- &SimpleExecutorMemoryManager::deallocate))
+ &SimpleExecutorMemoryManager::release))
.release();
}
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index 9db48e8..0e9535d 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -1034,6 +1034,10 @@ static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) {
}
// DirectX resources
+ if (Name == "dx.Padding")
+ return TargetTypeInfo(
+ ArrayType::get(Type::getInt8Ty(C), Ty->getIntParameter(0)),
+ TargetExtType::CanBeGlobal);
if (Name.starts_with("dx."))
return TargetTypeInfo(PointerType::get(C, 0), TargetExtType::CanBeGlobal,
TargetExtType::CanBeLocal,
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 421d6603..c3a27c9 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -488,6 +488,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_HEXAGON_MACH_V5, EF_HEXAGON_MACH);
BCaseMask(EF_HEXAGON_MACH_V55, EF_HEXAGON_MACH);
BCaseMask(EF_HEXAGON_MACH_V60, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V61, EF_HEXAGON_MACH);
BCaseMask(EF_HEXAGON_MACH_V62, EF_HEXAGON_MACH);
BCaseMask(EF_HEXAGON_MACH_V65, EF_HEXAGON_MACH);
BCaseMask(EF_HEXAGON_MACH_V66, EF_HEXAGON_MACH);
@@ -499,12 +500,21 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_HEXAGON_MACH_V71T, EF_HEXAGON_MACH);
BCaseMask(EF_HEXAGON_MACH_V73, EF_HEXAGON_MACH);
BCaseMask(EF_HEXAGON_MACH_V75, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V77, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V79, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V81, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V83, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V85, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V87, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V89, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V91, EF_HEXAGON_MACH);
BCaseMask(EF_HEXAGON_ISA_V2, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V3, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V4, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V5, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V55, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V60, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V61, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V62, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V65, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V66, EF_HEXAGON_ISA);
@@ -514,6 +524,14 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_HEXAGON_ISA_V71, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V73, EF_HEXAGON_ISA);
BCaseMask(EF_HEXAGON_ISA_V75, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V77, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V79, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V81, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V83, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V85, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V87, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V89, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V91, EF_HEXAGON_ISA);
break;
case ELF::EM_AVR:
BCaseMask(EF_AVR_ARCH_AVR1, EF_AVR_ARCH_MASK);
diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc
index dad0fa3..648d6a5 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -354,8 +354,8 @@ namespace llvm {
/// Emulates hitting "retry" from an "abort, retry, ignore" CRT debug report
/// dialog. "retry" raises an exception which ultimately triggers our stack
/// dumper.
-static LLVM_ATTRIBUTE_UNUSED int
-AvoidMessageBoxHook(int ReportType, char *Message, int *Return) {
+[[maybe_unused]] static int AvoidMessageBoxHook(int ReportType, char *Message,
+ int *Return) {
// Set *Return to the retry code for the return value of _CrtDbgReport:
// http://msdn.microsoft.com/en-us/library/8hyw4sy7(v=vs.71).aspx
// This may also trigger just-in-time debugging via DebugBreak().
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index be2f2e4..662d84b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1561,6 +1561,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
@@ -1717,6 +1718,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
@@ -7775,6 +7777,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
return LowerVECREDUCE(Op, DAG);
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_FMUL:
+ return LowerVECREDUCE_MUL(Op, DAG);
case ISD::ATOMIC_LOAD_AND:
return LowerATOMIC_LOAD_AND(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
@@ -16794,6 +16799,33 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
}
}
+SDValue AArch64TargetLowering::LowerVECREDUCE_MUL(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ assert(SrcVT.isScalableVector() && "Unexpected operand type!");
+
+ SDVTList SrcVTs = DAG.getVTList(SrcVT, SrcVT);
+ unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
+ SDValue Identity = DAG.getNeutralElement(BaseOpc, DL, SrcVT, Op->getFlags());
+
+ // Whilst we don't know the size of the vector we do know the maximum size so
+ // can perform a tree reduction with an identity vector, which means once we
+ // arrive at the result the remaining stages (when the vector is smaller than
+ // the maximum) have no affect.
+
+ unsigned Segments = AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
+ unsigned Stages = llvm::Log2_32(Segments * SrcVT.getVectorMinNumElements());
+
+ for (unsigned I = 0; I < Stages; ++I) {
+ Src = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, SrcVTs, Src, Identity);
+ Src = DAG.getNode(BaseOpc, DL, SrcVT, Src.getValue(0), Src.getValue(1));
+ }
+
+ return DAG.getExtractVectorElt(DL, Op.getValueType(), Src, 0);
+}
+
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
@@ -18144,8 +18176,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store,
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
Instruction *Load, Value *Mask, IntrinsicInst *DI) const {
const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
- if (Factor != 2 && Factor != 4) {
- LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n");
+ if (Factor != 2 && Factor != 3 && Factor != 4) {
+ LLVM_DEBUG(dbgs() << "Matching ld2, ld3 and ld4 patterns failed\n");
return false;
}
auto *LI = dyn_cast<LoadInst>(Load);
@@ -18223,8 +18255,8 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
Instruction *Store, Value *Mask,
ArrayRef<Value *> InterleavedValues) const {
unsigned Factor = InterleavedValues.size();
- if (Factor != 2 && Factor != 4) {
- LLVM_DEBUG(dbgs() << "Matching st2 and st4 patterns failed\n");
+ if (Factor != 2 && Factor != 3 && Factor != 4) {
+ LLVM_DEBUG(dbgs() << "Matching st2, st3 and st4 patterns failed\n");
return false;
}
StoreInst *SI = dyn_cast<StoreInst>(Store);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 00956fd..9495c9f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -752,6 +752,7 @@ private:
SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECREDUCE_MUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 30dfcf2b..12c600f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -10600,6 +10600,9 @@ describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
Register DestReg = DestSrc->Destination->getReg();
Register SrcReg = DestSrc->Source->getReg();
+ if (!DestReg.isValid() || !SrcReg.isValid())
+ return std::nullopt;
+
auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
// If the described register is the destination, just return the source.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index bfe2c80..a67b12a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -901,6 +901,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
+ addRulesForGOpcs({G_READSTEADYCOUNTER}, Standard).Uni(S64, {{Sgpr64}, {}});
+
bool hasSALUFloat = ST->hasSALUFloatInsts();
addRulesForGOpcs({G_FADD}, Standard)
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index be62395..e0375ea 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -513,8 +513,7 @@ void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
}
if (Imm == AMDGPU::EncValues::LITERAL_CONST) {
- Op = decodeLiteralConstant(
- Desc, OpDesc, OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64);
+ Op = decodeLiteralConstant(Desc, OpDesc);
continue;
}
@@ -1545,21 +1544,21 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
MCOperand
AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const {
if (HasLiteral) {
- if (Literal64 != Val)
+ if (Literal != Val)
return errOperand(Val, "More than one unique literal is illegal");
}
HasLiteral = true;
- Literal = Literal64 = Val;
+ Literal = Val;
- bool UseLit64 = Hi_32(Literal64) == 0;
+ bool UseLit64 = Hi_32(Literal) == 0;
return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit(
- LitModifier::Lit64, Literal64, getContext()))
- : MCOperand::createImm(Literal64);
+ LitModifier::Lit64, Literal, getContext()))
+ : MCOperand::createImm(Literal);
}
-MCOperand AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc,
- const MCOperandInfo &OpDesc,
- bool ExtendFP64) const {
+MCOperand
+AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc,
+ const MCOperandInfo &OpDesc) const {
// For now all literal constants are supposed to be unsigned integer
// ToDo: deal with signed/unsigned 64-bit integer constants
// ToDo: deal with float/double constants
@@ -1569,35 +1568,79 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc,
Twine(Bytes.size()));
}
HasLiteral = true;
- Literal = Literal64 = eatBytes<uint32_t>(Bytes);
- if (ExtendFP64)
- Literal64 <<= 32;
+ Literal = eatBytes<uint32_t>(Bytes);
}
- int64_t Val = ExtendFP64 ? Literal64 : Literal;
+ // For disassembling always assume all inline constants are available.
+ bool HasInv2Pi = true;
- bool CanUse64BitLiterals =
- STI.hasFeature(AMDGPU::Feature64BitLiterals) &&
- !(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
-
- bool UseLit64 = false;
- if (CanUse64BitLiterals) {
- if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64)
- UseLit64 = false;
- else if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64)
- UseLit64 = Hi_32(Literal64) == 0;
+ // Invalid instruction codes may contain literals for inline-only
+ // operands, so we support them here as well.
+ int64_t Val = Literal;
+ bool UseLit = false;
+ switch (OpDesc.OperandType) {
+ default:
+ llvm_unreachable("Unexpected operand type!");
+ case AMDGPU::OPERAND_REG_IMM_BF16:
+ case AMDGPU::OPERAND_REG_INLINE_C_BF16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
+ UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_V2BF16:
+ UseLit = AMDGPU::isInlinableLiteralV2BF16(Val);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ UseLit = AMDGPU::isInlinableLiteralV2F16(Val);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
+ break;
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ UseLit = AMDGPU::isInlinableLiteralV2I16(Val);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+ case AMDGPU::OPERAND_REG_IMM_V2FP32:
+ case AMDGPU::OPERAND_REG_IMM_V2INT32:
+ case AMDGPU::OPERAND_KIMM32:
+ UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
+ Val <<= 32;
+ break;
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi);
+ break;
+ case MCOI::OPERAND_REGISTER:
+ // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
+ // decoding a literal in a position of a register operand. Give
+ // it special handling in the caller, decodeImmOperands(), instead
+ // of quietly allowing it here.
+ break;
}
- return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit(
- LitModifier::Lit64, Val, getContext()))
- : MCOperand::createImm(Val);
+ return UseLit ? MCOperand::createExpr(AMDGPUMCExpr::createLit(
+ LitModifier::Lit, Val, getContext()))
+ : MCOperand::createImm(Val);
}
-MCOperand
-AMDGPUDisassembler::decodeLiteral64Constant(const MCInst &Inst) const {
+MCOperand AMDGPUDisassembler::decodeLiteral64Constant() const {
assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
if (!HasLiteral) {
@@ -1606,25 +1649,13 @@ AMDGPUDisassembler::decodeLiteral64Constant(const MCInst &Inst) const {
Twine(Bytes.size()));
}
HasLiteral = true;
- Literal64 = eatBytes<uint64_t>(Bytes);
- }
-
- bool UseLit64 = false;
- const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
- const MCOperandInfo &OpDesc = Desc.operands()[Inst.getNumOperands()];
- if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64) {
- UseLit64 = false;
- } else {
- assert(OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64);
- UseLit64 = Hi_32(Literal64) == 0;
+ Literal = eatBytes<uint64_t>(Bytes);
}
+ bool UseLit64 = Hi_32(Literal) == 0;
return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit(
- LitModifier::Lit64, Literal64, getContext()))
- : MCOperand::createImm(Literal64);
+ LitModifier::Lit64, Literal, getContext()))
+ : MCOperand::createImm(Literal);
}
MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
@@ -1913,7 +1944,7 @@ MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const MCInst &Inst,
return MCOperand::createImm(Val);
if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
- return decodeLiteral64Constant(Inst);
+ return decodeLiteral64Constant();
}
switch (Width) {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 2751857..d103d79 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -44,8 +44,7 @@ private:
const unsigned HwModeRegClass;
const unsigned TargetMaxInstBytes;
mutable ArrayRef<uint8_t> Bytes;
- mutable uint32_t Literal;
- mutable uint64_t Literal64;
+ mutable uint64_t Literal;
mutable bool HasLiteral;
mutable std::optional<bool> EnableWavefrontSize32;
unsigned CodeObjectVersion;
@@ -144,9 +143,8 @@ public:
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const;
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc,
- const MCOperandInfo &OpDesc,
- bool ExtendFP64) const;
- MCOperand decodeLiteral64Constant(const MCInst &Inst) const;
+ const MCOperandInfo &OpDesc) const;
+ MCOperand decodeLiteral64Constant() const;
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 58482ea..9fbf9e5 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -69,6 +69,12 @@ static cl::opt<bool> GCNTrackers(
cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
cl::init(false));
+static cl::opt<unsigned> PendingQueueLimit(
+ "amdgpu-scheduler-pending-queue-limit", cl::Hidden,
+ cl::desc(
+ "Max (Available+Pending) size to inspect pending queue (0 disables)"),
+ cl::init(256));
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
#define DUMP_MAX_REG_PRESSURE
static cl::opt<bool> PrintMaxRPRegUsageBeforeScheduler(
@@ -335,17 +341,52 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
}
}
+static bool shouldCheckPending(SchedBoundary &Zone,
+ const TargetSchedModel *SchedModel) {
+ bool HasBufferedModel =
+ SchedModel->hasInstrSchedModel() && SchedModel->getMicroOpBufferSize();
+ unsigned Combined = Zone.Available.size() + Zone.Pending.size();
+ return Combined <= PendingQueueLimit && HasBufferedModel;
+}
+
+static SUnit *pickOnlyChoice(SchedBoundary &Zone,
+ const TargetSchedModel *SchedModel) {
+ // pickOnlyChoice() releases pending instructions and checks for new hazards.
+ SUnit *OnlyChoice = Zone.pickOnlyChoice();
+ if (!shouldCheckPending(Zone, SchedModel) || Zone.Pending.empty())
+ return OnlyChoice;
+
+ return nullptr;
+}
+
+void GCNSchedStrategy::printCandidateDecision(const SchedCandidate &Current,
+ const SchedCandidate &Preferred) {
+ LLVM_DEBUG({
+ dbgs() << "Prefer:\t\t";
+ DAG->dumpNode(*Preferred.SU);
+
+ if (Current.SU) {
+ dbgs() << "Not:\t";
+ DAG->dumpNode(*Current.SU);
+ }
+
+ dbgs() << "Reason:\t\t";
+ traceCandidate(Preferred);
+ });
+}
+
// This function is mostly cut and pasted from
// GenericScheduler::pickNodeFromQueue()
void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
- SchedCandidate &Cand,
+ SchedCandidate &Cand, bool &IsPending,
bool IsBottomUp) {
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
unsigned SGPRPressure = 0;
unsigned VGPRPressure = 0;
+ IsPending = false;
if (DAG->isTrackingPressure()) {
if (!GCNTrackers) {
SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
@@ -358,8 +399,9 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
VGPRPressure = T->getPressure().getArchVGPRNum();
}
}
- ReadyQueue &Q = Zone.Available;
- for (SUnit *SU : Q) {
+ LLVM_DEBUG(dbgs() << "Available Q:\n");
+ ReadyQueue &AQ = Zone.Available;
+ for (SUnit *SU : AQ) {
SchedCandidate TryCand(ZonePolicy);
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
@@ -371,27 +413,55 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
// Initialize resource delta if needed in case future heuristics query it.
if (TryCand.ResDelta == SchedResourceDelta())
TryCand.initResourceDelta(Zone.DAG, SchedModel);
+ LLVM_DEBUG(printCandidateDecision(Cand, TryCand));
Cand.setBest(TryCand);
- LLVM_DEBUG(traceCandidate(Cand));
+ } else {
+ printCandidateDecision(TryCand, Cand);
+ }
+ }
+
+ if (!shouldCheckPending(Zone, SchedModel))
+ return;
+
+ LLVM_DEBUG(dbgs() << "Pending Q:\n");
+ ReadyQueue &PQ = Zone.Pending;
+ for (SUnit *SU : PQ) {
+
+ SchedCandidate TryCand(ZonePolicy);
+ initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
+ VGPRPressure, IsBottomUp);
+ // Pass SchedBoundary only when comparing nodes from the same boundary.
+ SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
+ tryPendingCandidate(Cand, TryCand, ZoneArg);
+ if (TryCand.Reason != NoCand) {
+ // Initialize resource delta if needed in case future heuristics query it.
+ if (TryCand.ResDelta == SchedResourceDelta())
+ TryCand.initResourceDelta(Zone.DAG, SchedModel);
+ LLVM_DEBUG(printCandidateDecision(Cand, TryCand));
+ IsPending = true;
+ Cand.setBest(TryCand);
+ } else {
+ printCandidateDecision(TryCand, Cand);
}
}
}
// This function is mostly cut and pasted from
// GenericScheduler::pickNodeBidirectional()
-SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
+SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode,
+ bool &PickedPending) {
// Schedule as far as possible in the direction of no choice. This is most
// efficient, but also provides the best heuristics for CriticalPSets.
- if (SUnit *SU = Bot.pickOnlyChoice()) {
+ if (SUnit *SU = pickOnlyChoice(Bot, SchedModel)) {
IsTopNode = false;
return SU;
}
- if (SUnit *SU = Top.pickOnlyChoice()) {
+ if (SUnit *SU = pickOnlyChoice(Top, SchedModel)) {
IsTopNode = true;
return SU;
}
- // Set the bottom-up policy based on the state of the current bottom zone and
- // the instructions outside the zone, including the top zone.
+ // Set the bottom-up policy based on the state of the current bottom zone
+ // and the instructions outside the zone, including the top zone.
CandPolicy BotPolicy;
setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
// Set the top-down policy based on the state of the current top zone and
@@ -399,12 +469,14 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
CandPolicy TopPolicy;
setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
+ bool BotPending = false;
// See if BotCand is still valid (because we previously scheduled from Top).
LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
if (!BotCand.isValid() || BotCand.SU->isScheduled ||
BotCand.Policy != BotPolicy) {
BotCand.reset(CandPolicy());
pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand,
+ BotPending,
/*IsBottomUp=*/true);
assert(BotCand.Reason != NoCand && "failed to find the first candidate");
} else {
@@ -414,6 +486,7 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
SchedCandidate TCand;
TCand.reset(CandPolicy());
pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand,
+ BotPending,
/*IsBottomUp=*/true);
assert(TCand.SU == BotCand.SU &&
"Last pick result should correspond to re-picking right now");
@@ -421,12 +494,14 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
#endif
}
+ bool TopPending = false;
// Check if the top Q has a better candidate.
LLVM_DEBUG(dbgs() << "Picking from Top:\n");
if (!TopCand.isValid() || TopCand.SU->isScheduled ||
TopCand.Policy != TopPolicy) {
TopCand.reset(CandPolicy());
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand,
+ TopPending,
/*IsBottomUp=*/false);
assert(TopCand.Reason != NoCand && "failed to find the first candidate");
} else {
@@ -436,6 +511,7 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
SchedCandidate TCand;
TCand.reset(CandPolicy());
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand,
+ TopPending,
/*IsBottomUp=*/false);
assert(TCand.SU == TopCand.SU &&
"Last pick result should correspond to re-picking right now");
@@ -446,12 +522,21 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
// Pick best from BotCand and TopCand.
LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
dbgs() << "Bot Cand: "; traceCandidate(BotCand););
- SchedCandidate Cand = BotCand;
- TopCand.Reason = NoCand;
- tryCandidate(Cand, TopCand, nullptr);
- if (TopCand.Reason != NoCand) {
- Cand.setBest(TopCand);
+ SchedCandidate Cand = BotPending ? TopCand : BotCand;
+ SchedCandidate TryCand = BotPending ? BotCand : TopCand;
+ PickedPending = BotPending && TopPending;
+
+ TryCand.Reason = NoCand;
+ if (BotPending || TopPending) {
+ PickedPending |= tryPendingCandidate(Cand, TopCand, nullptr);
+ } else {
+ tryCandidate(Cand, TryCand, nullptr);
+ }
+
+ if (TryCand.Reason != NoCand) {
+ Cand.setBest(TryCand);
}
+
LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
IsTopNode = Cand.AtTop;
@@ -466,35 +551,55 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
return nullptr;
}
+ bool PickedPending;
SUnit *SU;
do {
+ PickedPending = false;
if (RegionPolicy.OnlyTopDown) {
- SU = Top.pickOnlyChoice();
+ SU = pickOnlyChoice(Top, SchedModel);
if (!SU) {
CandPolicy NoPolicy;
TopCand.reset(NoPolicy);
pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand,
+ PickedPending,
/*IsBottomUp=*/false);
assert(TopCand.Reason != NoCand && "failed to find a candidate");
SU = TopCand.SU;
}
IsTopNode = true;
} else if (RegionPolicy.OnlyBottomUp) {
- SU = Bot.pickOnlyChoice();
+ SU = pickOnlyChoice(Bot, SchedModel);
if (!SU) {
CandPolicy NoPolicy;
BotCand.reset(NoPolicy);
pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand,
+ PickedPending,
/*IsBottomUp=*/true);
assert(BotCand.Reason != NoCand && "failed to find a candidate");
SU = BotCand.SU;
}
IsTopNode = false;
} else {
- SU = pickNodeBidirectional(IsTopNode);
+ SU = pickNodeBidirectional(IsTopNode, PickedPending);
}
} while (SU->isScheduled);
+ if (PickedPending) {
+ unsigned ReadyCycle = IsTopNode ? SU->TopReadyCycle : SU->BotReadyCycle;
+ SchedBoundary &Zone = IsTopNode ? Top : Bot;
+ unsigned CurrentCycle = Zone.getCurrCycle();
+ if (ReadyCycle > CurrentCycle)
+ Zone.bumpCycle(ReadyCycle);
+
+ // FIXME: checkHazard() doesn't give information about which cycle the
+ // hazard will resolve so just keep bumping the cycle by 1. This could be
+ // made more efficient if checkHazard() returned more details.
+ while (Zone.checkHazard(SU))
+ Zone.bumpCycle(Zone.getCurrCycle() + 1);
+
+ Zone.releasePending();
+ }
+
if (SU->isTopReady())
Top.removeReady(SU);
if (SU->isBottomReady())
@@ -540,6 +645,47 @@ GCNSchedStageID GCNSchedStrategy::getNextStage() const {
return *std::next(CurrentStage);
}
+bool GCNSchedStrategy::tryPendingCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary *Zone) const {
+ // Initialize the candidate if needed.
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return true;
+ }
+
+ // Bias PhysReg Defs and copies to their uses and defined respectively.
+ if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
+ biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
+ return TryCand.Reason != NoCand;
+
+ // Avoid exceeding the target's limit.
+ if (DAG->isTrackingPressure() &&
+ tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
+ RegExcess, TRI, DAG->MF))
+ return TryCand.Reason != NoCand;
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (DAG->isTrackingPressure() &&
+ tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
+ TryCand, Cand, RegCritical, TRI, DAG->MF))
+ return TryCand.Reason != NoCand;
+
+ bool SameBoundary = Zone != nullptr;
+ if (SameBoundary) {
+ TryCand.initResourceDelta(DAG, SchedModel);
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return TryCand.Reason != NoCand;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources, TryCand, Cand,
+ ResourceDemand))
+ return TryCand.Reason != NoCand;
+ }
+
+ return false;
+}
+
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
const MachineSchedContext *C, bool IsLegacyScheduler)
: GCNSchedStrategy(C) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 8ea4267..975781f 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -44,17 +44,32 @@ raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
/// heuristics to determine excess/critical pressure sets.
class GCNSchedStrategy : public GenericScheduler {
protected:
- SUnit *pickNodeBidirectional(bool &IsTopNode);
+ SUnit *pickNodeBidirectional(bool &IsTopNode, bool &PickedPending);
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
- SchedCandidate &Cand, bool IsBottomUp);
+ SchedCandidate &Cand, bool &IsPending,
+ bool IsBottomUp);
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
const RegPressureTracker &RPTracker,
const SIRegisterInfo *SRI, unsigned SGPRPressure,
unsigned VGPRPressure, bool IsBottomUp);
+ /// Evaluates instructions in the pending queue using a subset of scheduling
+ /// heuristics.
+ ///
+ /// Instructions that cannot be issued due to hardware constraints are placed
+ /// in the pending queue rather than the available queue, making them normally
+ /// invisible to scheduling heuristics. However, in certain scenarios (such as
+ /// avoiding register spilling), it may be beneficial to consider scheduling
+ /// these not-yet-ready instructions.
+ bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
+ SchedBoundary *Zone) const;
+
+ void printCandidateDecision(const SchedCandidate &Current,
+ const SchedCandidate &Preferred);
+
std::vector<unsigned> Pressure;
std::vector<unsigned> MaxPressure;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index a01a5fd..5e3195b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1697,9 +1697,6 @@ LLVM_READNONE
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
LLVM_READNONE
-bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
-
-LLVM_READNONE
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
LLVM_READNONE
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 228114c..44c4830 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -57,6 +57,7 @@ def ResBindTy : DXILOpParamType;
def ResPropsTy : DXILOpParamType;
def SplitDoubleTy : DXILOpParamType;
def BinaryWithCarryTy : DXILOpParamType;
+def DimensionsTy : DXILOpParamType;
class DXILOpClass;
@@ -901,6 +902,13 @@ def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> {
let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
}
+def GetDimensions : DXILOp<72, getDimensions> {
+ let Doc = "gets the dimensions of a buffer or texture";
+ let arguments = [HandleTy, Int32Ty];
+ let result = DimensionsTy;
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
def Barrier : DXILOp<80, barrier> {
let Doc = "inserts a memory barrier in the shader";
let intrinsics = [
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index 1aed8f9..944b2e6 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -261,6 +261,12 @@ static StructType *getBinaryWithCarryType(LLVMContext &Context) {
return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c");
}
+static StructType *getDimensionsType(LLVMContext &Ctx) {
+ Type *Int32Ty = Type::getInt32Ty(Ctx);
+ return getOrCreateStructType("dx.types.Dimensions",
+ {Int32Ty, Int32Ty, Int32Ty, Int32Ty}, Ctx);
+}
+
static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
Type *OverloadTy) {
switch (Kind) {
@@ -318,6 +324,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
return getSplitDoubleType(Ctx);
case OpParamType::BinaryWithCarryTy:
return getBinaryWithCarryType(Ctx);
+ case OpParamType::DimensionsTy:
+ return getDimensionsType(Ctx);
}
llvm_unreachable("Invalid parameter kind");
return nullptr;
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 610d8b6..e46a393 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -627,6 +627,28 @@ public:
});
}
+ [[nodiscard]] bool lowerGetDimensionsX(Function &F) {
+ IRBuilder<> &IRB = OpBuilder.getIRB();
+ Type *Int32Ty = IRB.getInt32Ty();
+
+ return replaceFunction(F, [&](CallInst *CI) -> Error {
+ IRB.SetInsertPoint(CI);
+ Value *Handle =
+ createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
+ Value *Undef = UndefValue::get(Int32Ty);
+
+ Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
+ OpCode::GetDimensions, {Handle, Undef}, CI->getName(), Int32Ty);
+ if (Error E = OpCall.takeError())
+ return E;
+ Value *Dim = IRB.CreateExtractValue(*OpCall, 0);
+
+ CI->replaceAllUsesWith(Dim);
+ CI->eraseFromParent();
+ return Error::success();
+ });
+ }
+
[[nodiscard]] bool lowerGetPointer(Function &F) {
// These should have already been handled in DXILResourceAccess, so we can
// just clean up the dead prototype.
@@ -934,6 +956,9 @@ public:
case Intrinsic::dx_resource_updatecounter:
HasErrors |= lowerUpdateCounter(F);
break;
+ case Intrinsic::dx_resource_getdimensions_x:
+ HasErrors |= lowerGetDimensionsX(F);
+ break;
case Intrinsic::ctpop:
HasErrors |= lowerCtpopToCountBits(F);
break;
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 5f180d6..3bd6ed4 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -66,6 +66,10 @@ public:
void remapInstruction(MCInst &Instr) const;
+ Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address) const override;
+
private:
bool makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address,
uint64_t &BytesToSkip, raw_ostream &CS) const;
@@ -567,6 +571,18 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
return Result;
}
+Expected<bool> HexagonDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
+ uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address) const {
+ // At the start of a symbol, force a fresh packet by resetting any
+ // in-progress bundle state. This prevents packets from straddling label
+ // boundaries when data (e.g. jump tables) appears in between.
+ Size = 0;
+ resetBundle();
+ return true;
+}
+
static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
ArrayRef<MCPhysReg> Table) {
if (RegNo < Table.size()) {
diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp
index 1ce8d7e3..df0c8c1 100644
--- a/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -264,9 +264,10 @@ public:
} // end anonymous namespace
-static bool CC_Mips(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
- Type *OrigTy, CCState &State) LLVM_ATTRIBUTE_UNUSED;
+[[maybe_unused]] static bool CC_Mips(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
+ CCState &State);
static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 7f1ff45..2fd7327 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -3176,9 +3176,10 @@ static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT, MVT LocVT,
F64Regs);
}
-static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
- Type *OrigTy, CCState &State) LLVM_ATTRIBUTE_UNUSED;
+[[maybe_unused]] static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
+ CCState &State);
#include "MipsGenCallingConv.inc"
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 662d3f6..b1794b7 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -717,6 +717,18 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.clampScalar(0, sXLen, sXLen)
.lower();
+ LegalityPredicate InsertVectorEltPred = [=](const LegalityQuery &Query) {
+ LLT VecTy = Query.Types[0];
+ LLT EltTy = Query.Types[1];
+ return VecTy.getElementType() == EltTy;
+ };
+
+ getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
+ .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
+ InsertVectorEltPred, typeIs(2, sXLen)))
+ .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), InsertVectorEltPred,
+ typeIs(2, sXLen)));
+
getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7123a2d..eb87558 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1672,6 +1672,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.useRVVForFixedLengthVectors())
setTargetDAGCombine(ISD::BITCAST);
+ setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
+
// Disable strict node mutation.
IsStrictFPEnabled = true;
EnableExtLdPromotion = true;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 0f6e1ca..ed54404d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1763,6 +1763,26 @@ defm RELAXED_DOT :
"i16x8.relaxed_dot_i8x16_i7x16_s\t$dst, $lhs, $rhs",
"i16x8.relaxed_dot_i8x16_i7x16_s", 0x112>;
+def : Pat<
+ (v8i16 (add
+ (wasm_shuffle
+ (v8i16 (extmul_low_s v16i8:$lhs, v16i8:$rhs)),
+ (v8i16 (extmul_high_s v16i8:$lhs, v16i8:$rhs)),
+ (i32 0), (i32 1), (i32 4), (i32 5),
+ (i32 8), (i32 9), (i32 12), (i32 13),
+ (i32 16), (i32 17), (i32 20), (i32 21),
+ (i32 24), (i32 25), (i32 28), (i32 29)),
+ (wasm_shuffle
+ (v8i16 (extmul_low_s v16i8:$lhs, v16i8:$rhs)),
+ (v8i16 (extmul_high_s v16i8:$lhs, v16i8:$rhs)),
+ (i32 2), (i32 3), (i32 6), (i32 7),
+ (i32 10), (i32 11), (i32 14), (i32 15),
+ (i32 18), (i32 19), (i32 22), (i32 23),
+ (i32 26), (i32 27), (i32 30), (i32 31)))
+ ),
+ (v8i16 (RELAXED_DOT v16i8:$lhs, v16i8:$rhs))
+>;
+
defm RELAXED_DOT_ADD :
RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc),
(outs), (ins),
@@ -1771,6 +1791,18 @@ defm RELAXED_DOT_ADD :
"i32x4.relaxed_dot_i8x16_i7x16_add_s\t$dst, $lhs, $rhs, $acc",
"i32x4.relaxed_dot_i8x16_i7x16_add_s", 0x113>;
+def : Pat<
+ (v4i32 (add
+ (v4i32 (int_wasm_extadd_pairwise_signed
+ (v8i16 (int_wasm_relaxed_dot_i8x16_i7x16_signed v16i8:$lhs, v16i8:$rhs)))),
+ (v4i32 V128:$acc))),
+ (v4i32 (RELAXED_DOT_ADD v16i8:$lhs, v16i8:$rhs, (v4i32 V128:$acc)))
+ >;
+
+def : Pat<(v4i32 (partial_reduce_smla (v4i32 V128:$acc), (v16i8 V128:$lhs),
+ (v16i8 V128:$rhs))),
+ (RELAXED_DOT_ADD $lhs, $rhs, $acc)>, Requires<[HasRelaxedSIMD]>;
+
//===----------------------------------------------------------------------===//
// Relaxed BFloat16 dot product
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index b81641f..28fa2cd 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -414,8 +414,6 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
getActionDefinitionsBuilder(G_SEXT_INREG).lower();
- getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
-
// fp constants
getActionDefinitionsBuilder(G_FCONSTANT)
.legalFor({s32, s64})
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 6db780f..8e08d16 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1338,6 +1338,10 @@ def ProcessorFeatures {
list<SubtargetFeature> PTLFeatures =
!listremove(ARLSFeatures, [FeatureWIDEKL]);
+ // Novalake
+ list<SubtargetFeature> NVLFeatures =
+ !listconcat(PTLFeatures, [FeaturePREFETCHI]);
+
// Clearwaterforest
list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI,
FeatureAVXVNNIINT16,
@@ -1883,6 +1887,9 @@ foreach P = ["pantherlake", "wildcatlake"] in {
def : ProcModel<P, AlderlakePModel,
ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
}
+def : ProcModel<"novalake", AlderlakePModel, ProcessorFeatures.NVLFeatures,
+ ProcessorFeatures.ADLTuning>;
+
def : ProcModel<"clearwaterforest", AlderlakePModel,
ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
def : ProcModel<"emeraldrapids", SapphireRapidsModel,
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 481a9be..1fca466f 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1946,16 +1946,14 @@ static void addConstantComments(const MachineInstr *MI,
CASE_ARITH_RM(PMADDUBSW) {
unsigned SrcIdx = getSrcIdx(MI, 1);
if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
- if (C->getType()->getScalarSizeInBits() == 8) {
- std::string Comment;
- raw_string_ostream CS(Comment);
- unsigned VectorWidth =
- X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
- CS << "[";
- printConstant(C, VectorWidth, CS);
- CS << "]";
- OutStreamer.AddComment(CS.str());
- }
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+ unsigned VectorWidth =
+ X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
+ CS << "[";
+ printConstant(C, VectorWidth, CS);
+ CS << "]";
+ OutStreamer.AddComment(CS.str());
}
break;
}
@@ -1967,16 +1965,14 @@ static void addConstantComments(const MachineInstr *MI,
CASE_ARITH_RM(PMULHRSW) {
unsigned SrcIdx = getSrcIdx(MI, 1);
if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
- if (C->getType()->getScalarSizeInBits() == 16) {
- std::string Comment;
- raw_string_ostream CS(Comment);
- unsigned VectorWidth =
- X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
- CS << "[";
- printConstant(C, VectorWidth, CS);
- CS << "]";
- OutStreamer.AddComment(CS.str());
- }
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+ unsigned VectorWidth =
+ X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
+ CS << "[";
+ printConstant(C, VectorWidth, CS);
+ CS << "]";
+ OutStreamer.AddComment(CS.str());
}
break;
}
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 3479106..6065575 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1152,6 +1152,20 @@ static StringRef getIntelProcessorTypeAndSubtype(unsigned Family,
break;
}
break;
+ case 0x12:
+ switch (Model) {
+ // Novalake:
+ case 0x1:
+ case 0x3:
+ CPU = "novalake";
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_NOVALAKE;
+ break;
+ default: // Unknown family 0x12 CPU.
+ break;
+ }
+ break;
+
default:
break; // Unknown.
}
diff --git a/llvm/lib/TargetParser/Unix/Host.inc b/llvm/lib/TargetParser/Unix/Host.inc
index aeb2f59..38b942d 100644
--- a/llvm/lib/TargetParser/Unix/Host.inc
+++ b/llvm/lib/TargetParser/Unix/Host.inc
@@ -59,10 +59,30 @@ static std::string updateTripleOSVersion(std::string TargetTripleString) {
if (TT.getOS() == Triple::AIX && !TT.getOSMajorVersion()) {
struct utsname name;
if (uname(&name) != -1) {
+ std::string release = name.release;
+
+ if (strcmp(name.sysname, "OS400") == 0) {
+ /*
+ PASE uses different versioning system than AIX.
+ The following table shows the currently supported PASE
+ releases and the corresponding AIX release:
+ --------------------------
+ PASE | AIX
+ --------------------------
+ V7R4 | 7.2 (TL2)
+ --------------------------
+ V7R5 | 7.2 (TL5)
+ --------------------------
+ V7R6 | 7.3 (TL1)
+ --------------------------
+ */
+ release = (release == "4" || release == "5") ? "2" : "3";
+ }
+
std::string NewOSName = std::string(Triple::getOSTypeName(Triple::AIX));
NewOSName += name.version;
NewOSName += '.';
- NewOSName += name.release;
+ NewOSName += release;
NewOSName += ".0.0";
TT.setOSName(NewOSName);
return TT.str();
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index e382cfe..dd13ce3 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -176,6 +176,8 @@ constexpr FeatureBitset FeaturesArrowlakeS =
FeatureSM4;
constexpr FeatureBitset FeaturesPantherlake =
(FeaturesArrowlakeS ^ FeatureWIDEKL);
+constexpr FeatureBitset FeaturesNovalake =
+ FeaturesPantherlake | FeaturePREFETCHI;
constexpr FeatureBitset FeaturesClearwaterforest =
(FeaturesSierraforest ^ FeatureWIDEKL) | FeatureAVXVNNIINT16 |
FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeaturePREFETCHI | FeatureUSERMSR;
@@ -379,6 +381,8 @@ constexpr ProcInfo Processors[] = {
// Pantherlake microarchitecture based processors.
{ {"pantherlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesPantherlake, 'p', false },
{ {"wildcatlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesPantherlake, 'p', false },
+ // Novalake microarchitecture based processors.
+ { {"novalake"}, CK_Novalake, FEATURE_AVX2, FeaturesNovalake, 'r', false },
// Sierraforest microarchitecture based processors.
{ {"sierraforest"}, CK_Sierraforest, FEATURE_AVX2, FeaturesSierraforest, 'p', false },
// Grandridge microarchitecture based processors.
diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
index f166fef..cf7e450 100644
--- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
@@ -153,26 +153,23 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine();
bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe);
if (IsCallerPresplitCoroutine && HasAttr) {
- BranchProbability MinBranchProbability(
- static_cast<int>(CoroElideBranchRatio * MinBlockCounterExecution),
- MinBlockCounterExecution);
-
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
- auto Prob = BranchProbability::getBranchProbability(
- BFI.getBlockFreq(CB->getParent()).getFrequency(),
- BFI.getEntryFreq().getFrequency());
+ auto BlockFreq = BFI.getBlockFreq(CB->getParent()).getFrequency();
+ auto EntryFreq = BFI.getEntryFreq().getFrequency();
+ uint64_t MinFreq =
+ static_cast<uint64_t>(EntryFreq * CoroElideBranchRatio);
- if (Prob < MinBranchProbability) {
+ if (BlockFreq < MinFreq) {
ORE.emit([&]() {
return OptimizationRemarkMissed(
DEBUG_TYPE, "CoroAnnotationElideUnlikely", Caller)
<< "'" << ore::NV("callee", Callee->getName())
<< "' not elided in '"
<< ore::NV("caller", Caller->getName())
- << "' because of low probability: "
- << ore::NV("probability", Prob) << " (threshold: "
- << ore::NV("threshold", MinBranchProbability) << ")";
+ << "' because of low frequency: "
+ << ore::NV("block_freq", BlockFreq)
+ << " (threshold: " << ore::NV("min_freq", MinFreq) << ")";
});
continue;
}
@@ -188,7 +185,8 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
return OptimizationRemark(DEBUG_TYPE, "CoroAnnotationElide", Caller)
<< "'" << ore::NV("callee", Callee->getName())
<< "' elided in '" << ore::NV("caller", Caller->getName())
- << "' (probability: " << ore::NV("probability", Prob) << ")";
+ << "' (block_freq: " << ore::NV("block_freq", BlockFreq)
+ << ")";
});
FAM.invalidate(*Caller, PreservedAnalyses::none());
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 4c9b10a..cdc559b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -156,9 +156,9 @@ Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
Value *Src = CI.getOperand(0);
Type *Ty = CI.getType();
- if (auto *SrcC = dyn_cast<Constant>(Src))
- if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL))
- return replaceInstUsesWith(CI, Res);
+ if (Value *Res =
+ simplifyCastInst(CI.getOpcode(), Src, Ty, SQ.getWithInstruction(&CI)))
+ return replaceInstUsesWith(CI, Res);
// Try to eliminate a cast of a cast.
if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 511bca4..6e17801 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -605,17 +605,16 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
return Mapping;
}
-namespace llvm {
-void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize,
- bool IsKasan, uint64_t *ShadowBase,
- int *MappingScale, bool *OrShadowOffset) {
+void llvm::getAddressSanitizerParams(const Triple &TargetTriple, int LongSize,
+ bool IsKasan, uint64_t *ShadowBase,
+ int *MappingScale, bool *OrShadowOffset) {
auto Mapping = getShadowMapping(TargetTriple, LongSize, IsKasan);
*ShadowBase = Mapping.Offset;
*MappingScale = Mapping.Scale;
*OrShadowOffset = Mapping.OrShadowOffset;
}
-void removeASanIncompatibleFnAttributes(Function &F, bool ReadsArgMem) {
+void llvm::removeASanIncompatibleFnAttributes(Function &F, bool ReadsArgMem) {
// Sanitizer checks read from shadow, which invalidates memory(argmem: *).
//
// This is not only true for sanitized functions, because AttrInfer can
@@ -668,8 +667,6 @@ ASanAccessInfo::ASanAccessInfo(bool IsWrite, bool CompileKernel,
AccessSizeIndex(AccessSizeIndex), IsWrite(IsWrite),
CompileKernel(CompileKernel) {}
-} // namespace llvm
-
static uint64_t getRedzoneSizeForScale(int MappingScale) {
// Redzone used for stack and globals is at least 32 bytes.
// For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
@@ -677,11 +674,10 @@ static uint64_t getRedzoneSizeForScale(int MappingScale) {
}
static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) {
- if (TargetTriple.isOSEmscripten()) {
+ if (TargetTriple.isOSEmscripten())
return kAsanEmscriptenCtorAndDtorPriority;
- } else {
+ else
return kAsanCtorAndDtorPriority;
- }
}
static Twine genName(StringRef suffix) {
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 444b390..72e8e50 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -2092,8 +2092,6 @@ bool CHR::run() {
return Changed;
}
-namespace llvm {
-
ControlHeightReductionPass::ControlHeightReductionPass() {
parseCHRFilterFiles();
}
@@ -2116,5 +2114,3 @@ PreservedAnalyses ControlHeightReductionPass::run(
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
-
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index d99f1eb..ddb99a5 100644
--- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -75,8 +75,6 @@ static cl::opt<bool>
"expressed as branches by widenable conditions"),
cl::init(true));
-namespace {
-
// Get the condition of \p I. It can either be a guard or a conditional branch.
static Value *getCondition(Instruction *I) {
if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
@@ -130,6 +128,8 @@ findInsertionPointForWideCondition(Instruction *WCOrGuard) {
return std::nullopt;
}
+namespace {
+
class GuardWideningImpl {
DominatorTree &DT;
PostDominatorTree *PDT;
@@ -328,7 +328,7 @@ public:
/// The entry point for this pass.
bool run();
};
-}
+} // namespace
static bool isSupportedGuardInstruction(const Instruction *Insn) {
if (isGuard(Insn))
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index c327311..7ebcc21 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -53,6 +53,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
@@ -117,6 +118,10 @@ static cl::opt<bool>
LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
cl::desc("Predicate conditions in read only loops"));
+static cl::opt<bool> LoopPredicationTraps(
+ "indvars-predicate-loop-traps", cl::Hidden, cl::init(true),
+ cl::desc("Predicate conditions that trap in loops with only local writes"));
+
static cl::opt<bool>
AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
cl::desc("Allow widening of indvars to eliminate s/zext"));
@@ -1704,6 +1709,24 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
return Changed;
}
+static bool crashingBBWithoutEffect(const BasicBlock &BB) {
+ return llvm::all_of(BB, [](const Instruction &I) {
+ // TODO: for now this is overly restrictive, to make sure nothing in this
+ // BB can depend on the loop body.
+ // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a
+ // load does not have a side effect, but we could have
+ // %a = load ptr, ptr %ptr
+ // %b = load i32, ptr %a
+ // Now if the loop stored a non-nullptr to %a, we could cause a nullptr
+ // dereference by skipping over loop iterations.
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->onlyAccessesInaccessibleMemory())
+ return true;
+ }
+ return isa<UnreachableInst>(I);
+ });
+}
+
bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -1816,11 +1839,25 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// suggestions on how to improve this? I can obviously bail out for outer
// loops, but that seems less than ideal. MemorySSA can find memory writes,
// is that enough for *all* side effects?
+ bool HasThreadLocalSideEffects = false;
for (BasicBlock *BB : L->blocks())
for (auto &I : *BB)
// TODO:isGuaranteedToTransfer
- if (I.mayHaveSideEffects())
- return false;
+ if (I.mayHaveSideEffects()) {
+ if (!LoopPredicationTraps)
+ return false;
+ HasThreadLocalSideEffects = true;
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ // Simple stores cannot be observed by other threads.
+ // If HasThreadLocalSideEffects is set, we check
+ // crashingBBWithoutEffect to make sure that the crashing BB cannot
+ // observe them either.
+ if (!SI->isSimple())
+ return false;
+ } else {
+ return false;
+ }
+ }
bool Changed = false;
// Finally, do the actual predication for all predicatable blocks. A couple
@@ -1840,6 +1877,19 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ if (HasThreadLocalSideEffects) {
+ const BasicBlock *Unreachable = nullptr;
+ for (const BasicBlock *Succ : BI->successors()) {
+ if (isa<UnreachableInst>(Succ->getTerminator()))
+ Unreachable = Succ;
+ }
+ // Exit BB which have one branch back into the loop and another one to
+ // a trap can still be optimized, because local side effects cannot
+ // be observed in the exit case (the trap). We could be smarter about
+ // this, but for now lets pattern match common cases that directly trap.
+ if (Unreachable == nullptr || !crashingBBWithoutEffect(*Unreachable))
+ return Changed;
+ }
Value *NewCond;
if (ExitCount == ExactBTC) {
NewCond = L->contains(BI->getSuccessor(0)) ?
diff --git a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
index 3c14036e..6fb8197 100644
--- a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
@@ -26,8 +26,6 @@
using namespace llvm;
-namespace llvm {
-
static cl::opt<unsigned>
JumpTableSizeThreshold("jump-table-to-switch-size-threshold", cl::Hidden,
cl::desc("Only split jump tables with size less or "
@@ -43,8 +41,8 @@ static cl::opt<unsigned> FunctionSizeThreshold(
"or equal than this threshold."),
cl::init(50));
+namespace llvm {
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
-
} // end namespace llvm
#define DEBUG_TYPE "jump-table-to-switch"
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 9655173..b2c526b 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -116,8 +116,6 @@ STATISTIC(NumIntAssociationsHoisted,
STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions "
"reassociated and hoisted out of the loop");
-namespace llvm {
-
/// Memory promotion is enabled by default.
static cl::opt<bool>
DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
@@ -156,7 +154,7 @@ static cl::opt<unsigned> IntAssociationUpperLimit(
// which may not be precise, since optimizeUses is capped. The result is
// correct, but we may not get as "far up" as possible to get which access is
// clobbering the one queried.
-cl::opt<unsigned> SetLicmMssaOptCap(
+cl::opt<unsigned> llvm::SetLicmMssaOptCap(
"licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
"for faster compile. Caps the MemorySSA clobbering calls."));
@@ -164,15 +162,15 @@ cl::opt<unsigned> SetLicmMssaOptCap(
// Experimentally, memory promotion carries less importance than sinking and
// hoisting. Limit when we do promotion when using MemorySSA, in order to save
// compile time.
-cl::opt<unsigned> SetLicmMssaNoAccForPromotionCap(
+cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
"licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
"effect. When MSSA in LICM is enabled, then this is the maximum "
"number of accesses allowed to be present in a loop in order to "
"enable memory promotion."));
+namespace llvm {
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
-
} // end namespace llvm
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
@@ -1120,11 +1118,10 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
return false;
}
-namespace {
/// Return true if-and-only-if we know how to (mechanically) both hoist and
/// sink a given instruction out of a loop. Does not address legality
/// concerns such as aliasing or speculation safety.
-bool isHoistableAndSinkableInst(Instruction &I) {
+static bool isHoistableAndSinkableInst(Instruction &I) {
// Only these instructions are hoistable/sinkable.
return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
@@ -1136,8 +1133,8 @@ bool isHoistableAndSinkableInst(Instruction &I) {
}
/// Return true if I is the only Instruction with a MemoryAccess in L.
-bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
- const MemorySSAUpdater &MSSAU) {
+static bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
+ const MemorySSAUpdater &MSSAU) {
for (auto *BB : L->getBlocks())
if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
int NotAPhi = 0;
@@ -1151,7 +1148,6 @@ bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
}
return true;
}
-}
static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA,
BatchAAResults &BAA,
diff --git a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
index 73f1942..7706de8 100644
--- a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
@@ -21,8 +21,7 @@
#define DEBUG_TYPE "loop-bound-split"
-namespace llvm {
-
+using namespace llvm;
using namespace PatternMatch;
namespace {
@@ -358,8 +357,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
IRBuilder<> Builder(&PostLoopPreHeader->front());
// Update phi nodes in header of post-loop.
- bool isExitingLatch =
- (L.getExitingBlock() == L.getLoopLatch()) ? true : false;
+ bool isExitingLatch = L.getExitingBlock() == L.getLoopLatch();
Value *ExitingCondLCSSAPhi = nullptr;
for (PHINode &PN : L.getHeader()->phis()) {
// Create LCSSA phi node in preheader of post-loop.
@@ -472,8 +470,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
PreservedAnalyses LoopBoundSplitPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &U) {
- Function &F = *L.getHeader()->getParent();
- (void)F;
+ [[maybe_unused]] Function &F = *L.getHeader()->getParent();
LLVM_DEBUG(dbgs() << "Spliting bound of loop in " << F.getName() << ": " << L
<< "\n");
@@ -486,5 +483,3 @@ PreservedAnalyses LoopBoundSplitPass::run(Loop &L, LoopAnalysisManager &AM,
return getLoopPassPreservedAnalyses();
}
-
-} // end namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index 32078b1..7da8586 100644
--- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -16,8 +16,6 @@
using namespace llvm;
-namespace llvm {
-
/// Explicitly specialize the pass manager's run method to handle loop nest
/// structure updates.
PreservedAnalyses
@@ -185,7 +183,6 @@ LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
}
return PA;
}
-} // namespace llvm
void FunctionToLoopPassAdaptor::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
@@ -193,6 +190,7 @@ void FunctionToLoopPassAdaptor::printPipeline(
Pass->printPipeline(OS, MapClassName2PassName);
OS << ')';
}
+
PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
FunctionAnalysisManager &AM) {
// Before we even compute any loop analyses, first run a miniature function
diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 448dc2b..f3e6cbf 100644
--- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -540,8 +540,6 @@ bool LoopVersioningLICM::run(DominatorTree *DT) {
return Changed;
}
-namespace llvm {
-
PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &LAR,
LPMUpdater &U) {
@@ -556,4 +554,3 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
}
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 80aa98d..5a8f18a 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -160,9 +160,6 @@ static cl::opt<bool> EnablePhiOfOps("enable-phi-of-ops", cl::init(true),
//===----------------------------------------------------------------------===//
// Anchor methods.
-namespace llvm {
-namespace GVNExpression {
-
Expression::~Expression() = default;
BasicExpression::~BasicExpression() = default;
CallExpression::~CallExpression() = default;
@@ -171,9 +168,6 @@ StoreExpression::~StoreExpression() = default;
AggregateValueExpression::~AggregateValueExpression() = default;
PHIExpression::~PHIExpression() = default;
-} // end namespace GVNExpression
-} // end namespace llvm
-
namespace {
// Tarjan's SCC finding algorithm with Nuutila's improvements
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index ba58b8e..6d7ce36 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -2623,32 +2623,32 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
namespace {
- class ReassociateLegacyPass : public FunctionPass {
- ReassociatePass Impl;
+class ReassociateLegacyPass : public FunctionPass {
+ ReassociatePass Impl;
- public:
- static char ID; // Pass identification, replacement for typeid
+public:
+ static char ID; // Pass identification, replacement for typeid
- ReassociateLegacyPass() : FunctionPass(ID) {
- initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
- }
+ ReassociateLegacyPass() : FunctionPass(ID) {
+ initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
- FunctionAnalysisManager DummyFAM;
- auto PA = Impl.run(F, DummyFAM);
- return !PA.areAllPreserved();
- }
+ FunctionAnalysisManager DummyFAM;
+ auto PA = Impl.run(F, DummyFAM);
+ return !PA.areAllPreserved();
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addPreserved<BasicAAWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
- };
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
} // end anonymous namespace
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index aae5d60..25a531c 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -50,9 +50,7 @@ using namespace llvm;
#define DEBUG_TYPE "scalarizer"
-namespace {
-
-BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
+static BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
BasicBlock *BB = Itr->getParent();
if (isa<PHINode>(Itr))
Itr = BB->getFirstInsertionPt();
@@ -76,6 +74,8 @@ using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
// along with a pointer to their scattered forms.
using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>;
+namespace {
+
struct VectorSplit {
// The type of the vector.
FixedVectorType *VecTy = nullptr;
@@ -196,6 +196,7 @@ struct VectorLayout {
// The size of each (non-remainder) fragment in bytes.
uint64_t SplitSize = 0;
};
+} // namespace
static bool isStructOfMatchingFixedVectors(Type *Ty) {
if (!isa<StructType>(Ty))
@@ -268,6 +269,7 @@ static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
return Res;
}
+namespace {
class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
public:
ScalarizerVisitor(DominatorTree *DT, const TargetTransformInfo *TTI,
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index ebcbd2b..fa66a03 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -149,8 +149,6 @@ bool SpeculativeExecutionLegacyPass::runOnFunction(Function &F) {
return Impl.runImpl(F, TTI);
}
-namespace llvm {
-
bool SpeculativeExecutionPass::runImpl(Function &F, TargetTransformInfo *TTI) {
if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence(&F)) {
LLVM_DEBUG(dbgs() << "Not running SpeculativeExecution because "
@@ -328,11 +326,11 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
return true;
}
-FunctionPass *createSpeculativeExecutionPass() {
+FunctionPass *llvm::createSpeculativeExecutionPass() {
return new SpeculativeExecutionLegacyPass();
}
-FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() {
+FunctionPass *llvm::createSpeculativeExecutionIfHasBranchDivergencePass() {
return new SpeculativeExecutionLegacyPass(/* OnlyIfDivergentTarget = */ true);
}
@@ -362,4 +360,3 @@ void SpeculativeExecutionPass::printPipeline(
OS << "only-if-divergent-target";
OS << '>';
}
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 7d01709..e94ad19 100644
--- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -716,8 +716,6 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
return Ret;
}
-namespace llvm {
-
PreservedAnalyses
StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) {
const DataLayout *DL = &F.getDataLayout();
@@ -735,5 +733,3 @@ StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserve<TargetIRAnalysis>();
return PA;
}
-
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 1d83ddc..89d41f3e 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -192,7 +192,7 @@ struct AllocaDerivedValueTracker {
SmallPtrSet<Instruction *, 32> AllocaUsers;
SmallPtrSet<Instruction *, 32> EscapePoints;
};
-}
+} // namespace
static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
if (F.callsFunctionThatReturnsTwice())
@@ -967,7 +967,7 @@ struct TailCallElim : public FunctionPass {
/*BFI=*/nullptr);
}
};
-}
+} // namespace
char TailCallElim::ID = 0;
INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim", "Tail Call Elimination",
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 62a81ba..280eb20 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7957,9 +7957,9 @@ bool VPRecipeBuilder::getScaledReductions(
auto CollectExtInfo = [this, &Exts, &ExtOpTypes,
&ExtKinds](SmallVectorImpl<Value *> &Ops) -> bool {
for (const auto &[I, OpI] : enumerate(Ops)) {
- auto *CI = dyn_cast<ConstantInt>(OpI);
- if (I > 0 && CI &&
- canConstantBeExtended(CI, ExtOpTypes[0], ExtKinds[0])) {
+ const APInt *C;
+ if (I > 0 && match(OpI, m_APInt(C)) &&
+ canConstantBeExtended(C, ExtOpTypes[0], ExtKinds[0])) {
ExtOpTypes[I] = ExtOpTypes[0];
ExtKinds[I] = ExtKinds[0];
continue;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 0101942..d167009 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1753,14 +1753,14 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
}
#endif
-bool llvm::canConstantBeExtended(const ConstantInt *CI, Type *NarrowType,
+bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType,
TTI::PartialReductionExtendKind ExtKind) {
- APInt TruncatedVal = CI->getValue().trunc(NarrowType->getScalarSizeInBits());
- unsigned WideSize = CI->getType()->getScalarSizeInBits();
+ APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits());
+ unsigned WideSize = C->getBitWidth();
APInt ExtendedVal = ExtKind == TTI::PR_SignExtend
? TruncatedVal.sext(WideSize)
: TruncatedVal.zext(WideSize);
- return ExtendedVal == CI->getValue();
+ return ExtendedVal == *C;
}
TargetTransformInfo::OperandValueInfo
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index 1580a3b..2aaabd9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -474,7 +474,7 @@ public:
/// Check if a constant \p CI can be safely treated as having been extended
/// from a narrower type with the given extension kind.
-bool canConstantBeExtended(const ConstantInt *CI, Type *NarrowType,
+bool canConstantBeExtended(const APInt *C, Type *NarrowType,
TTI::PartialReductionExtendKind ExtKind);
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index ff286f7..d8203e2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -173,10 +173,10 @@ inline int_pred_ty<is_zero_int> m_ZeroInt() {
/// For vectors, this includes constants with undefined elements.
inline int_pred_ty<is_one> m_One() { return int_pred_ty<is_one>(); }
-struct bind_const_int {
- uint64_t &Res;
+struct bind_apint {
+ const APInt *&Res;
- bind_const_int(uint64_t &Res) : Res(Res) {}
+ bind_apint(const APInt *&Res) : Res(Res) {}
bool match(VPValue *VPV) const {
if (!VPV->isLiveIn())
@@ -188,7 +188,23 @@ struct bind_const_int {
const auto *CI = dyn_cast<ConstantInt>(V);
if (!CI)
return false;
- if (auto C = CI->getValue().tryZExtValue()) {
+ Res = &CI->getValue();
+ return true;
+ }
+};
+
+inline bind_apint m_APInt(const APInt *&C) { return C; }
+
+struct bind_const_int {
+ uint64_t &Res;
+
+ bind_const_int(uint64_t &Res) : Res(Res) {}
+
+ bool match(VPValue *VPV) const {
+ const APInt *APConst;
+ if (!bind_apint(APConst).match(VPV))
+ return false;
+ if (auto C = APConst->tryZExtValue()) {
Res = *C;
return true;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 775837f..7a98c75 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -341,12 +341,12 @@ VPPartialReductionRecipe::computeCost(ElementCount VF,
ExtAType = GetExtendKind(ExtAR);
ExtBType = GetExtendKind(ExtBR);
- if (!ExtBR && Widen->getOperand(1)->isLiveIn()) {
- auto *CI = cast<ConstantInt>(Widen->getOperand(1)->getLiveInIRValue());
- if (canConstantBeExtended(CI, InputTypeA, ExtAType)) {
- InputTypeB = InputTypeA;
- ExtBType = ExtAType;
- }
+ using namespace VPlanPatternMatch;
+ const APInt *C;
+ if (!ExtBR && match(Widen->getOperand(1), m_APInt(C)) &&
+ canConstantBeExtended(C, InputTypeA, ExtAType)) {
+ InputTypeB = InputTypeA;
+ ExtBType = ExtAType;
}
};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8d76b2d8..cae9aee8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2122,9 +2122,18 @@ static void licm(VPlan &Plan) {
VPBasicBlock *Preheader = Plan.getVectorPreheader();
// Return true if we do not know how to (mechanically) hoist a given recipe
- // out of a loop region. Does not address legality concerns such as aliasing
- // or speculation safety.
+ // out of a loop region.
auto CannotHoistRecipe = [](VPRecipeBase &R) {
+ // Assumes don't alias anything or throw; as long as they're guaranteed to
+ // execute, they're safe to hoist.
+ if (match(&R, m_Intrinsic<Intrinsic::assume>()))
+ return false;
+
+ // TODO: Relax checks in the future, e.g. we could also hoist reads, if
+ // their memory location is not modified in the vector loop.
+ if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
+ return true;
+
// Allocas cannot be hoisted.
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
return RepR && RepR->getOpcode() == Instruction::Alloca;
@@ -2132,17 +2141,18 @@ static void licm(VPlan &Plan) {
// Hoist any loop invariant recipes from the vector loop region to the
// preheader. Preform a shallow traversal of the vector loop region, to
- // exclude recipes in replicate regions.
+ // exclude recipes in replicate regions. Since the top-level blocks in the
+ // vector loop region are guaranteed to execute if the vector pre-header is,
+ // we don't need to check speculation safety.
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ assert(Preheader->getSingleSuccessor() == LoopRegion &&
+ "Expected vector prehader's successor to be the vector loop region");
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(LoopRegion->getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
if (CannotHoistRecipe(R))
continue;
- // TODO: Relax checks in the future, e.g. we could also hoist reads, if
- // their memory location is not modified in the vector loop.
- if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() ||
- any_of(R.operands(), [](VPValue *Op) {
+ if (any_of(R.operands(), [](VPValue *Op) {
return !Op->isDefinedOutsideLoopRegions();
}))
continue;