diff options
Diffstat (limited to 'llvm/lib')
62 files changed, 1096 insertions, 431 deletions
diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp index b78cc03e..f9bf092 100644 --- a/llvm/lib/Analysis/DXILResource.cpp +++ b/llvm/lib/Analysis/DXILResource.cpp @@ -281,6 +281,38 @@ static StructType *getOrCreateElementStruct(Type *ElemType, StringRef Name) { return StructType::create(ElemType, Name); } +static Type *getTypeWithoutPadding(Type *Ty) { + // Recursively remove padding from structures. + if (auto *ST = dyn_cast<StructType>(Ty)) { + LLVMContext &Ctx = Ty->getContext(); + SmallVector<Type *> ElementTypes; + ElementTypes.reserve(ST->getNumElements()); + for (Type *ElTy : ST->elements()) { + if (isa<PaddingExtType>(ElTy)) + continue; + ElementTypes.push_back(getTypeWithoutPadding(ElTy)); + } + + // Handle explicitly padded cbuffer arrays like { [ n x paddedty ], ty } + if (ElementTypes.size() == 2) + if (auto *AT = dyn_cast<ArrayType>(ElementTypes[0])) + if (ElementTypes[1] == AT->getElementType()) + return ArrayType::get(ElementTypes[1], AT->getNumElements() + 1); + + // If we only have a single element, don't wrap it in a struct. + if (ElementTypes.size() == 1) + return ElementTypes[0]; + + return StructType::get(Ctx, ElementTypes, /*IsPacked=*/false); + } + // Arrays just need to have their element type adjusted. + if (auto *AT = dyn_cast<ArrayType>(Ty)) + return ArrayType::get(getTypeWithoutPadding(AT->getElementType()), + AT->getNumElements()); + // Anything else should be good as is. + return Ty; +} + StructType *ResourceTypeInfo::createElementStruct(StringRef CBufferName) { SmallString<64> TypeName; @@ -334,14 +366,21 @@ StructType *ResourceTypeInfo::createElementStruct(StringRef CBufferName) { } case ResourceKind::CBuffer: { auto *RTy = cast<CBufferExtType>(HandleTy); - LayoutExtType *LayoutType = cast<LayoutExtType>(RTy->getResourceType()); - StructType *Ty = cast<StructType>(LayoutType->getWrappedType()); SmallString<64> Name = getResourceKindName(Kind); if (!CBufferName.empty()) { Name.append("."); Name.append(CBufferName); } - return StructType::create(Ty->elements(), Name); + + // TODO: Remove this when we update the frontend to use explicit padding. + if (LayoutExtType *LayoutType = + dyn_cast<LayoutExtType>(RTy->getResourceType())) { + StructType *Ty = cast<StructType>(LayoutType->getWrappedType()); + return StructType::create(Ty->elements(), Name); + } + + return getOrCreateElementStruct( + getTypeWithoutPadding(RTy->getResourceType()), Name); } case ResourceKind::Sampler: { auto *RTy = cast<SamplerExtType>(HandleTy); @@ -454,10 +493,10 @@ uint32_t ResourceTypeInfo::getCBufferSize(const DataLayout &DL) const { Type *ElTy = cast<CBufferExtType>(HandleTy)->getResourceType(); + // TODO: Remove this when we update the frontend to use explicit padding. if (auto *LayoutTy = dyn_cast<LayoutExtType>(ElTy)) return LayoutTy->getSize(); - // TODO: What should we do with unannotated arrays? return DL.getTypeAllocSize(ElTy); } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index a64b93d..442b9d1 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -4623,17 +4623,11 @@ const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V, /// If Expr computes ~A, return A else return nullptr static const SCEV *MatchNotExpr(const SCEV *Expr) { - const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr); - if (!Add || Add->getNumOperands() != 2 || - !Add->getOperand(0)->isAllOnesValue()) - return nullptr; - - const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1)); - if (!AddRHS || AddRHS->getNumOperands() != 2 || - !AddRHS->getOperand(0)->isAllOnesValue()) - return nullptr; - - return AddRHS->getOperand(1); + const SCEV *MulOp; + if (match(Expr, m_scev_Add(m_scev_AllOnes(), + m_scev_Mul(m_scev_AllOnes(), m_SCEV(MulOp))))) + return MulOp; + return nullptr; } /// Return a SCEV corresponding to ~V = -1-V @@ -12220,12 +12214,11 @@ ScalarEvolution::computeConstantDifference(const SCEV *More, const SCEV *Less) { // Try to match a common constant multiply. auto MatchConstMul = [](const SCEV *S) -> std::optional<std::pair<const SCEV *, APInt>> { - auto *M = dyn_cast<SCEVMulExpr>(S); - if (!M || M->getNumOperands() != 2 || - !isa<SCEVConstant>(M->getOperand(0))) - return std::nullopt; - return { - {M->getOperand(1), cast<SCEVConstant>(M->getOperand(0))->getAPInt()}}; + const APInt *C; + const SCEV *Op; + if (match(S, m_scev_Mul(m_scev_APInt(C), m_SCEV(Op)))) + return {{Op, *C}}; + return std::nullopt; }; if (auto MatchedMore = MatchConstMul(More)) { if (auto MatchedLess = MatchConstMul(Less)) { diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp index 25c1db9..ded4df4 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -55,12 +55,10 @@ LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx, } LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx, - LLT NewEltTy) { + ElementCount EC) { return [=](const LegalityQuery &Query) { const LLT OldTy = Query.Types[TypeIdx]; - ElementCount NewEltCount = NewEltTy.isVector() ? NewEltTy.getElementCount() - : ElementCount::getFixed(1); - return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount)); + return std::make_pair(TypeIdx, OldTy.changeElementCount(EC)); }; } diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 055fdc6..ca82857 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -818,8 +818,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, if (!DefMI) return false; - const TargetMachine& TM = DefMI->getMF()->getTarget(); - if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) + if (DefMI->getFlag(MachineInstr::FmNoNans)) return true; // If the value is a constant, we can obviously see if it is a NaN or not. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 0f2b518..cb0038c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3971,8 +3971,14 @@ void SelectionDAGBuilder::visitSIToFP(const User &I) { } void SelectionDAGBuilder::visitPtrToAddr(const User &I) { - // FIXME: this is not correct for pointers with addr width != pointer width - visitPtrToInt(I); + SDValue N = getValue(I.getOperand(0)); + // By definition the type of the ptrtoaddr must be equal to the address type. + const auto &TLI = DAG.getTargetLoweringInfo(); + EVT AddrVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + // The address width must be smaller or equal to the pointer representation + // width, so we lower ptrtoaddr as a truncate (possibly folded to a no-op). + N = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), AddrVT, N); + setValue(&I, N); } void SelectionDAGBuilder::visitPtrToInt(const User &I) { diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt index 0ffe3ae..f343925 100644 --- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt @@ -56,6 +56,7 @@ add_llvm_component_library(LLVMOrcJIT SectCreate.cpp SelfExecutorProcessControl.cpp SimpleRemoteEPC.cpp + SimpleRemoteMemoryMapper.cpp Speculation.cpp SpeculateAnalyses.cpp ExecutorProcessControl.cpp diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp index 50e6b25..0833af7 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp @@ -57,16 +57,17 @@ public: std::swap(FR.Actions, G.allocActions()); Parent.EPC.callSPSWrapperAsync< - rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>( - Parent.SAs.Finalize, + rt::SPSSimpleExecutorMemoryManagerInitializeSignature>( + Parent.SAs.Initialize, [OnFinalize = std::move(OnFinalize), AllocAddr = this->AllocAddr]( - Error SerializationErr, Error FinalizeErr) mutable { + Error SerializationErr, + Expected<ExecutorAddr> InitializeKey) mutable { // FIXME: Release abandoned alloc. if (SerializationErr) { - cantFail(std::move(FinalizeErr)); + cantFail(InitializeKey.takeError()); OnFinalize(std::move(SerializationErr)); - } else if (FinalizeErr) - OnFinalize(std::move(FinalizeErr)); + } else if (!InitializeKey) + OnFinalize(InitializeKey.takeError()); else OnFinalize(FinalizedAlloc(AllocAddr)); }, @@ -76,8 +77,8 @@ public: void abandon(OnAbandonedFunction OnAbandoned) override { // FIXME: Return memory to pool instead. Parent.EPC.callSPSWrapperAsync< - rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>( - Parent.SAs.Deallocate, + rt::SPSSimpleExecutorMemoryManagerReleaseSignature>( + Parent.SAs.Release, [OnAbandoned = std::move(OnAbandoned)](Error SerializationErr, Error DeallocateErr) mutable { if (SerializationErr) { @@ -123,9 +124,8 @@ void EPCGenericJITLinkMemoryManager::allocate(const JITLinkDylib *JD, void EPCGenericJITLinkMemoryManager::deallocate( std::vector<FinalizedAlloc> Allocs, OnDeallocatedFunction OnDeallocated) { - EPC.callSPSWrapperAsync< - rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>( - SAs.Deallocate, + EPC.callSPSWrapperAsync<rt::SPSSimpleExecutorMemoryManagerReleaseSignature>( + SAs.Release, [OnDeallocated = std::move(OnDeallocated)](Error SerErr, Error DeallocErr) mutable { if (SerErr) { diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp index fec7062..cc72488 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp @@ -25,9 +25,9 @@ EPCGenericRTDyldMemoryManager::CreateWithDefaultBootstrapSymbols( if (auto Err = EPC.getBootstrapSymbols( {{SAs.Instance, rt::SimpleExecutorMemoryManagerInstanceName}, {SAs.Reserve, rt::SimpleExecutorMemoryManagerReserveWrapperName}, - {SAs.Finalize, rt::SimpleExecutorMemoryManagerFinalizeWrapperName}, - {SAs.Deallocate, - rt::SimpleExecutorMemoryManagerDeallocateWrapperName}, + {SAs.Initialize, + rt::SimpleExecutorMemoryManagerInitializeWrapperName}, + {SAs.Release, rt::SimpleExecutorMemoryManagerReleaseWrapperName}, {SAs.RegisterEHFrame, rt::RegisterEHFrameSectionAllocActionName}, {SAs.DeregisterEHFrame, rt::DeregisterEHFrameSectionAllocActionName}})) @@ -48,7 +48,7 @@ EPCGenericRTDyldMemoryManager::~EPCGenericRTDyldMemoryManager() { Error Err = Error::success(); if (auto Err2 = EPC.callSPSWrapper< - rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>( + rt::SPSSimpleExecutorMemoryManagerReleaseSignature>( SAs.Reserve, Err, SAs.Instance, FinalizedAllocs)) { // FIXME: Report errors through EPC once that functionality is available. logAllUnhandledErrors(std::move(Err2), errs(), ""); @@ -267,10 +267,10 @@ bool EPCGenericRTDyldMemoryManager::finalizeMemory(std::string *ErrMsg) { // We'll also need to make an extra allocation for the eh-frame wrapper call // arguments. - Error FinalizeErr = Error::success(); + Expected<ExecutorAddr> InitializeKey((ExecutorAddr())); if (auto Err = EPC.callSPSWrapper< - rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>( - SAs.Finalize, FinalizeErr, SAs.Instance, std::move(FR))) { + rt::SPSSimpleExecutorMemoryManagerInitializeSignature>( + SAs.Initialize, InitializeKey, SAs.Instance, std::move(FR))) { std::lock_guard<std::mutex> Lock(M); this->ErrMsg = toString(std::move(Err)); dbgs() << "Serialization error: " << this->ErrMsg << "\n"; @@ -278,9 +278,9 @@ bool EPCGenericRTDyldMemoryManager::finalizeMemory(std::string *ErrMsg) { *ErrMsg = this->ErrMsg; return true; } - if (FinalizeErr) { + if (!InitializeKey) { std::lock_guard<std::mutex> Lock(M); - this->ErrMsg = toString(std::move(FinalizeErr)); + this->ErrMsg = toString(InitializeKey.takeError()); dbgs() << "Finalization error: " << this->ErrMsg << "\n"; if (ErrMsg) *ErrMsg = this->ErrMsg; diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp index 26e8f53..cc99d3c 100644 --- a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp @@ -23,10 +23,12 @@ const char *SimpleExecutorMemoryManagerInstanceName = "__llvm_orc_SimpleExecutorMemoryManager_Instance"; const char *SimpleExecutorMemoryManagerReserveWrapperName = "__llvm_orc_SimpleExecutorMemoryManager_reserve_wrapper"; -const char *SimpleExecutorMemoryManagerFinalizeWrapperName = - "__llvm_orc_SimpleExecutorMemoryManager_finalize_wrapper"; -const char *SimpleExecutorMemoryManagerDeallocateWrapperName = - "__llvm_orc_SimpleExecutorMemoryManager_deallocate_wrapper"; +const char *SimpleExecutorMemoryManagerInitializeWrapperName = + "__llvm_orc_SimpleExecutorMemoryManager_initialize_wrapper"; +const char *SimpleExecutorMemoryManagerDeinitializeWrapperName = + "__llvm_orc_SimpleExecutorMemoryManager_deinitialize_wrapper"; +const char *SimpleExecutorMemoryManagerReleaseWrapperName = + "__llvm_orc_SimpleExecutorMemoryManager_release_wrapper"; const char *ExecutorSharedMemoryMapperServiceInstanceName = "__llvm_orc_ExecutorSharedMemoryMapperService_Instance"; diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp index 87d7578..dec1df7 100644 --- a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp +++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp @@ -216,9 +216,9 @@ SimpleRemoteEPC::createDefaultMemoryManager(SimpleRemoteEPC &SREPC) { if (auto Err = SREPC.getBootstrapSymbols( {{SAs.Allocator, rt::SimpleExecutorMemoryManagerInstanceName}, {SAs.Reserve, rt::SimpleExecutorMemoryManagerReserveWrapperName}, - {SAs.Finalize, rt::SimpleExecutorMemoryManagerFinalizeWrapperName}, - {SAs.Deallocate, - rt::SimpleExecutorMemoryManagerDeallocateWrapperName}})) + {SAs.Initialize, + rt::SimpleExecutorMemoryManagerInitializeWrapperName}, + {SAs.Release, rt::SimpleExecutorMemoryManagerReleaseWrapperName}})) return std::move(Err); return std::make_unique<EPCGenericJITLinkMemoryManager>(SREPC, SAs); diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.cpp new file mode 100644 index 0000000..b82de3f --- /dev/null +++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.cpp @@ -0,0 +1,104 @@ +//===---- SimpleRemoteMemoryMapper.cpp - Remote memory mapper ----*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.h" + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h" + +namespace llvm::orc { + +SimpleRemoteMemoryMapper::SimpleRemoteMemoryMapper(ExecutorProcessControl &EPC, + SymbolAddrs SAs) + : EPC(EPC), SAs(SAs) {} + +void SimpleRemoteMemoryMapper::reserve(size_t NumBytes, + OnReservedFunction OnReserved) { + EPC.callSPSWrapperAsync<rt::SPSSimpleRemoteMemoryMapReserveSignature>( + SAs.Reserve, + [NumBytes, OnReserved = std::move(OnReserved)]( + Error SerializationErr, Expected<ExecutorAddr> Result) mutable { + if (SerializationErr) { + cantFail(Result.takeError()); + return OnReserved(std::move(SerializationErr)); + } + + if (Result) + OnReserved(ExecutorAddrRange(*Result, NumBytes)); + else + OnReserved(Result.takeError()); + }, + SAs.Instance, static_cast<uint64_t>(NumBytes)); +} + +char *SimpleRemoteMemoryMapper::prepare(jitlink::LinkGraph &G, + ExecutorAddr Addr, size_t ContentSize) { + return G.allocateBuffer(ContentSize).data(); +} + +void SimpleRemoteMemoryMapper::initialize(MemoryMapper::AllocInfo &AI, + OnInitializedFunction OnInitialized) { + + tpctypes::FinalizeRequest FR; + + std::swap(FR.Actions, AI.Actions); + FR.Segments.reserve(AI.Segments.size()); + + for (auto Seg : AI.Segments) + FR.Segments.push_back({Seg.AG, AI.MappingBase + Seg.Offset, + Seg.ContentSize + Seg.ZeroFillSize, + ArrayRef<char>(Seg.WorkingMem, Seg.ContentSize)}); + + EPC.callSPSWrapperAsync<rt::SPSSimpleRemoteMemoryMapInitializeSignature>( + SAs.Initialize, + [OnInitialized = std::move(OnInitialized)]( + Error SerializationErr, Expected<ExecutorAddr> Result) mutable { + if (SerializationErr) { + cantFail(Result.takeError()); + return OnInitialized(std::move(SerializationErr)); + } + + OnInitialized(std::move(Result)); + }, + SAs.Instance, std::move(FR)); +} + +void SimpleRemoteMemoryMapper::deinitialize( + ArrayRef<ExecutorAddr> Allocations, + MemoryMapper::OnDeinitializedFunction OnDeinitialized) { + EPC.callSPSWrapperAsync<rt::SPSSimpleRemoteMemoryMapDeinitializeSignature>( + SAs.Deinitialize, + [OnDeinitialized = std::move(OnDeinitialized)](Error SerializationErr, + Error Result) mutable { + if (SerializationErr) { + cantFail(std::move(Result)); + return OnDeinitialized(std::move(SerializationErr)); + } + + OnDeinitialized(std::move(Result)); + }, + SAs.Instance, Allocations); +} + +void SimpleRemoteMemoryMapper::release(ArrayRef<ExecutorAddr> Bases, + OnReleasedFunction OnReleased) { + EPC.callSPSWrapperAsync<rt::SPSSimpleRemoteMemoryMapReleaseSignature>( + SAs.Release, + [OnReleased = std::move(OnReleased)](Error SerializationErr, + Error Result) mutable { + if (SerializationErr) { + cantFail(std::move(Result)); + return OnReleased(std::move(SerializationErr)); + } + + return OnReleased(std::move(Result)); + }, + SAs.Instance, Bases); +} + +} // namespace llvm::orc diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp index 3cdffb8..fe881a1 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp @@ -8,6 +8,7 @@ #include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h" #include "llvm/Support/FormatVariadic.h" @@ -18,166 +19,167 @@ namespace orc { namespace rt_bootstrap { SimpleExecutorMemoryManager::~SimpleExecutorMemoryManager() { - assert(Allocations.empty() && "shutdown not called?"); + assert(Slabs.empty() && "shutdown not called?"); } -Expected<ExecutorAddr> SimpleExecutorMemoryManager::allocate(uint64_t Size) { +Expected<ExecutorAddr> SimpleExecutorMemoryManager::reserve(uint64_t Size) { std::error_code EC; auto MB = sys::Memory::allocateMappedMemory( Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC); if (EC) return errorCodeToError(EC); std::lock_guard<std::mutex> Lock(M); - assert(!Allocations.count(MB.base()) && "Duplicate allocation addr"); - Allocations[MB.base()].Size = Size; + assert(!Slabs.count(MB.base()) && "Duplicate allocation addr"); + Slabs[MB.base()].Size = Size; return ExecutorAddr::fromPtr(MB.base()); } -Error SimpleExecutorMemoryManager::finalize(tpctypes::FinalizeRequest &FR) { - ExecutorAddr Base(~0ULL); +Expected<ExecutorAddr> +SimpleExecutorMemoryManager::initialize(tpctypes::FinalizeRequest &FR) { std::vector<shared::WrapperFunctionCall> DeallocationActions; - size_t SuccessfulFinalizationActions = 0; if (FR.Segments.empty()) { - // NOTE: Finalizing nothing is currently a no-op. Should it be an error? if (FR.Actions.empty()) - return Error::success(); + return make_error<StringError>("Finalization request is empty", + inconvertibleErrorCode()); else return make_error<StringError>("Finalization actions attached to empty " "finalization request", inconvertibleErrorCode()); } - for (auto &Seg : FR.Segments) - Base = std::min(Base, Seg.Addr); - - for (auto &ActPair : FR.Actions) - if (ActPair.Dealloc) - DeallocationActions.push_back(ActPair.Dealloc); - - // Get the Allocation for this finalization. - size_t AllocSize = 0; - { - std::lock_guard<std::mutex> Lock(M); - auto I = Allocations.find(Base.toPtr<void *>()); - if (I == Allocations.end()) - return make_error<StringError>("Attempt to finalize unrecognized " - "allocation " + - formatv("{0:x}", Base.getValue()), - inconvertibleErrorCode()); - AllocSize = I->second.Size; - I->second.DeallocationActions = std::move(DeallocationActions); - } - ExecutorAddr AllocEnd = Base + ExecutorAddrDiff(AllocSize); - - // Bail-out function: this will run deallocation actions corresponding to any - // completed finalization actions, then deallocate memory. - auto BailOut = [&](Error Err) { - std::pair<void *, Allocation> AllocToDestroy; - - // Get allocation to destroy. - { - std::lock_guard<std::mutex> Lock(M); - auto I = Allocations.find(Base.toPtr<void *>()); - - // Check for missing allocation (effective a double free). - if (I == Allocations.end()) - return joinErrors( - std::move(Err), - make_error<StringError>("No allocation entry found " - "for " + - formatv("{0:x}", Base.getValue()), - inconvertibleErrorCode())); - AllocToDestroy = std::move(*I); - Allocations.erase(I); - } + ExecutorAddrRange RR(FR.Segments.front().Addr, FR.Segments.front().Addr); - // Run deallocation actions for all completed finalization actions. - while (SuccessfulFinalizationActions) - Err = - joinErrors(std::move(Err), FR.Actions[--SuccessfulFinalizationActions] - .Dealloc.runWithSPSRetErrorMerged()); - - // Deallocate memory. - sys::MemoryBlock MB(AllocToDestroy.first, AllocToDestroy.second.Size); - if (auto EC = sys::Memory::releaseMappedMemory(MB)) - Err = joinErrors(std::move(Err), errorCodeToError(EC)); - - return Err; - }; + std::vector<sys::MemoryBlock> MBsToReset; + auto ResetMBs = make_scope_exit([&]() { + for (auto &MB : MBsToReset) + sys::Memory::protectMappedMemory(MB, sys::Memory::MF_READ | + sys::Memory::MF_WRITE); + sys::Memory::InvalidateInstructionCache(RR.Start.toPtr<void *>(), + RR.size()); + }); // Copy content and apply permissions. for (auto &Seg : FR.Segments) { + RR.Start = std::min(RR.Start, Seg.Addr); + RR.End = std::max(RR.End, Seg.Addr + Seg.Size); // Check segment ranges. if (LLVM_UNLIKELY(Seg.Size < Seg.Content.size())) - return BailOut(make_error<StringError>( + return make_error<StringError>( formatv("Segment {0:x} content size ({1:x} bytes) " "exceeds segment size ({2:x} bytes)", Seg.Addr.getValue(), Seg.Content.size(), Seg.Size), - inconvertibleErrorCode())); + inconvertibleErrorCode()); ExecutorAddr SegEnd = Seg.Addr + ExecutorAddrDiff(Seg.Size); - if (LLVM_UNLIKELY(Seg.Addr < Base || SegEnd > AllocEnd)) - return BailOut(make_error<StringError>( + if (LLVM_UNLIKELY(Seg.Addr < RR.Start || SegEnd > RR.End)) + return make_error<StringError>( formatv("Segment {0:x} -- {1:x} crosses boundary of " "allocation {2:x} -- {3:x}", - Seg.Addr.getValue(), SegEnd.getValue(), Base.getValue(), - AllocEnd.getValue()), - inconvertibleErrorCode())); + Seg.Addr, SegEnd, RR.Start, RR.End), + inconvertibleErrorCode()); char *Mem = Seg.Addr.toPtr<char *>(); if (!Seg.Content.empty()) memcpy(Mem, Seg.Content.data(), Seg.Content.size()); memset(Mem + Seg.Content.size(), 0, Seg.Size - Seg.Content.size()); assert(Seg.Size <= std::numeric_limits<size_t>::max()); + + sys::MemoryBlock MB(Mem, Seg.Size); if (auto EC = sys::Memory::protectMappedMemory( - {Mem, static_cast<size_t>(Seg.Size)}, - toSysMemoryProtectionFlags(Seg.RAG.Prot))) - return BailOut(errorCodeToError(EC)); + MB, toSysMemoryProtectionFlags(Seg.RAG.Prot))) + return errorCodeToError(EC); + + MBsToReset.push_back(MB); + if ((Seg.RAG.Prot & MemProt::Exec) == MemProt::Exec) sys::Memory::InvalidateInstructionCache(Mem, Seg.Size); } - // Run finalization actions. - for (auto &ActPair : FR.Actions) { - if (auto Err = ActPair.Finalize.runWithSPSRetErrorMerged()) - return BailOut(std::move(Err)); - ++SuccessfulFinalizationActions; + auto DeallocActions = runFinalizeActions(FR.Actions); + if (!DeallocActions) + return DeallocActions.takeError(); + + { + std::lock_guard<std::mutex> Lock(M); + auto Region = createRegionInfo(RR, "In initialize"); + if (!Region) + return Region.takeError(); + Region->DeallocActions = std::move(*DeallocActions); } - return Error::success(); + // Successful initialization. + ResetMBs.release(); + + return RR.Start; } -Error SimpleExecutorMemoryManager::deallocate( - const std::vector<ExecutorAddr> &Bases) { - std::vector<std::pair<void *, Allocation>> AllocPairs; - AllocPairs.reserve(Bases.size()); +Error SimpleExecutorMemoryManager::deinitialize( + const std::vector<ExecutorAddr> &InitKeys) { + Error Err = Error::success(); - // Get allocation to destroy. + for (auto &KeyAddr : llvm::reverse(InitKeys)) { + std::vector<shared::WrapperFunctionCall> DeallocActions; + { + std::scoped_lock<std::mutex> Lock(M); + auto Slab = getSlabInfo(KeyAddr, "In deinitialize"); + if (!Slab) { + Err = joinErrors(std::move(Err), Slab.takeError()); + continue; + } + + auto RI = getRegionInfo(*Slab, KeyAddr, "In deinitialize"); + if (!RI) { + Err = joinErrors(std::move(Err), RI.takeError()); + continue; + } + + DeallocActions = std::move(RI->DeallocActions); + } + + Err = joinErrors(std::move(Err), + runDeallocActions(std::move(DeallocActions))); + } + + return Err; +} + +Error SimpleExecutorMemoryManager::release( + const std::vector<ExecutorAddr> &Bases) { Error Err = Error::success(); - { - std::lock_guard<std::mutex> Lock(M); - for (auto &Base : Bases) { - auto I = Allocations.find(Base.toPtr<void *>()); - - // Check for missing allocation (effective a double free). - if (I != Allocations.end()) { - AllocPairs.push_back(std::move(*I)); - Allocations.erase(I); - } else + + // TODO: Prohibit new initializations within the slabs being removed? + for (auto &Base : llvm::reverse(Bases)) { + std::vector<shared::WrapperFunctionCall> DeallocActions; + sys::MemoryBlock MB; + + { + std::scoped_lock<std::mutex> Lock(M); + + auto SlabI = Slabs.find(Base.toPtr<void *>()); + if (SlabI == Slabs.end()) { Err = joinErrors( std::move(Err), - make_error<StringError>("No allocation entry found " - "for " + - formatv("{0:x}", Base.getValue()), + make_error<StringError>("In release, " + formatv("{0:x}", Base) + + " is not part of any reserved " + "address range", inconvertibleErrorCode())); + continue; + } + + auto &Slab = SlabI->second; + + for (auto &[Addr, Region] : Slab.Regions) + llvm::copy(Region.DeallocActions, back_inserter(DeallocActions)); + + MB = {Base.toPtr<void *>(), Slab.Size}; + + Slabs.erase(SlabI); } - } - while (!AllocPairs.empty()) { - auto &P = AllocPairs.back(); - Err = joinErrors(std::move(Err), deallocateImpl(P.first, P.second)); - AllocPairs.pop_back(); + Err = joinErrors(std::move(Err), runDeallocActions(DeallocActions)); + if (auto EC = sys::Memory::releaseMappedMemory(MB)) + Err = joinErrors(std::move(Err), errorCodeToError(EC)); } return Err; @@ -185,16 +187,15 @@ Error SimpleExecutorMemoryManager::deallocate( Error SimpleExecutorMemoryManager::shutdown() { - AllocationsMap AM; + // TODO: Prevent new allocations during shutdown. + std::vector<ExecutorAddr> Bases; { - std::lock_guard<std::mutex> Lock(M); - AM = std::move(Allocations); + std::scoped_lock<std::mutex> Lock(M); + for (auto &[Base, Slab] : Slabs) + Bases.push_back(ExecutorAddr::fromPtr(Base)); } - Error Err = Error::success(); - for (auto &KV : AM) - Err = joinErrors(std::move(Err), deallocateImpl(KV.first, KV.second)); - return Err; + return release(Bases); } void SimpleExecutorMemoryManager::addBootstrapSymbols( @@ -202,58 +203,150 @@ void SimpleExecutorMemoryManager::addBootstrapSymbols( M[rt::SimpleExecutorMemoryManagerInstanceName] = ExecutorAddr::fromPtr(this); M[rt::SimpleExecutorMemoryManagerReserveWrapperName] = ExecutorAddr::fromPtr(&reserveWrapper); - M[rt::SimpleExecutorMemoryManagerFinalizeWrapperName] = - ExecutorAddr::fromPtr(&finalizeWrapper); - M[rt::SimpleExecutorMemoryManagerDeallocateWrapperName] = - ExecutorAddr::fromPtr(&deallocateWrapper); + M[rt::SimpleExecutorMemoryManagerInitializeWrapperName] = + ExecutorAddr::fromPtr(&initializeWrapper); + M[rt::SimpleExecutorMemoryManagerDeinitializeWrapperName] = + ExecutorAddr::fromPtr(&deinitializeWrapper); + M[rt::SimpleExecutorMemoryManagerReleaseWrapperName] = + ExecutorAddr::fromPtr(&releaseWrapper); } -Error SimpleExecutorMemoryManager::deallocateImpl(void *Base, Allocation &A) { - Error Err = Error::success(); +Expected<SimpleExecutorMemoryManager::SlabInfo &> +SimpleExecutorMemoryManager::getSlabInfo(ExecutorAddr A, StringRef Context) { + auto MakeBadSlabError = [&]() { + return make_error<StringError>( + Context + ", address " + formatv("{0:x}", A) + + " is not part of any reserved address range", + inconvertibleErrorCode()); + }; - while (!A.DeallocationActions.empty()) { - Err = joinErrors(std::move(Err), - A.DeallocationActions.back().runWithSPSRetErrorMerged()); - A.DeallocationActions.pop_back(); + auto I = Slabs.upper_bound(A.toPtr<void *>()); + if (I == Slabs.begin()) + return MakeBadSlabError(); + --I; + if (!ExecutorAddrRange(ExecutorAddr::fromPtr(I->first), I->second.Size) + .contains(A)) + return MakeBadSlabError(); + + return I->second; +} + +Expected<SimpleExecutorMemoryManager::SlabInfo &> +SimpleExecutorMemoryManager::getSlabInfo(ExecutorAddrRange R, + StringRef Context) { + auto MakeBadSlabError = [&]() { + return make_error<StringError>( + Context + ", range " + formatv("{0:x}", R) + + " is not part of any reserved address range", + inconvertibleErrorCode()); + }; + + auto I = Slabs.upper_bound(R.Start.toPtr<void *>()); + if (I == Slabs.begin()) + return MakeBadSlabError(); + --I; + if (!ExecutorAddrRange(ExecutorAddr::fromPtr(I->first), I->second.Size) + .contains(R)) + return MakeBadSlabError(); + + return I->second; +} + +Expected<SimpleExecutorMemoryManager::RegionInfo &> +SimpleExecutorMemoryManager::createRegionInfo(ExecutorAddrRange R, + StringRef Context) { + + auto Slab = getSlabInfo(R, Context); + if (!Slab) + return Slab.takeError(); + + auto MakeBadRegionError = [&](ExecutorAddrRange Other, bool Prev) { + return make_error<StringError>(Context + ", region " + formatv("{0:x}", R) + + " overlaps " + + (Prev ? "previous" : "following") + + " region " + formatv("{0:x}", Other), + inconvertibleErrorCode()); + }; + + auto I = Slab->Regions.upper_bound(R.Start); + if (I != Slab->Regions.begin()) { + auto J = std::prev(I); + ExecutorAddrRange PrevRange(J->first, J->second.Size); + if (PrevRange.overlaps(R)) + return MakeBadRegionError(PrevRange, true); + } + if (I != Slab->Regions.end()) { + ExecutorAddrRange NextRange(I->first, I->second.Size); + if (NextRange.overlaps(R)) + return MakeBadRegionError(NextRange, false); } - sys::MemoryBlock MB(Base, A.Size); - if (auto EC = sys::Memory::releaseMappedMemory(MB)) - Err = joinErrors(std::move(Err), errorCodeToError(EC)); + auto &RInfo = Slab->Regions[R.Start]; + RInfo.Size = R.size(); + return RInfo; +} - return Err; +Expected<SimpleExecutorMemoryManager::RegionInfo &> +SimpleExecutorMemoryManager::getRegionInfo(SlabInfo &Slab, ExecutorAddr A, + StringRef Context) { + auto I = Slab.Regions.find(A); + if (I == Slab.Regions.end()) + return make_error<StringError>( + Context + ", address " + formatv("{0:x}", A) + + " does not correspond to the start of any initialized region", + inconvertibleErrorCode()); + + return I->second; +} + +Expected<SimpleExecutorMemoryManager::RegionInfo &> +SimpleExecutorMemoryManager::getRegionInfo(ExecutorAddr A, StringRef Context) { + auto Slab = getSlabInfo(A, Context); + if (!Slab) + return Slab.takeError(); + + return getRegionInfo(*Slab, A, Context); } llvm::orc::shared::CWrapperFunctionResult SimpleExecutorMemoryManager::reserveWrapper(const char *ArgData, size_t ArgSize) { - return shared::WrapperFunction< - rt::SPSSimpleExecutorMemoryManagerReserveSignature>:: + return shared::WrapperFunction<rt::SPSSimpleRemoteMemoryMapReserveSignature>:: handle(ArgData, ArgSize, shared::makeMethodWrapperHandler( - &SimpleExecutorMemoryManager::allocate)) + &SimpleExecutorMemoryManager::reserve)) + .release(); +} + +llvm::orc::shared::CWrapperFunctionResult +SimpleExecutorMemoryManager::initializeWrapper(const char *ArgData, + size_t ArgSize) { + return shared:: + WrapperFunction<rt::SPSSimpleRemoteMemoryMapInitializeSignature>::handle( + ArgData, ArgSize, + shared::makeMethodWrapperHandler( + &SimpleExecutorMemoryManager::initialize)) .release(); } llvm::orc::shared::CWrapperFunctionResult -SimpleExecutorMemoryManager::finalizeWrapper(const char *ArgData, - size_t ArgSize) { +SimpleExecutorMemoryManager::deinitializeWrapper(const char *ArgData, + size_t ArgSize) { return shared::WrapperFunction< - rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>:: + rt::SPSSimpleRemoteMemoryMapDeinitializeSignature>:: handle(ArgData, ArgSize, shared::makeMethodWrapperHandler( - &SimpleExecutorMemoryManager::finalize)) + &SimpleExecutorMemoryManager::deinitialize)) .release(); } llvm::orc::shared::CWrapperFunctionResult -SimpleExecutorMemoryManager::deallocateWrapper(const char *ArgData, - size_t ArgSize) { - return shared::WrapperFunction< - rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>:: +SimpleExecutorMemoryManager::releaseWrapper(const char *ArgData, + size_t ArgSize) { + return shared::WrapperFunction<rt::SPSSimpleRemoteMemoryMapReleaseSignature>:: handle(ArgData, ArgSize, shared::makeMethodWrapperHandler( - &SimpleExecutorMemoryManager::deallocate)) + &SimpleExecutorMemoryManager::release)) .release(); } diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index 9db48e8..0e9535d 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -1034,6 +1034,10 @@ static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) { } // DirectX resources + if (Name == "dx.Padding") + return TargetTypeInfo( + ArrayType::get(Type::getInt8Ty(C), Ty->getIntParameter(0)), + TargetExtType::CanBeGlobal); if (Name.starts_with("dx.")) return TargetTypeInfo(PointerType::get(C, 0), TargetExtType::CanBeGlobal, TargetExtType::CanBeLocal, diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 421d6603..c3a27c9 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -488,6 +488,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO, BCaseMask(EF_HEXAGON_MACH_V5, EF_HEXAGON_MACH); BCaseMask(EF_HEXAGON_MACH_V55, EF_HEXAGON_MACH); BCaseMask(EF_HEXAGON_MACH_V60, EF_HEXAGON_MACH); + BCaseMask(EF_HEXAGON_MACH_V61, EF_HEXAGON_MACH); BCaseMask(EF_HEXAGON_MACH_V62, EF_HEXAGON_MACH); BCaseMask(EF_HEXAGON_MACH_V65, EF_HEXAGON_MACH); BCaseMask(EF_HEXAGON_MACH_V66, EF_HEXAGON_MACH); @@ -499,12 +500,21 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO, BCaseMask(EF_HEXAGON_MACH_V71T, EF_HEXAGON_MACH); BCaseMask(EF_HEXAGON_MACH_V73, EF_HEXAGON_MACH); BCaseMask(EF_HEXAGON_MACH_V75, EF_HEXAGON_MACH); + BCaseMask(EF_HEXAGON_MACH_V77, EF_HEXAGON_MACH); + BCaseMask(EF_HEXAGON_MACH_V79, EF_HEXAGON_MACH); + BCaseMask(EF_HEXAGON_MACH_V81, EF_HEXAGON_MACH); + BCaseMask(EF_HEXAGON_MACH_V83, EF_HEXAGON_MACH); + BCaseMask(EF_HEXAGON_MACH_V85, EF_HEXAGON_MACH); + BCaseMask(EF_HEXAGON_MACH_V87, EF_HEXAGON_MACH); + BCaseMask(EF_HEXAGON_MACH_V89, EF_HEXAGON_MACH); + BCaseMask(EF_HEXAGON_MACH_V91, EF_HEXAGON_MACH); BCaseMask(EF_HEXAGON_ISA_V2, EF_HEXAGON_ISA); BCaseMask(EF_HEXAGON_ISA_V3, EF_HEXAGON_ISA); BCaseMask(EF_HEXAGON_ISA_V4, EF_HEXAGON_ISA); BCaseMask(EF_HEXAGON_ISA_V5, EF_HEXAGON_ISA); BCaseMask(EF_HEXAGON_ISA_V55, EF_HEXAGON_ISA); BCaseMask(EF_HEXAGON_ISA_V60, EF_HEXAGON_ISA); + BCaseMask(EF_HEXAGON_ISA_V61, EF_HEXAGON_ISA); BCaseMask(EF_HEXAGON_ISA_V62, EF_HEXAGON_ISA); BCaseMask(EF_HEXAGON_ISA_V65, EF_HEXAGON_ISA); BCaseMask(EF_HEXAGON_ISA_V66, EF_HEXAGON_ISA); @@ -514,6 +524,14 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO, BCaseMask(EF_HEXAGON_ISA_V71, EF_HEXAGON_ISA); BCaseMask(EF_HEXAGON_ISA_V73, EF_HEXAGON_ISA); BCaseMask(EF_HEXAGON_ISA_V75, EF_HEXAGON_ISA); + BCaseMask(EF_HEXAGON_ISA_V77, EF_HEXAGON_ISA); + BCaseMask(EF_HEXAGON_ISA_V79, EF_HEXAGON_ISA); + BCaseMask(EF_HEXAGON_ISA_V81, EF_HEXAGON_ISA); + BCaseMask(EF_HEXAGON_ISA_V83, EF_HEXAGON_ISA); + BCaseMask(EF_HEXAGON_ISA_V85, EF_HEXAGON_ISA); + BCaseMask(EF_HEXAGON_ISA_V87, EF_HEXAGON_ISA); + BCaseMask(EF_HEXAGON_ISA_V89, EF_HEXAGON_ISA); + BCaseMask(EF_HEXAGON_ISA_V91, EF_HEXAGON_ISA); break; case ELF::EM_AVR: BCaseMask(EF_AVR_ARCH_AVR1, EF_AVR_ARCH_MASK); diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc index dad0fa3..648d6a5 100644 --- a/llvm/lib/Support/Windows/Signals.inc +++ b/llvm/lib/Support/Windows/Signals.inc @@ -354,8 +354,8 @@ namespace llvm { /// Emulates hitting "retry" from an "abort, retry, ignore" CRT debug report /// dialog. "retry" raises an exception which ultimately triggers our stack /// dumper. -static LLVM_ATTRIBUTE_UNUSED int -AvoidMessageBoxHook(int ReportType, char *Message, int *Return) { +[[maybe_unused]] static int AvoidMessageBoxHook(int ReportType, char *Message, + int *Return) { // Set *Return to the retry code for the return value of _CrtDbgReport: // http://msdn.microsoft.com/en-us/library/8hyw4sy7(v=vs.71).aspx // This may also trigger just-in-time debugging via DebugBreak(). diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index be2f2e4..662d84b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1561,6 +1561,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); setOperationAction(ISD::VECREDUCE_AND, VT, Custom); setOperationAction(ISD::VECREDUCE_OR, VT, Custom); + setOperationAction(ISD::VECREDUCE_MUL, VT, Custom); setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); @@ -1717,6 +1718,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom); setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom); + setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom); setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); @@ -7775,6 +7777,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::VECREDUCE_FMAXIMUM: case ISD::VECREDUCE_FMINIMUM: return LowerVECREDUCE(Op, DAG); + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_FMUL: + return LowerVECREDUCE_MUL(Op, DAG); case ISD::ATOMIC_LOAD_AND: return LowerATOMIC_LOAD_AND(Op, DAG); case ISD::DYNAMIC_STACKALLOC: @@ -16794,6 +16799,33 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, } } +SDValue AArch64TargetLowering::LowerVECREDUCE_MUL(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + assert(SrcVT.isScalableVector() && "Unexpected operand type!"); + + SDVTList SrcVTs = DAG.getVTList(SrcVT, SrcVT); + unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode()); + SDValue Identity = DAG.getNeutralElement(BaseOpc, DL, SrcVT, Op->getFlags()); + + // Whilst we don't know the size of the vector we do know the maximum size so + // can perform a tree reduction with an identity vector, which means once we + // arrive at the result the remaining stages (when the vector is smaller than + // the maximum) have no affect. + + unsigned Segments = AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock; + unsigned Stages = llvm::Log2_32(Segments * SrcVT.getVectorMinNumElements()); + + for (unsigned I = 0; I < Stages; ++I) { + Src = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, SrcVTs, Src, Identity); + Src = DAG.getNode(BaseOpc, DL, SrcVT, Src.getValue(0), Src.getValue(1)); + } + + return DAG.getExtractVectorElt(DL, Op.getValueType(), Src, 0); +} + SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const { auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>(); @@ -18144,8 +18176,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store, bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( Instruction *Load, Value *Mask, IntrinsicInst *DI) const { const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID()); - if (Factor != 2 && Factor != 4) { - LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n"); + if (Factor != 2 && Factor != 3 && Factor != 4) { + LLVM_DEBUG(dbgs() << "Matching ld2, ld3 and ld4 patterns failed\n"); return false; } auto *LI = dyn_cast<LoadInst>(Load); @@ -18223,8 +18255,8 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore( Instruction *Store, Value *Mask, ArrayRef<Value *> InterleavedValues) const { unsigned Factor = InterleavedValues.size(); - if (Factor != 2 && Factor != 4) { - LLVM_DEBUG(dbgs() << "Matching st2 and st4 patterns failed\n"); + if (Factor != 2 && Factor != 3 && Factor != 4) { + LLVM_DEBUG(dbgs() << "Matching st2, st3 and st4 patterns failed\n"); return false; } StoreInst *SI = dyn_cast<StoreInst>(Store); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 00956fd..9495c9f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -752,6 +752,7 @@ private: SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECREDUCE_MUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 30dfcf2b..12c600f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -10600,6 +10600,9 @@ describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, Register DestReg = DestSrc->Destination->getReg(); Register SrcReg = DestSrc->Source->getReg(); + if (!DestReg.isValid() || !SrcReg.isValid()) + return std::nullopt; + auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {}); // If the described register is the destination, just return the source. diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index bfe2c80..a67b12a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -901,6 +901,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}}); + addRulesForGOpcs({G_READSTEADYCOUNTER}, Standard).Uni(S64, {{Sgpr64}, {}}); + bool hasSALUFloat = ST->hasSALUFloatInsts(); addRulesForGOpcs({G_FADD}, Standard) diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index be62395..e0375ea 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -513,8 +513,7 @@ void AMDGPUDisassembler::decodeImmOperands(MCInst &MI, } if (Imm == AMDGPU::EncValues::LITERAL_CONST) { - Op = decodeLiteralConstant( - Desc, OpDesc, OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64); + Op = decodeLiteralConstant(Desc, OpDesc); continue; } @@ -1545,21 +1544,21 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const { MCOperand AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const { if (HasLiteral) { - if (Literal64 != Val) + if (Literal != Val) return errOperand(Val, "More than one unique literal is illegal"); } HasLiteral = true; - Literal = Literal64 = Val; + Literal = Val; - bool UseLit64 = Hi_32(Literal64) == 0; + bool UseLit64 = Hi_32(Literal) == 0; return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit( - LitModifier::Lit64, Literal64, getContext())) - : MCOperand::createImm(Literal64); + LitModifier::Lit64, Literal, getContext())) + : MCOperand::createImm(Literal); } -MCOperand AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc, - const MCOperandInfo &OpDesc, - bool ExtendFP64) const { +MCOperand +AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc, + const MCOperandInfo &OpDesc) const { // For now all literal constants are supposed to be unsigned integer // ToDo: deal with signed/unsigned 64-bit integer constants // ToDo: deal with float/double constants @@ -1569,35 +1568,79 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc, Twine(Bytes.size())); } HasLiteral = true; - Literal = Literal64 = eatBytes<uint32_t>(Bytes); - if (ExtendFP64) - Literal64 <<= 32; + Literal = eatBytes<uint32_t>(Bytes); } - int64_t Val = ExtendFP64 ? Literal64 : Literal; + // For disassembling always assume all inline constants are available. + bool HasInv2Pi = true; - bool CanUse64BitLiterals = - STI.hasFeature(AMDGPU::Feature64BitLiterals) && - !(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)); - - bool UseLit64 = false; - if (CanUse64BitLiterals) { - if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64) - UseLit64 = false; - else if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64) - UseLit64 = Hi_32(Literal64) == 0; + // Invalid instruction codes may contain literals for inline-only + // operands, so we support them here as well. + int64_t Val = Literal; + bool UseLit = false; + switch (OpDesc.OperandType) { + default: + llvm_unreachable("Unexpected operand type!"); + case AMDGPU::OPERAND_REG_IMM_BF16: + case AMDGPU::OPERAND_REG_INLINE_C_BF16: + case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: + UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi); + break; + case AMDGPU::OPERAND_REG_IMM_V2BF16: + UseLit = AMDGPU::isInlinableLiteralV2BF16(Val); + break; + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi); + break; + case AMDGPU::OPERAND_REG_IMM_V2FP16: + UseLit = AMDGPU::isInlinableLiteralV2F16(Val); + break; + case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: + break; + case AMDGPU::OPERAND_REG_IMM_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi); + break; + case AMDGPU::OPERAND_REG_IMM_V2INT16: + UseLit = AMDGPU::isInlinableLiteralV2I16(Val); + break; + case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_INLINE_AC_FP32: + case AMDGPU::OPERAND_REG_IMM_INT32: + case AMDGPU::OPERAND_REG_INLINE_C_INT32: + case AMDGPU::OPERAND_REG_INLINE_AC_INT32: + case AMDGPU::OPERAND_REG_IMM_V2FP32: + case AMDGPU::OPERAND_REG_IMM_V2INT32: + case AMDGPU::OPERAND_KIMM32: + UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi); + break; + case AMDGPU::OPERAND_REG_IMM_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: + case AMDGPU::OPERAND_REG_INLINE_AC_FP64: + Val <<= 32; + break; + case AMDGPU::OPERAND_REG_IMM_INT64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: + UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi); + break; + case MCOI::OPERAND_REGISTER: + // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits + // decoding a literal in a position of a register operand. Give + // it special handling in the caller, decodeImmOperands(), instead + // of quietly allowing it here. + break; } - return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit( - LitModifier::Lit64, Val, getContext())) - : MCOperand::createImm(Val); + return UseLit ? MCOperand::createExpr(AMDGPUMCExpr::createLit( + LitModifier::Lit, Val, getContext())) + : MCOperand::createImm(Val); } -MCOperand -AMDGPUDisassembler::decodeLiteral64Constant(const MCInst &Inst) const { +MCOperand AMDGPUDisassembler::decodeLiteral64Constant() const { assert(STI.hasFeature(AMDGPU::Feature64BitLiterals)); if (!HasLiteral) { @@ -1606,25 +1649,13 @@ AMDGPUDisassembler::decodeLiteral64Constant(const MCInst &Inst) const { Twine(Bytes.size())); } HasLiteral = true; - Literal64 = eatBytes<uint64_t>(Bytes); - } - - bool UseLit64 = false; - const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); - const MCOperandInfo &OpDesc = Desc.operands()[Inst.getNumOperands()]; - if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64) { - UseLit64 = false; - } else { - assert(OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64); - UseLit64 = Hi_32(Literal64) == 0; + Literal = eatBytes<uint64_t>(Bytes); } + bool UseLit64 = Hi_32(Literal) == 0; return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit( - LitModifier::Lit64, Literal64, getContext())) - : MCOperand::createImm(Literal64); + LitModifier::Lit64, Literal, getContext())) + : MCOperand::createImm(Literal); } MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { @@ -1913,7 +1944,7 @@ MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const MCInst &Inst, return MCOperand::createImm(Val); if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) { - return decodeLiteral64Constant(Inst); + return decodeLiteral64Constant(); } switch (Width) { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 2751857..d103d79 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -44,8 +44,7 @@ private: const unsigned HwModeRegClass; const unsigned TargetMaxInstBytes; mutable ArrayRef<uint8_t> Bytes; - mutable uint32_t Literal; - mutable uint64_t Literal64; + mutable uint64_t Literal; mutable bool HasLiteral; mutable std::optional<bool> EnableWavefrontSize32; unsigned CodeObjectVersion; @@ -144,9 +143,8 @@ public: MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const; MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const; MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, - const MCOperandInfo &OpDesc, - bool ExtendFP64) const; - MCOperand decodeLiteral64Constant(const MCInst &Inst) const; + const MCOperandInfo &OpDesc) const; + MCOperand decodeLiteral64Constant() const; MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const; diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 58482ea..9fbf9e5 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -69,6 +69,12 @@ static cl::opt<bool> GCNTrackers( cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false)); +static cl::opt<unsigned> PendingQueueLimit( + "amdgpu-scheduler-pending-queue-limit", cl::Hidden, + cl::desc( + "Max (Available+Pending) size to inspect pending queue (0 disables)"), + cl::init(256)); + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) #define DUMP_MAX_REG_PRESSURE static cl::opt<bool> PrintMaxRPRegUsageBeforeScheduler( @@ -335,17 +341,52 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, } } +static bool shouldCheckPending(SchedBoundary &Zone, + const TargetSchedModel *SchedModel) { + bool HasBufferedModel = + SchedModel->hasInstrSchedModel() && SchedModel->getMicroOpBufferSize(); + unsigned Combined = Zone.Available.size() + Zone.Pending.size(); + return Combined <= PendingQueueLimit && HasBufferedModel; +} + +static SUnit *pickOnlyChoice(SchedBoundary &Zone, + const TargetSchedModel *SchedModel) { + // pickOnlyChoice() releases pending instructions and checks for new hazards. + SUnit *OnlyChoice = Zone.pickOnlyChoice(); + if (!shouldCheckPending(Zone, SchedModel) || Zone.Pending.empty()) + return OnlyChoice; + + return nullptr; +} + +void GCNSchedStrategy::printCandidateDecision(const SchedCandidate &Current, + const SchedCandidate &Preferred) { + LLVM_DEBUG({ + dbgs() << "Prefer:\t\t"; + DAG->dumpNode(*Preferred.SU); + + if (Current.SU) { + dbgs() << "Not:\t"; + DAG->dumpNode(*Current.SU); + } + + dbgs() << "Reason:\t\t"; + traceCandidate(Preferred); + }); +} + // This function is mostly cut and pasted from // GenericScheduler::pickNodeFromQueue() void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, - SchedCandidate &Cand, + SchedCandidate &Cand, bool &IsPending, bool IsBottomUp) { const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI); ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); unsigned SGPRPressure = 0; unsigned VGPRPressure = 0; + IsPending = false; if (DAG->isTrackingPressure()) { if (!GCNTrackers) { SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; @@ -358,8 +399,9 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, VGPRPressure = T->getPressure().getArchVGPRNum(); } } - ReadyQueue &Q = Zone.Available; - for (SUnit *SU : Q) { + LLVM_DEBUG(dbgs() << "Available Q:\n"); + ReadyQueue &AQ = Zone.Available; + for (SUnit *SU : AQ) { SchedCandidate TryCand(ZonePolicy); initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure, @@ -371,27 +413,55 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, // Initialize resource delta if needed in case future heuristics query it. if (TryCand.ResDelta == SchedResourceDelta()) TryCand.initResourceDelta(Zone.DAG, SchedModel); + LLVM_DEBUG(printCandidateDecision(Cand, TryCand)); Cand.setBest(TryCand); - LLVM_DEBUG(traceCandidate(Cand)); + } else { + printCandidateDecision(TryCand, Cand); + } + } + + if (!shouldCheckPending(Zone, SchedModel)) + return; + + LLVM_DEBUG(dbgs() << "Pending Q:\n"); + ReadyQueue &PQ = Zone.Pending; + for (SUnit *SU : PQ) { + + SchedCandidate TryCand(ZonePolicy); + initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure, + VGPRPressure, IsBottomUp); + // Pass SchedBoundary only when comparing nodes from the same boundary. + SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; + tryPendingCandidate(Cand, TryCand, ZoneArg); + if (TryCand.Reason != NoCand) { + // Initialize resource delta if needed in case future heuristics query it. + if (TryCand.ResDelta == SchedResourceDelta()) + TryCand.initResourceDelta(Zone.DAG, SchedModel); + LLVM_DEBUG(printCandidateDecision(Cand, TryCand)); + IsPending = true; + Cand.setBest(TryCand); + } else { + printCandidateDecision(TryCand, Cand); } } } // This function is mostly cut and pasted from // GenericScheduler::pickNodeBidirectional() -SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { +SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode, + bool &PickedPending) { // Schedule as far as possible in the direction of no choice. This is most // efficient, but also provides the best heuristics for CriticalPSets. - if (SUnit *SU = Bot.pickOnlyChoice()) { + if (SUnit *SU = pickOnlyChoice(Bot, SchedModel)) { IsTopNode = false; return SU; } - if (SUnit *SU = Top.pickOnlyChoice()) { + if (SUnit *SU = pickOnlyChoice(Top, SchedModel)) { IsTopNode = true; return SU; } - // Set the bottom-up policy based on the state of the current bottom zone and - // the instructions outside the zone, including the top zone. + // Set the bottom-up policy based on the state of the current bottom zone + // and the instructions outside the zone, including the top zone. CandPolicy BotPolicy; setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); // Set the top-down policy based on the state of the current top zone and @@ -399,12 +469,14 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { CandPolicy TopPolicy; setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); + bool BotPending = false; // See if BotCand is still valid (because we previously scheduled from Top). LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); if (!BotCand.isValid() || BotCand.SU->isScheduled || BotCand.Policy != BotPolicy) { BotCand.reset(CandPolicy()); pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand, + BotPending, /*IsBottomUp=*/true); assert(BotCand.Reason != NoCand && "failed to find the first candidate"); } else { @@ -414,6 +486,7 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { SchedCandidate TCand; TCand.reset(CandPolicy()); pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand, + BotPending, /*IsBottomUp=*/true); assert(TCand.SU == BotCand.SU && "Last pick result should correspond to re-picking right now"); @@ -421,12 +494,14 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { #endif } + bool TopPending = false; // Check if the top Q has a better candidate. LLVM_DEBUG(dbgs() << "Picking from Top:\n"); if (!TopCand.isValid() || TopCand.SU->isScheduled || TopCand.Policy != TopPolicy) { TopCand.reset(CandPolicy()); pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand, + TopPending, /*IsBottomUp=*/false); assert(TopCand.Reason != NoCand && "failed to find the first candidate"); } else { @@ -436,6 +511,7 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { SchedCandidate TCand; TCand.reset(CandPolicy()); pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand, + TopPending, /*IsBottomUp=*/false); assert(TCand.SU == TopCand.SU && "Last pick result should correspond to re-picking right now"); @@ -446,12 +522,21 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { // Pick best from BotCand and TopCand. LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); dbgs() << "Bot Cand: "; traceCandidate(BotCand);); - SchedCandidate Cand = BotCand; - TopCand.Reason = NoCand; - tryCandidate(Cand, TopCand, nullptr); - if (TopCand.Reason != NoCand) { - Cand.setBest(TopCand); + SchedCandidate Cand = BotPending ? TopCand : BotCand; + SchedCandidate TryCand = BotPending ? BotCand : TopCand; + PickedPending = BotPending && TopPending; + + TryCand.Reason = NoCand; + if (BotPending || TopPending) { + PickedPending |= tryPendingCandidate(Cand, TopCand, nullptr); + } else { + tryCandidate(Cand, TryCand, nullptr); + } + + if (TryCand.Reason != NoCand) { + Cand.setBest(TryCand); } + LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); IsTopNode = Cand.AtTop; @@ -466,35 +551,55 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) { Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); return nullptr; } + bool PickedPending; SUnit *SU; do { + PickedPending = false; if (RegionPolicy.OnlyTopDown) { - SU = Top.pickOnlyChoice(); + SU = pickOnlyChoice(Top, SchedModel); if (!SU) { CandPolicy NoPolicy; TopCand.reset(NoPolicy); pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand, + PickedPending, /*IsBottomUp=*/false); assert(TopCand.Reason != NoCand && "failed to find a candidate"); SU = TopCand.SU; } IsTopNode = true; } else if (RegionPolicy.OnlyBottomUp) { - SU = Bot.pickOnlyChoice(); + SU = pickOnlyChoice(Bot, SchedModel); if (!SU) { CandPolicy NoPolicy; BotCand.reset(NoPolicy); pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand, + PickedPending, /*IsBottomUp=*/true); assert(BotCand.Reason != NoCand && "failed to find a candidate"); SU = BotCand.SU; } IsTopNode = false; } else { - SU = pickNodeBidirectional(IsTopNode); + SU = pickNodeBidirectional(IsTopNode, PickedPending); } } while (SU->isScheduled); + if (PickedPending) { + unsigned ReadyCycle = IsTopNode ? SU->TopReadyCycle : SU->BotReadyCycle; + SchedBoundary &Zone = IsTopNode ? Top : Bot; + unsigned CurrentCycle = Zone.getCurrCycle(); + if (ReadyCycle > CurrentCycle) + Zone.bumpCycle(ReadyCycle); + + // FIXME: checkHazard() doesn't give information about which cycle the + // hazard will resolve so just keep bumping the cycle by 1. This could be + // made more efficient if checkHazard() returned more details. + while (Zone.checkHazard(SU)) + Zone.bumpCycle(Zone.getCurrCycle() + 1); + + Zone.releasePending(); + } + if (SU->isTopReady()) Top.removeReady(SU); if (SU->isBottomReady()) @@ -540,6 +645,47 @@ GCNSchedStageID GCNSchedStrategy::getNextStage() const { return *std::next(CurrentStage); } +bool GCNSchedStrategy::tryPendingCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) const { + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; + return true; + } + + // Bias PhysReg Defs and copies to their uses and defined respectively. + if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), + biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) + return TryCand.Reason != NoCand; + + // Avoid exceeding the target's limit. + if (DAG->isTrackingPressure() && + tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, + RegExcess, TRI, DAG->MF)) + return TryCand.Reason != NoCand; + + // Avoid increasing the max critical pressure in the scheduled region. + if (DAG->isTrackingPressure() && + tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, + TryCand, Cand, RegCritical, TRI, DAG->MF)) + return TryCand.Reason != NoCand; + + bool SameBoundary = Zone != nullptr; + if (SameBoundary) { + TryCand.initResourceDelta(DAG, SchedModel); + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return TryCand.Reason != NoCand; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, TryCand, Cand, + ResourceDemand)) + return TryCand.Reason != NoCand; + } + + return false; +} + GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( const MachineSchedContext *C, bool IsLegacyScheduler) : GCNSchedStrategy(C) { diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 8ea4267..975781f 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -44,17 +44,32 @@ raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID); /// heuristics to determine excess/critical pressure sets. class GCNSchedStrategy : public GenericScheduler { protected: - SUnit *pickNodeBidirectional(bool &IsTopNode); + SUnit *pickNodeBidirectional(bool &IsTopNode, bool &PickedPending); void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, - SchedCandidate &Cand, bool IsBottomUp); + SchedCandidate &Cand, bool &IsPending, + bool IsBottomUp); void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp); + /// Evaluates instructions in the pending queue using a subset of scheduling + /// heuristics. + /// + /// Instructions that cannot be issued due to hardware constraints are placed + /// in the pending queue rather than the available queue, making them normally + /// invisible to scheduling heuristics. However, in certain scenarios (such as + /// avoiding register spilling), it may be beneficial to consider scheduling + /// these not-yet-ready instructions. + bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) const; + + void printCandidateDecision(const SchedCandidate &Current, + const SchedCandidate &Preferred); + std::vector<unsigned> Pressure; std::vector<unsigned> MaxPressure; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index a01a5fd..5e3195b 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1697,9 +1697,6 @@ LLVM_READNONE bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi); LLVM_READNONE -bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi); - -LLVM_READNONE bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi); LLVM_READNONE diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 228114c..44c4830 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -57,6 +57,7 @@ def ResBindTy : DXILOpParamType; def ResPropsTy : DXILOpParamType; def SplitDoubleTy : DXILOpParamType; def BinaryWithCarryTy : DXILOpParamType; +def DimensionsTy : DXILOpParamType; class DXILOpClass; @@ -901,6 +902,13 @@ def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> { let attributes = [Attributes<DXIL1_0, [ReadOnly]>]; } +def GetDimensions : DXILOp<72, getDimensions> { + let Doc = "gets the dimensions of a buffer or texture"; + let arguments = [HandleTy, Int32Ty]; + let result = DimensionsTy; + let stages = [Stages<DXIL1_0, [all_stages]>]; +} + def Barrier : DXILOp<80, barrier> { let Doc = "inserts a memory barrier in the shader"; let intrinsics = [ diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp index 1aed8f9..944b2e6 100644 --- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp @@ -261,6 +261,12 @@ static StructType *getBinaryWithCarryType(LLVMContext &Context) { return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c"); } +static StructType *getDimensionsType(LLVMContext &Ctx) { + Type *Int32Ty = Type::getInt32Ty(Ctx); + return getOrCreateStructType("dx.types.Dimensions", + {Int32Ty, Int32Ty, Int32Ty, Int32Ty}, Ctx); +} + static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx, Type *OverloadTy) { switch (Kind) { @@ -318,6 +324,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx, return getSplitDoubleType(Ctx); case OpParamType::BinaryWithCarryTy: return getBinaryWithCarryType(Ctx); + case OpParamType::DimensionsTy: + return getDimensionsType(Ctx); } llvm_unreachable("Invalid parameter kind"); return nullptr; diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index 610d8b6..e46a393 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -627,6 +627,28 @@ public: }); } + [[nodiscard]] bool lowerGetDimensionsX(Function &F) { + IRBuilder<> &IRB = OpBuilder.getIRB(); + Type *Int32Ty = IRB.getInt32Ty(); + + return replaceFunction(F, [&](CallInst *CI) -> Error { + IRB.SetInsertPoint(CI); + Value *Handle = + createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType()); + Value *Undef = UndefValue::get(Int32Ty); + + Expected<CallInst *> OpCall = OpBuilder.tryCreateOp( + OpCode::GetDimensions, {Handle, Undef}, CI->getName(), Int32Ty); + if (Error E = OpCall.takeError()) + return E; + Value *Dim = IRB.CreateExtractValue(*OpCall, 0); + + CI->replaceAllUsesWith(Dim); + CI->eraseFromParent(); + return Error::success(); + }); + } + [[nodiscard]] bool lowerGetPointer(Function &F) { // These should have already been handled in DXILResourceAccess, so we can // just clean up the dead prototype. @@ -934,6 +956,9 @@ public: case Intrinsic::dx_resource_updatecounter: HasErrors |= lowerUpdateCounter(F); break; + case Intrinsic::dx_resource_getdimensions_x: + HasErrors |= lowerGetDimensionsX(F); + break; case Intrinsic::ctpop: HasErrors |= lowerCtpopToCountBits(F); break; diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 5f180d6..3bd6ed4 100644 --- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -66,6 +66,10 @@ public: void remapInstruction(MCInst &Instr) const; + Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address) const override; + private: bool makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address, uint64_t &BytesToSkip, raw_ostream &CS) const; @@ -567,6 +571,18 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB, return Result; } +Expected<bool> HexagonDisassembler::onSymbolStart(SymbolInfoTy &Symbol, + uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address) const { + // At the start of a symbol, force a fresh packet by resetting any + // in-progress bundle state. This prevents packets from straddling label + // boundaries when data (e.g. jump tables) appears in between. + Size = 0; + resetBundle(); + return true; +} + static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo, ArrayRef<MCPhysReg> Table) { if (RegNo < Table.size()) { diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp index 1ce8d7e3..df0c8c1 100644 --- a/llvm/lib/Target/Mips/MipsFastISel.cpp +++ b/llvm/lib/Target/Mips/MipsFastISel.cpp @@ -264,9 +264,10 @@ public: } // end anonymous namespace -static bool CC_Mips(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - Type *OrigTy, CCState &State) LLVM_ATTRIBUTE_UNUSED; +[[maybe_unused]] static bool CC_Mips(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, Type *OrigTy, + CCState &State); static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 7f1ff45..2fd7327 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -3176,9 +3176,10 @@ static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT, MVT LocVT, F64Regs); } -static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - Type *OrigTy, CCState &State) LLVM_ATTRIBUTE_UNUSED; +[[maybe_unused]] static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, Type *OrigTy, + CCState &State); #include "MipsGenCallingConv.inc" diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 662d3f6..b1794b7 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -717,6 +717,18 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .clampScalar(0, sXLen, sXLen) .lower(); + LegalityPredicate InsertVectorEltPred = [=](const LegalityQuery &Query) { + LLT VecTy = Query.Types[0]; + LLT EltTy = Query.Types[1]; + return VecTy.getElementType() == EltTy; + }; + + getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) + .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), + InsertVectorEltPred, typeIs(2, sXLen))) + .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), InsertVectorEltPred, + typeIs(2, sXLen))); + getLegacyLegalizerInfo().computeTables(); verify(*ST.getInstrInfo()); } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7123a2d..eb87558 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1672,6 +1672,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.useRVVForFixedLengthVectors()) setTargetDAGCombine(ISD::BITCAST); + setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64); + // Disable strict node mutation. IsStrictFPEnabled = true; EnableExtLdPromotion = true; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 0f6e1ca..ed54404d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1763,6 +1763,26 @@ defm RELAXED_DOT : "i16x8.relaxed_dot_i8x16_i7x16_s\t$dst, $lhs, $rhs", "i16x8.relaxed_dot_i8x16_i7x16_s", 0x112>; +def : Pat< + (v8i16 (add + (wasm_shuffle + (v8i16 (extmul_low_s v16i8:$lhs, v16i8:$rhs)), + (v8i16 (extmul_high_s v16i8:$lhs, v16i8:$rhs)), + (i32 0), (i32 1), (i32 4), (i32 5), + (i32 8), (i32 9), (i32 12), (i32 13), + (i32 16), (i32 17), (i32 20), (i32 21), + (i32 24), (i32 25), (i32 28), (i32 29)), + (wasm_shuffle + (v8i16 (extmul_low_s v16i8:$lhs, v16i8:$rhs)), + (v8i16 (extmul_high_s v16i8:$lhs, v16i8:$rhs)), + (i32 2), (i32 3), (i32 6), (i32 7), + (i32 10), (i32 11), (i32 14), (i32 15), + (i32 18), (i32 19), (i32 22), (i32 23), + (i32 26), (i32 27), (i32 30), (i32 31))) + ), + (v8i16 (RELAXED_DOT v16i8:$lhs, v16i8:$rhs)) +>; + defm RELAXED_DOT_ADD : RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc), (outs), (ins), @@ -1771,6 +1791,18 @@ defm RELAXED_DOT_ADD : "i32x4.relaxed_dot_i8x16_i7x16_add_s\t$dst, $lhs, $rhs, $acc", "i32x4.relaxed_dot_i8x16_i7x16_add_s", 0x113>; +def : Pat< + (v4i32 (add + (v4i32 (int_wasm_extadd_pairwise_signed + (v8i16 (int_wasm_relaxed_dot_i8x16_i7x16_signed v16i8:$lhs, v16i8:$rhs)))), + (v4i32 V128:$acc))), + (v4i32 (RELAXED_DOT_ADD v16i8:$lhs, v16i8:$rhs, (v4i32 V128:$acc))) + >; + +def : Pat<(v4i32 (partial_reduce_smla (v4i32 V128:$acc), (v16i8 V128:$lhs), + (v16i8 V128:$rhs))), + (RELAXED_DOT_ADD $lhs, $rhs, $acc)>, Requires<[HasRelaxedSIMD]>; + //===----------------------------------------------------------------------===// // Relaxed BFloat16 dot product //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index b81641f..28fa2cd 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -414,8 +414,6 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, getActionDefinitionsBuilder(G_SEXT_INREG).lower(); - getActionDefinitionsBuilder(G_IS_FPCLASS).lower(); - // fp constants getActionDefinitionsBuilder(G_FCONSTANT) .legalFor({s32, s64}) diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 6db780f..8e08d16 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1338,6 +1338,10 @@ def ProcessorFeatures { list<SubtargetFeature> PTLFeatures = !listremove(ARLSFeatures, [FeatureWIDEKL]); + // Novalake + list<SubtargetFeature> NVLFeatures = + !listconcat(PTLFeatures, [FeaturePREFETCHI]); + // Clearwaterforest list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI, FeatureAVXVNNIINT16, @@ -1883,6 +1887,9 @@ foreach P = ["pantherlake", "wildcatlake"] in { def : ProcModel<P, AlderlakePModel, ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>; } +def : ProcModel<"novalake", AlderlakePModel, ProcessorFeatures.NVLFeatures, + ProcessorFeatures.ADLTuning>; + def : ProcModel<"clearwaterforest", AlderlakePModel, ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>; def : ProcModel<"emeraldrapids", SapphireRapidsModel, diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 481a9be..1fca466f 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1946,16 +1946,14 @@ static void addConstantComments(const MachineInstr *MI, CASE_ARITH_RM(PMADDUBSW) { unsigned SrcIdx = getSrcIdx(MI, 1); if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { - if (C->getType()->getScalarSizeInBits() == 8) { - std::string Comment; - raw_string_ostream CS(Comment); - unsigned VectorWidth = - X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); - CS << "["; - printConstant(C, VectorWidth, CS); - CS << "]"; - OutStreamer.AddComment(CS.str()); - } + std::string Comment; + raw_string_ostream CS(Comment); + unsigned VectorWidth = + X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); + CS << "["; + printConstant(C, VectorWidth, CS); + CS << "]"; + OutStreamer.AddComment(CS.str()); } break; } @@ -1967,16 +1965,14 @@ static void addConstantComments(const MachineInstr *MI, CASE_ARITH_RM(PMULHRSW) { unsigned SrcIdx = getSrcIdx(MI, 1); if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { - if (C->getType()->getScalarSizeInBits() == 16) { - std::string Comment; - raw_string_ostream CS(Comment); - unsigned VectorWidth = - X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); - CS << "["; - printConstant(C, VectorWidth, CS); - CS << "]"; - OutStreamer.AddComment(CS.str()); - } + std::string Comment; + raw_string_ostream CS(Comment); + unsigned VectorWidth = + X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); + CS << "["; + printConstant(C, VectorWidth, CS); + CS << "]"; + OutStreamer.AddComment(CS.str()); } break; } diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 3479106..6065575 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1152,6 +1152,20 @@ static StringRef getIntelProcessorTypeAndSubtype(unsigned Family, break; } break; + case 0x12: + switch (Model) { + // Novalake: + case 0x1: + case 0x3: + CPU = "novalake"; + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_NOVALAKE; + break; + default: // Unknown family 0x12 CPU. + break; + } + break; + default: break; // Unknown. } diff --git a/llvm/lib/TargetParser/Unix/Host.inc b/llvm/lib/TargetParser/Unix/Host.inc index aeb2f59..38b942d 100644 --- a/llvm/lib/TargetParser/Unix/Host.inc +++ b/llvm/lib/TargetParser/Unix/Host.inc @@ -59,10 +59,30 @@ static std::string updateTripleOSVersion(std::string TargetTripleString) { if (TT.getOS() == Triple::AIX && !TT.getOSMajorVersion()) { struct utsname name; if (uname(&name) != -1) { + std::string release = name.release; + + if (strcmp(name.sysname, "OS400") == 0) { + /* + PASE uses different versioning system than AIX. + The following table shows the currently supported PASE + releases and the corresponding AIX release: + -------------------------- + PASE | AIX + -------------------------- + V7R4 | 7.2 (TL2) + -------------------------- + V7R5 | 7.2 (TL5) + -------------------------- + V7R6 | 7.3 (TL1) + -------------------------- + */ + release = (release == "4" || release == "5") ? "2" : "3"; + } + std::string NewOSName = std::string(Triple::getOSTypeName(Triple::AIX)); NewOSName += name.version; NewOSName += '.'; - NewOSName += name.release; + NewOSName += release; NewOSName += ".0.0"; TT.setOSName(NewOSName); return TT.str(); diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index e382cfe..dd13ce3 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -176,6 +176,8 @@ constexpr FeatureBitset FeaturesArrowlakeS = FeatureSM4; constexpr FeatureBitset FeaturesPantherlake = (FeaturesArrowlakeS ^ FeatureWIDEKL); +constexpr FeatureBitset FeaturesNovalake = + FeaturesPantherlake | FeaturePREFETCHI; constexpr FeatureBitset FeaturesClearwaterforest = (FeaturesSierraforest ^ FeatureWIDEKL) | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeaturePREFETCHI | FeatureUSERMSR; @@ -379,6 +381,8 @@ constexpr ProcInfo Processors[] = { // Pantherlake microarchitecture based processors. { {"pantherlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesPantherlake, 'p', false }, { {"wildcatlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesPantherlake, 'p', false }, + // Novalake microarchitecture based processors. + { {"novalake"}, CK_Novalake, FEATURE_AVX2, FeaturesNovalake, 'r', false }, // Sierraforest microarchitecture based processors. { {"sierraforest"}, CK_Sierraforest, FEATURE_AVX2, FeaturesSierraforest, 'p', false }, // Grandridge microarchitecture based processors. diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp index f166fef..cf7e450 100644 --- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp @@ -153,26 +153,23 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C, bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine(); bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe); if (IsCallerPresplitCoroutine && HasAttr) { - BranchProbability MinBranchProbability( - static_cast<int>(CoroElideBranchRatio * MinBlockCounterExecution), - MinBlockCounterExecution); - auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller); - auto Prob = BranchProbability::getBranchProbability( - BFI.getBlockFreq(CB->getParent()).getFrequency(), - BFI.getEntryFreq().getFrequency()); + auto BlockFreq = BFI.getBlockFreq(CB->getParent()).getFrequency(); + auto EntryFreq = BFI.getEntryFreq().getFrequency(); + uint64_t MinFreq = + static_cast<uint64_t>(EntryFreq * CoroElideBranchRatio); - if (Prob < MinBranchProbability) { + if (BlockFreq < MinFreq) { ORE.emit([&]() { return OptimizationRemarkMissed( DEBUG_TYPE, "CoroAnnotationElideUnlikely", Caller) << "'" << ore::NV("callee", Callee->getName()) << "' not elided in '" << ore::NV("caller", Caller->getName()) - << "' because of low probability: " - << ore::NV("probability", Prob) << " (threshold: " - << ore::NV("threshold", MinBranchProbability) << ")"; + << "' because of low frequency: " + << ore::NV("block_freq", BlockFreq) + << " (threshold: " << ore::NV("min_freq", MinFreq) << ")"; }); continue; } @@ -188,7 +185,8 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C, return OptimizationRemark(DEBUG_TYPE, "CoroAnnotationElide", Caller) << "'" << ore::NV("callee", Callee->getName()) << "' elided in '" << ore::NV("caller", Caller->getName()) - << "' (probability: " << ore::NV("probability", Prob) << ")"; + << "' (block_freq: " << ore::NV("block_freq", BlockFreq) + << ")"; }); FAM.invalidate(*Caller, PreservedAnalyses::none()); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 4c9b10a..cdc559b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -156,9 +156,9 @@ Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); Type *Ty = CI.getType(); - if (auto *SrcC = dyn_cast<Constant>(Src)) - if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL)) - return replaceInstUsesWith(CI, Res); + if (Value *Res = + simplifyCastInst(CI.getOpcode(), Src, Ty, SQ.getWithInstruction(&CI))) + return replaceInstUsesWith(CI, Res); // Try to eliminate a cast of a cast. if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 511bca4..6e17801 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -605,17 +605,16 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, return Mapping; } -namespace llvm { -void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize, - bool IsKasan, uint64_t *ShadowBase, - int *MappingScale, bool *OrShadowOffset) { +void llvm::getAddressSanitizerParams(const Triple &TargetTriple, int LongSize, + bool IsKasan, uint64_t *ShadowBase, + int *MappingScale, bool *OrShadowOffset) { auto Mapping = getShadowMapping(TargetTriple, LongSize, IsKasan); *ShadowBase = Mapping.Offset; *MappingScale = Mapping.Scale; *OrShadowOffset = Mapping.OrShadowOffset; } -void removeASanIncompatibleFnAttributes(Function &F, bool ReadsArgMem) { +void llvm::removeASanIncompatibleFnAttributes(Function &F, bool ReadsArgMem) { // Sanitizer checks read from shadow, which invalidates memory(argmem: *). // // This is not only true for sanitized functions, because AttrInfer can @@ -668,8 +667,6 @@ ASanAccessInfo::ASanAccessInfo(bool IsWrite, bool CompileKernel, AccessSizeIndex(AccessSizeIndex), IsWrite(IsWrite), CompileKernel(CompileKernel) {} -} // namespace llvm - static uint64_t getRedzoneSizeForScale(int MappingScale) { // Redzone used for stack and globals is at least 32 bytes. // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. @@ -677,11 +674,10 @@ static uint64_t getRedzoneSizeForScale(int MappingScale) { } static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) { - if (TargetTriple.isOSEmscripten()) { + if (TargetTriple.isOSEmscripten()) return kAsanEmscriptenCtorAndDtorPriority; - } else { + else return kAsanCtorAndDtorPriority; - } } static Twine genName(StringRef suffix) { diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index 444b390..72e8e50 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -2092,8 +2092,6 @@ bool CHR::run() { return Changed; } -namespace llvm { - ControlHeightReductionPass::ControlHeightReductionPass() { parseCHRFilterFiles(); } @@ -2116,5 +2114,3 @@ PreservedAnalyses ControlHeightReductionPass::run( return PreservedAnalyses::all(); return PreservedAnalyses::none(); } - -} // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp index d99f1eb..ddb99a5 100644 --- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -75,8 +75,6 @@ static cl::opt<bool> "expressed as branches by widenable conditions"), cl::init(true)); -namespace { - // Get the condition of \p I. It can either be a guard or a conditional branch. static Value *getCondition(Instruction *I) { if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { @@ -130,6 +128,8 @@ findInsertionPointForWideCondition(Instruction *WCOrGuard) { return std::nullopt; } +namespace { + class GuardWideningImpl { DominatorTree &DT; PostDominatorTree *PDT; @@ -328,7 +328,7 @@ public: /// The entry point for this pass. bool run(); }; -} +} // namespace static bool isSupportedGuardInstruction(const Instruction *Insn) { if (isGuard(Insn)) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index c327311..7ebcc21 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -53,6 +53,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" @@ -117,6 +118,10 @@ static cl::opt<bool> LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops")); +static cl::opt<bool> LoopPredicationTraps( + "indvars-predicate-loop-traps", cl::Hidden, cl::init(true), + cl::desc("Predicate conditions that trap in loops with only local writes")); + static cl::opt<bool> AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext")); @@ -1704,6 +1709,24 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { return Changed; } +static bool crashingBBWithoutEffect(const BasicBlock &BB) { + return llvm::all_of(BB, [](const Instruction &I) { + // TODO: for now this is overly restrictive, to make sure nothing in this + // BB can depend on the loop body. + // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a + // load does not have a side effect, but we could have + // %a = load ptr, ptr %ptr + // %b = load i32, ptr %a + // Now if the loop stored a non-nullptr to %a, we could cause a nullptr + // dereference by skipping over loop iterations. + if (const auto *CB = dyn_cast<CallBase>(&I)) { + if (CB->onlyAccessesInaccessibleMemory()) + return true; + } + return isa<UnreachableInst>(I); + }); +} + bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { SmallVector<BasicBlock*, 16> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -1816,11 +1839,25 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { // suggestions on how to improve this? I can obviously bail out for outer // loops, but that seems less than ideal. MemorySSA can find memory writes, // is that enough for *all* side effects? + bool HasThreadLocalSideEffects = false; for (BasicBlock *BB : L->blocks()) for (auto &I : *BB) // TODO:isGuaranteedToTransfer - if (I.mayHaveSideEffects()) - return false; + if (I.mayHaveSideEffects()) { + if (!LoopPredicationTraps) + return false; + HasThreadLocalSideEffects = true; + if (StoreInst *SI = dyn_cast<StoreInst>(&I)) { + // Simple stores cannot be observed by other threads. + // If HasThreadLocalSideEffects is set, we check + // crashingBBWithoutEffect to make sure that the crashing BB cannot + // observe them either. + if (!SI->isSimple()) + return false; + } else { + return false; + } + } bool Changed = false; // Finally, do the actual predication for all predicatable blocks. A couple @@ -1840,6 +1877,19 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); + if (HasThreadLocalSideEffects) { + const BasicBlock *Unreachable = nullptr; + for (const BasicBlock *Succ : BI->successors()) { + if (isa<UnreachableInst>(Succ->getTerminator())) + Unreachable = Succ; + } + // Exit BB which have one branch back into the loop and another one to + // a trap can still be optimized, because local side effects cannot + // be observed in the exit case (the trap). We could be smarter about + // this, but for now lets pattern match common cases that directly trap. + if (Unreachable == nullptr || !crashingBBWithoutEffect(*Unreachable)) + return Changed; + } Value *NewCond; if (ExitCount == ExactBTC) { NewCond = L->contains(BI->getSuccessor(0)) ? diff --git a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp index 3c14036e..6fb8197 100644 --- a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp +++ b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp @@ -26,8 +26,6 @@ using namespace llvm; -namespace llvm { - static cl::opt<unsigned> JumpTableSizeThreshold("jump-table-to-switch-size-threshold", cl::Hidden, cl::desc("Only split jump tables with size less or " @@ -43,8 +41,8 @@ static cl::opt<unsigned> FunctionSizeThreshold( "or equal than this threshold."), cl::init(50)); +namespace llvm { extern cl::opt<bool> ProfcheckDisableMetadataFixes; - } // end namespace llvm #define DEBUG_TYPE "jump-table-to-switch" diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 9655173..b2c526b 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -116,8 +116,6 @@ STATISTIC(NumIntAssociationsHoisted, STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions " "reassociated and hoisted out of the loop"); -namespace llvm { - /// Memory promotion is enabled by default. static cl::opt<bool> DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), @@ -156,7 +154,7 @@ static cl::opt<unsigned> IntAssociationUpperLimit( // which may not be precise, since optimizeUses is capped. The result is // correct, but we may not get as "far up" as possible to get which access is // clobbering the one queried. -cl::opt<unsigned> SetLicmMssaOptCap( +cl::opt<unsigned> llvm::SetLicmMssaOptCap( "licm-mssa-optimization-cap", cl::init(100), cl::Hidden, cl::desc("Enable imprecision in LICM in pathological cases, in exchange " "for faster compile. Caps the MemorySSA clobbering calls.")); @@ -164,15 +162,15 @@ cl::opt<unsigned> SetLicmMssaOptCap( // Experimentally, memory promotion carries less importance than sinking and // hoisting. Limit when we do promotion when using MemorySSA, in order to save // compile time. -cl::opt<unsigned> SetLicmMssaNoAccForPromotionCap( +cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap( "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden, cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no " "effect. When MSSA in LICM is enabled, then this is the maximum " "number of accesses allowed to be present in a loop in order to " "enable memory promotion.")); +namespace llvm { extern cl::opt<bool> ProfcheckDisableMetadataFixes; - } // end namespace llvm static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); @@ -1120,11 +1118,10 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, return false; } -namespace { /// Return true if-and-only-if we know how to (mechanically) both hoist and /// sink a given instruction out of a loop. Does not address legality /// concerns such as aliasing or speculation safety. -bool isHoistableAndSinkableInst(Instruction &I) { +static bool isHoistableAndSinkableInst(Instruction &I) { // Only these instructions are hoistable/sinkable. return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) || isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) || @@ -1136,8 +1133,8 @@ bool isHoistableAndSinkableInst(Instruction &I) { } /// Return true if I is the only Instruction with a MemoryAccess in L. -bool isOnlyMemoryAccess(const Instruction *I, const Loop *L, - const MemorySSAUpdater &MSSAU) { +static bool isOnlyMemoryAccess(const Instruction *I, const Loop *L, + const MemorySSAUpdater &MSSAU) { for (auto *BB : L->getBlocks()) if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) { int NotAPhi = 0; @@ -1151,7 +1148,6 @@ bool isOnlyMemoryAccess(const Instruction *I, const Loop *L, } return true; } -} static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA, BatchAAResults &BAA, diff --git a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp index 73f1942..7706de8 100644 --- a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp +++ b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp @@ -21,8 +21,7 @@ #define DEBUG_TYPE "loop-bound-split" -namespace llvm { - +using namespace llvm; using namespace PatternMatch; namespace { @@ -358,8 +357,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI, IRBuilder<> Builder(&PostLoopPreHeader->front()); // Update phi nodes in header of post-loop. - bool isExitingLatch = - (L.getExitingBlock() == L.getLoopLatch()) ? true : false; + bool isExitingLatch = L.getExitingBlock() == L.getLoopLatch(); Value *ExitingCondLCSSAPhi = nullptr; for (PHINode &PN : L.getHeader()->phis()) { // Create LCSSA phi node in preheader of post-loop. @@ -472,8 +470,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI, PreservedAnalyses LoopBoundSplitPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U) { - Function &F = *L.getHeader()->getParent(); - (void)F; + [[maybe_unused]] Function &F = *L.getHeader()->getParent(); LLVM_DEBUG(dbgs() << "Spliting bound of loop in " << F.getName() << ": " << L << "\n"); @@ -486,5 +483,3 @@ PreservedAnalyses LoopBoundSplitPass::run(Loop &L, LoopAnalysisManager &AM, return getLoopPassPreservedAnalyses(); } - -} // end namespace llvm diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp index 32078b1..7da8586 100644 --- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp @@ -16,8 +16,6 @@ using namespace llvm; -namespace llvm { - /// Explicitly specialize the pass manager's run method to handle loop nest /// structure updates. PreservedAnalyses @@ -185,7 +183,6 @@ LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM, } return PA; } -} // namespace llvm void FunctionToLoopPassAdaptor::printPipeline( raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { @@ -193,6 +190,7 @@ void FunctionToLoopPassAdaptor::printPipeline( Pass->printPipeline(OS, MapClassName2PassName); OS << ')'; } + PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, FunctionAnalysisManager &AM) { // Before we even compute any loop analyses, first run a miniature function diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index 448dc2b..f3e6cbf 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -540,8 +540,6 @@ bool LoopVersioningLICM::run(DominatorTree *DT) { return Changed; } -namespace llvm { - PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &LAR, LPMUpdater &U) { @@ -556,4 +554,3 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM, return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); } -} // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index 80aa98d..5a8f18a 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -160,9 +160,6 @@ static cl::opt<bool> EnablePhiOfOps("enable-phi-of-ops", cl::init(true), //===----------------------------------------------------------------------===// // Anchor methods. -namespace llvm { -namespace GVNExpression { - Expression::~Expression() = default; BasicExpression::~BasicExpression() = default; CallExpression::~CallExpression() = default; @@ -171,9 +168,6 @@ StoreExpression::~StoreExpression() = default; AggregateValueExpression::~AggregateValueExpression() = default; PHIExpression::~PHIExpression() = default; -} // end namespace GVNExpression -} // end namespace llvm - namespace { // Tarjan's SCC finding algorithm with Nuutila's improvements diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index ba58b8e..6d7ce36 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -2623,32 +2623,32 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) { namespace { - class ReassociateLegacyPass : public FunctionPass { - ReassociatePass Impl; +class ReassociateLegacyPass : public FunctionPass { + ReassociatePass Impl; - public: - static char ID; // Pass identification, replacement for typeid +public: + static char ID; // Pass identification, replacement for typeid - ReassociateLegacyPass() : FunctionPass(ID) { - initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry()); - } + ReassociateLegacyPass() : FunctionPass(ID) { + initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry()); + } - bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; - FunctionAnalysisManager DummyFAM; - auto PA = Impl.run(F, DummyFAM); - return !PA.areAllPreserved(); - } + FunctionAnalysisManager DummyFAM; + auto PA = Impl.run(F, DummyFAM); + return !PA.areAllPreserved(); + } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addPreserved<AAResultsWrapperPass>(); - AU.addPreserved<BasicAAWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - } - }; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<BasicAAWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + } +}; } // end anonymous namespace diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index aae5d60..25a531c 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -50,9 +50,7 @@ using namespace llvm; #define DEBUG_TYPE "scalarizer" -namespace { - -BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) { +static BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) { BasicBlock *BB = Itr->getParent(); if (isa<PHINode>(Itr)) Itr = BB->getFirstInsertionPt(); @@ -76,6 +74,8 @@ using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>; // along with a pointer to their scattered forms. using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>; +namespace { + struct VectorSplit { // The type of the vector. FixedVectorType *VecTy = nullptr; @@ -196,6 +196,7 @@ struct VectorLayout { // The size of each (non-remainder) fragment in bytes. uint64_t SplitSize = 0; }; +} // namespace static bool isStructOfMatchingFixedVectors(Type *Ty) { if (!isa<StructType>(Ty)) @@ -268,6 +269,7 @@ static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments, return Res; } +namespace { class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> { public: ScalarizerVisitor(DominatorTree *DT, const TargetTransformInfo *TTI, diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp index ebcbd2b..fa66a03 100644 --- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -149,8 +149,6 @@ bool SpeculativeExecutionLegacyPass::runOnFunction(Function &F) { return Impl.runImpl(F, TTI); } -namespace llvm { - bool SpeculativeExecutionPass::runImpl(Function &F, TargetTransformInfo *TTI) { if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence(&F)) { LLVM_DEBUG(dbgs() << "Not running SpeculativeExecution because " @@ -328,11 +326,11 @@ bool SpeculativeExecutionPass::considerHoistingFromTo( return true; } -FunctionPass *createSpeculativeExecutionPass() { +FunctionPass *llvm::createSpeculativeExecutionPass() { return new SpeculativeExecutionLegacyPass(); } -FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() { +FunctionPass *llvm::createSpeculativeExecutionIfHasBranchDivergencePass() { return new SpeculativeExecutionLegacyPass(/* OnlyIfDivergentTarget = */ true); } @@ -362,4 +360,3 @@ void SpeculativeExecutionPass::printPipeline( OS << "only-if-divergent-target"; OS << '>'; } -} // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index 7d01709..e94ad19 100644 --- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -716,8 +716,6 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) { return Ret; } -namespace llvm { - PreservedAnalyses StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) { const DataLayout *DL = &F.getDataLayout(); @@ -735,5 +733,3 @@ StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) { PA.preserve<TargetIRAnalysis>(); return PA; } - -} // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 1d83ddc..89d41f3e 100644 --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -192,7 +192,7 @@ struct AllocaDerivedValueTracker { SmallPtrSet<Instruction *, 32> AllocaUsers; SmallPtrSet<Instruction *, 32> EscapePoints; }; -} +} // namespace static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) { if (F.callsFunctionThatReturnsTwice()) @@ -967,7 +967,7 @@ struct TailCallElim : public FunctionPass { /*BFI=*/nullptr); } }; -} +} // namespace char TailCallElim::ID = 0; INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim", "Tail Call Elimination", diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 62a81ba..280eb20 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7957,9 +7957,9 @@ bool VPRecipeBuilder::getScaledReductions( auto CollectExtInfo = [this, &Exts, &ExtOpTypes, &ExtKinds](SmallVectorImpl<Value *> &Ops) -> bool { for (const auto &[I, OpI] : enumerate(Ops)) { - auto *CI = dyn_cast<ConstantInt>(OpI); - if (I > 0 && CI && - canConstantBeExtended(CI, ExtOpTypes[0], ExtKinds[0])) { + const APInt *C; + if (I > 0 && match(OpI, m_APInt(C)) && + canConstantBeExtended(C, ExtOpTypes[0], ExtKinds[0])) { ExtOpTypes[I] = ExtOpTypes[0]; ExtKinds[I] = ExtKinds[0]; continue; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 0101942..d167009 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1753,14 +1753,14 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) { } #endif -bool llvm::canConstantBeExtended(const ConstantInt *CI, Type *NarrowType, +bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind) { - APInt TruncatedVal = CI->getValue().trunc(NarrowType->getScalarSizeInBits()); - unsigned WideSize = CI->getType()->getScalarSizeInBits(); + APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits()); + unsigned WideSize = C->getBitWidth(); APInt ExtendedVal = ExtKind == TTI::PR_SignExtend ? TruncatedVal.sext(WideSize) : TruncatedVal.zext(WideSize); - return ExtendedVal == CI->getValue(); + return ExtendedVal == *C; } TargetTransformInfo::OperandValueInfo diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h index 1580a3b..2aaabd9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h @@ -474,7 +474,7 @@ public: /// Check if a constant \p CI can be safely treated as having been extended /// from a narrower type with the given extension kind. -bool canConstantBeExtended(const ConstantInt *CI, Type *NarrowType, +bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind); } // end namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index ff286f7..d8203e2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -173,10 +173,10 @@ inline int_pred_ty<is_zero_int> m_ZeroInt() { /// For vectors, this includes constants with undefined elements. inline int_pred_ty<is_one> m_One() { return int_pred_ty<is_one>(); } -struct bind_const_int { - uint64_t &Res; +struct bind_apint { + const APInt *&Res; - bind_const_int(uint64_t &Res) : Res(Res) {} + bind_apint(const APInt *&Res) : Res(Res) {} bool match(VPValue *VPV) const { if (!VPV->isLiveIn()) @@ -188,7 +188,23 @@ struct bind_const_int { const auto *CI = dyn_cast<ConstantInt>(V); if (!CI) return false; - if (auto C = CI->getValue().tryZExtValue()) { + Res = &CI->getValue(); + return true; + } +}; + +inline bind_apint m_APInt(const APInt *&C) { return C; } + +struct bind_const_int { + uint64_t &Res; + + bind_const_int(uint64_t &Res) : Res(Res) {} + + bool match(VPValue *VPV) const { + const APInt *APConst; + if (!bind_apint(APConst).match(VPV)) + return false; + if (auto C = APConst->tryZExtValue()) { Res = *C; return true; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 775837f..7a98c75 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -341,12 +341,12 @@ VPPartialReductionRecipe::computeCost(ElementCount VF, ExtAType = GetExtendKind(ExtAR); ExtBType = GetExtendKind(ExtBR); - if (!ExtBR && Widen->getOperand(1)->isLiveIn()) { - auto *CI = cast<ConstantInt>(Widen->getOperand(1)->getLiveInIRValue()); - if (canConstantBeExtended(CI, InputTypeA, ExtAType)) { - InputTypeB = InputTypeA; - ExtBType = ExtAType; - } + using namespace VPlanPatternMatch; + const APInt *C; + if (!ExtBR && match(Widen->getOperand(1), m_APInt(C)) && + canConstantBeExtended(C, InputTypeA, ExtAType)) { + InputTypeB = InputTypeA; + ExtBType = ExtAType; } }; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 8d76b2d8..cae9aee8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2122,9 +2122,18 @@ static void licm(VPlan &Plan) { VPBasicBlock *Preheader = Plan.getVectorPreheader(); // Return true if we do not know how to (mechanically) hoist a given recipe - // out of a loop region. Does not address legality concerns such as aliasing - // or speculation safety. + // out of a loop region. auto CannotHoistRecipe = [](VPRecipeBase &R) { + // Assumes don't alias anything or throw; as long as they're guaranteed to + // execute, they're safe to hoist. + if (match(&R, m_Intrinsic<Intrinsic::assume>())) + return false; + + // TODO: Relax checks in the future, e.g. we could also hoist reads, if + // their memory location is not modified in the vector loop. + if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi()) + return true; + // Allocas cannot be hoisted. auto *RepR = dyn_cast<VPReplicateRecipe>(&R); return RepR && RepR->getOpcode() == Instruction::Alloca; @@ -2132,17 +2141,18 @@ static void licm(VPlan &Plan) { // Hoist any loop invariant recipes from the vector loop region to the // preheader. Preform a shallow traversal of the vector loop region, to - // exclude recipes in replicate regions. + // exclude recipes in replicate regions. Since the top-level blocks in the + // vector loop region are guaranteed to execute if the vector pre-header is, + // we don't need to check speculation safety. VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + assert(Preheader->getSingleSuccessor() == LoopRegion && + "Expected vector prehader's successor to be the vector loop region"); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( vp_depth_first_shallow(LoopRegion->getEntry()))) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { if (CannotHoistRecipe(R)) continue; - // TODO: Relax checks in the future, e.g. we could also hoist reads, if - // their memory location is not modified in the vector loop. - if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() || - any_of(R.operands(), [](VPValue *Op) { + if (any_of(R.operands(), [](VPValue *Op) { return !Op->isDefinedOutsideLoopRegions(); })) continue; |