aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/StaticDataProfileInfo.cpp70
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp6
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp10
-rw-r--r--llvm/lib/ExecutionEngine/Orc/CMakeLists.txt1
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp22
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp18
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp10
-rw-r--r--llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp6
-rw-r--r--llvm/lib/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.cpp104
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp367
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp32
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h1
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrGISel.td7
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp129
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h8
-rw-r--r--llvm/lib/Target/DirectX/DXIL.td8
-rw-r--r--llvm/lib/Target/DirectX/DXILOpBuilder.cpp8
-rw-r--r--llvm/lib/Target/DirectX/DXILOpLowering.cpp25
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp15
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp16
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp12
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td4
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td13
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp36
-rw-r--r--llvm/lib/TargetParser/TargetDataLayout.cpp5
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/GuardWidening.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp16
-rw-r--r--llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp11
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp8
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanHelpers.h2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h30
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp12
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp23
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp4
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.h2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp15
44 files changed, 767 insertions, 329 deletions
diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
index e7f0b2c..61d4935 100644
--- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp
+++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
@@ -1,10 +1,14 @@
#include "llvm/Analysis/StaticDataProfileInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/ProfileData/InstrProf.h"
+#define DEBUG_TYPE "static-data-profile-info"
+
using namespace llvm;
namespace llvm {
@@ -79,6 +83,17 @@ StaticDataProfileInfo::getConstantHotnessUsingProfileCount(
return StaticDataHotness::LukewarmOrUnknown;
}
+StaticDataProfileInfo::StaticDataHotness
+StaticDataProfileInfo::getSectionHotnessUsingDataAccessProfile(
+ std::optional<StringRef> MaybeSectionPrefix) const {
+ if (!MaybeSectionPrefix)
+ return StaticDataHotness::LukewarmOrUnknown;
+ StringRef Prefix = *MaybeSectionPrefix;
+ assert((Prefix == "hot" || Prefix == "unlikely") &&
+ "Expect section_prefix to be one of hot or unlikely");
+ return Prefix == "hot" ? StaticDataHotness::Hot : StaticDataHotness::Cold;
+}
+
StringRef StaticDataProfileInfo::hotnessToStr(StaticDataHotness Hotness) const {
switch (Hotness) {
case StaticDataHotness::Cold:
@@ -101,13 +116,66 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const {
StringRef StaticDataProfileInfo::getConstantSectionPrefix(
const Constant *C, const ProfileSummaryInfo *PSI) const {
std::optional<uint64_t> Count = getConstantProfileCount(C);
+
+#ifndef NDEBUG
+ auto DbgPrintPrefix = [](StringRef Prefix) {
+ return Prefix.empty() ? "<empty>" : Prefix;
+ };
+#endif
+
+ if (EnableDataAccessProf) {
+ // Module flag `HasDataAccessProf` is 1 -> empty section prefix means
+ // unknown hotness except for string literals.
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C);
+ GV && llvm::memprof::IsAnnotationOK(*GV) &&
+ !GV->getName().starts_with(".str")) {
+ auto HotnessFromDataAccessProf =
+ getSectionHotnessUsingDataAccessProfile(GV->getSectionPrefix());
+
+ if (!Count) {
+ StringRef Prefix = hotnessToStr(HotnessFromDataAccessProf);
+ LLVM_DEBUG(dbgs() << GV->getName() << " has section prefix "
+ << DbgPrintPrefix(Prefix)
+ << ", solely from data access profiles\n");
+ return Prefix;
+ }
+
+ // Both data access profiles and PGO counters are available. Use the
+ // hotter one.
+ auto HotnessFromPGO = getConstantHotnessUsingProfileCount(C, PSI, *Count);
+ StaticDataHotness GlobalVarHotness = StaticDataHotness::LukewarmOrUnknown;
+ if (HotnessFromDataAccessProf == StaticDataHotness::Hot ||
+ HotnessFromPGO == StaticDataHotness::Hot) {
+ GlobalVarHotness = StaticDataHotness::Hot;
+ } else if (HotnessFromDataAccessProf ==
+ StaticDataHotness::LukewarmOrUnknown ||
+ HotnessFromPGO == StaticDataHotness::LukewarmOrUnknown) {
+ GlobalVarHotness = StaticDataHotness::LukewarmOrUnknown;
+ } else {
+ GlobalVarHotness = StaticDataHotness::Cold;
+ }
+ StringRef Prefix = hotnessToStr(GlobalVarHotness);
+ LLVM_DEBUG(
+ dbgs() << GV->getName() << " has section prefix "
+ << DbgPrintPrefix(Prefix)
+ << ", the max from data access profiles as "
+ << DbgPrintPrefix(hotnessToStr(HotnessFromDataAccessProf))
+ << " and PGO counters as "
+ << DbgPrintPrefix(hotnessToStr(HotnessFromPGO)) << "\n");
+ return Prefix;
+ }
+ }
if (!Count)
return "";
return hotnessToStr(getConstantHotnessUsingProfileCount(C, PSI, *Count));
}
bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) {
- Info.reset(new StaticDataProfileInfo());
+ bool EnableDataAccessProf = false;
+ if (auto *MD = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("EnableDataAccessProf")))
+ EnableDataAccessProf = MD->getZExtValue();
+ Info.reset(new StaticDataProfileInfo(EnableDataAccessProf));
return false;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index 25c1db9..ded4df4 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -55,12 +55,10 @@ LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx,
}
LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx,
- LLT NewEltTy) {
+ ElementCount EC) {
return [=](const LegalityQuery &Query) {
const LLT OldTy = Query.Types[TypeIdx];
- ElementCount NewEltCount = NewEltTy.isVector() ? NewEltTy.getElementCount()
- : ElementCount::getFixed(1);
- return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount));
+ return std::make_pair(TypeIdx, OldTy.changeElementCount(EC));
};
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 055fdc6..ca82857 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -818,8 +818,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
if (!DefMI)
return false;
- const TargetMachine& TM = DefMI->getMF()->getTarget();
- if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
+ if (DefMI->getFlag(MachineInstr::FmNoNans))
return true;
// If the value is a constant, we can obviously see if it is a NaN or not.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 0f2b518..cb0038c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3971,8 +3971,14 @@ void SelectionDAGBuilder::visitSIToFP(const User &I) {
}
void SelectionDAGBuilder::visitPtrToAddr(const User &I) {
- // FIXME: this is not correct for pointers with addr width != pointer width
- visitPtrToInt(I);
+ SDValue N = getValue(I.getOperand(0));
+ // By definition the type of the ptrtoaddr must be equal to the address type.
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ EVT AddrVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ // The address width must be smaller or equal to the pointer representation
+ // width, so we lower ptrtoaddr as a truncate (possibly folded to a no-op).
+ N = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), AddrVT, N);
+ setValue(&I, N);
}
void SelectionDAGBuilder::visitPtrToInt(const User &I) {
diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
index 0ffe3ae..f343925 100644
--- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
@@ -56,6 +56,7 @@ add_llvm_component_library(LLVMOrcJIT
SectCreate.cpp
SelfExecutorProcessControl.cpp
SimpleRemoteEPC.cpp
+ SimpleRemoteMemoryMapper.cpp
Speculation.cpp
SpeculateAnalyses.cpp
ExecutorProcessControl.cpp
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
index 50e6b25..0833af7 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
@@ -57,16 +57,17 @@ public:
std::swap(FR.Actions, G.allocActions());
Parent.EPC.callSPSWrapperAsync<
- rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>(
- Parent.SAs.Finalize,
+ rt::SPSSimpleExecutorMemoryManagerInitializeSignature>(
+ Parent.SAs.Initialize,
[OnFinalize = std::move(OnFinalize), AllocAddr = this->AllocAddr](
- Error SerializationErr, Error FinalizeErr) mutable {
+ Error SerializationErr,
+ Expected<ExecutorAddr> InitializeKey) mutable {
// FIXME: Release abandoned alloc.
if (SerializationErr) {
- cantFail(std::move(FinalizeErr));
+ cantFail(InitializeKey.takeError());
OnFinalize(std::move(SerializationErr));
- } else if (FinalizeErr)
- OnFinalize(std::move(FinalizeErr));
+ } else if (!InitializeKey)
+ OnFinalize(InitializeKey.takeError());
else
OnFinalize(FinalizedAlloc(AllocAddr));
},
@@ -76,8 +77,8 @@ public:
void abandon(OnAbandonedFunction OnAbandoned) override {
// FIXME: Return memory to pool instead.
Parent.EPC.callSPSWrapperAsync<
- rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
- Parent.SAs.Deallocate,
+ rt::SPSSimpleExecutorMemoryManagerReleaseSignature>(
+ Parent.SAs.Release,
[OnAbandoned = std::move(OnAbandoned)](Error SerializationErr,
Error DeallocateErr) mutable {
if (SerializationErr) {
@@ -123,9 +124,8 @@ void EPCGenericJITLinkMemoryManager::allocate(const JITLinkDylib *JD,
void EPCGenericJITLinkMemoryManager::deallocate(
std::vector<FinalizedAlloc> Allocs, OnDeallocatedFunction OnDeallocated) {
- EPC.callSPSWrapperAsync<
- rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
- SAs.Deallocate,
+ EPC.callSPSWrapperAsync<rt::SPSSimpleExecutorMemoryManagerReleaseSignature>(
+ SAs.Release,
[OnDeallocated = std::move(OnDeallocated)](Error SerErr,
Error DeallocErr) mutable {
if (SerErr) {
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
index fec7062..cc72488 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
@@ -25,9 +25,9 @@ EPCGenericRTDyldMemoryManager::CreateWithDefaultBootstrapSymbols(
if (auto Err = EPC.getBootstrapSymbols(
{{SAs.Instance, rt::SimpleExecutorMemoryManagerInstanceName},
{SAs.Reserve, rt::SimpleExecutorMemoryManagerReserveWrapperName},
- {SAs.Finalize, rt::SimpleExecutorMemoryManagerFinalizeWrapperName},
- {SAs.Deallocate,
- rt::SimpleExecutorMemoryManagerDeallocateWrapperName},
+ {SAs.Initialize,
+ rt::SimpleExecutorMemoryManagerInitializeWrapperName},
+ {SAs.Release, rt::SimpleExecutorMemoryManagerReleaseWrapperName},
{SAs.RegisterEHFrame, rt::RegisterEHFrameSectionAllocActionName},
{SAs.DeregisterEHFrame,
rt::DeregisterEHFrameSectionAllocActionName}}))
@@ -48,7 +48,7 @@ EPCGenericRTDyldMemoryManager::~EPCGenericRTDyldMemoryManager() {
Error Err = Error::success();
if (auto Err2 = EPC.callSPSWrapper<
- rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
+ rt::SPSSimpleExecutorMemoryManagerReleaseSignature>(
SAs.Reserve, Err, SAs.Instance, FinalizedAllocs)) {
// FIXME: Report errors through EPC once that functionality is available.
logAllUnhandledErrors(std::move(Err2), errs(), "");
@@ -267,10 +267,10 @@ bool EPCGenericRTDyldMemoryManager::finalizeMemory(std::string *ErrMsg) {
// We'll also need to make an extra allocation for the eh-frame wrapper call
// arguments.
- Error FinalizeErr = Error::success();
+ Expected<ExecutorAddr> InitializeKey((ExecutorAddr()));
if (auto Err = EPC.callSPSWrapper<
- rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>(
- SAs.Finalize, FinalizeErr, SAs.Instance, std::move(FR))) {
+ rt::SPSSimpleExecutorMemoryManagerInitializeSignature>(
+ SAs.Initialize, InitializeKey, SAs.Instance, std::move(FR))) {
std::lock_guard<std::mutex> Lock(M);
this->ErrMsg = toString(std::move(Err));
dbgs() << "Serialization error: " << this->ErrMsg << "\n";
@@ -278,9 +278,9 @@ bool EPCGenericRTDyldMemoryManager::finalizeMemory(std::string *ErrMsg) {
*ErrMsg = this->ErrMsg;
return true;
}
- if (FinalizeErr) {
+ if (!InitializeKey) {
std::lock_guard<std::mutex> Lock(M);
- this->ErrMsg = toString(std::move(FinalizeErr));
+ this->ErrMsg = toString(InitializeKey.takeError());
dbgs() << "Finalization error: " << this->ErrMsg << "\n";
if (ErrMsg)
*ErrMsg = this->ErrMsg;
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
index 26e8f53..cc99d3c 100644
--- a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
@@ -23,10 +23,12 @@ const char *SimpleExecutorMemoryManagerInstanceName =
"__llvm_orc_SimpleExecutorMemoryManager_Instance";
const char *SimpleExecutorMemoryManagerReserveWrapperName =
"__llvm_orc_SimpleExecutorMemoryManager_reserve_wrapper";
-const char *SimpleExecutorMemoryManagerFinalizeWrapperName =
- "__llvm_orc_SimpleExecutorMemoryManager_finalize_wrapper";
-const char *SimpleExecutorMemoryManagerDeallocateWrapperName =
- "__llvm_orc_SimpleExecutorMemoryManager_deallocate_wrapper";
+const char *SimpleExecutorMemoryManagerInitializeWrapperName =
+ "__llvm_orc_SimpleExecutorMemoryManager_initialize_wrapper";
+const char *SimpleExecutorMemoryManagerDeinitializeWrapperName =
+ "__llvm_orc_SimpleExecutorMemoryManager_deinitialize_wrapper";
+const char *SimpleExecutorMemoryManagerReleaseWrapperName =
+ "__llvm_orc_SimpleExecutorMemoryManager_release_wrapper";
const char *ExecutorSharedMemoryMapperServiceInstanceName =
"__llvm_orc_ExecutorSharedMemoryMapperService_Instance";
diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
index 87d7578..dec1df7 100644
--- a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
@@ -216,9 +216,9 @@ SimpleRemoteEPC::createDefaultMemoryManager(SimpleRemoteEPC &SREPC) {
if (auto Err = SREPC.getBootstrapSymbols(
{{SAs.Allocator, rt::SimpleExecutorMemoryManagerInstanceName},
{SAs.Reserve, rt::SimpleExecutorMemoryManagerReserveWrapperName},
- {SAs.Finalize, rt::SimpleExecutorMemoryManagerFinalizeWrapperName},
- {SAs.Deallocate,
- rt::SimpleExecutorMemoryManagerDeallocateWrapperName}}))
+ {SAs.Initialize,
+ rt::SimpleExecutorMemoryManagerInitializeWrapperName},
+ {SAs.Release, rt::SimpleExecutorMemoryManagerReleaseWrapperName}}))
return std::move(Err);
return std::make_unique<EPCGenericJITLinkMemoryManager>(SREPC, SAs);
diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.cpp
new file mode 100644
index 0000000..b82de3f
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.cpp
@@ -0,0 +1,104 @@
+//===---- SimpleRemoteMemoryMapper.cpp - Remote memory mapper ----*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/SimpleRemoteMemoryMapper.h"
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+
+namespace llvm::orc {
+
+SimpleRemoteMemoryMapper::SimpleRemoteMemoryMapper(ExecutorProcessControl &EPC,
+ SymbolAddrs SAs)
+ : EPC(EPC), SAs(SAs) {}
+
+void SimpleRemoteMemoryMapper::reserve(size_t NumBytes,
+ OnReservedFunction OnReserved) {
+ EPC.callSPSWrapperAsync<rt::SPSSimpleRemoteMemoryMapReserveSignature>(
+ SAs.Reserve,
+ [NumBytes, OnReserved = std::move(OnReserved)](
+ Error SerializationErr, Expected<ExecutorAddr> Result) mutable {
+ if (SerializationErr) {
+ cantFail(Result.takeError());
+ return OnReserved(std::move(SerializationErr));
+ }
+
+ if (Result)
+ OnReserved(ExecutorAddrRange(*Result, NumBytes));
+ else
+ OnReserved(Result.takeError());
+ },
+ SAs.Instance, static_cast<uint64_t>(NumBytes));
+}
+
+char *SimpleRemoteMemoryMapper::prepare(jitlink::LinkGraph &G,
+ ExecutorAddr Addr, size_t ContentSize) {
+ return G.allocateBuffer(ContentSize).data();
+}
+
+void SimpleRemoteMemoryMapper::initialize(MemoryMapper::AllocInfo &AI,
+ OnInitializedFunction OnInitialized) {
+
+ tpctypes::FinalizeRequest FR;
+
+ std::swap(FR.Actions, AI.Actions);
+ FR.Segments.reserve(AI.Segments.size());
+
+ for (auto Seg : AI.Segments)
+ FR.Segments.push_back({Seg.AG, AI.MappingBase + Seg.Offset,
+ Seg.ContentSize + Seg.ZeroFillSize,
+ ArrayRef<char>(Seg.WorkingMem, Seg.ContentSize)});
+
+ EPC.callSPSWrapperAsync<rt::SPSSimpleRemoteMemoryMapInitializeSignature>(
+ SAs.Initialize,
+ [OnInitialized = std::move(OnInitialized)](
+ Error SerializationErr, Expected<ExecutorAddr> Result) mutable {
+ if (SerializationErr) {
+ cantFail(Result.takeError());
+ return OnInitialized(std::move(SerializationErr));
+ }
+
+ OnInitialized(std::move(Result));
+ },
+ SAs.Instance, std::move(FR));
+}
+
+void SimpleRemoteMemoryMapper::deinitialize(
+ ArrayRef<ExecutorAddr> Allocations,
+ MemoryMapper::OnDeinitializedFunction OnDeinitialized) {
+ EPC.callSPSWrapperAsync<rt::SPSSimpleRemoteMemoryMapDeinitializeSignature>(
+ SAs.Deinitialize,
+ [OnDeinitialized = std::move(OnDeinitialized)](Error SerializationErr,
+ Error Result) mutable {
+ if (SerializationErr) {
+ cantFail(std::move(Result));
+ return OnDeinitialized(std::move(SerializationErr));
+ }
+
+ OnDeinitialized(std::move(Result));
+ },
+ SAs.Instance, Allocations);
+}
+
+void SimpleRemoteMemoryMapper::release(ArrayRef<ExecutorAddr> Bases,
+ OnReleasedFunction OnReleased) {
+ EPC.callSPSWrapperAsync<rt::SPSSimpleRemoteMemoryMapReleaseSignature>(
+ SAs.Release,
+ [OnReleased = std::move(OnReleased)](Error SerializationErr,
+ Error Result) mutable {
+ if (SerializationErr) {
+ cantFail(std::move(Result));
+ return OnReleased(std::move(SerializationErr));
+ }
+
+ return OnReleased(std::move(Result));
+ },
+ SAs.Instance, Bases);
+}
+
+} // namespace llvm::orc
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
index 3cdffb8..fe881a1 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
@@ -8,6 +8,7 @@
#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
#include "llvm/Support/FormatVariadic.h"
@@ -18,166 +19,167 @@ namespace orc {
namespace rt_bootstrap {
SimpleExecutorMemoryManager::~SimpleExecutorMemoryManager() {
- assert(Allocations.empty() && "shutdown not called?");
+ assert(Slabs.empty() && "shutdown not called?");
}
-Expected<ExecutorAddr> SimpleExecutorMemoryManager::allocate(uint64_t Size) {
+Expected<ExecutorAddr> SimpleExecutorMemoryManager::reserve(uint64_t Size) {
std::error_code EC;
auto MB = sys::Memory::allocateMappedMemory(
Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC);
if (EC)
return errorCodeToError(EC);
std::lock_guard<std::mutex> Lock(M);
- assert(!Allocations.count(MB.base()) && "Duplicate allocation addr");
- Allocations[MB.base()].Size = Size;
+ assert(!Slabs.count(MB.base()) && "Duplicate allocation addr");
+ Slabs[MB.base()].Size = Size;
return ExecutorAddr::fromPtr(MB.base());
}
-Error SimpleExecutorMemoryManager::finalize(tpctypes::FinalizeRequest &FR) {
- ExecutorAddr Base(~0ULL);
+Expected<ExecutorAddr>
+SimpleExecutorMemoryManager::initialize(tpctypes::FinalizeRequest &FR) {
std::vector<shared::WrapperFunctionCall> DeallocationActions;
- size_t SuccessfulFinalizationActions = 0;
if (FR.Segments.empty()) {
- // NOTE: Finalizing nothing is currently a no-op. Should it be an error?
if (FR.Actions.empty())
- return Error::success();
+ return make_error<StringError>("Finalization request is empty",
+ inconvertibleErrorCode());
else
return make_error<StringError>("Finalization actions attached to empty "
"finalization request",
inconvertibleErrorCode());
}
- for (auto &Seg : FR.Segments)
- Base = std::min(Base, Seg.Addr);
-
- for (auto &ActPair : FR.Actions)
- if (ActPair.Dealloc)
- DeallocationActions.push_back(ActPair.Dealloc);
-
- // Get the Allocation for this finalization.
- size_t AllocSize = 0;
- {
- std::lock_guard<std::mutex> Lock(M);
- auto I = Allocations.find(Base.toPtr<void *>());
- if (I == Allocations.end())
- return make_error<StringError>("Attempt to finalize unrecognized "
- "allocation " +
- formatv("{0:x}", Base.getValue()),
- inconvertibleErrorCode());
- AllocSize = I->second.Size;
- I->second.DeallocationActions = std::move(DeallocationActions);
- }
- ExecutorAddr AllocEnd = Base + ExecutorAddrDiff(AllocSize);
-
- // Bail-out function: this will run deallocation actions corresponding to any
- // completed finalization actions, then deallocate memory.
- auto BailOut = [&](Error Err) {
- std::pair<void *, Allocation> AllocToDestroy;
-
- // Get allocation to destroy.
- {
- std::lock_guard<std::mutex> Lock(M);
- auto I = Allocations.find(Base.toPtr<void *>());
-
- // Check for missing allocation (effective a double free).
- if (I == Allocations.end())
- return joinErrors(
- std::move(Err),
- make_error<StringError>("No allocation entry found "
- "for " +
- formatv("{0:x}", Base.getValue()),
- inconvertibleErrorCode()));
- AllocToDestroy = std::move(*I);
- Allocations.erase(I);
- }
+ ExecutorAddrRange RR(FR.Segments.front().Addr, FR.Segments.front().Addr);
- // Run deallocation actions for all completed finalization actions.
- while (SuccessfulFinalizationActions)
- Err =
- joinErrors(std::move(Err), FR.Actions[--SuccessfulFinalizationActions]
- .Dealloc.runWithSPSRetErrorMerged());
-
- // Deallocate memory.
- sys::MemoryBlock MB(AllocToDestroy.first, AllocToDestroy.second.Size);
- if (auto EC = sys::Memory::releaseMappedMemory(MB))
- Err = joinErrors(std::move(Err), errorCodeToError(EC));
-
- return Err;
- };
+ std::vector<sys::MemoryBlock> MBsToReset;
+ auto ResetMBs = make_scope_exit([&]() {
+ for (auto &MB : MBsToReset)
+ sys::Memory::protectMappedMemory(MB, sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE);
+ sys::Memory::InvalidateInstructionCache(RR.Start.toPtr<void *>(),
+ RR.size());
+ });
// Copy content and apply permissions.
for (auto &Seg : FR.Segments) {
+ RR.Start = std::min(RR.Start, Seg.Addr);
+ RR.End = std::max(RR.End, Seg.Addr + Seg.Size);
// Check segment ranges.
if (LLVM_UNLIKELY(Seg.Size < Seg.Content.size()))
- return BailOut(make_error<StringError>(
+ return make_error<StringError>(
formatv("Segment {0:x} content size ({1:x} bytes) "
"exceeds segment size ({2:x} bytes)",
Seg.Addr.getValue(), Seg.Content.size(), Seg.Size),
- inconvertibleErrorCode()));
+ inconvertibleErrorCode());
ExecutorAddr SegEnd = Seg.Addr + ExecutorAddrDiff(Seg.Size);
- if (LLVM_UNLIKELY(Seg.Addr < Base || SegEnd > AllocEnd))
- return BailOut(make_error<StringError>(
+ if (LLVM_UNLIKELY(Seg.Addr < RR.Start || SegEnd > RR.End))
+ return make_error<StringError>(
formatv("Segment {0:x} -- {1:x} crosses boundary of "
"allocation {2:x} -- {3:x}",
- Seg.Addr.getValue(), SegEnd.getValue(), Base.getValue(),
- AllocEnd.getValue()),
- inconvertibleErrorCode()));
+ Seg.Addr, SegEnd, RR.Start, RR.End),
+ inconvertibleErrorCode());
char *Mem = Seg.Addr.toPtr<char *>();
if (!Seg.Content.empty())
memcpy(Mem, Seg.Content.data(), Seg.Content.size());
memset(Mem + Seg.Content.size(), 0, Seg.Size - Seg.Content.size());
assert(Seg.Size <= std::numeric_limits<size_t>::max());
+
+ sys::MemoryBlock MB(Mem, Seg.Size);
if (auto EC = sys::Memory::protectMappedMemory(
- {Mem, static_cast<size_t>(Seg.Size)},
- toSysMemoryProtectionFlags(Seg.RAG.Prot)))
- return BailOut(errorCodeToError(EC));
+ MB, toSysMemoryProtectionFlags(Seg.RAG.Prot)))
+ return errorCodeToError(EC);
+
+ MBsToReset.push_back(MB);
+
if ((Seg.RAG.Prot & MemProt::Exec) == MemProt::Exec)
sys::Memory::InvalidateInstructionCache(Mem, Seg.Size);
}
- // Run finalization actions.
- for (auto &ActPair : FR.Actions) {
- if (auto Err = ActPair.Finalize.runWithSPSRetErrorMerged())
- return BailOut(std::move(Err));
- ++SuccessfulFinalizationActions;
+ auto DeallocActions = runFinalizeActions(FR.Actions);
+ if (!DeallocActions)
+ return DeallocActions.takeError();
+
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ auto Region = createRegionInfo(RR, "In initialize");
+ if (!Region)
+ return Region.takeError();
+ Region->DeallocActions = std::move(*DeallocActions);
}
- return Error::success();
+ // Successful initialization.
+ ResetMBs.release();
+
+ return RR.Start;
}
-Error SimpleExecutorMemoryManager::deallocate(
- const std::vector<ExecutorAddr> &Bases) {
- std::vector<std::pair<void *, Allocation>> AllocPairs;
- AllocPairs.reserve(Bases.size());
+Error SimpleExecutorMemoryManager::deinitialize(
+ const std::vector<ExecutorAddr> &InitKeys) {
+ Error Err = Error::success();
- // Get allocation to destroy.
+ for (auto &KeyAddr : llvm::reverse(InitKeys)) {
+ std::vector<shared::WrapperFunctionCall> DeallocActions;
+ {
+ std::scoped_lock<std::mutex> Lock(M);
+ auto Slab = getSlabInfo(KeyAddr, "In deinitialize");
+ if (!Slab) {
+ Err = joinErrors(std::move(Err), Slab.takeError());
+ continue;
+ }
+
+ auto RI = getRegionInfo(*Slab, KeyAddr, "In deinitialize");
+ if (!RI) {
+ Err = joinErrors(std::move(Err), RI.takeError());
+ continue;
+ }
+
+ DeallocActions = std::move(RI->DeallocActions);
+ }
+
+ Err = joinErrors(std::move(Err),
+ runDeallocActions(std::move(DeallocActions)));
+ }
+
+ return Err;
+}
+
+Error SimpleExecutorMemoryManager::release(
+ const std::vector<ExecutorAddr> &Bases) {
Error Err = Error::success();
- {
- std::lock_guard<std::mutex> Lock(M);
- for (auto &Base : Bases) {
- auto I = Allocations.find(Base.toPtr<void *>());
-
- // Check for missing allocation (effective a double free).
- if (I != Allocations.end()) {
- AllocPairs.push_back(std::move(*I));
- Allocations.erase(I);
- } else
+
+ // TODO: Prohibit new initializations within the slabs being removed?
+ for (auto &Base : llvm::reverse(Bases)) {
+ std::vector<shared::WrapperFunctionCall> DeallocActions;
+ sys::MemoryBlock MB;
+
+ {
+ std::scoped_lock<std::mutex> Lock(M);
+
+ auto SlabI = Slabs.find(Base.toPtr<void *>());
+ if (SlabI == Slabs.end()) {
Err = joinErrors(
std::move(Err),
- make_error<StringError>("No allocation entry found "
- "for " +
- formatv("{0:x}", Base.getValue()),
+ make_error<StringError>("In release, " + formatv("{0:x}", Base) +
+ " is not part of any reserved "
+ "address range",
inconvertibleErrorCode()));
+ continue;
+ }
+
+ auto &Slab = SlabI->second;
+
+ for (auto &[Addr, Region] : Slab.Regions)
+ llvm::copy(Region.DeallocActions, back_inserter(DeallocActions));
+
+ MB = {Base.toPtr<void *>(), Slab.Size};
+
+ Slabs.erase(SlabI);
}
- }
- while (!AllocPairs.empty()) {
- auto &P = AllocPairs.back();
- Err = joinErrors(std::move(Err), deallocateImpl(P.first, P.second));
- AllocPairs.pop_back();
+ Err = joinErrors(std::move(Err), runDeallocActions(DeallocActions));
+ if (auto EC = sys::Memory::releaseMappedMemory(MB))
+ Err = joinErrors(std::move(Err), errorCodeToError(EC));
}
return Err;
@@ -185,16 +187,15 @@ Error SimpleExecutorMemoryManager::deallocate(
Error SimpleExecutorMemoryManager::shutdown() {
- AllocationsMap AM;
+ // TODO: Prevent new allocations during shutdown.
+ std::vector<ExecutorAddr> Bases;
{
- std::lock_guard<std::mutex> Lock(M);
- AM = std::move(Allocations);
+ std::scoped_lock<std::mutex> Lock(M);
+ for (auto &[Base, Slab] : Slabs)
+ Bases.push_back(ExecutorAddr::fromPtr(Base));
}
- Error Err = Error::success();
- for (auto &KV : AM)
- Err = joinErrors(std::move(Err), deallocateImpl(KV.first, KV.second));
- return Err;
+ return release(Bases);
}
void SimpleExecutorMemoryManager::addBootstrapSymbols(
@@ -202,58 +203,150 @@ void SimpleExecutorMemoryManager::addBootstrapSymbols(
M[rt::SimpleExecutorMemoryManagerInstanceName] = ExecutorAddr::fromPtr(this);
M[rt::SimpleExecutorMemoryManagerReserveWrapperName] =
ExecutorAddr::fromPtr(&reserveWrapper);
- M[rt::SimpleExecutorMemoryManagerFinalizeWrapperName] =
- ExecutorAddr::fromPtr(&finalizeWrapper);
- M[rt::SimpleExecutorMemoryManagerDeallocateWrapperName] =
- ExecutorAddr::fromPtr(&deallocateWrapper);
+ M[rt::SimpleExecutorMemoryManagerInitializeWrapperName] =
+ ExecutorAddr::fromPtr(&initializeWrapper);
+ M[rt::SimpleExecutorMemoryManagerDeinitializeWrapperName] =
+ ExecutorAddr::fromPtr(&deinitializeWrapper);
+ M[rt::SimpleExecutorMemoryManagerReleaseWrapperName] =
+ ExecutorAddr::fromPtr(&releaseWrapper);
}
-Error SimpleExecutorMemoryManager::deallocateImpl(void *Base, Allocation &A) {
- Error Err = Error::success();
+Expected<SimpleExecutorMemoryManager::SlabInfo &>
+SimpleExecutorMemoryManager::getSlabInfo(ExecutorAddr A, StringRef Context) {
+ auto MakeBadSlabError = [&]() {
+ return make_error<StringError>(
+ Context + ", address " + formatv("{0:x}", A) +
+ " is not part of any reserved address range",
+ inconvertibleErrorCode());
+ };
- while (!A.DeallocationActions.empty()) {
- Err = joinErrors(std::move(Err),
- A.DeallocationActions.back().runWithSPSRetErrorMerged());
- A.DeallocationActions.pop_back();
+ auto I = Slabs.upper_bound(A.toPtr<void *>());
+ if (I == Slabs.begin())
+ return MakeBadSlabError();
+ --I;
+ if (!ExecutorAddrRange(ExecutorAddr::fromPtr(I->first), I->second.Size)
+ .contains(A))
+ return MakeBadSlabError();
+
+ return I->second;
+}
+
+Expected<SimpleExecutorMemoryManager::SlabInfo &>
+SimpleExecutorMemoryManager::getSlabInfo(ExecutorAddrRange R,
+ StringRef Context) {
+ auto MakeBadSlabError = [&]() {
+ return make_error<StringError>(
+ Context + ", range " + formatv("{0:x}", R) +
+ " is not part of any reserved address range",
+ inconvertibleErrorCode());
+ };
+
+ auto I = Slabs.upper_bound(R.Start.toPtr<void *>());
+ if (I == Slabs.begin())
+ return MakeBadSlabError();
+ --I;
+ if (!ExecutorAddrRange(ExecutorAddr::fromPtr(I->first), I->second.Size)
+ .contains(R))
+ return MakeBadSlabError();
+
+ return I->second;
+}
+
+Expected<SimpleExecutorMemoryManager::RegionInfo &>
+SimpleExecutorMemoryManager::createRegionInfo(ExecutorAddrRange R,
+ StringRef Context) {
+
+ auto Slab = getSlabInfo(R, Context);
+ if (!Slab)
+ return Slab.takeError();
+
+ auto MakeBadRegionError = [&](ExecutorAddrRange Other, bool Prev) {
+ return make_error<StringError>(Context + ", region " + formatv("{0:x}", R) +
+ " overlaps " +
+ (Prev ? "previous" : "following") +
+ " region " + formatv("{0:x}", Other),
+ inconvertibleErrorCode());
+ };
+
+ auto I = Slab->Regions.upper_bound(R.Start);
+ if (I != Slab->Regions.begin()) {
+ auto J = std::prev(I);
+ ExecutorAddrRange PrevRange(J->first, J->second.Size);
+ if (PrevRange.overlaps(R))
+ return MakeBadRegionError(PrevRange, true);
+ }
+ if (I != Slab->Regions.end()) {
+ ExecutorAddrRange NextRange(I->first, I->second.Size);
+ if (NextRange.overlaps(R))
+ return MakeBadRegionError(NextRange, false);
}
- sys::MemoryBlock MB(Base, A.Size);
- if (auto EC = sys::Memory::releaseMappedMemory(MB))
- Err = joinErrors(std::move(Err), errorCodeToError(EC));
+ auto &RInfo = Slab->Regions[R.Start];
+ RInfo.Size = R.size();
+ return RInfo;
+}
- return Err;
+Expected<SimpleExecutorMemoryManager::RegionInfo &>
+SimpleExecutorMemoryManager::getRegionInfo(SlabInfo &Slab, ExecutorAddr A,
+ StringRef Context) {
+ auto I = Slab.Regions.find(A);
+ if (I == Slab.Regions.end())
+ return make_error<StringError>(
+ Context + ", address " + formatv("{0:x}", A) +
+ " does not correspond to the start of any initialized region",
+ inconvertibleErrorCode());
+
+ return I->second;
+}
+
+Expected<SimpleExecutorMemoryManager::RegionInfo &>
+SimpleExecutorMemoryManager::getRegionInfo(ExecutorAddr A, StringRef Context) {
+ auto Slab = getSlabInfo(A, Context);
+ if (!Slab)
+ return Slab.takeError();
+
+ return getRegionInfo(*Slab, A, Context);
}
llvm::orc::shared::CWrapperFunctionResult
SimpleExecutorMemoryManager::reserveWrapper(const char *ArgData,
size_t ArgSize) {
- return shared::WrapperFunction<
- rt::SPSSimpleExecutorMemoryManagerReserveSignature>::
+ return shared::WrapperFunction<rt::SPSSimpleRemoteMemoryMapReserveSignature>::
handle(ArgData, ArgSize,
shared::makeMethodWrapperHandler(
- &SimpleExecutorMemoryManager::allocate))
+ &SimpleExecutorMemoryManager::reserve))
+ .release();
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+SimpleExecutorMemoryManager::initializeWrapper(const char *ArgData,
+ size_t ArgSize) {
+ return shared::
+ WrapperFunction<rt::SPSSimpleRemoteMemoryMapInitializeSignature>::handle(
+ ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &SimpleExecutorMemoryManager::initialize))
.release();
}
llvm::orc::shared::CWrapperFunctionResult
-SimpleExecutorMemoryManager::finalizeWrapper(const char *ArgData,
- size_t ArgSize) {
+SimpleExecutorMemoryManager::deinitializeWrapper(const char *ArgData,
+ size_t ArgSize) {
return shared::WrapperFunction<
- rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>::
+ rt::SPSSimpleRemoteMemoryMapDeinitializeSignature>::
handle(ArgData, ArgSize,
shared::makeMethodWrapperHandler(
- &SimpleExecutorMemoryManager::finalize))
+ &SimpleExecutorMemoryManager::deinitialize))
.release();
}
llvm::orc::shared::CWrapperFunctionResult
-SimpleExecutorMemoryManager::deallocateWrapper(const char *ArgData,
- size_t ArgSize) {
- return shared::WrapperFunction<
- rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>::
+SimpleExecutorMemoryManager::releaseWrapper(const char *ArgData,
+ size_t ArgSize) {
+ return shared::WrapperFunction<rt::SPSSimpleRemoteMemoryMapReleaseSignature>::
handle(ArgData, ArgSize,
shared::makeMethodWrapperHandler(
- &SimpleExecutorMemoryManager::deallocate))
+ &SimpleExecutorMemoryManager::release))
.release();
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index be2f2e4..91c1f59 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1561,6 +1561,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
@@ -1717,6 +1718,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
@@ -7775,6 +7777,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
return LowerVECREDUCE(Op, DAG);
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_FMUL:
+ return LowerVECREDUCE_MUL(Op, DAG);
case ISD::ATOMIC_LOAD_AND:
return LowerATOMIC_LOAD_AND(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
@@ -16794,6 +16799,33 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
}
}
+SDValue AArch64TargetLowering::LowerVECREDUCE_MUL(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ assert(SrcVT.isScalableVector() && "Unexpected operand type!");
+
+ SDVTList SrcVTs = DAG.getVTList(SrcVT, SrcVT);
+ unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
+ SDValue Identity = DAG.getNeutralElement(BaseOpc, DL, SrcVT, Op->getFlags());
+
+ // Whilst we don't know the size of the vector we do know the maximum size so
+ // can perform a tree reduction with an identity vector, which means once we
+ // arrive at the result the remaining stages (when the vector is smaller than
+ // the maximum) have no affect.
+
+ unsigned Segments = AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
+ unsigned Stages = llvm::Log2_32(Segments * SrcVT.getVectorMinNumElements());
+
+ for (unsigned I = 0; I < Stages; ++I) {
+ Src = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, SrcVTs, Src, Identity);
+ Src = DAG.getNode(BaseOpc, DL, SrcVT, Src.getValue(0), Src.getValue(1));
+ }
+
+ return DAG.getExtractVectorElt(DL, Op.getValueType(), Src, 0);
+}
+
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 00956fd..9495c9f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -752,6 +752,7 @@ private:
SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECREDUCE_MUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 7322212..fe84193 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -233,6 +233,12 @@ def G_SDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_USDOT : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -278,6 +284,7 @@ def : GINodeEquiv<G_UADDLV, AArch64uaddlv>;
def : GINodeEquiv<G_UDOT, AArch64udot>;
def : GINodeEquiv<G_SDOT, AArch64sdot>;
+def : GINodeEquiv<G_USDOT, AArch64usdot>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 9e2d698..05a4313 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1855,6 +1855,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerTriOp(AArch64::G_UDOT);
case Intrinsic::aarch64_neon_sdot:
return LowerTriOp(AArch64::G_SDOT);
+ case Intrinsic::aarch64_neon_usdot:
+ return LowerTriOp(AArch64::G_USDOT);
case Intrinsic::aarch64_neon_sqxtn:
return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
case Intrinsic::aarch64_neon_sqxtun:
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index be62395..e0375ea 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -513,8 +513,7 @@ void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
}
if (Imm == AMDGPU::EncValues::LITERAL_CONST) {
- Op = decodeLiteralConstant(
- Desc, OpDesc, OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64);
+ Op = decodeLiteralConstant(Desc, OpDesc);
continue;
}
@@ -1545,21 +1544,21 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
MCOperand
AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const {
if (HasLiteral) {
- if (Literal64 != Val)
+ if (Literal != Val)
return errOperand(Val, "More than one unique literal is illegal");
}
HasLiteral = true;
- Literal = Literal64 = Val;
+ Literal = Val;
- bool UseLit64 = Hi_32(Literal64) == 0;
+ bool UseLit64 = Hi_32(Literal) == 0;
return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit(
- LitModifier::Lit64, Literal64, getContext()))
- : MCOperand::createImm(Literal64);
+ LitModifier::Lit64, Literal, getContext()))
+ : MCOperand::createImm(Literal);
}
-MCOperand AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc,
- const MCOperandInfo &OpDesc,
- bool ExtendFP64) const {
+MCOperand
+AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc,
+ const MCOperandInfo &OpDesc) const {
// For now all literal constants are supposed to be unsigned integer
// ToDo: deal with signed/unsigned 64-bit integer constants
// ToDo: deal with float/double constants
@@ -1569,35 +1568,79 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc,
Twine(Bytes.size()));
}
HasLiteral = true;
- Literal = Literal64 = eatBytes<uint32_t>(Bytes);
- if (ExtendFP64)
- Literal64 <<= 32;
+ Literal = eatBytes<uint32_t>(Bytes);
}
- int64_t Val = ExtendFP64 ? Literal64 : Literal;
+ // For disassembling always assume all inline constants are available.
+ bool HasInv2Pi = true;
- bool CanUse64BitLiterals =
- STI.hasFeature(AMDGPU::Feature64BitLiterals) &&
- !(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
-
- bool UseLit64 = false;
- if (CanUse64BitLiterals) {
- if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64)
- UseLit64 = false;
- else if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64)
- UseLit64 = Hi_32(Literal64) == 0;
+ // Invalid instruction codes may contain literals for inline-only
+ // operands, so we support them here as well.
+ int64_t Val = Literal;
+ bool UseLit = false;
+ switch (OpDesc.OperandType) {
+ default:
+ llvm_unreachable("Unexpected operand type!");
+ case AMDGPU::OPERAND_REG_IMM_BF16:
+ case AMDGPU::OPERAND_REG_INLINE_C_BF16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
+ UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_V2BF16:
+ UseLit = AMDGPU::isInlinableLiteralV2BF16(Val);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ UseLit = AMDGPU::isInlinableLiteralV2F16(Val);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
+ break;
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ UseLit = AMDGPU::isInlinableLiteralV2I16(Val);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+ case AMDGPU::OPERAND_REG_IMM_V2FP32:
+ case AMDGPU::OPERAND_REG_IMM_V2INT32:
+ case AMDGPU::OPERAND_KIMM32:
+ UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
+ Val <<= 32;
+ break;
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi);
+ break;
+ case MCOI::OPERAND_REGISTER:
+ // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
+ // decoding a literal in a position of a register operand. Give
+ // it special handling in the caller, decodeImmOperands(), instead
+ // of quietly allowing it here.
+ break;
}
- return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit(
- LitModifier::Lit64, Val, getContext()))
- : MCOperand::createImm(Val);
+ return UseLit ? MCOperand::createExpr(AMDGPUMCExpr::createLit(
+ LitModifier::Lit, Val, getContext()))
+ : MCOperand::createImm(Val);
}
-MCOperand
-AMDGPUDisassembler::decodeLiteral64Constant(const MCInst &Inst) const {
+MCOperand AMDGPUDisassembler::decodeLiteral64Constant() const {
assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
if (!HasLiteral) {
@@ -1606,25 +1649,13 @@ AMDGPUDisassembler::decodeLiteral64Constant(const MCInst &Inst) const {
Twine(Bytes.size()));
}
HasLiteral = true;
- Literal64 = eatBytes<uint64_t>(Bytes);
- }
-
- bool UseLit64 = false;
- const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
- const MCOperandInfo &OpDesc = Desc.operands()[Inst.getNumOperands()];
- if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64) {
- UseLit64 = false;
- } else {
- assert(OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 ||
- OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64);
- UseLit64 = Hi_32(Literal64) == 0;
+ Literal = eatBytes<uint64_t>(Bytes);
}
+ bool UseLit64 = Hi_32(Literal) == 0;
return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit(
- LitModifier::Lit64, Literal64, getContext()))
- : MCOperand::createImm(Literal64);
+ LitModifier::Lit64, Literal, getContext()))
+ : MCOperand::createImm(Literal);
}
MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
@@ -1913,7 +1944,7 @@ MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const MCInst &Inst,
return MCOperand::createImm(Val);
if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
- return decodeLiteral64Constant(Inst);
+ return decodeLiteral64Constant();
}
switch (Width) {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 2751857..d103d79 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -44,8 +44,7 @@ private:
const unsigned HwModeRegClass;
const unsigned TargetMaxInstBytes;
mutable ArrayRef<uint8_t> Bytes;
- mutable uint32_t Literal;
- mutable uint64_t Literal64;
+ mutable uint64_t Literal;
mutable bool HasLiteral;
mutable std::optional<bool> EnableWavefrontSize32;
unsigned CodeObjectVersion;
@@ -144,9 +143,8 @@ public:
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const;
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc,
- const MCOperandInfo &OpDesc,
- bool ExtendFP64) const;
- MCOperand decodeLiteral64Constant(const MCInst &Inst) const;
+ const MCOperandInfo &OpDesc) const;
+ MCOperand decodeLiteral64Constant() const;
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 228114c..44c4830 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -57,6 +57,7 @@ def ResBindTy : DXILOpParamType;
def ResPropsTy : DXILOpParamType;
def SplitDoubleTy : DXILOpParamType;
def BinaryWithCarryTy : DXILOpParamType;
+def DimensionsTy : DXILOpParamType;
class DXILOpClass;
@@ -901,6 +902,13 @@ def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> {
let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
}
+def GetDimensions : DXILOp<72, getDimensions> {
+ let Doc = "gets the dimensions of a buffer or texture";
+ let arguments = [HandleTy, Int32Ty];
+ let result = DimensionsTy;
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
def Barrier : DXILOp<80, barrier> {
let Doc = "inserts a memory barrier in the shader";
let intrinsics = [
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index 1aed8f9..944b2e6 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -261,6 +261,12 @@ static StructType *getBinaryWithCarryType(LLVMContext &Context) {
return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c");
}
+static StructType *getDimensionsType(LLVMContext &Ctx) {
+ Type *Int32Ty = Type::getInt32Ty(Ctx);
+ return getOrCreateStructType("dx.types.Dimensions",
+ {Int32Ty, Int32Ty, Int32Ty, Int32Ty}, Ctx);
+}
+
static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
Type *OverloadTy) {
switch (Kind) {
@@ -318,6 +324,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
return getSplitDoubleType(Ctx);
case OpParamType::BinaryWithCarryTy:
return getBinaryWithCarryType(Ctx);
+ case OpParamType::DimensionsTy:
+ return getDimensionsType(Ctx);
}
llvm_unreachable("Invalid parameter kind");
return nullptr;
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 610d8b6..e46a393 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -627,6 +627,28 @@ public:
});
}
+ [[nodiscard]] bool lowerGetDimensionsX(Function &F) {
+ IRBuilder<> &IRB = OpBuilder.getIRB();
+ Type *Int32Ty = IRB.getInt32Ty();
+
+ return replaceFunction(F, [&](CallInst *CI) -> Error {
+ IRB.SetInsertPoint(CI);
+ Value *Handle =
+ createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
+ Value *Undef = UndefValue::get(Int32Ty);
+
+ Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
+ OpCode::GetDimensions, {Handle, Undef}, CI->getName(), Int32Ty);
+ if (Error E = OpCall.takeError())
+ return E;
+ Value *Dim = IRB.CreateExtractValue(*OpCall, 0);
+
+ CI->replaceAllUsesWith(Dim);
+ CI->eraseFromParent();
+ return Error::success();
+ });
+ }
+
[[nodiscard]] bool lowerGetPointer(Function &F) {
// These should have already been handled in DXILResourceAccess, so we can
// just clean up the dead prototype.
@@ -934,6 +956,9 @@ public:
case Intrinsic::dx_resource_updatecounter:
HasErrors |= lowerUpdateCounter(F);
break;
+ case Intrinsic::dx_resource_getdimensions_x:
+ HasErrors |= lowerGetDimensionsX(F);
+ break;
case Intrinsic::ctpop:
HasErrors |= lowerCtpopToCountBits(F);
break;
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
index 82c43ff..26a8728 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
@@ -1165,12 +1165,15 @@ void DXILBitcodeWriter::writeValueSymbolTableForwardDecl() {}
/// Returns the bit offset to backpatch with the location of the real VST.
void DXILBitcodeWriter::writeModuleInfo() {
// Emit various pieces of data attached to a module.
- if (!M.getTargetTriple().empty())
- writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE,
- M.getTargetTriple().str(), 0 /*TODO*/);
- const std::string &DL = M.getDataLayoutStr();
- if (!DL.empty())
- writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
+
+ // We need to hardcode a triple and datalayout that's compatible with the
+ // historical DXIL triple and datalayout from DXC.
+ StringRef Triple = "dxil-ms-dx";
+ StringRef DL = "e-m:e-p:32:32-i1:8-i8:8-i16:32-i32:32-i64:64-"
+ "f16:32-f32:32-f64:64-n8:16:32:64";
+ writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, Triple, 0 /*TODO*/);
+ writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
+
if (!M.getModuleInlineAsm().empty())
writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(),
0 /*TODO*/);
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
index 1eb03bf..725f2b1 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
@@ -149,11 +149,6 @@ public:
std::string Data;
llvm::raw_string_ostream OS(Data);
- Triple OriginalTriple = M.getTargetTriple();
- // Set to DXIL triple when write to bitcode.
- // Only the output bitcode need to be DXIL triple.
- M.setTargetTriple(Triple("dxil-ms-dx"));
-
// Perform late legalization of lifetime intrinsics that would otherwise
// fail the Module Verifier if performed in an earlier pass
legalizeLifetimeIntrinsics(M);
@@ -165,9 +160,6 @@ public:
// not-so-legal legalizations
removeLifetimeIntrinsics(M);
- // Recover triple.
- M.setTargetTriple(OriginalTriple);
-
Constant *ModuleConstant =
ConstantDataArray::get(M.getContext(), arrayRefFromStringRef(Data));
auto *GV = new llvm::GlobalVariable(M, ModuleConstant->getType(), true,
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 5f180d6..3bd6ed4 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -66,6 +66,10 @@ public:
void remapInstruction(MCInst &Instr) const;
+ Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address) const override;
+
private:
bool makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address,
uint64_t &BytesToSkip, raw_ostream &CS) const;
@@ -567,6 +571,18 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
return Result;
}
+Expected<bool> HexagonDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
+ uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address) const {
+ // At the start of a symbol, force a fresh packet by resetting any
+ // in-progress bundle state. This prevents packets from straddling label
+ // boundaries when data (e.g. jump tables) appears in between.
+ Size = 0;
+ resetBundle();
+ return true;
+}
+
static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
ArrayRef<MCPhysReg> Table) {
if (RegNo < Table.size()) {
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 662d3f6..b1794b7 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -717,6 +717,18 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.clampScalar(0, sXLen, sXLen)
.lower();
+ LegalityPredicate InsertVectorEltPred = [=](const LegalityQuery &Query) {
+ LLT VecTy = Query.Types[0];
+ LLT EltTy = Query.Types[1];
+ return VecTy.getElementType() == EltTy;
+ };
+
+ getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
+ .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
+ InsertVectorEltPred, typeIs(2, sXLen)))
+ .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), InsertVectorEltPred,
+ typeIs(2, sXLen)));
+
getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 0f6e1ca..eedfdb3 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1771,6 +1771,10 @@ defm RELAXED_DOT_ADD :
"i32x4.relaxed_dot_i8x16_i7x16_add_s\t$dst, $lhs, $rhs, $acc",
"i32x4.relaxed_dot_i8x16_i7x16_add_s", 0x113>;
+def : Pat<(v4i32 (partial_reduce_smla (v4i32 V128:$acc), (v16i8 V128:$lhs),
+ (v16i8 V128:$rhs))),
+ (RELAXED_DOT_ADD $lhs, $rhs, $acc)>, Requires<[HasRelaxedSIMD]>;
+
//===----------------------------------------------------------------------===//
// Relaxed BFloat16 dot product
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 0fd44b7..ec31675 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1256,8 +1256,17 @@ def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
(MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
(MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
- (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
+
+// If the globaladdr is an absolute_symbol, don't bother using the sign extending
+// instruction since there's no benefit to using it with absolute symbols.
+def globalAddrNoAbsSym : PatLeaf<(tglobaladdr:$dst), [{
+ auto *GA = cast<GlobalAddressSDNode>(N);
+ return !GA->getGlobal()->getAbsoluteSymbolRange();
+}]>;
+def : Pat<(i64 (X86Wrapper globalAddrNoAbsSym:$dst)),
+ (MOV64ri32 tglobaladdr:$dst)>,
+ Requires<[KernelCode]>;
+
def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
(MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper mcsym:$dst)),
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 481a9be..1fca466f 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1946,16 +1946,14 @@ static void addConstantComments(const MachineInstr *MI,
CASE_ARITH_RM(PMADDUBSW) {
unsigned SrcIdx = getSrcIdx(MI, 1);
if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
- if (C->getType()->getScalarSizeInBits() == 8) {
- std::string Comment;
- raw_string_ostream CS(Comment);
- unsigned VectorWidth =
- X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
- CS << "[";
- printConstant(C, VectorWidth, CS);
- CS << "]";
- OutStreamer.AddComment(CS.str());
- }
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+ unsigned VectorWidth =
+ X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
+ CS << "[";
+ printConstant(C, VectorWidth, CS);
+ CS << "]";
+ OutStreamer.AddComment(CS.str());
}
break;
}
@@ -1967,16 +1965,14 @@ static void addConstantComments(const MachineInstr *MI,
CASE_ARITH_RM(PMULHRSW) {
unsigned SrcIdx = getSrcIdx(MI, 1);
if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
- if (C->getType()->getScalarSizeInBits() == 16) {
- std::string Comment;
- raw_string_ostream CS(Comment);
- unsigned VectorWidth =
- X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
- CS << "[";
- printConstant(C, VectorWidth, CS);
- CS << "]";
- OutStreamer.AddComment(CS.str());
- }
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+ unsigned VectorWidth =
+ X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
+ CS << "[";
+ printConstant(C, VectorWidth, CS);
+ CS << "]";
+ OutStreamer.AddComment(CS.str());
}
break;
}
diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp b/llvm/lib/TargetParser/TargetDataLayout.cpp
index 950bb2b..d765d9c 100644
--- a/llvm/lib/TargetParser/TargetDataLayout.cpp
+++ b/llvm/lib/TargetParser/TargetDataLayout.cpp
@@ -548,8 +548,11 @@ std::string Triple::computeDataLayout(StringRef ABIName) const {
case Triple::csky:
return computeCSKYDataLayout(*this);
case Triple::dxil:
+ // TODO: We need to align vectors on the element size generally, but for now
+ // we hard code this for 3-element 32- and 64-bit vectors as a workaround.
+ // See https://github.com/llvm/llvm-project/issues/123968
return "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-"
- "f32:32-f64:64-n8:16:32:64";
+ "f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64";
case Triple::hexagon:
return "e-m:e-p:32:32:32-a:0-n16:32-"
"i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-"
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 4c9b10a..cdc559b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -156,9 +156,9 @@ Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
Value *Src = CI.getOperand(0);
Type *Ty = CI.getType();
- if (auto *SrcC = dyn_cast<Constant>(Src))
- if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL))
- return replaceInstUsesWith(CI, Res);
+ if (Value *Res =
+ simplifyCastInst(CI.getOpcode(), Src, Ty, SQ.getWithInstruction(&CI)))
+ return replaceInstUsesWith(CI, Res);
// Try to eliminate a cast of a cast.
if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index d99f1eb..ddb99a5 100644
--- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -75,8 +75,6 @@ static cl::opt<bool>
"expressed as branches by widenable conditions"),
cl::init(true));
-namespace {
-
// Get the condition of \p I. It can either be a guard or a conditional branch.
static Value *getCondition(Instruction *I) {
if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
@@ -130,6 +128,8 @@ findInsertionPointForWideCondition(Instruction *WCOrGuard) {
return std::nullopt;
}
+namespace {
+
class GuardWideningImpl {
DominatorTree &DT;
PostDominatorTree *PDT;
@@ -328,7 +328,7 @@ public:
/// The entry point for this pass.
bool run();
};
-}
+} // namespace
static bool isSupportedGuardInstruction(const Instruction *Insn) {
if (isGuard(Insn))
diff --git a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
index 3c14036e..6fb8197 100644
--- a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
@@ -26,8 +26,6 @@
using namespace llvm;
-namespace llvm {
-
static cl::opt<unsigned>
JumpTableSizeThreshold("jump-table-to-switch-size-threshold", cl::Hidden,
cl::desc("Only split jump tables with size less or "
@@ -43,8 +41,8 @@ static cl::opt<unsigned> FunctionSizeThreshold(
"or equal than this threshold."),
cl::init(50));
+namespace llvm {
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
-
} // end namespace llvm
#define DEBUG_TYPE "jump-table-to-switch"
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 9655173..b2c526b 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -116,8 +116,6 @@ STATISTIC(NumIntAssociationsHoisted,
STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions "
"reassociated and hoisted out of the loop");
-namespace llvm {
-
/// Memory promotion is enabled by default.
static cl::opt<bool>
DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
@@ -156,7 +154,7 @@ static cl::opt<unsigned> IntAssociationUpperLimit(
// which may not be precise, since optimizeUses is capped. The result is
// correct, but we may not get as "far up" as possible to get which access is
// clobbering the one queried.
-cl::opt<unsigned> SetLicmMssaOptCap(
+cl::opt<unsigned> llvm::SetLicmMssaOptCap(
"licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
"for faster compile. Caps the MemorySSA clobbering calls."));
@@ -164,15 +162,15 @@ cl::opt<unsigned> SetLicmMssaOptCap(
// Experimentally, memory promotion carries less importance than sinking and
// hoisting. Limit when we do promotion when using MemorySSA, in order to save
// compile time.
-cl::opt<unsigned> SetLicmMssaNoAccForPromotionCap(
+cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
"licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
"effect. When MSSA in LICM is enabled, then this is the maximum "
"number of accesses allowed to be present in a loop in order to "
"enable memory promotion."));
+namespace llvm {
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
-
} // end namespace llvm
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
@@ -1120,11 +1118,10 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
return false;
}
-namespace {
/// Return true if-and-only-if we know how to (mechanically) both hoist and
/// sink a given instruction out of a loop. Does not address legality
/// concerns such as aliasing or speculation safety.
-bool isHoistableAndSinkableInst(Instruction &I) {
+static bool isHoistableAndSinkableInst(Instruction &I) {
// Only these instructions are hoistable/sinkable.
return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
@@ -1136,8 +1133,8 @@ bool isHoistableAndSinkableInst(Instruction &I) {
}
/// Return true if I is the only Instruction with a MemoryAccess in L.
-bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
- const MemorySSAUpdater &MSSAU) {
+static bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
+ const MemorySSAUpdater &MSSAU) {
for (auto *BB : L->getBlocks())
if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
int NotAPhi = 0;
@@ -1151,7 +1148,6 @@ bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
}
return true;
}
-}
static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA,
BatchAAResults &BAA,
diff --git a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
index 73f1942..7706de8 100644
--- a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
@@ -21,8 +21,7 @@
#define DEBUG_TYPE "loop-bound-split"
-namespace llvm {
-
+using namespace llvm;
using namespace PatternMatch;
namespace {
@@ -358,8 +357,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
IRBuilder<> Builder(&PostLoopPreHeader->front());
// Update phi nodes in header of post-loop.
- bool isExitingLatch =
- (L.getExitingBlock() == L.getLoopLatch()) ? true : false;
+ bool isExitingLatch = L.getExitingBlock() == L.getLoopLatch();
Value *ExitingCondLCSSAPhi = nullptr;
for (PHINode &PN : L.getHeader()->phis()) {
// Create LCSSA phi node in preheader of post-loop.
@@ -472,8 +470,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
PreservedAnalyses LoopBoundSplitPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &U) {
- Function &F = *L.getHeader()->getParent();
- (void)F;
+ [[maybe_unused]] Function &F = *L.getHeader()->getParent();
LLVM_DEBUG(dbgs() << "Spliting bound of loop in " << F.getName() << ": " << L
<< "\n");
@@ -486,5 +483,3 @@ PreservedAnalyses LoopBoundSplitPass::run(Loop &L, LoopAnalysisManager &AM,
return getLoopPassPreservedAnalyses();
}
-
-} // end namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 62a81ba..280eb20 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7957,9 +7957,9 @@ bool VPRecipeBuilder::getScaledReductions(
auto CollectExtInfo = [this, &Exts, &ExtOpTypes,
&ExtKinds](SmallVectorImpl<Value *> &Ops) -> bool {
for (const auto &[I, OpI] : enumerate(Ops)) {
- auto *CI = dyn_cast<ConstantInt>(OpI);
- if (I > 0 && CI &&
- canConstantBeExtended(CI, ExtOpTypes[0], ExtKinds[0])) {
+ const APInt *C;
+ if (I > 0 && match(OpI, m_APInt(C)) &&
+ canConstantBeExtended(C, ExtOpTypes[0], ExtKinds[0])) {
ExtOpTypes[I] = ExtOpTypes[0];
ExtKinds[I] = ExtKinds[0];
continue;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 0101942..d167009 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1753,14 +1753,14 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
}
#endif
-bool llvm::canConstantBeExtended(const ConstantInt *CI, Type *NarrowType,
+bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType,
TTI::PartialReductionExtendKind ExtKind) {
- APInt TruncatedVal = CI->getValue().trunc(NarrowType->getScalarSizeInBits());
- unsigned WideSize = CI->getType()->getScalarSizeInBits();
+ APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits());
+ unsigned WideSize = C->getBitWidth();
APInt ExtendedVal = ExtKind == TTI::PR_SignExtend
? TruncatedVal.sext(WideSize)
: TruncatedVal.zext(WideSize);
- return ExtendedVal == CI->getValue();
+ return ExtendedVal == *C;
}
TargetTransformInfo::OperandValueInfo
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 3bcd7cc..0e0b042 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -4267,12 +4267,14 @@ public:
BackedgeTakenCount = new VPValue();
return BackedgeTakenCount;
}
+ VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
/// The vector trip count.
VPValue &getVectorTripCount() { return VectorTripCount; }
/// Returns the VF of the vector loop region.
VPValue &getVF() { return VF; };
+ const VPValue &getVF() const { return VF; };
/// Returns VF * UF of the vector loop region.
VPValue &getVFxUF() { return VFxUF; }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index 1580a3b..2aaabd9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -474,7 +474,7 @@ public:
/// Check if a constant \p CI can be safely treated as having been extended
/// from a narrower type with the given extension kind.
-bool canConstantBeExtended(const ConstantInt *CI, Type *NarrowType,
+bool canConstantBeExtended(const APInt *C, Type *NarrowType,
TTI::PartialReductionExtendKind ExtKind);
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index ecd5e96..d8203e2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -173,10 +173,10 @@ inline int_pred_ty<is_zero_int> m_ZeroInt() {
/// For vectors, this includes constants with undefined elements.
inline int_pred_ty<is_one> m_One() { return int_pred_ty<is_one>(); }
-struct bind_const_int {
- uint64_t &Res;
+struct bind_apint {
+ const APInt *&Res;
- bind_const_int(uint64_t &Res) : Res(Res) {}
+ bind_apint(const APInt *&Res) : Res(Res) {}
bool match(VPValue *VPV) const {
if (!VPV->isLiveIn())
@@ -188,7 +188,23 @@ struct bind_const_int {
const auto *CI = dyn_cast<ConstantInt>(V);
if (!CI)
return false;
- if (auto C = CI->getValue().tryZExtValue()) {
+ Res = &CI->getValue();
+ return true;
+ }
+};
+
+inline bind_apint m_APInt(const APInt *&C) { return C; }
+
+struct bind_const_int {
+ uint64_t &Res;
+
+ bind_const_int(uint64_t &Res) : Res(Res) {}
+
+ bool match(VPValue *VPV) const {
+ const APInt *APConst;
+ if (!bind_apint(APConst).match(VPV))
+ return false;
+ if (auto C = APConst->tryZExtValue()) {
Res = *C;
return true;
}
@@ -400,6 +416,12 @@ m_AnyOf(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::AnyOf>(Op0);
}
+template <typename Op0_t>
+inline VPInstruction_match<VPInstruction::FirstActiveLane, Op0_t>
+m_FirstActiveLane(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::FirstActiveLane>(Op0);
+}
+
template <unsigned Opcode, typename Op0_t>
inline AllRecipe_match<Opcode, Op0_t> m_Unary(const Op0_t &Op0) {
return AllRecipe_match<Opcode, Op0_t>(Op0);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 775837f..7a98c75 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -341,12 +341,12 @@ VPPartialReductionRecipe::computeCost(ElementCount VF,
ExtAType = GetExtendKind(ExtAR);
ExtBType = GetExtendKind(ExtBR);
- if (!ExtBR && Widen->getOperand(1)->isLiveIn()) {
- auto *CI = cast<ConstantInt>(Widen->getOperand(1)->getLiveInIRValue());
- if (canConstantBeExtended(CI, InputTypeA, ExtAType)) {
- InputTypeB = InputTypeA;
- ExtBType = ExtAType;
- }
+ using namespace VPlanPatternMatch;
+ const APInt *C;
+ if (!ExtBR && match(Widen->getOperand(1), m_APInt(C)) &&
+ canConstantBeExtended(C, InputTypeA, ExtAType)) {
+ InputTypeB = InputTypeA;
+ ExtBType = ExtAType;
}
};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 011466b..7f5a41c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -788,9 +788,7 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
ScalarEvolution &SE) {
VPValue *Incoming, *Mask;
if (!match(Op, m_VPInstruction<VPInstruction::ExtractLane>(
- m_VPInstruction<VPInstruction::FirstActiveLane>(
- m_VPValue(Mask)),
- m_VPValue(Incoming))))
+ m_FirstActiveLane(m_VPValue(Mask)), m_VPValue(Incoming))))
return nullptr;
auto *WideIV = getOptimizableIVOf(Incoming, SE);
@@ -2124,9 +2122,13 @@ static void licm(VPlan &Plan) {
VPBasicBlock *Preheader = Plan.getVectorPreheader();
// Return true if we do not know how to (mechanically) hoist a given recipe
- // out of a loop region. Does not address legality concerns such as aliasing
- // or speculation safety.
+ // out of a loop region.
auto CannotHoistRecipe = [](VPRecipeBase &R) {
+ // TODO: Relax checks in the future, e.g. we could also hoist reads, if
+ // their memory location is not modified in the vector loop.
+ if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
+ return true;
+
// Allocas cannot be hoisted.
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
return RepR && RepR->getOpcode() == Instruction::Alloca;
@@ -2134,17 +2136,18 @@ static void licm(VPlan &Plan) {
// Hoist any loop invariant recipes from the vector loop region to the
// preheader. Preform a shallow traversal of the vector loop region, to
- // exclude recipes in replicate regions.
+ // exclude recipes in replicate regions. Since the top-level blocks in the
+ // vector loop region are guaranteed to execute if the vector pre-header is,
+ // we don't need to check speculation safety.
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ assert(Preheader->getSingleSuccessor() == LoopRegion &&
+ "Expected vector prehader's successor to be the vector loop region");
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(LoopRegion->getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
if (CannotHoistRecipe(R))
continue;
- // TODO: Relax checks in the future, e.g. we could also hoist reads, if
- // their memory location is not modified in the vector loop.
- if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() ||
- any_of(R.operands(), [](VPValue *Op) {
+ if (any_of(R.operands(), [](VPValue *Op) {
return !Op->isDefinedOutsideLoopRegions();
}))
continue;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 86a8b08..5aeda3e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -352,8 +352,7 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
// Compute*Result which combine all parts to compute the final value.
VPValue *Op1;
if (match(&R, m_VPInstruction<VPInstruction::AnyOf>(m_VPValue(Op1))) ||
- match(&R, m_VPInstruction<VPInstruction::FirstActiveLane>(
- m_VPValue(Op1))) ||
+ match(&R, m_FirstActiveLane(m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>(
m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 7240188..8b1b0e5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -53,7 +53,7 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
return Expanded;
}
-bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) {
+bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
if (isa<VPActiveLaneMaskPHIRecipe>(V))
return true;
@@ -74,7 +74,7 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) {
IsWideCanonicalIV(A));
return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) &&
- B == Plan.getOrCreateBackedgeTakenCount();
+ B == Plan.getBackedgeTakenCount();
}
const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 0222b0a..cf95ac0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -90,7 +90,7 @@ inline bool isSingleScalar(const VPValue *VPV) {
}
/// Return true if \p V is a header mask in \p Plan.
-bool isHeaderMask(const VPValue *V, VPlan &Plan);
+bool isHeaderMask(const VPValue *V, const VPlan &Plan);
/// Checks if \p V is uniform across all VF lanes and UF parts. It is considered
/// as such if it is either loop invariant (defined outside the vector region)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 5262af6..91734a1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -298,11 +298,16 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
return false;
}
}
- if (const auto *EVL = dyn_cast<VPInstruction>(&R)) {
- if (EVL->getOpcode() == VPInstruction::ExplicitVectorLength &&
- !verifyEVLRecipe(*EVL)) {
- errs() << "EVL VPValue is not used correctly\n";
- return false;
+ if (const auto *VPI = dyn_cast<VPInstruction>(&R)) {
+ switch (VPI->getOpcode()) {
+ case VPInstruction::ExplicitVectorLength:
+ if (!verifyEVLRecipe(*VPI)) {
+ errs() << "EVL VPValue is not used correctly\n";
+ return false;
+ }
+ break;
+ default:
+ break;
}
}
}