aboutsummaryrefslogtreecommitdiff
path: root/llvm/include
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/include')
-rw-r--r--llvm/include/llvm/ADT/Any.h1
-rw-r--r--llvm/include/llvm/Analysis/DXILResource.h82
-rw-r--r--llvm/include/llvm/Analysis/LoopAccessAnalysis.h29
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h4
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfo.h4
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfoImpl.h2
-rw-r--r--llvm/include/llvm/Analysis/VectorUtils.h3
-rw-r--r--llvm/include/llvm/BinaryFormat/ELF.h3
-rw-r--r--llvm/include/llvm/Bitstream/BitstreamWriter.h5
-rw-r--r--llvm/include/llvm/CodeGen/AsmPrinter.h39
-rw-r--r--llvm/include/llvm/CodeGen/MachineBasicBlock.h9
-rw-r--r--llvm/include/llvm/CodeGen/MachineFunction.h10
-rw-r--r--llvm/include/llvm/CodeGen/MachineInstrBuilder.h92
-rw-r--r--llvm/include/llvm/CodeGen/MachineScheduler.h8
-rw-r--r--llvm/include/llvm/CodeGen/SDPatternMatch.h12
-rw-r--r--llvm/include/llvm/CodeGen/ScheduleDAG.h5
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGNodes.h2
-rw-r--r--llvm/include/llvm/CodeGen/ValueTypes.td496
-rw-r--r--llvm/include/llvm/Frontend/HLSL/HLSLBinding.h162
-rw-r--r--llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h115
-rw-r--r--llvm/include/llvm/Frontend/Offloading/PropertySet.h33
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/ClauseT.h5
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMP.td4
-rw-r--r--llvm/include/llvm/IR/IntrinsicInst.h17
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAMDGPU.td160
-rw-r--r--llvm/include/llvm/IR/IntrinsicsRISCV.td10
-rw-r--r--llvm/include/llvm/LTO/LTO.h26
-rw-r--r--llvm/include/llvm/MC/MCAsmBackend.h13
-rw-r--r--llvm/include/llvm/MC/MCAssembler.h3
-rw-r--r--llvm/include/llvm/MC/MCObjectFileInfo.h5
-rw-r--r--llvm/include/llvm/MC/MCObjectStreamer.h8
-rw-r--r--llvm/include/llvm/MC/MCSection.h49
-rw-r--r--llvm/include/llvm/ObjectYAML/ELFYAML.h2
-rw-r--r--llvm/include/llvm/ProfileData/MemProfData.inc40
-rw-r--r--llvm/include/llvm/Support/DebugLog.h43
-rw-r--r--llvm/include/llvm/Support/VirtualFileSystem.h2
-rw-r--r--llvm/include/llvm/Transforms/IPO/Attributor.h2
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h25
38 files changed, 996 insertions, 534 deletions
diff --git a/llvm/include/llvm/ADT/Any.h b/llvm/include/llvm/ADT/Any.h
index 88dbce9..a29aaa3 100644
--- a/llvm/include/llvm/ADT/Any.h
+++ b/llvm/include/llvm/ADT/Any.h
@@ -119,7 +119,6 @@ private:
template <class T> friend T any_cast(Any &&Value);
template <class T> friend const T *any_cast(const Any *Value);
template <class T> friend T *any_cast(Any *Value);
- template <typename T> friend bool any_isa(const Any &Value);
std::unique_ptr<StorageBase> Storage;
};
diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h
index 956dcbc..93c6bfb 100644
--- a/llvm/include/llvm/Analysis/DXILResource.h
+++ b/llvm/include/llvm/Analysis/DXILResource.h
@@ -12,6 +12,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Frontend/HLSL/HLSLBinding.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/PassManager.h"
@@ -633,86 +634,25 @@ LLVM_ABI ModulePass *createDXILResourceWrapperPassPass();
// register slots to resources with implicit bindings, and in a
// post-optimization validation pass that will raise diagnostic about
// overlapping bindings.
-//
-// For example for these resource bindings:
-//
-// RWBuffer<float> A[10] : register(u3);
-// RWBuffer<float> B[] : register(u5, space2)
-//
-// The analysis result for UAV binding type will look like this:
-//
-// UAVSpaces {
-// ResClass = ResourceClass::UAV,
-// Spaces = {
-// { Space = 0, FreeRanges = {{ 0, 2 }, { 13, UINT32_MAX }} },
-// { Space = 2, FreeRanges = {{ 0, 4 }} }
-// }
-// }
-//
class DXILResourceBindingInfo {
-public:
- struct BindingRange {
- uint32_t LowerBound;
- uint32_t UpperBound;
- BindingRange(uint32_t LB, uint32_t UB) : LowerBound(LB), UpperBound(UB) {}
- };
-
- struct RegisterSpace {
- uint32_t Space;
- SmallVector<BindingRange> FreeRanges;
- RegisterSpace(uint32_t Space) : Space(Space) {
- FreeRanges.emplace_back(0, UINT32_MAX);
- }
- // Size == -1 means unbounded array
- LLVM_ABI std::optional<uint32_t> findAvailableBinding(int32_t Size);
- };
-
- struct BindingSpaces {
- dxil::ResourceClass RC;
- llvm::SmallVector<RegisterSpace> Spaces;
- BindingSpaces(dxil::ResourceClass RC) : RC(RC) {}
- LLVM_ABI RegisterSpace &getOrInsertSpace(uint32_t Space);
- };
-
-private:
- BindingSpaces SRVSpaces, UAVSpaces, CBufferSpaces, SamplerSpaces;
- bool ImplicitBinding;
- bool OverlappingBinding;
+ hlsl::BindingInfo Bindings;
+ bool HasImplicitBinding = false;
+ bool HasOverlappingBinding = false;
// Populate the resource binding info given explicit resource binding calls
// in the module.
void populate(Module &M, DXILResourceTypeMap &DRTM);
public:
- DXILResourceBindingInfo()
- : SRVSpaces(dxil::ResourceClass::SRV),
- UAVSpaces(dxil::ResourceClass::UAV),
- CBufferSpaces(dxil::ResourceClass::CBuffer),
- SamplerSpaces(dxil::ResourceClass::Sampler), ImplicitBinding(false),
- OverlappingBinding(false) {}
-
- bool hasImplicitBinding() const { return ImplicitBinding; }
- void setHasImplicitBinding(bool Value) { ImplicitBinding = Value; }
- bool hasOverlappingBinding() const { return OverlappingBinding; }
-
- BindingSpaces &getBindingSpaces(dxil::ResourceClass RC) {
- switch (RC) {
- case dxil::ResourceClass::SRV:
- return SRVSpaces;
- case dxil::ResourceClass::UAV:
- return UAVSpaces;
- case dxil::ResourceClass::CBuffer:
- return CBufferSpaces;
- case dxil::ResourceClass::Sampler:
- return SamplerSpaces;
- }
+ bool hasImplicitBinding() const { return HasImplicitBinding; }
+ void setHasImplicitBinding(bool Value) { HasImplicitBinding = Value; }
+ bool hasOverlappingBinding() const { return HasOverlappingBinding; }
+ void setHasOverlappingBinding(bool Value) { HasOverlappingBinding = Value; }
- llvm_unreachable("Invalid resource class");
- }
-
- // Size == -1 means unbounded array
LLVM_ABI std::optional<uint32_t>
- findAvailableBinding(dxil::ResourceClass RC, uint32_t Space, int32_t Size);
+ findAvailableBinding(dxil::ResourceClass RC, uint32_t Space, int32_t Size) {
+ return Bindings.findAvailableBinding(RC, Space, Size);
+ }
friend class DXILResourceBindingAnalysis;
friend class DXILResourceBindingWrapperPass;
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index af6e534..92304ed 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -180,10 +180,12 @@ public:
const SmallVectorImpl<Instruction *> &Instrs) const;
};
- MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L,
+ MemoryDepChecker(PredicatedScalarEvolution &PSE, AssumptionCache *AC,
+ DominatorTree *DT, const Loop *L,
const DenseMap<Value *, const SCEV *> &SymbolicStrides,
unsigned MaxTargetVectorWidthInBits)
- : PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides),
+ : PSE(PSE), AC(AC), DT(DT), InnermostLoop(L),
+ SymbolicStrides(SymbolicStrides),
MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {}
/// Register the location (instructions are given increasing numbers)
@@ -288,6 +290,15 @@ public:
return PointerBounds;
}
+ DominatorTree *getDT() const {
+ assert(DT && "requested DT, but it is not available");
+ return DT;
+ }
+ AssumptionCache *getAC() const {
+ assert(AC && "requested AC, but it is not available");
+ return AC;
+ }
+
private:
/// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
/// applies dynamic knowledge to simplify SCEV expressions and convert them
@@ -296,6 +307,10 @@ private:
/// example we might assume a unit stride for a pointer in order to prove
/// that a memory access is strided and doesn't wrap.
PredicatedScalarEvolution &PSE;
+
+ AssumptionCache *AC;
+ DominatorTree *DT;
+
const Loop *InnermostLoop;
/// Reference to map of pointer values to
@@ -670,7 +685,7 @@ public:
LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const TargetTransformInfo *TTI,
const TargetLibraryInfo *TLI, AAResults *AA,
- DominatorTree *DT, LoopInfo *LI,
+ DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC,
bool AllowPartial = false);
/// Return true we can analyze the memory accesses in the loop and there are
@@ -922,7 +937,8 @@ LLVM_ABI std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
const SCEV *MaxBTC, ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
- std::pair<const SCEV *, const SCEV *>> *PointerBounds);
+ std::pair<const SCEV *, const SCEV *>> *PointerBounds,
+ DominatorTree *DT, AssumptionCache *AC);
class LoopAccessInfoManager {
/// The cache.
@@ -935,12 +951,13 @@ class LoopAccessInfoManager {
LoopInfo &LI;
TargetTransformInfo *TTI;
const TargetLibraryInfo *TLI = nullptr;
+ AssumptionCache *AC;
public:
LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT,
LoopInfo &LI, TargetTransformInfo *TTI,
- const TargetLibraryInfo *TLI)
- : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {}
+ const TargetLibraryInfo *TLI, AssumptionCache *AC)
+ : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI), AC(AC) {}
LLVM_ABI const LoopAccessInfo &getInfo(Loop &L, bool AllowPartial = false);
diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h
index bff7707..011d599 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h
@@ -91,6 +91,10 @@ inline bind_ty<const SCEVUnknown> m_SCEVUnknown(const SCEVUnknown *&V) {
return V;
}
+inline bind_ty<const SCEVAddExpr> m_scev_Add(const SCEVAddExpr *&V) {
+ return V;
+}
+
/// Match a specified const SCEV *.
struct specificscev_ty {
const SCEV *Expr;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 7928835..aa4550d 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1950,6 +1950,10 @@ public:
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const;
+ /// Returns true if GEP should not be used to index into vectors for this
+ /// target.
+ LLVM_ABI bool allowVectorElementIndexingUsingGEP() const;
+
private:
std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl;
};
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 2ea87b3..abdbca0 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1145,6 +1145,8 @@ public:
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
+ virtual bool allowVectorElementIndexingUsingGEP() const { return true; }
+
protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index b55c4e0..6781cd5 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -633,6 +633,9 @@ public:
return true;
}
+ /// Return true if this group is full, i.e. it has no gaps.
+ bool isFull() const { return getNumMembers() == getFactor(); }
+
private:
uint32_t Factor; // Interleave Factor.
bool Reverse;
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index ad35d7f..749971e 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -973,7 +973,10 @@ enum : unsigned {
// SM based processor values.
EF_CUDA_SM100 = 0x6400,
+ EF_CUDA_SM101 = 0x6500,
+ EF_CUDA_SM103 = 0x6700,
EF_CUDA_SM120 = 0x7800,
+ EF_CUDA_SM121 = 0x7900,
// Set when using an accelerator variant like sm_100a.
EF_CUDA_ACCELERATORS = 0x8,
diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h
index 78f5eb4..5f53681 100644
--- a/llvm/include/llvm/Bitstream/BitstreamWriter.h
+++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h
@@ -466,7 +466,7 @@ private:
EmitCode(Abbrev);
- unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
+ unsigned i = 0, e = Abbv->getNumOperandInfos();
if (Code) {
assert(e && "Expected non-empty abbreviation");
const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i++);
@@ -632,8 +632,7 @@ private:
void EncodeAbbrev(const BitCodeAbbrev &Abbv) {
EmitCode(bitc::DEFINE_ABBREV);
EmitVBR(Abbv.getNumOperandInfos(), 5);
- for (unsigned i = 0, e = static_cast<unsigned>(Abbv.getNumOperandInfos());
- i != e; ++i) {
+ for (unsigned i = 0, e = Abbv.getNumOperandInfos(); i != e; ++i) {
const BitCodeAbbrevOp &Op = Abbv.getOperandInfo(i);
Emit(Op.isLiteral(), 1);
if (Op.isLiteral()) {
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index faab2503..91c0142 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -190,6 +190,36 @@ private:
/// Emit comments in assembly output if this is true.
bool VerboseAsm;
+ /// Store symbols and type identifiers used to create callgraph section
+ /// entries related to a function.
+ struct FunctionInfo {
+ /// Numeric type identifier used in callgraph section for indirect calls
+ /// and targets.
+ using CGTypeId = uint64_t;
+
+ /// Enumeration of function kinds, and their mapping to function kind values
+ /// stored in callgraph section entries.
+ /// Must match the enum in llvm/tools/llvm-objdump/llvm-objdump.cpp.
+ enum class FunctionKind : uint64_t {
+ /// Function cannot be target to indirect calls.
+ NOT_INDIRECT_TARGET = 0,
+
+ /// Function may be target to indirect calls but its type id is unknown.
+ INDIRECT_TARGET_UNKNOWN_TID = 1,
+
+ /// Function may be target to indirect calls and its type id is known.
+ INDIRECT_TARGET_KNOWN_TID = 2,
+ };
+
+ /// Map type identifiers to callsite labels. Labels are generated for each
+ /// indirect callsite in the function.
+ SmallVector<std::pair<CGTypeId, MCSymbol *>> CallSiteLabels;
+ };
+
+ enum CallGraphSectionFormatVersion : uint64_t {
+ V_0 = 0,
+ };
+
/// Output stream for the stack usage file (i.e., .su file).
std::unique_ptr<raw_fd_ostream> StackUsageStream;
@@ -355,6 +385,13 @@ public:
/// are available. Returns empty string otherwise.
StringRef getConstantSectionSuffix(const Constant *C) const;
+ /// Generate and emit labels for callees of the indirect callsites which will
+ /// be used to populate the .callgraph section.
+ void emitIndirectCalleeLabels(
+ FunctionInfo &FuncInfo,
+ const MachineFunction::CallSiteInfoMap &CallSitesInfoMap,
+ const MachineInstr &MI);
+
//===------------------------------------------------------------------===//
// XRay instrumentation implementation.
//===------------------------------------------------------------------===//
@@ -442,6 +479,8 @@ public:
void emitKCFITrapEntry(const MachineFunction &MF, const MCSymbol *Symbol);
virtual void emitKCFITypeId(const MachineFunction &MF);
+ void emitCallGraphSection(const MachineFunction &MF, FunctionInfo &FuncInfo);
+
void emitPseudoProbe(const MachineInstr &MI);
void emitRemarksSection(remarks::RemarkStreamer &RS);
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 938d71d..9e3d919 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -323,10 +323,11 @@ public:
const MachineFunction *getParent() const { return xParent; }
MachineFunction *getParent() { return xParent; }
- /// Returns true if the original IR terminator is an `indirectbr`. This
- /// typically corresponds to a `goto` in C, rather than jump tables.
- bool terminatorIsComputedGoto() const {
- return back().isIndirectBranch() &&
+ /// Returns true if the original IR terminator is an `indirectbr` with
+ /// successor blocks. This typically corresponds to a `goto` in C, rather than
+ /// jump tables.
+ bool terminatorIsComputedGotoWithSuccessors() const {
+ return back().isIndirectBranch() && !succ_empty() &&
llvm::all_of(successors(), [](const MachineBasicBlock *Succ) {
return Succ->isIRBlockAddressTaken();
});
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index 7f88323..06c4daf 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -517,6 +517,13 @@ public:
SmallVector<ArgRegPair, 1> ArgRegPairs;
/// Callee type ids.
SmallVector<ConstantInt *, 4> CalleeTypeIds;
+
+ CallSiteInfo() = default;
+
+ /// Extracts the numeric type id from the CallBase's callee_type Metadata,
+ /// and sets CalleeTypeIds. This is used as type id for the indirect call in
+ /// the call graph section.
+ CallSiteInfo(const CallBase &CB);
};
struct CalledGlobalInfo {
@@ -524,11 +531,12 @@ public:
unsigned TargetFlags;
};
+ using CallSiteInfoMap = DenseMap<const MachineInstr *, CallSiteInfo>;
+
private:
Delegate *TheDelegate = nullptr;
GISelChangeObserver *Observer = nullptr;
- using CallSiteInfoMap = DenseMap<const MachineInstr *, CallSiteInfo>;
/// Map a call instruction to call site arguments forwarding info.
CallSiteInfoMap CallSitesInfo;
diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
index e63e77a..e705d7d9 100644
--- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -69,6 +69,32 @@ enum {
} // end namespace RegState
+/// Set of metadata that should be preserved when using BuildMI(). This provides
+/// a more convenient way of preserving DebugLoc, PCSections and MMRA.
+class MIMetadata {
+public:
+ MIMetadata() = default;
+ MIMetadata(DebugLoc DL, MDNode *PCSections = nullptr, MDNode *MMRA = nullptr)
+ : DL(std::move(DL)), PCSections(PCSections), MMRA(MMRA) {}
+ MIMetadata(const DILocation *DI, MDNode *PCSections = nullptr,
+ MDNode *MMRA = nullptr)
+ : DL(DI), PCSections(PCSections), MMRA(MMRA) {}
+ explicit MIMetadata(const Instruction &From)
+ : DL(From.getDebugLoc()),
+ PCSections(From.getMetadata(LLVMContext::MD_pcsections)) {}
+ explicit MIMetadata(const MachineInstr &From)
+ : DL(From.getDebugLoc()), PCSections(From.getPCSections()) {}
+
+ const DebugLoc &getDL() const { return DL; }
+ MDNode *getPCSections() const { return PCSections; }
+ MDNode *getMMRAMetadata() const { return MMRA; }
+
+private:
+ DebugLoc DL;
+ MDNode *PCSections = nullptr;
+ MDNode *MMRA = nullptr;
+};
+
class MachineInstrBuilder {
MachineFunction *MF = nullptr;
MachineInstr *MI = nullptr;
@@ -317,15 +343,11 @@ public:
}
}
- const MachineInstrBuilder &setPCSections(MDNode *MD) const {
- if (MD)
- MI->setPCSections(*MF, MD);
- return *this;
- }
-
- const MachineInstrBuilder &setMMRAMetadata(MDNode *MMRA) const {
- if (MMRA)
- MI->setMMRAMetadata(*MF, MMRA);
+ const MachineInstrBuilder &copyMIMetadata(const MIMetadata &MIMD) const {
+ if (MIMD.getPCSections())
+ MI->setPCSections(*MF, MIMD.getPCSections());
+ if (MIMD.getMMRAMetadata())
+ MI->setMMRAMetadata(*MF, MIMD.getMMRAMetadata());
return *this;
}
@@ -343,38 +365,11 @@ public:
}
};
-/// Set of metadata that should be preserved when using BuildMI(). This provides
-/// a more convenient way of preserving DebugLoc, PCSections and MMRA.
-class MIMetadata {
-public:
- MIMetadata() = default;
- MIMetadata(DebugLoc DL, MDNode *PCSections = nullptr, MDNode *MMRA = nullptr)
- : DL(std::move(DL)), PCSections(PCSections), MMRA(MMRA) {}
- MIMetadata(const DILocation *DI, MDNode *PCSections = nullptr,
- MDNode *MMRA = nullptr)
- : DL(DI), PCSections(PCSections), MMRA(MMRA) {}
- explicit MIMetadata(const Instruction &From)
- : DL(From.getDebugLoc()),
- PCSections(From.getMetadata(LLVMContext::MD_pcsections)) {}
- explicit MIMetadata(const MachineInstr &From)
- : DL(From.getDebugLoc()), PCSections(From.getPCSections()) {}
-
- const DebugLoc &getDL() const { return DL; }
- MDNode *getPCSections() const { return PCSections; }
- MDNode *getMMRAMetadata() const { return MMRA; }
-
-private:
- DebugLoc DL;
- MDNode *PCSections = nullptr;
- MDNode *MMRA = nullptr;
-};
-
/// Builder interface. Specify how to create the initial instruction itself.
inline MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD,
const MCInstrDesc &MCID) {
return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, MIMD.getDL()))
- .setPCSections(MIMD.getPCSections())
- .setMMRAMetadata(MIMD.getMMRAMetadata());
+ .copyMIMetadata(MIMD);
}
/// This version of the builder sets up the first operand as a
@@ -382,8 +377,7 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD,
inline MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD,
const MCInstrDesc &MCID, Register DestReg) {
return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, MIMD.getDL()))
- .setPCSections(MIMD.getPCSections())
- .setMMRAMetadata(MIMD.getMMRAMetadata())
+ .copyMIMetadata(MIMD)
.addReg(DestReg, RegState::Define);
}
@@ -397,10 +391,8 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
MachineFunction &MF = *BB.getParent();
MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL());
BB.insert(I, MI);
- return MachineInstrBuilder(MF, MI)
- .setPCSections(MIMD.getPCSections())
- .setMMRAMetadata(MIMD.getMMRAMetadata())
- .addReg(DestReg, RegState::Define);
+ return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD).addReg(
+ DestReg, RegState::Define);
}
/// This version of the builder inserts the newly-built instruction before
@@ -416,10 +408,8 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
MachineFunction &MF = *BB.getParent();
MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL());
BB.insert(I, MI);
- return MachineInstrBuilder(MF, MI)
- .setPCSections(MIMD.getPCSections())
- .setMMRAMetadata(MIMD.getMMRAMetadata())
- .addReg(DestReg, RegState::Define);
+ return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD).addReg(
+ DestReg, RegState::Define);
}
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I,
@@ -449,9 +439,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
MachineFunction &MF = *BB.getParent();
MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL());
BB.insert(I, MI);
- return MachineInstrBuilder(MF, MI)
- .setPCSections(MIMD.getPCSections())
- .setMMRAMetadata(MIMD.getMMRAMetadata());
+ return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD);
}
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
@@ -461,9 +449,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
MachineFunction &MF = *BB.getParent();
MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL());
BB.insert(I, MI);
- return MachineInstrBuilder(MF, MI)
- .setPCSections(MIMD.getPCSections())
- .setMMRAMetadata(MIMD.getMMRAMetadata());
+ return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD);
}
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I,
diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index efda7eb..5a2aee2 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -1303,8 +1303,8 @@ protected:
SchedBoundary Top;
SchedBoundary Bot;
- ClusterInfo *TopCluster;
- ClusterInfo *BotCluster;
+ unsigned TopClusterID;
+ unsigned BotClusterID;
/// Candidate last picked from Top boundary.
SchedCandidate TopCand;
@@ -1346,8 +1346,8 @@ protected:
/// Candidate last picked from Bot boundary.
SchedCandidate BotCand;
- ClusterInfo *TopCluster;
- ClusterInfo *BotCluster;
+ unsigned TopClusterID;
+ unsigned BotClusterID;
public:
PostGenericScheduler(const MachineSchedContext *C)
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 2967532..be90250 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -578,6 +578,18 @@ m_InsertSubvector(const LHS &Base, const RHS &Sub, const IDX &Idx) {
return TernaryOpc_match<LHS, RHS, IDX>(ISD::INSERT_SUBVECTOR, Base, Sub, Idx);
}
+template <typename LTy, typename RTy, typename TTy, typename FTy, typename CCTy>
+inline auto m_SelectCC(const LTy &L, const RTy &R, const TTy &T, const FTy &F,
+ const CCTy &CC) {
+ return m_Node(ISD::SELECT_CC, L, R, T, F, CC);
+}
+
+template <typename LTy, typename RTy, typename TTy, typename FTy, typename CCTy>
+inline auto m_SelectCCLike(const LTy &L, const RTy &R, const TTy &T,
+ const FTy &F, const CCTy &CC) {
+ return m_AnyOf(m_Select(m_SetCC(L, R, CC), T, F), m_SelectCC(L, R, T, F, CC));
+}
+
// === Binary operations ===
template <typename LHS_P, typename RHS_P, bool Commutable = false,
bool ExcludeChain = false>
diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h
index 3a0a31b..122b7be 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDAG.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h
@@ -240,6 +240,11 @@ class TargetRegisterInfo;
typedef SmallSet<SUnit *, 8> ClusterInfo;
constexpr unsigned InvalidClusterId = ~0u;
+ /// Return whether the input cluster ID's are the same and valid.
+ inline bool isTheSameCluster(unsigned A, unsigned B) {
+ return A != InvalidClusterId && A == B;
+ }
+
/// Scheduling unit. This is a node in the scheduling DAG.
class SUnit {
private:
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 8f88811..11ae8cd 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -182,7 +182,7 @@ public:
return SDValue(Node, R);
}
- /// Return true if this node is an operand of N.
+ /// Return true if the referenced return value is an operand of N.
LLVM_ABI bool isOperandOf(const SDNode *N) const;
/// Return the ValueType of the referenced return value.
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index 4551e7e..b06158d 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -92,258 +92,270 @@ def v1i1 : VTVec<1, i1, 17>; // 1 x i1 vector value
def v2i1 : VTVec<2, i1, 18>; // 2 x i1 vector value
def v3i1 : VTVec<3, i1, 19>; // 3 x i1 vector value
def v4i1 : VTVec<4, i1, 20>; // 4 x i1 vector value
-def v8i1 : VTVec<8, i1, 21>; // 8 x i1 vector value
-def v16i1 : VTVec<16, i1, 22>; // 16 x i1 vector value
-def v32i1 : VTVec<32, i1, 23>; // 32 x i1 vector value
-def v64i1 : VTVec<64, i1, 24>; // 64 x i1 vector value
-def v128i1 : VTVec<128, i1, 25>; // 128 x i1 vector value
-def v256i1 : VTVec<256, i1, 26>; // 256 x i1 vector value
-def v512i1 : VTVec<512, i1, 27>; // 512 x i1 vector value
-def v1024i1 : VTVec<1024, i1, 28>; // 1024 x i1 vector value
-def v2048i1 : VTVec<2048, i1, 29>; // 2048 x i1 vector value
-def v4096i1 : VTVec<4096, i1, 30>; // 4096 x i1 vector value
-
-def v128i2 : VTVec<128, i2, 31>; // 128 x i2 vector value
-def v256i2 : VTVec<256, i2, 32>; // 256 x i2 vector value
-
-def v64i4 : VTVec<64, i4, 33>; // 64 x i4 vector value
-def v128i4 : VTVec<128, i4, 34>; // 128 x i4 vector value
-
-def v1i8 : VTVec<1, i8, 35>; // 1 x i8 vector value
-def v2i8 : VTVec<2, i8, 36>; // 2 x i8 vector value
-def v3i8 : VTVec<3, i8, 37>; // 3 x i8 vector value
-def v4i8 : VTVec<4, i8, 38>; // 4 x i8 vector value
-def v8i8 : VTVec<8, i8, 39>; // 8 x i8 vector value
-def v16i8 : VTVec<16, i8, 40>; // 16 x i8 vector value
-def v32i8 : VTVec<32, i8, 41>; // 32 x i8 vector value
-def v64i8 : VTVec<64, i8, 42>; // 64 x i8 vector value
-def v128i8 : VTVec<128, i8, 43>; // 128 x i8 vector value
-def v256i8 : VTVec<256, i8, 44>; // 256 x i8 vector value
-def v512i8 : VTVec<512, i8, 45>; // 512 x i8 vector value
-def v1024i8 : VTVec<1024, i8, 46>; // 1024 x i8 vector value
-
-def v1i16 : VTVec<1, i16, 47>; // 1 x i16 vector value
-def v2i16 : VTVec<2, i16, 48>; // 2 x i16 vector value
-def v3i16 : VTVec<3, i16, 49>; // 3 x i16 vector value
-def v4i16 : VTVec<4, i16, 50>; // 4 x i16 vector value
-def v8i16 : VTVec<8, i16, 51>; // 8 x i16 vector value
-def v16i16 : VTVec<16, i16, 52>; // 16 x i16 vector value
-def v32i16 : VTVec<32, i16, 53>; // 32 x i16 vector value
-def v64i16 : VTVec<64, i16, 54>; // 64 x i16 vector value
-def v128i16 : VTVec<128, i16, 55>; // 128 x i16 vector value
-def v256i16 : VTVec<256, i16, 56>; // 256 x i16 vector value
-def v512i16 : VTVec<512, i16, 57>; // 512 x i16 vector value
-def v4096i16 : VTVec<4096, i16, 58>; // 4096 x i16 vector value
-
-def v1i32 : VTVec<1, i32, 59>; // 1 x i32 vector value
-def v2i32 : VTVec<2, i32, 60>; // 2 x i32 vector value
-def v3i32 : VTVec<3, i32, 61>; // 3 x i32 vector value
-def v4i32 : VTVec<4, i32, 62>; // 4 x i32 vector value
-def v5i32 : VTVec<5, i32, 63>; // 5 x i32 vector value
-def v6i32 : VTVec<6, i32, 64>; // 6 x f32 vector value
-def v7i32 : VTVec<7, i32, 65>; // 7 x f32 vector value
-def v8i32 : VTVec<8, i32, 66>; // 8 x i32 vector value
-def v9i32 : VTVec<9, i32, 67>; // 9 x i32 vector value
-def v10i32 : VTVec<10, i32, 68>; // 10 x i32 vector value
-def v11i32 : VTVec<11, i32, 69>; // 11 x i32 vector value
-def v12i32 : VTVec<12, i32, 70>; // 12 x i32 vector value
-def v16i32 : VTVec<16, i32, 71>; // 16 x i32 vector value
-def v32i32 : VTVec<32, i32, 72>; // 32 x i32 vector value
-def v64i32 : VTVec<64, i32, 73>; // 64 x i32 vector value
-def v128i32 : VTVec<128, i32, 74>; // 128 x i32 vector value
-def v256i32 : VTVec<256, i32, 75>; // 256 x i32 vector value
-def v512i32 : VTVec<512, i32, 76>; // 512 x i32 vector value
-def v1024i32 : VTVec<1024, i32, 77>; // 1024 x i32 vector value
-def v2048i32 : VTVec<2048, i32, 78>; // 2048 x i32 vector value
-def v4096i32 : VTVec<4096, i32, 79>; // 4096 x i32 vector value
-
-def v1i64 : VTVec<1, i64, 80>; // 1 x i64 vector value
-def v2i64 : VTVec<2, i64, 81>; // 2 x i64 vector value
-def v3i64 : VTVec<3, i64, 82>; // 3 x i64 vector value
-def v4i64 : VTVec<4, i64, 83>; // 4 x i64 vector value
-def v8i64 : VTVec<8, i64, 84>; // 8 x i64 vector value
-def v16i64 : VTVec<16, i64, 85>; // 16 x i64 vector value
-def v32i64 : VTVec<32, i64, 86>; // 32 x i64 vector value
-def v64i64 : VTVec<64, i64, 87>; // 64 x i64 vector value
-def v128i64 : VTVec<128, i64, 88>; // 128 x i64 vector value
-def v256i64 : VTVec<256, i64, 89>; // 256 x i64 vector value
-
-def v1i128 : VTVec<1, i128, 90>; // 1 x i128 vector value
-
-def v1f16 : VTVec<1, f16, 91>; // 1 x f16 vector value
-def v2f16 : VTVec<2, f16, 92>; // 2 x f16 vector value
-def v3f16 : VTVec<3, f16, 93>; // 3 x f16 vector value
-def v4f16 : VTVec<4, f16, 94>; // 4 x f16 vector value
-def v8f16 : VTVec<8, f16, 95>; // 8 x f16 vector value
-def v16f16 : VTVec<16, f16, 96>; // 16 x f16 vector value
-def v32f16 : VTVec<32, f16, 97>; // 32 x f16 vector value
-def v64f16 : VTVec<64, f16, 98>; // 64 x f16 vector value
-def v128f16 : VTVec<128, f16, 99>; // 128 x f16 vector value
-def v256f16 : VTVec<256, f16, 100>; // 256 x f16 vector value
-def v512f16 : VTVec<512, f16, 101>; // 512 x f16 vector value
-def v4096f16 : VTVec<4096, f16, 102>; // 4096 x f16 vector value
-
-def v1bf16 : VTVec<1, bf16, 103>; // 1 x bf16 vector value
-def v2bf16 : VTVec<2, bf16, 104>; // 2 x bf16 vector value
-def v3bf16 : VTVec<3, bf16, 105>; // 3 x bf16 vector value
-def v4bf16 : VTVec<4, bf16, 106>; // 4 x bf16 vector value
-def v8bf16 : VTVec<8, bf16, 107>; // 8 x bf16 vector value
-def v16bf16 : VTVec<16, bf16, 108>; // 16 x bf16 vector value
-def v32bf16 : VTVec<32, bf16, 109>; // 32 x bf16 vector value
-def v64bf16 : VTVec<64, bf16, 110>; // 64 x bf16 vector value
-def v128bf16 : VTVec<128, bf16, 111>; // 128 x bf16 vector value
-def v4096bf16 : VTVec<4096, bf16, 112>; // 4096 x bf16 vector value
-
-def v1f32 : VTVec<1, f32, 113>; // 1 x f32 vector value
-def v2f32 : VTVec<2, f32, 114>; // 2 x f32 vector value
-def v3f32 : VTVec<3, f32, 115>; // 3 x f32 vector value
-def v4f32 : VTVec<4, f32, 116>; // 4 x f32 vector value
-def v5f32 : VTVec<5, f32, 117>; // 5 x f32 vector value
-def v6f32 : VTVec<6, f32, 118>; // 6 x f32 vector value
-def v7f32 : VTVec<7, f32, 119>; // 7 x f32 vector value
-def v8f32 : VTVec<8, f32, 120>; // 8 x f32 vector value
-def v9f32 : VTVec<9, f32, 121>; // 9 x f32 vector value
-def v10f32 : VTVec<10, f32, 122>; // 10 x f32 vector value
-def v11f32 : VTVec<11, f32, 123>; // 11 x f32 vector value
-def v12f32 : VTVec<12, f32, 124>; // 12 x f32 vector value
-def v16f32 : VTVec<16, f32, 125>; // 16 x f32 vector value
-def v32f32 : VTVec<32, f32, 126>; // 32 x f32 vector value
-def v64f32 : VTVec<64, f32, 127>; // 64 x f32 vector value
-def v128f32 : VTVec<128, f32, 128>; // 128 x f32 vector value
-def v256f32 : VTVec<256, f32, 129>; // 256 x f32 vector value
-def v512f32 : VTVec<512, f32, 130>; // 512 x f32 vector value
-def v1024f32 : VTVec<1024, f32, 131>; // 1024 x f32 vector value
-def v2048f32 : VTVec<2048, f32, 132>; // 2048 x f32 vector value
-
-def v1f64 : VTVec<1, f64, 133>; // 1 x f64 vector value
-def v2f64 : VTVec<2, f64, 134>; // 2 x f64 vector value
-def v3f64 : VTVec<3, f64, 135>; // 3 x f64 vector value
-def v4f64 : VTVec<4, f64, 136>; // 4 x f64 vector value
-def v8f64 : VTVec<8, f64, 137>; // 8 x f64 vector value
-def v16f64 : VTVec<16, f64, 138>; // 16 x f64 vector value
-def v32f64 : VTVec<32, f64, 139>; // 32 x f64 vector value
-def v64f64 : VTVec<64, f64, 140>; // 64 x f64 vector value
-def v128f64 : VTVec<128, f64, 141>; // 128 x f64 vector value
-def v256f64 : VTVec<256, f64, 142>; // 256 x f64 vector value
-
-def nxv1i1 : VTScalableVec<1, i1, 143>; // n x 1 x i1 vector value
-def nxv2i1 : VTScalableVec<2, i1, 144>; // n x 2 x i1 vector value
-def nxv4i1 : VTScalableVec<4, i1, 145>; // n x 4 x i1 vector value
-def nxv8i1 : VTScalableVec<8, i1, 146>; // n x 8 x i1 vector value
-def nxv16i1 : VTScalableVec<16, i1, 147>; // n x 16 x i1 vector value
-def nxv32i1 : VTScalableVec<32, i1, 148>; // n x 32 x i1 vector value
-def nxv64i1 : VTScalableVec<64, i1, 149>; // n x 64 x i1 vector value
-
-def nxv1i8 : VTScalableVec<1, i8, 150>; // n x 1 x i8 vector value
-def nxv2i8 : VTScalableVec<2, i8, 151>; // n x 2 x i8 vector value
-def nxv4i8 : VTScalableVec<4, i8, 152>; // n x 4 x i8 vector value
-def nxv8i8 : VTScalableVec<8, i8, 153>; // n x 8 x i8 vector value
-def nxv16i8 : VTScalableVec<16, i8, 154>; // n x 16 x i8 vector value
-def nxv32i8 : VTScalableVec<32, i8, 155>; // n x 32 x i8 vector value
-def nxv64i8 : VTScalableVec<64, i8, 156>; // n x 64 x i8 vector value
-
-def nxv1i16 : VTScalableVec<1, i16, 157>; // n x 1 x i16 vector value
-def nxv2i16 : VTScalableVec<2, i16, 158>; // n x 2 x i16 vector value
-def nxv4i16 : VTScalableVec<4, i16, 159>; // n x 4 x i16 vector value
-def nxv8i16 : VTScalableVec<8, i16, 160>; // n x 8 x i16 vector value
-def nxv16i16 : VTScalableVec<16, i16, 161>; // n x 16 x i16 vector value
-def nxv32i16 : VTScalableVec<32, i16, 162>; // n x 32 x i16 vector value
-
-def nxv1i32 : VTScalableVec<1, i32, 163>; // n x 1 x i32 vector value
-def nxv2i32 : VTScalableVec<2, i32, 164>; // n x 2 x i32 vector value
-def nxv4i32 : VTScalableVec<4, i32, 165>; // n x 4 x i32 vector value
-def nxv8i32 : VTScalableVec<8, i32, 166>; // n x 8 x i32 vector value
-def nxv16i32 : VTScalableVec<16, i32, 167>; // n x 16 x i32 vector value
-def nxv32i32 : VTScalableVec<32, i32, 168>; // n x 32 x i32 vector value
-
-def nxv1i64 : VTScalableVec<1, i64, 169>; // n x 1 x i64 vector value
-def nxv2i64 : VTScalableVec<2, i64, 170>; // n x 2 x i64 vector value
-def nxv4i64 : VTScalableVec<4, i64, 171>; // n x 4 x i64 vector value
-def nxv8i64 : VTScalableVec<8, i64, 172>; // n x 8 x i64 vector value
-def nxv16i64 : VTScalableVec<16, i64, 173>; // n x 16 x i64 vector value
-def nxv32i64 : VTScalableVec<32, i64, 174>; // n x 32 x i64 vector value
-
-def nxv1f16 : VTScalableVec<1, f16, 175>; // n x 1 x f16 vector value
-def nxv2f16 : VTScalableVec<2, f16, 176>; // n x 2 x f16 vector value
-def nxv4f16 : VTScalableVec<4, f16, 177>; // n x 4 x f16 vector value
-def nxv8f16 : VTScalableVec<8, f16, 178>; // n x 8 x f16 vector value
-def nxv16f16 : VTScalableVec<16, f16, 179>; // n x 16 x f16 vector value
-def nxv32f16 : VTScalableVec<32, f16, 180>; // n x 32 x f16 vector value
-
-def nxv1bf16 : VTScalableVec<1, bf16, 181>; // n x 1 x bf16 vector value
-def nxv2bf16 : VTScalableVec<2, bf16, 182>; // n x 2 x bf16 vector value
-def nxv4bf16 : VTScalableVec<4, bf16, 183>; // n x 4 x bf16 vector value
-def nxv8bf16 : VTScalableVec<8, bf16, 184>; // n x 8 x bf16 vector value
-def nxv16bf16 : VTScalableVec<16, bf16, 185>; // n x 16 x bf16 vector value
-def nxv32bf16 : VTScalableVec<32, bf16, 186>; // n x 32 x bf16 vector value
-
-def nxv1f32 : VTScalableVec<1, f32, 187>; // n x 1 x f32 vector value
-def nxv2f32 : VTScalableVec<2, f32, 188>; // n x 2 x f32 vector value
-def nxv4f32 : VTScalableVec<4, f32, 189>; // n x 4 x f32 vector value
-def nxv8f32 : VTScalableVec<8, f32, 190>; // n x 8 x f32 vector value
-def nxv16f32 : VTScalableVec<16, f32, 191>; // n x 16 x f32 vector value
-
-def nxv1f64 : VTScalableVec<1, f64, 192>; // n x 1 x f64 vector value
-def nxv2f64 : VTScalableVec<2, f64, 193>; // n x 2 x f64 vector value
-def nxv4f64 : VTScalableVec<4, f64, 194>; // n x 4 x f64 vector value
-def nxv8f64 : VTScalableVec<8, f64, 195>; // n x 8 x f64 vector value
+def v5i1 : VTVec<5, i1, 21>; // 5 x i1 vector value
+def v6i1 : VTVec<6, i1, 22>; // 6 x i1 vector value
+def v7i1 : VTVec<7, i1, 23>; // 7 x i1 vector value
+def v8i1 : VTVec<8, i1, 24>; // 8 x i1 vector value
+def v16i1 : VTVec<16, i1, 25>; // 16 x i1 vector value
+def v32i1 : VTVec<32, i1, 26>; // 32 x i1 vector value
+def v64i1 : VTVec<64, i1, 27>; // 64 x i1 vector value
+def v128i1 : VTVec<128, i1, 28>; // 128 x i1 vector value
+def v256i1 : VTVec<256, i1, 29>; // 256 x i1 vector value
+def v512i1 : VTVec<512, i1, 30>; // 512 x i1 vector value
+def v1024i1 : VTVec<1024, i1, 31>; // 1024 x i1 vector value
+def v2048i1 : VTVec<2048, i1, 32>; // 2048 x i1 vector value
+def v4096i1 : VTVec<4096, i1, 33>; // 4096 x i1 vector value
+
+def v128i2 : VTVec<128, i2, 34>; // 128 x i2 vector value
+def v256i2 : VTVec<256, i2, 35>; // 256 x i2 vector value
+
+def v64i4 : VTVec<64, i4, 36>; // 64 x i4 vector value
+def v128i4 : VTVec<128, i4, 37>; // 128 x i4 vector value
+
+def v1i8 : VTVec<1, i8, 38>; // 1 x i8 vector value
+def v2i8 : VTVec<2, i8, 39>; // 2 x i8 vector value
+def v3i8 : VTVec<3, i8, 40>; // 3 x i8 vector value
+def v4i8 : VTVec<4, i8, 41>; // 4 x i8 vector value
+def v5i8 : VTVec<5, i8, 42>; // 5 x i8 vector value
+def v6i8 : VTVec<6, i8, 43>; // 6 x i8 vector value
+def v7i8 : VTVec<7, i8, 44>; // 7 x i8 vector value
+def v8i8 : VTVec<8, i8, 45>; // 8 x i8 vector value
+def v16i8 : VTVec<16, i8, 46>; // 16 x i8 vector value
+def v32i8 : VTVec<32, i8, 47>; // 32 x i8 vector value
+def v64i8 : VTVec<64, i8, 48>; // 64 x i8 vector value
+def v128i8 : VTVec<128, i8, 49>; // 128 x i8 vector value
+def v256i8 : VTVec<256, i8, 50>; // 256 x i8 vector value
+def v512i8 : VTVec<512, i8, 51>; // 512 x i8 vector value
+def v1024i8 : VTVec<1024, i8, 52>; // 1024 x i8 vector value
+
+def v1i16 : VTVec<1, i16, 53>; // 1 x i16 vector value
+def v2i16 : VTVec<2, i16, 54>; // 2 x i16 vector value
+def v3i16 : VTVec<3, i16, 55>; // 3 x i16 vector value
+def v4i16 : VTVec<4, i16, 56>; // 4 x i16 vector value
+def v5i16 : VTVec<5, i16, 57>; // 5 x i16 vector value
+def v6i16 : VTVec<6, i16, 58>; // 6 x i16 vector value
+def v7i16 : VTVec<7, i16, 59>; // 7 x i16 vector value
+def v8i16 : VTVec<8, i16, 60>; // 8 x i16 vector value
+def v16i16 : VTVec<16, i16, 61>; // 16 x i16 vector value
+def v32i16 : VTVec<32, i16, 62>; // 32 x i16 vector value
+def v64i16 : VTVec<64, i16, 63>; // 64 x i16 vector value
+def v128i16 : VTVec<128, i16, 64>; // 128 x i16 vector value
+def v256i16 : VTVec<256, i16, 65>; // 256 x i16 vector value
+def v512i16 : VTVec<512, i16, 66>; // 512 x i16 vector value
+def v4096i16 : VTVec<4096, i16, 67>; // 4096 x i16 vector value
+
+def v1i32 : VTVec<1, i32, 68>; // 1 x i32 vector value
+def v2i32 : VTVec<2, i32, 69>; // 2 x i32 vector value
+def v3i32 : VTVec<3, i32, 70>; // 3 x i32 vector value
+def v4i32 : VTVec<4, i32, 71>; // 4 x i32 vector value
+def v5i32 : VTVec<5, i32, 72>; // 5 x i32 vector value
+def v6i32 : VTVec<6, i32, 73>; // 6 x i32 vector value
+def v7i32 : VTVec<7, i32, 74>; // 7 x i32 vector value
+def v8i32 : VTVec<8, i32, 75>; // 8 x i32 vector value
+def v9i32 : VTVec<9, i32, 76>; // 9 x i32 vector value
+def v10i32 : VTVec<10, i32, 77>; // 10 x i32 vector value
+def v11i32 : VTVec<11, i32, 78>; // 11 x i32 vector value
+def v12i32 : VTVec<12, i32, 79>; // 12 x i32 vector value
+def v16i32 : VTVec<16, i32, 80>; // 16 x i32 vector value
+def v32i32 : VTVec<32, i32, 81>; // 32 x i32 vector value
+def v64i32 : VTVec<64, i32, 82>; // 64 x i32 vector value
+def v128i32 : VTVec<128, i32, 83>; // 128 x i32 vector value
+def v256i32 : VTVec<256, i32, 84>; // 256 x i32 vector value
+def v512i32 : VTVec<512, i32, 85>; // 512 x i32 vector value
+def v1024i32 : VTVec<1024, i32, 86>; // 1024 x i32 vector value
+def v2048i32 : VTVec<2048, i32, 87>; // 2048 x i32 vector value
+def v4096i32 : VTVec<4096, i32, 88>; // 4096 x i32 vector value
+
+def v1i64 : VTVec<1, i64, 89>; // 1 x i64 vector value
+def v2i64 : VTVec<2, i64, 90>; // 2 x i64 vector value
+def v3i64 : VTVec<3, i64, 91>; // 3 x i64 vector value
+def v4i64 : VTVec<4, i64, 92>; // 4 x i64 vector value
+def v8i64 : VTVec<8, i64, 93>; // 8 x i64 vector value
+def v16i64 : VTVec<16, i64, 94>; // 16 x i64 vector value
+def v32i64 : VTVec<32, i64, 95>; // 32 x i64 vector value
+def v64i64 : VTVec<64, i64, 96>; // 64 x i64 vector value
+def v128i64 : VTVec<128, i64, 97>; // 128 x i64 vector value
+def v256i64 : VTVec<256, i64, 98>; // 256 x i64 vector value
+
+def v1i128 : VTVec<1, i128, 99>; // 1 x i128 vector value
+
+def v1f16 : VTVec<1, f16, 100>; // 1 x f16 vector value
+def v2f16 : VTVec<2, f16, 101>; // 2 x f16 vector value
+def v3f16 : VTVec<3, f16, 102>; // 3 x f16 vector value
+def v4f16 : VTVec<4, f16, 103>; // 4 x f16 vector value
+def v5f16 : VTVec<5, f16, 104>; // 5 x f16 vector value
+def v6f16 : VTVec<6, f16, 105>; // 6 x f16 vector value
+def v7f16 : VTVec<7, f16, 106>; // 7 x f16 vector value
+def v8f16 : VTVec<8, f16, 107>; // 8 x f16 vector value
+def v16f16 : VTVec<16, f16, 108>; // 16 x f16 vector value
+def v32f16 : VTVec<32, f16, 109>; // 32 x f16 vector value
+def v64f16 : VTVec<64, f16, 110>; // 64 x f16 vector value
+def v128f16 : VTVec<128, f16, 111>; // 128 x f16 vector value
+def v256f16 : VTVec<256, f16, 112>; // 256 x f16 vector value
+def v512f16 : VTVec<512, f16, 113>; // 512 x f16 vector value
+def v4096f16 : VTVec<4096, f16, 114>; // 4096 x f16 vector value
+
+def v1bf16 : VTVec<1, bf16, 115>; // 1 x bf16 vector value
+def v2bf16 : VTVec<2, bf16, 116>; // 2 x bf16 vector value
+def v3bf16 : VTVec<3, bf16, 117>; // 3 x bf16 vector value
+def v4bf16 : VTVec<4, bf16, 118>; // 4 x bf16 vector value
+def v8bf16 : VTVec<8, bf16, 119>; // 8 x bf16 vector value
+def v16bf16 : VTVec<16, bf16, 120>; // 16 x bf16 vector value
+def v32bf16 : VTVec<32, bf16, 121>; // 32 x bf16 vector value
+def v64bf16 : VTVec<64, bf16, 122>; // 64 x bf16 vector value
+def v128bf16 : VTVec<128, bf16, 123>; // 128 x bf16 vector value
+def v4096bf16 : VTVec<4096, bf16, 124>; // 4096 x bf16 vector value
+
+def v1f32 : VTVec<1, f32, 125>; // 1 x f32 vector value
+def v2f32 : VTVec<2, f32, 126>; // 2 x f32 vector value
+def v3f32 : VTVec<3, f32, 127>; // 3 x f32 vector value
+def v4f32 : VTVec<4, f32, 128>; // 4 x f32 vector value
+def v5f32 : VTVec<5, f32, 129>; // 5 x f32 vector value
+def v6f32 : VTVec<6, f32, 130>; // 6 x f32 vector value
+def v7f32 : VTVec<7, f32, 131>; // 7 x f32 vector value
+def v8f32 : VTVec<8, f32, 132>; // 8 x f32 vector value
+def v9f32 : VTVec<9, f32, 133>; // 9 x f32 vector value
+def v10f32 : VTVec<10, f32, 134>; // 10 x f32 vector value
+def v11f32 : VTVec<11, f32, 135>; // 11 x f32 vector value
+def v12f32 : VTVec<12, f32, 136>; // 12 x f32 vector value
+def v16f32 : VTVec<16, f32, 137>; // 16 x f32 vector value
+def v32f32 : VTVec<32, f32, 138>; // 32 x f32 vector value
+def v64f32 : VTVec<64, f32, 139>; // 64 x f32 vector value
+def v128f32 : VTVec<128, f32, 140>; // 128 x f32 vector value
+def v256f32 : VTVec<256, f32, 141>; // 256 x f32 vector value
+def v512f32 : VTVec<512, f32, 142>; // 512 x f32 vector value
+def v1024f32 : VTVec<1024, f32, 143>; // 1024 x f32 vector value
+def v2048f32 : VTVec<2048, f32, 144>; // 2048 x f32 vector value
+
+def v1f64 : VTVec<1, f64, 145>; // 1 x f64 vector value
+def v2f64 : VTVec<2, f64, 146>; // 2 x f64 vector value
+def v3f64 : VTVec<3, f64, 147>; // 3 x f64 vector value
+def v4f64 : VTVec<4, f64, 148>; // 4 x f64 vector value
+def v8f64 : VTVec<8, f64, 149>; // 8 x f64 vector value
+def v16f64 : VTVec<16, f64, 150>; // 16 x f64 vector value
+def v32f64 : VTVec<32, f64, 151>; // 32 x f64 vector value
+def v64f64 : VTVec<64, f64, 152>; // 64 x f64 vector value
+def v128f64 : VTVec<128, f64, 153>; // 128 x f64 vector value
+def v256f64 : VTVec<256, f64, 154>; // 256 x f64 vector value
+
+def nxv1i1 : VTScalableVec<1, i1, 155>; // n x 1 x i1 vector value
+def nxv2i1 : VTScalableVec<2, i1, 156>; // n x 2 x i1 vector value
+def nxv4i1 : VTScalableVec<4, i1, 157>; // n x 4 x i1 vector value
+def nxv8i1 : VTScalableVec<8, i1, 158>; // n x 8 x i1 vector value
+def nxv16i1 : VTScalableVec<16, i1, 159>; // n x 16 x i1 vector value
+def nxv32i1 : VTScalableVec<32, i1, 160>; // n x 32 x i1 vector value
+def nxv64i1 : VTScalableVec<64, i1, 161>; // n x 64 x i1 vector value
+
+def nxv1i8 : VTScalableVec<1, i8, 162>; // n x 1 x i8 vector value
+def nxv2i8 : VTScalableVec<2, i8, 163>; // n x 2 x i8 vector value
+def nxv4i8 : VTScalableVec<4, i8, 164>; // n x 4 x i8 vector value
+def nxv8i8 : VTScalableVec<8, i8, 165>; // n x 8 x i8 vector value
+def nxv16i8 : VTScalableVec<16, i8, 166>; // n x 16 x i8 vector value
+def nxv32i8 : VTScalableVec<32, i8, 167>; // n x 32 x i8 vector value
+def nxv64i8 : VTScalableVec<64, i8, 168>; // n x 64 x i8 vector value
+
+def nxv1i16 : VTScalableVec<1, i16, 169>; // n x 1 x i16 vector value
+def nxv2i16 : VTScalableVec<2, i16, 170>; // n x 2 x i16 vector value
+def nxv4i16 : VTScalableVec<4, i16, 171>; // n x 4 x i16 vector value
+def nxv8i16 : VTScalableVec<8, i16, 172>; // n x 8 x i16 vector value
+def nxv16i16 : VTScalableVec<16, i16, 173>; // n x 16 x i16 vector value
+def nxv32i16 : VTScalableVec<32, i16, 174>; // n x 32 x i16 vector value
+
+def nxv1i32 : VTScalableVec<1, i32, 175>; // n x 1 x i32 vector value
+def nxv2i32 : VTScalableVec<2, i32, 176>; // n x 2 x i32 vector value
+def nxv4i32 : VTScalableVec<4, i32, 177>; // n x 4 x i32 vector value
+def nxv8i32 : VTScalableVec<8, i32, 178>; // n x 8 x i32 vector value
+def nxv16i32 : VTScalableVec<16, i32, 179>; // n x 16 x i32 vector value
+def nxv32i32 : VTScalableVec<32, i32, 180>; // n x 32 x i32 vector value
+
+def nxv1i64 : VTScalableVec<1, i64, 181>; // n x 1 x i64 vector value
+def nxv2i64 : VTScalableVec<2, i64, 182>; // n x 2 x i64 vector value
+def nxv4i64 : VTScalableVec<4, i64, 183>; // n x 4 x i64 vector value
+def nxv8i64 : VTScalableVec<8, i64, 184>; // n x 8 x i64 vector value
+def nxv16i64 : VTScalableVec<16, i64, 185>; // n x 16 x i64 vector value
+def nxv32i64 : VTScalableVec<32, i64, 186>; // n x 32 x i64 vector value
+
+def nxv1f16 : VTScalableVec<1, f16, 187>; // n x 1 x f16 vector value
+def nxv2f16 : VTScalableVec<2, f16, 188>; // n x 2 x f16 vector value
+def nxv4f16 : VTScalableVec<4, f16, 189>; // n x 4 x f16 vector value
+def nxv8f16 : VTScalableVec<8, f16, 190>; // n x 8 x f16 vector value
+def nxv16f16 : VTScalableVec<16, f16, 191>; // n x 16 x f16 vector value
+def nxv32f16 : VTScalableVec<32, f16, 192>; // n x 32 x f16 vector value
+
+def nxv1bf16 : VTScalableVec<1, bf16, 193>; // n x 1 x bf16 vector value
+def nxv2bf16 : VTScalableVec<2, bf16, 194>; // n x 2 x bf16 vector value
+def nxv4bf16 : VTScalableVec<4, bf16, 195>; // n x 4 x bf16 vector value
+def nxv8bf16 : VTScalableVec<8, bf16, 196>; // n x 8 x bf16 vector value
+def nxv16bf16 : VTScalableVec<16, bf16, 197>; // n x 16 x bf16 vector value
+def nxv32bf16 : VTScalableVec<32, bf16, 198>; // n x 32 x bf16 vector value
+
+def nxv1f32 : VTScalableVec<1, f32, 199>; // n x 1 x f32 vector value
+def nxv2f32 : VTScalableVec<2, f32, 200>; // n x 2 x f32 vector value
+def nxv4f32 : VTScalableVec<4, f32, 201>; // n x 4 x f32 vector value
+def nxv8f32 : VTScalableVec<8, f32, 202>; // n x 8 x f32 vector value
+def nxv16f32 : VTScalableVec<16, f32, 203>; // n x 16 x f32 vector value
+
+def nxv1f64 : VTScalableVec<1, f64, 204>; // n x 1 x f64 vector value
+def nxv2f64 : VTScalableVec<2, f64, 205>; // n x 2 x f64 vector value
+def nxv4f64 : VTScalableVec<4, f64, 206>; // n x 4 x f64 vector value
+def nxv8f64 : VTScalableVec<8, f64, 207>; // n x 8 x f64 vector value
// Sz = NF * MinNumElts * 8(bits)
-def riscv_nxv1i8x2 : VTVecTup<16, 2, i8, 196>; // RISCV vector tuple(min_num_elts=1, nf=2)
-def riscv_nxv1i8x3 : VTVecTup<24, 3, i8, 197>; // RISCV vector tuple(min_num_elts=1, nf=3)
-def riscv_nxv1i8x4 : VTVecTup<32, 4, i8, 198>; // RISCV vector tuple(min_num_elts=1, nf=4)
-def riscv_nxv1i8x5 : VTVecTup<40, 5, i8, 199>; // RISCV vector tuple(min_num_elts=1, nf=5)
-def riscv_nxv1i8x6 : VTVecTup<48, 6, i8, 200>; // RISCV vector tuple(min_num_elts=1, nf=6)
-def riscv_nxv1i8x7 : VTVecTup<56, 7, i8, 201>; // RISCV vector tuple(min_num_elts=1, nf=7)
-def riscv_nxv1i8x8 : VTVecTup<64, 8, i8, 202>; // RISCV vector tuple(min_num_elts=1, nf=8)
-def riscv_nxv2i8x2 : VTVecTup<32, 2, i8, 203>; // RISCV vector tuple(min_num_elts=2, nf=2)
-def riscv_nxv2i8x3 : VTVecTup<48, 3, i8, 204>; // RISCV vector tuple(min_num_elts=2, nf=3)
-def riscv_nxv2i8x4 : VTVecTup<64, 4, i8, 205>; // RISCV vector tuple(min_num_elts=2, nf=4)
-def riscv_nxv2i8x5 : VTVecTup<80, 5, i8, 206>; // RISCV vector tuple(min_num_elts=2, nf=5)
-def riscv_nxv2i8x6 : VTVecTup<96, 6, i8, 207>; // RISCV vector tuple(min_num_elts=2, nf=6)
-def riscv_nxv2i8x7 : VTVecTup<112, 7, i8, 208>; // RISCV vector tuple(min_num_elts=2, nf=7)
-def riscv_nxv2i8x8 : VTVecTup<128, 8, i8, 209>; // RISCV vector tuple(min_num_elts=2, nf=8)
-def riscv_nxv4i8x2 : VTVecTup<64, 2, i8, 210>; // RISCV vector tuple(min_num_elts=4, nf=2)
-def riscv_nxv4i8x3 : VTVecTup<96, 3, i8, 211>; // RISCV vector tuple(min_num_elts=4, nf=3)
-def riscv_nxv4i8x4 : VTVecTup<128, 4, i8, 212>; // RISCV vector tuple(min_num_elts=4, nf=4)
-def riscv_nxv4i8x5 : VTVecTup<160, 5, i8, 213>; // RISCV vector tuple(min_num_elts=4, nf=5)
-def riscv_nxv4i8x6 : VTVecTup<192, 6, i8, 214>; // RISCV vector tuple(min_num_elts=4, nf=6)
-def riscv_nxv4i8x7 : VTVecTup<224, 7, i8, 215>; // RISCV vector tuple(min_num_elts=4, nf=7)
-def riscv_nxv4i8x8 : VTVecTup<256, 8, i8, 216>; // RISCV vector tuple(min_num_elts=4, nf=8)
-def riscv_nxv8i8x2 : VTVecTup<128, 2, i8, 217>; // RISCV vector tuple(min_num_elts=8, nf=2)
-def riscv_nxv8i8x3 : VTVecTup<192, 3, i8, 218>; // RISCV vector tuple(min_num_elts=8, nf=3)
-def riscv_nxv8i8x4 : VTVecTup<256, 4, i8, 219>; // RISCV vector tuple(min_num_elts=8, nf=4)
-def riscv_nxv8i8x5 : VTVecTup<320, 5, i8, 220>; // RISCV vector tuple(min_num_elts=8, nf=5)
-def riscv_nxv8i8x6 : VTVecTup<384, 6, i8, 221>; // RISCV vector tuple(min_num_elts=8, nf=6)
-def riscv_nxv8i8x7 : VTVecTup<448, 7, i8, 222>; // RISCV vector tuple(min_num_elts=8, nf=7)
-def riscv_nxv8i8x8 : VTVecTup<512, 8, i8, 223>; // RISCV vector tuple(min_num_elts=8, nf=8)
-def riscv_nxv16i8x2 : VTVecTup<256, 2, i8, 224>; // RISCV vector tuple(min_num_elts=16, nf=2)
-def riscv_nxv16i8x3 : VTVecTup<384, 3, i8, 225>; // RISCV vector tuple(min_num_elts=16, nf=3)
-def riscv_nxv16i8x4 : VTVecTup<512, 4, i8, 226>; // RISCV vector tuple(min_num_elts=16, nf=4)
-def riscv_nxv32i8x2 : VTVecTup<512, 2, i8, 227>; // RISCV vector tuple(min_num_elts=32, nf=2)
-
-def x86mmx : ValueType<64, 228>; // X86 MMX value
-def Glue : ValueType<0, 229>; // Pre-RA sched glue
-def isVoid : ValueType<0, 230>; // Produces no value
-def untyped : ValueType<8, 231> { // Produces an untyped value
+def riscv_nxv1i8x2 : VTVecTup<16, 2, i8, 208>; // RISCV vector tuple(min_num_elts=1, nf=2)
+def riscv_nxv1i8x3 : VTVecTup<24, 3, i8, 209>; // RISCV vector tuple(min_num_elts=1, nf=3)
+def riscv_nxv1i8x4 : VTVecTup<32, 4, i8, 210>; // RISCV vector tuple(min_num_elts=1, nf=4)
+def riscv_nxv1i8x5 : VTVecTup<40, 5, i8, 211>; // RISCV vector tuple(min_num_elts=1, nf=5)
+def riscv_nxv1i8x6 : VTVecTup<48, 6, i8, 212>; // RISCV vector tuple(min_num_elts=1, nf=6)
+def riscv_nxv1i8x7 : VTVecTup<56, 7, i8, 213>; // RISCV vector tuple(min_num_elts=1, nf=7)
+def riscv_nxv1i8x8 : VTVecTup<64, 8, i8, 214>; // RISCV vector tuple(min_num_elts=1, nf=8)
+def riscv_nxv2i8x2 : VTVecTup<32, 2, i8, 215>; // RISCV vector tuple(min_num_elts=2, nf=2)
+def riscv_nxv2i8x3 : VTVecTup<48, 3, i8, 216>; // RISCV vector tuple(min_num_elts=2, nf=3)
+def riscv_nxv2i8x4 : VTVecTup<64, 4, i8, 217>; // RISCV vector tuple(min_num_elts=2, nf=4)
+def riscv_nxv2i8x5 : VTVecTup<80, 5, i8, 218>; // RISCV vector tuple(min_num_elts=2, nf=5)
+def riscv_nxv2i8x6 : VTVecTup<96, 6, i8, 219>; // RISCV vector tuple(min_num_elts=2, nf=6)
+def riscv_nxv2i8x7 : VTVecTup<112, 7, i8, 220>; // RISCV vector tuple(min_num_elts=2, nf=7)
+def riscv_nxv2i8x8 : VTVecTup<128, 8, i8, 221>; // RISCV vector tuple(min_num_elts=2, nf=8)
+def riscv_nxv4i8x2 : VTVecTup<64, 2, i8, 222>; // RISCV vector tuple(min_num_elts=4, nf=2)
+def riscv_nxv4i8x3 : VTVecTup<96, 3, i8, 223>; // RISCV vector tuple(min_num_elts=4, nf=3)
+def riscv_nxv4i8x4 : VTVecTup<128, 4, i8, 224>; // RISCV vector tuple(min_num_elts=4, nf=4)
+def riscv_nxv4i8x5 : VTVecTup<160, 5, i8, 225>; // RISCV vector tuple(min_num_elts=4, nf=5)
+def riscv_nxv4i8x6 : VTVecTup<192, 6, i8, 226>; // RISCV vector tuple(min_num_elts=4, nf=6)
+def riscv_nxv4i8x7 : VTVecTup<224, 7, i8, 227>; // RISCV vector tuple(min_num_elts=4, nf=7)
+def riscv_nxv4i8x8 : VTVecTup<256, 8, i8, 228>; // RISCV vector tuple(min_num_elts=4, nf=8)
+def riscv_nxv8i8x2 : VTVecTup<128, 2, i8, 229>; // RISCV vector tuple(min_num_elts=8, nf=2)
+def riscv_nxv8i8x3 : VTVecTup<192, 3, i8, 230>; // RISCV vector tuple(min_num_elts=8, nf=3)
+def riscv_nxv8i8x4 : VTVecTup<256, 4, i8, 231>; // RISCV vector tuple(min_num_elts=8, nf=4)
+def riscv_nxv8i8x5 : VTVecTup<320, 5, i8, 232>; // RISCV vector tuple(min_num_elts=8, nf=5)
+def riscv_nxv8i8x6 : VTVecTup<384, 6, i8, 233>; // RISCV vector tuple(min_num_elts=8, nf=6)
+def riscv_nxv8i8x7 : VTVecTup<448, 7, i8, 234>; // RISCV vector tuple(min_num_elts=8, nf=7)
+def riscv_nxv8i8x8 : VTVecTup<512, 8, i8, 235>; // RISCV vector tuple(min_num_elts=8, nf=8)
+def riscv_nxv16i8x2 : VTVecTup<256, 2, i8, 236>; // RISCV vector tuple(min_num_elts=16, nf=2)
+def riscv_nxv16i8x3 : VTVecTup<384, 3, i8, 237>; // RISCV vector tuple(min_num_elts=16, nf=3)
+def riscv_nxv16i8x4 : VTVecTup<512, 4, i8, 238>; // RISCV vector tuple(min_num_elts=16, nf=4)
+def riscv_nxv32i8x2 : VTVecTup<512, 2, i8, 239>; // RISCV vector tuple(min_num_elts=32, nf=2)
+
+def x86mmx : ValueType<64, 240>; // X86 MMX value
+def Glue : ValueType<0, 241>; // Pre-RA sched glue
+def isVoid : ValueType<0, 242>; // Produces no value
+def untyped : ValueType<8, 243> { // Produces an untyped value
let LLVMName = "Untyped";
}
-def funcref : ValueType<0, 232>; // WebAssembly's funcref type
-def externref : ValueType<0, 233>; // WebAssembly's externref type
-def exnref : ValueType<0, 234>; // WebAssembly's exnref type
-def x86amx : ValueType<8192, 235>; // X86 AMX value
-def i64x8 : ValueType<512, 236>; // 8 Consecutive GPRs (AArch64)
+def funcref : ValueType<0, 244>; // WebAssembly's funcref type
+def externref : ValueType<0, 245>; // WebAssembly's externref type
+def exnref : ValueType<0, 246>; // WebAssembly's exnref type
+def x86amx : ValueType<8192, 247>; // X86 AMX value
+def i64x8 : ValueType<512, 248>; // 8 Consecutive GPRs (AArch64)
def aarch64svcount
- : ValueType<16, 237>; // AArch64 predicate-as-counter
-def spirvbuiltin : ValueType<0, 238>; // SPIR-V's builtin type
+ : ValueType<16, 249>; // AArch64 predicate-as-counter
+def spirvbuiltin : ValueType<0, 250>; // SPIR-V's builtin type
// AMDGPU buffer fat pointer, buffer rsrc + offset, rewritten before MIR translation.
// FIXME: Remove this and the getPointerType() override if MVT::i160 is added.
-def amdgpuBufferFatPointer : ValueType<160, 239>;
+def amdgpuBufferFatPointer : ValueType<160, 251>;
// AMDGPU buffer strided pointer, buffer rsrc + index + offset, doesn't reach MIR.
// FIXME: Remove this and the getPointerType() override if MVT::i82 is added.
-def amdgpuBufferStridedPointer : ValueType<192, 240>;
+def amdgpuBufferStridedPointer : ValueType<192, 252>;
-def aarch64mfp8 : ValueType<8, 241>; // 8-bit value in FPR (AArch64)
+def aarch64mfp8 : ValueType<8, 253>; // 8-bit value in FPR (AArch64)
let isNormalValueType = false in {
def token : ValueType<0, 504>; // TokenTy
diff --git a/llvm/include/llvm/Frontend/HLSL/HLSLBinding.h b/llvm/include/llvm/Frontend/HLSL/HLSLBinding.h
new file mode 100644
index 0000000..70a2eeb
--- /dev/null
+++ b/llvm/include/llvm/Frontend/HLSL/HLSLBinding.h
@@ -0,0 +1,162 @@
+//===- HLSLBinding.h - Representation for resource bindings in HLSL -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file contains objects to represent resource bindings.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FRONTEND_HLSL_HLSLBINDING_H
+#define LLVM_FRONTEND_HLSL_HLSLBINDING_H
+
+#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DXILABI.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+namespace hlsl {
+
+/// BindingInfo represents the ranges of bindings and free space for each
+/// `dxil::ResourceClass`. This can represent HLSL-level bindings as well as
+/// bindings described in root signatures, and can be used for analysis of
+/// overlapping or missing bindings as well as for finding space for implicit
+/// bindings.
+///
+/// As an example, given these resource bindings:
+///
+/// RWBuffer<float> A[10] : register(u3);
+/// RWBuffer<float> B[] : register(u5, space2)
+///
+/// The binding info for UAV bindings should look like this:
+///
+/// UAVSpaces {
+/// ResClass = ResourceClass::UAV,
+/// Spaces = {
+/// { Space = 0u, FreeRanges = {{ 0u, 2u }, { 13u, ~0u }} },
+/// { Space = 2u, FreeRanges = {{ 0u, 4u }} }
+/// }
+/// }
+class BindingInfo {
+public:
+ struct BindingRange {
+ uint32_t LowerBound;
+ uint32_t UpperBound;
+ BindingRange(uint32_t LB, uint32_t UB) : LowerBound(LB), UpperBound(UB) {}
+ };
+
+ struct RegisterSpace {
+ uint32_t Space;
+ SmallVector<BindingRange> FreeRanges;
+ RegisterSpace(uint32_t Space) : Space(Space) {
+ FreeRanges.emplace_back(0, ~0u);
+ }
+ // Size == -1 means unbounded array
+ LLVM_ABI std::optional<uint32_t> findAvailableBinding(int32_t Size);
+ };
+
+ struct BindingSpaces {
+ dxil::ResourceClass RC;
+ llvm::SmallVector<RegisterSpace> Spaces;
+ BindingSpaces(dxil::ResourceClass RC) : RC(RC) {}
+ LLVM_ABI RegisterSpace &getOrInsertSpace(uint32_t Space);
+ };
+
+private:
+ BindingSpaces SRVSpaces{dxil::ResourceClass::SRV};
+ BindingSpaces UAVSpaces{dxil::ResourceClass::UAV};
+ BindingSpaces CBufferSpaces{dxil::ResourceClass::CBuffer};
+ BindingSpaces SamplerSpaces{dxil::ResourceClass::Sampler};
+
+public:
+ BindingSpaces &getBindingSpaces(dxil::ResourceClass RC) {
+ switch (RC) {
+ case dxil::ResourceClass::SRV:
+ return SRVSpaces;
+ case dxil::ResourceClass::UAV:
+ return UAVSpaces;
+ case dxil::ResourceClass::CBuffer:
+ return CBufferSpaces;
+ case dxil::ResourceClass::Sampler:
+ return SamplerSpaces;
+ }
+
+ llvm_unreachable("Invalid resource class");
+ }
+ const BindingSpaces &getBindingSpaces(dxil::ResourceClass RC) const {
+ return const_cast<BindingInfo *>(this)->getBindingSpaces(RC);
+ }
+
+ // Size == -1 means unbounded array
+ LLVM_ABI std::optional<uint32_t>
+ findAvailableBinding(dxil::ResourceClass RC, uint32_t Space, int32_t Size);
+
+ friend class BindingInfoBuilder;
+};
+
+/// Builder class for creating a /c BindingInfo.
+class BindingInfoBuilder {
+public:
+ struct Binding {
+ dxil::ResourceClass RC;
+ uint32_t Space;
+ uint32_t LowerBound;
+ uint32_t UpperBound;
+ const void *Cookie;
+
+ Binding(dxil::ResourceClass RC, uint32_t Space, uint32_t LowerBound,
+ uint32_t UpperBound, const void *Cookie)
+ : RC(RC), Space(Space), LowerBound(LowerBound), UpperBound(UpperBound),
+ Cookie(Cookie) {}
+
+ bool isUnbounded() const { return UpperBound == ~0U; }
+
+ bool operator==(const Binding &RHS) const {
+ return std::tie(RC, Space, LowerBound, UpperBound, Cookie) ==
+ std::tie(RHS.RC, RHS.Space, RHS.LowerBound, RHS.UpperBound,
+ RHS.Cookie);
+ }
+ bool operator!=(const Binding &RHS) const { return !(*this == RHS); }
+
+ bool operator<(const Binding &RHS) const {
+ return std::tie(RC, Space, LowerBound) <
+ std::tie(RHS.RC, RHS.Space, RHS.LowerBound);
+ }
+ };
+
+private:
+ SmallVector<Binding> Bindings;
+
+public:
+ void trackBinding(dxil::ResourceClass RC, uint32_t Space, uint32_t LowerBound,
+ uint32_t UpperBound, const void *Cookie) {
+ Bindings.emplace_back(RC, Space, LowerBound, UpperBound, Cookie);
+ }
+ /// Calculate the binding info - \c ReportOverlap will be called once for each
+ /// overlapping binding.
+ BindingInfo calculateBindingInfo(
+ llvm::function_ref<void(const BindingInfoBuilder &Builder,
+ const Binding &Overlapping)>
+ ReportOverlap);
+
+ /// Calculate the binding info - \c HasOverlap will be set to indicate whether
+ /// there are any overlapping bindings.
+ BindingInfo calculateBindingInfo(bool &HasOverlap) {
+ HasOverlap = false;
+ return calculateBindingInfo(
+ [&HasOverlap](auto, auto) { HasOverlap = true; });
+ }
+
+ /// For use in the \c ReportOverlap callback of \c calculateBindingInfo -
+ /// finds a binding that the \c ReportedBinding overlaps with.
+ const Binding &findOverlapping(const Binding &ReportedBinding) const;
+};
+
+} // namespace hlsl
+} // namespace llvm
+
+#endif // LLVM_FRONTEND_HLSL_HLSLBINDING_H
diff --git a/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h b/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h
index 6fa51ed..0bd0774 100644
--- a/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h
+++ b/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h
@@ -14,6 +14,7 @@
#ifndef LLVM_FRONTEND_HLSL_ROOTSIGNATUREMETADATA_H
#define LLVM_FRONTEND_HLSL_ROOTSIGNATUREMETADATA_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/Frontend/HLSL/HLSLRootSignature.h"
#include "llvm/IR/Constants.h"
#include "llvm/MC/DXContainerRootSignature.h"
@@ -26,6 +27,80 @@ class Metadata;
namespace hlsl {
namespace rootsig {
+template <typename T>
+class RootSignatureValidationError
+ : public ErrorInfo<RootSignatureValidationError<T>> {
+public:
+ static char ID;
+ StringRef ParamName;
+ T Value;
+
+ RootSignatureValidationError(StringRef ParamName, T Value)
+ : ParamName(ParamName), Value(Value) {}
+
+ void log(raw_ostream &OS) const override {
+ OS << "Invalid value for " << ParamName << ": " << Value;
+ }
+
+ std::error_code convertToErrorCode() const override {
+ return llvm::inconvertibleErrorCode();
+ }
+};
+
+class GenericRSMetadataError : public ErrorInfo<GenericRSMetadataError> {
+public:
+ static char ID;
+ StringRef Message;
+ MDNode *MD;
+
+ GenericRSMetadataError(StringRef Message, MDNode *MD)
+ : Message(Message), MD(MD) {}
+
+ void log(raw_ostream &OS) const override {
+ OS << Message;
+ if (MD) {
+ OS << "\n";
+ MD->printTree(OS);
+ }
+ }
+
+ std::error_code convertToErrorCode() const override {
+ return llvm::inconvertibleErrorCode();
+ }
+};
+
+class InvalidRSMetadataFormat : public ErrorInfo<InvalidRSMetadataFormat> {
+public:
+ static char ID;
+ StringRef ElementName;
+
+ InvalidRSMetadataFormat(StringRef ElementName) : ElementName(ElementName) {}
+
+ void log(raw_ostream &OS) const override {
+ OS << "Invalid format for " << ElementName;
+ }
+
+ std::error_code convertToErrorCode() const override {
+ return llvm::inconvertibleErrorCode();
+ }
+};
+
+class InvalidRSMetadataValue : public ErrorInfo<InvalidRSMetadataValue> {
+public:
+ static char ID;
+ StringRef ParamName;
+
+ InvalidRSMetadataValue(StringRef ParamName) : ParamName(ParamName) {}
+
+ void log(raw_ostream &OS) const override {
+ OS << "Invalid value for " << ParamName;
+ }
+
+ std::error_code convertToErrorCode() const override {
+ return llvm::inconvertibleErrorCode();
+ }
+};
+
class MetadataBuilder {
public:
MetadataBuilder(llvm::LLVMContext &Ctx, ArrayRef<RootElement> Elements)
@@ -66,29 +141,27 @@ class MetadataParser {
public:
MetadataParser(MDNode *Root) : Root(Root) {}
- LLVM_ABI bool ParseRootSignature(LLVMContext *Ctx,
- mcdxbc::RootSignatureDesc &RSD);
+ LLVM_ABI llvm::Expected<llvm::mcdxbc::RootSignatureDesc>
+ ParseRootSignature(uint32_t Version);
private:
- bool parseRootFlags(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
- MDNode *RootFlagNode);
- bool parseRootConstants(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
- MDNode *RootConstantNode);
- bool parseRootDescriptors(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
- MDNode *RootDescriptorNode,
- RootSignatureElementKind ElementKind);
- bool parseDescriptorRange(LLVMContext *Ctx, mcdxbc::DescriptorTable &Table,
- MDNode *RangeDescriptorNode);
- bool parseDescriptorTable(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
- MDNode *DescriptorTableNode);
- bool parseRootSignatureElement(LLVMContext *Ctx,
- mcdxbc::RootSignatureDesc &RSD,
- MDNode *Element);
- bool parseStaticSampler(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
- MDNode *StaticSamplerNode);
-
- bool validateRootSignature(LLVMContext *Ctx,
- const llvm::mcdxbc::RootSignatureDesc &RSD);
+ llvm::Error parseRootFlags(mcdxbc::RootSignatureDesc &RSD,
+ MDNode *RootFlagNode);
+ llvm::Error parseRootConstants(mcdxbc::RootSignatureDesc &RSD,
+ MDNode *RootConstantNode);
+ llvm::Error parseRootDescriptors(mcdxbc::RootSignatureDesc &RSD,
+ MDNode *RootDescriptorNode,
+ RootSignatureElementKind ElementKind);
+ llvm::Error parseDescriptorRange(mcdxbc::DescriptorTable &Table,
+ MDNode *RangeDescriptorNode);
+ llvm::Error parseDescriptorTable(mcdxbc::RootSignatureDesc &RSD,
+ MDNode *DescriptorTableNode);
+ llvm::Error parseRootSignatureElement(mcdxbc::RootSignatureDesc &RSD,
+ MDNode *Element);
+ llvm::Error parseStaticSampler(mcdxbc::RootSignatureDesc &RSD,
+ MDNode *StaticSamplerNode);
+
+ llvm::Error validateRootSignature(const llvm::mcdxbc::RootSignatureDesc &RSD);
MDNode *Root;
};
diff --git a/llvm/include/llvm/Frontend/Offloading/PropertySet.h b/llvm/include/llvm/Frontend/Offloading/PropertySet.h
new file mode 100644
index 0000000..d198d3e
--- /dev/null
+++ b/llvm/include/llvm/Frontend/Offloading/PropertySet.h
@@ -0,0 +1,33 @@
+///===- llvm/Frontend/Offloading/PropertySet.h ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+///===---------------------------------------------------------------------===//
+/// \file This file defines PropertySetRegistry and PropertyValue types and
+/// provides helper functions to translate PropertySetRegistry from/to JSON.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Error.h"
+
+#include <map>
+#include <variant>
+
+namespace llvm {
+class raw_ostream;
+class MemoryBufferRef;
+
+namespace offloading {
+
+using ByteArray = SmallVector<unsigned char, 0>;
+using PropertyValue = std::variant<uint32_t, ByteArray>;
+using PropertySet = std::map<std::string, PropertyValue>;
+using PropertySetRegistry = std::map<std::string, PropertySet>;
+
+void writePropertiesToJSON(const PropertySetRegistry &P, raw_ostream &O);
+Expected<PropertySetRegistry> readPropertiesFromJSON(MemoryBufferRef Buf);
+
+} // namespace offloading
+} // namespace llvm
diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
index 7919f7a..ce1cedc 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
@@ -578,8 +578,9 @@ struct DynamicAllocatorsT {
template <typename T, typename I, typename E> //
struct EnterT {
using List = ObjectListT<I, E>;
- using WrapperTrait = std::true_type;
- List v;
+ ENUM(Modifier, Automap);
+ using TupleTrait = std::true_type;
+ std::tuple<OPT(Modifier), List> t;
};
// V5.2: [5.6.2] `exclusive` clause
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 1b94657..79f25bb 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -179,7 +179,7 @@ def OMPC_DynamicAllocators : Clause<[Spelling<"dynamic_allocators">]> {
let clangClass = "OMPDynamicAllocatorsClause";
}
def OMPC_Enter : Clause<[Spelling<"enter">]> {
- let flangClass = "OmpObjectList";
+ let flangClass = "OmpEnterClause";
}
def OMPC_Exclusive : Clause<[Spelling<"exclusive">]> {
let clangClass = "OMPExclusiveClause";
@@ -1460,7 +1460,7 @@ def OMP_DoSimd : Directive<[Spelling<"do simd">]> {
];
let allowedOnceClauses = [
VersionedClause<OMPC_Collapse>,
- VersionedClause<OMPC_If>,
+ VersionedClause<OMPC_If, 50>,
VersionedClause<OMPC_NoWait>,
VersionedClause<OMPC_Order, 50>,
VersionedClause<OMPC_Ordered>,
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 0318427..2e13896 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -996,14 +996,6 @@ public:
return cast<PointerType>(getRawDest()->getType())->getAddressSpace();
}
- /// FIXME: Remove this function once transition to Align is over.
- /// Use getDestAlign() instead.
- LLVM_DEPRECATED("Use getDestAlign() instead", "getDestAlign")
- unsigned getDestAlignment() const {
- if (auto MA = getParamAlign(ARG_DEST))
- return MA->value();
- return 0;
- }
MaybeAlign getDestAlign() const { return getParamAlign(ARG_DEST); }
/// Set the specified arguments of the instruction.
@@ -1057,15 +1049,6 @@ public:
return cast<PointerType>(getRawSource()->getType())->getAddressSpace();
}
- /// FIXME: Remove this function once transition to Align is over.
- /// Use getSourceAlign() instead.
- LLVM_DEPRECATED("Use getSourceAlign() instead", "getSourceAlign")
- unsigned getSourceAlignment() const {
- if (auto MA = BaseCL::getParamAlign(ARG_SOURCE))
- return MA->value();
- return 0;
- }
-
MaybeAlign getSourceAlign() const {
return BaseCL::getParamAlign(ARG_SOURCE);
}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 1da4e36..469bdb4 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -593,6 +593,14 @@ def int_amdgcn_tanh : DefaultAttrsIntrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
>;
+def int_amdgcn_cvt_sr_pk_f16_f32 : DefaultAttrsIntrinsic<
+ [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_f16_f32">;
+
+def int_amdgcn_cvt_sr_pk_bf16_f32 : DefaultAttrsIntrinsic<
+ [llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_bf16_f32">;
+
def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic<
[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">;
@@ -601,32 +609,108 @@ def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic<
[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">;
-class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
- [DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_cvt_pk_fp8_f16
+ : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty],
+ [IntrNoMem, IntrSpeculatable]>,
+ ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f16">;
+
+def int_amdgcn_cvt_pk_bf8_f16
+ : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty],
+ [IntrNoMem, IntrSpeculatable]>,
+ ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f16">;
+
+// llvm.amdgcn.cvt.sr.fp8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3]
+// byte_sel selects byte to write in vdst.
+def int_amdgcn_cvt_sr_fp8_f16 : DefaultAttrsIntrinsic<
+ [llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f16">;
+
+// llvm.amdgcn.cvt.sr.bf8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3]
+// byte_sel selects byte to write in vdst.
+def int_amdgcn_cvt_sr_bf8_f16 : DefaultAttrsIntrinsic<
+ [llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f16">;
+
+// llvm.amdgcn.cvt.scale.pk32.f16.bf6 v32f16 vdst, v6i32 src0, i32 scale_sel [0..7]
+class AMDGPUCvtScaleIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
+ [DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
-class AMDGPUCvtScaleF32ToFP6BF6Intrinsic<LLVMType DstTy, LLVMType Src0Ty, LLVMType Src1Ty, string name> : DefaultAttrsIntrinsic<
- [DstTy], [Src0Ty, Src1Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
+ [DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
class AMDGPUCvtScaleF32SRIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
[DstTy], [Src0Ty, llvm_i32_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
+def int_amdgcn_cvt_scale_pk8_f16_fp8 : AMDGPUCvtScaleIntrinsic<llvm_v8f16_ty, llvm_v2i32_ty, "cvt_scale_pk8_f16_fp8">;
+def int_amdgcn_cvt_scale_pk8_bf16_fp8 : AMDGPUCvtScaleIntrinsic<llvm_v8bf16_ty, llvm_v2i32_ty, "cvt_scale_pk8_bf16_fp8">;
+def int_amdgcn_cvt_scale_pk8_f16_bf8 : AMDGPUCvtScaleIntrinsic<llvm_v8f16_ty, llvm_v2i32_ty, "cvt_scale_pk8_f16_bf8">;
+def int_amdgcn_cvt_scale_pk8_bf16_bf8 : AMDGPUCvtScaleIntrinsic<llvm_v8bf16_ty, llvm_v2i32_ty, "cvt_scale_pk8_bf16_bf8">;
+def int_amdgcn_cvt_scale_pk8_f16_fp4 : AMDGPUCvtScaleIntrinsic<llvm_v8f16_ty, llvm_i32_ty, "cvt_scale_pk8_f16_fp4">;
+def int_amdgcn_cvt_scale_pk8_bf16_fp4 : AMDGPUCvtScaleIntrinsic<llvm_v8bf16_ty, llvm_i32_ty, "cvt_scale_pk8_bf16_fp4">;
+def int_amdgcn_cvt_scale_pk8_f32_fp8 : AMDGPUCvtScaleIntrinsic<llvm_v8f32_ty, llvm_v2i32_ty, "cvt_scale_pk8_f32_fp8">;
+def int_amdgcn_cvt_scale_pk8_f32_bf8 : AMDGPUCvtScaleIntrinsic<llvm_v8f32_ty, llvm_v2i32_ty, "cvt_scale_pk8_f32_bf8">;
+def int_amdgcn_cvt_scale_pk8_f32_fp4 : AMDGPUCvtScaleIntrinsic<llvm_v8f32_ty, llvm_i32_ty, "cvt_scale_pk8_f32_fp4">;
+def int_amdgcn_cvt_scale_pk16_f16_bf6 : AMDGPUCvtScaleIntrinsic<llvm_v16f16_ty, llvm_v3i32_ty, "cvt_scale_pk16_f16_bf6">;
+def int_amdgcn_cvt_scale_pk16_bf16_bf6 : AMDGPUCvtScaleIntrinsic<llvm_v16bf16_ty, llvm_v3i32_ty, "cvt_scale_pk16_bf16_bf6">;
+def int_amdgcn_cvt_scale_pk16_f16_fp6 : AMDGPUCvtScaleIntrinsic<llvm_v16f16_ty, llvm_v3i32_ty, "cvt_scale_pk16_f16_fp6">;
+def int_amdgcn_cvt_scale_pk16_bf16_fp6 : AMDGPUCvtScaleIntrinsic<llvm_v16bf16_ty, llvm_v3i32_ty, "cvt_scale_pk16_bf16_fp6">;
+def int_amdgcn_cvt_scale_pk16_f32_fp6 : AMDGPUCvtScaleIntrinsic<llvm_v16f32_ty, llvm_v3i32_ty, "cvt_scale_pk16_f32_fp6">;
+def int_amdgcn_cvt_scale_pk16_f32_bf6 : AMDGPUCvtScaleIntrinsic<llvm_v16f32_ty, llvm_v3i32_ty, "cvt_scale_pk16_f32_bf6">;
+
+class AMDGPUCvtScaleF32ToFP6BF6Intrinsic<LLVMType DstTy, LLVMType Src0Ty, LLVMType Src1Ty, string name> : DefaultAttrsIntrinsic<
+ [DstTy], [Src0Ty, Src1Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+>, ClangBuiltin<"__builtin_amdgcn_"#name>;
+
def int_amdgcn_cvt_scalef32_pk32_fp6_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_pk32_fp6_f16">;
def int_amdgcn_cvt_scalef32_pk32_bf6_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_pk32_bf6_f16">;
+def int_amdgcn_cvt_scalef32_pk8_fp8_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8bf16_ty, "cvt_scalef32_pk8_fp8_bf16">;
+def int_amdgcn_cvt_scalef32_pk8_bf8_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8bf16_ty, "cvt_scalef32_pk8_bf8_bf16">;
def int_amdgcn_cvt_scalef32_pk32_fp6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_pk32_fp6_bf16">;
def int_amdgcn_cvt_scalef32_pk32_bf6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_pk32_bf6_bf16">;
+def int_amdgcn_cvt_scalef32_pk8_fp8_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8f16_ty, "cvt_scalef32_pk8_fp8_f16">;
+def int_amdgcn_cvt_scalef32_pk8_bf8_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8f16_ty, "cvt_scalef32_pk8_bf8_f16">;
+def int_amdgcn_cvt_scalef32_pk8_fp8_f32 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8f32_ty, "cvt_scalef32_pk8_fp8_f32">;
+def int_amdgcn_cvt_scalef32_pk8_bf8_f32 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8f32_ty, "cvt_scalef32_pk8_bf8_f32">;
+def int_amdgcn_cvt_scalef32_pk8_fp4_f32 : AMDGPUCvtScaleF32Intrinsic<llvm_i32_ty, llvm_v8f32_ty, "cvt_scalef32_pk8_fp4_f32">;
+def int_amdgcn_cvt_scalef32_pk8_fp4_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_i32_ty, llvm_v8f16_ty, "cvt_scalef32_pk8_fp4_f16">;
+def int_amdgcn_cvt_scalef32_pk8_fp4_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_i32_ty, llvm_v8bf16_ty, "cvt_scalef32_pk8_fp4_bf16">;
+def int_amdgcn_cvt_scalef32_pk16_fp6_f32 : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16f32_ty, "cvt_scalef32_pk16_fp6_f32">;
+def int_amdgcn_cvt_scalef32_pk16_bf6_f32 : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16f32_ty, "cvt_scalef32_pk16_bf6_f32">;
+def int_amdgcn_cvt_scalef32_pk16_fp6_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16f16_ty, "cvt_scalef32_pk16_fp6_f16">;
+def int_amdgcn_cvt_scalef32_pk16_bf6_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16f16_ty, "cvt_scalef32_pk16_bf6_f16">;
+def int_amdgcn_cvt_scalef32_pk16_fp6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16bf16_ty, "cvt_scalef32_pk16_fp6_bf16">;
+def int_amdgcn_cvt_scalef32_pk16_bf6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16bf16_ty, "cvt_scalef32_pk16_bf6_bf16">;
+
+def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty, "cvt_scalef32_sr_pk32_fp6_f32">;
+def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty, "cvt_scalef32_sr_pk32_bf6_f32">;
+def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_sr_pk32_fp6_f16">;
+def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_sr_pk32_bf6_f16">;
+def int_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_sr_pk32_fp6_bf16">;
+def int_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_sr_pk32_bf6_bf16">;
+def int_amdgcn_cvt_scalef32_sr_pk8_fp8_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8bf16_ty, "cvt_scalef32_sr_pk8_fp8_bf16">;
+def int_amdgcn_cvt_scalef32_sr_pk8_bf8_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8bf16_ty, "cvt_scalef32_sr_pk8_bf8_bf16">;
+def int_amdgcn_cvt_scalef32_sr_pk8_fp8_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8f16_ty, "cvt_scalef32_sr_pk8_fp8_f16">;
+def int_amdgcn_cvt_scalef32_sr_pk8_bf8_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8f16_ty, "cvt_scalef32_sr_pk8_bf8_f16">;
+def int_amdgcn_cvt_scalef32_sr_pk8_fp8_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8f32_ty, "cvt_scalef32_sr_pk8_fp8_f32">;
+def int_amdgcn_cvt_scalef32_sr_pk8_bf8_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8f32_ty, "cvt_scalef32_sr_pk8_bf8_f32">;
+def int_amdgcn_cvt_scalef32_sr_pk8_fp4_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_i32_ty, llvm_v8f32_ty, "cvt_scalef32_sr_pk8_fp4_f32">;
+def int_amdgcn_cvt_scalef32_sr_pk8_fp4_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_i32_ty, llvm_v8f16_ty, "cvt_scalef32_sr_pk8_fp4_f16">;
+def int_amdgcn_cvt_scalef32_sr_pk8_fp4_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_i32_ty, llvm_v8bf16_ty, "cvt_scalef32_sr_pk8_fp4_bf16">;
+def int_amdgcn_cvt_scalef32_sr_pk16_fp6_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16f32_ty, "cvt_scalef32_sr_pk16_fp6_f32">;
+def int_amdgcn_cvt_scalef32_sr_pk16_bf6_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16f32_ty, "cvt_scalef32_sr_pk16_bf6_f32">;
+def int_amdgcn_cvt_scalef32_sr_pk16_fp6_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16f16_ty, "cvt_scalef32_sr_pk16_fp6_f16">;
+def int_amdgcn_cvt_scalef32_sr_pk16_bf6_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16f16_ty, "cvt_scalef32_sr_pk16_bf6_f16">;
+def int_amdgcn_cvt_scalef32_sr_pk16_fp6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16bf16_ty, "cvt_scalef32_sr_pk16_fp6_bf16">;
+def int_amdgcn_cvt_scalef32_sr_pk16_bf6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16bf16_ty, "cvt_scalef32_sr_pk16_bf6_bf16">;
+
def int_amdgcn_cvt_scalef32_2xpk16_fp6_f32 : AMDGPUCvtScaleF32ToFP6BF6Intrinsic<llvm_v6i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, "cvt_scalef32_2xpk16_fp6_f32">;
def int_amdgcn_cvt_scalef32_2xpk16_bf6_f32 : AMDGPUCvtScaleF32ToFP6BF6Intrinsic<llvm_v6i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, "cvt_scalef32_2xpk16_bf6_f32">;
-def int_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_sr_pk32_bf6_bf16">;
-def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_sr_pk32_bf6_f16">;
-def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty, "cvt_scalef32_sr_pk32_bf6_f32">;
-def int_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_sr_pk32_fp6_bf16">;
-def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_sr_pk32_fp6_f16">;
-def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty, "cvt_scalef32_sr_pk32_fp6_f32">;
-
class AMDGPUCvtScaleFP4FP8BF8ToF1632Intrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic<
[DstTy],
[llvm_i32_ty, // src
@@ -3473,6 +3557,12 @@ def int_amdgcn_cvt_pk_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32">,
[llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
+// llvm.amdgcn.cvt.pk.fp8.f32.e5m3 int vdst, float srcA, float srcB, int old, imm word_sel
+def int_amdgcn_cvt_pk_fp8_f32_e5m3 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32_e5m3">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+
// llvm.amdgcn.cvt.sr.bf8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3]
// byte_sel selects byte to write into vdst.
def int_amdgcn_cvt_sr_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f32">,
@@ -3486,6 +3576,12 @@ def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">,
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
+// llvm.amdgcn.cvt.sr.fp8.f32.e5m3 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3]
+def int_amdgcn_cvt_sr_fp8_f32_e5m3 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32_e5m3">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty],
+ [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+
// llvm.amdgcn.cvt.off.fp32.i4 int srcA
def int_amdgcn_cvt_off_f32_i4: ClangBuiltin<"__builtin_amdgcn_cvt_off_f32_i4">,
DefaultAttrsIntrinsic<[llvm_float_ty],
@@ -3597,6 +3693,48 @@ def int_amdgcn_sat_pk4_i4_i8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_i4_i8">,
def int_amdgcn_sat_pk4_u4_u8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_u4_u8">,
DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
+// llvm.amdgcn.permlane.bcast <src0> <src1> <src2>
+def int_amdgcn_permlane_bcast : ClangBuiltin<"__builtin_amdgcn_permlane_bcast">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+
+// llvm.amdgcn.permlane.up <src0> <src1> <src2>
+def int_amdgcn_permlane_up : ClangBuiltin<"__builtin_amdgcn_permlane_up">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+
+// llvm.amdgcn.permlane.down <src0> <src1> <src2>
+def int_amdgcn_permlane_down : ClangBuiltin<"__builtin_amdgcn_permlane_down">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+
+// llvm.amdgcn.permlane.xor <src0> <src1> <src2>
+def int_amdgcn_permlane_xor : ClangBuiltin<"__builtin_amdgcn_permlane_xor">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+
+// llvm.amdgcn.permlane.idx.gen <src0> <src1>
+def int_amdgcn_permlane_idx_gen : ClangBuiltin<"__builtin_amdgcn_permlane_idx_gen">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+
+def int_amdgcn_perm_pk16_b4_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b4_u4">,
+ DefaultAttrsIntrinsic<[llvm_v2i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_amdgcn_perm_pk16_b6_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b6_u4">,
+ DefaultAttrsIntrinsic<[llvm_v3i32_ty], [llvm_i32_ty, llvm_i64_ty, llvm_v2i32_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_amdgcn_perm_pk16_b8_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b8_u4">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_v2i32_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
//===----------------------------------------------------------------------===//
// Special Intrinsics for backend internal use only. No frontend
// should emit calls to these.
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index e63a41f..99f975f 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1717,6 +1717,16 @@ let TargetPrefix = "riscv" in {
llvm_anyint_ty],
[NoCapture<ArgIndex<0>>, IntrReadMem]>;
+ // Input: (pointer, offset, mask, vl)
+ def int_riscv_sseg # nf # _load_mask
+ : DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty],
+ !listsplat(LLVMMatchType<0>,
+ !add(nf, -1))),
+ [llvm_anyptr_ty, llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty],
+ [NoCapture<ArgIndex<0>>, IntrReadMem]>;
+
// Input: (<stored values>..., pointer, mask, vl)
def int_riscv_seg # nf # _store_mask
: DefaultAttrsIntrinsic<[],
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index d8e632b..323c478 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -542,21 +542,23 @@ private:
ArrayRef<SymbolResolution> Res, unsigned Partition,
bool InSummary);
- // These functions take a range of symbol resolutions [ResI, ResE) and consume
- // the resolutions used by a single input module by incrementing ResI. After
- // these functions return, [ResI, ResE) will refer to the resolution range for
- // the remaining modules in the InputFile.
- Error addModule(InputFile &Input, unsigned ModI,
- const SymbolResolution *&ResI, const SymbolResolution *ResE);
-
- Expected<RegularLTOState::AddedModule>
- addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
- const SymbolResolution *&ResI, const SymbolResolution *ResE);
+ // These functions take a range of symbol resolutions and consume the
+ // resolutions used by a single input module. Functions return ranges refering
+ // to the resolutions for the remaining modules in the InputFile.
+ Expected<ArrayRef<SymbolResolution>>
+ addModule(InputFile &Input, ArrayRef<SymbolResolution> InputRes,
+ unsigned ModI, ArrayRef<SymbolResolution> Res);
+
+ Expected<std::pair<RegularLTOState::AddedModule, ArrayRef<SymbolResolution>>>
+ addRegularLTO(InputFile &Input, ArrayRef<SymbolResolution> InputRes,
+ BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
+ ArrayRef<SymbolResolution> Res);
Error linkRegularLTO(RegularLTOState::AddedModule Mod,
bool LivenessFromIndex);
- Error addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
- const SymbolResolution *&ResI, const SymbolResolution *ResE);
+ Expected<ArrayRef<SymbolResolution>>
+ addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
+ ArrayRef<SymbolResolution> Res);
Error runRegularLTO(AddStreamFn AddStream);
Error runThinLTO(AddStreamFn AddStream, FileCache Cache,
diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index bfc1175..311d9ca 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -117,14 +117,13 @@ public:
void maybeAddReloc(const MCFragment &, const MCFixup &, const MCValue &,
uint64_t &Value, bool IsResolved);
- /// Determine if a relocation is required. In addition,
- /// Apply the \p Value for given \p Fixup into the provided data fragment, at
- /// the offset specified by the fixup and following the fixup kind as
- /// appropriate. Errors (such as an out of range fixup value) should be
- /// reported via \p Ctx.
+ // Determine if a relocation is required. In addition, apply `Value` to the
+ // `Data` fragment at the specified fixup offset if applicable. `Data` points
+ // to the first byte of the fixup offset, which may be at the content's end if
+ // the fixup is zero-sized.
virtual void applyFixup(const MCFragment &, const MCFixup &,
- const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsResolved) = 0;
+ const MCValue &Target, uint8_t *Data, uint64_t Value,
+ bool IsResolved) = 0;
/// @}
diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h
index 4853701..ffde5ca2 100644
--- a/llvm/include/llvm/MC/MCAssembler.h
+++ b/llvm/include/llvm/MC/MCAssembler.h
@@ -99,8 +99,7 @@ private:
/// \param RecordReloc Record relocation if needed.
/// relocation.
bool evaluateFixup(const MCFragment &F, MCFixup &Fixup, MCValue &Target,
- uint64_t &Value, bool RecordReloc,
- MutableArrayRef<char> Contents) const;
+ uint64_t &Value, bool RecordReloc, uint8_t *Data) const;
/// Check whether a fixup can be satisfied, or whether it needs to be relaxed
/// (increased in size, in order to hold its value correctly).
diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h
index 5ce58ae..d69560c 100644
--- a/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -69,6 +69,9 @@ protected:
/// Language Specific Data Area information is emitted to.
MCSection *LSDASection = nullptr;
+ /// Section containing call graph metadata.
+ MCSection *CallGraphSection = nullptr;
+
/// If exception handling is supported by the target and the target can
/// support a compact representation of the CIE and FDE, this is the section
/// to emit them into.
@@ -359,6 +362,8 @@ public:
MCSection *getFaultMapSection() const { return FaultMapSection; }
MCSection *getRemarksSection() const { return RemarksSection; }
+ MCSection *getCallGraphSection(const MCSection &TextSec) const;
+
MCSection *getStackSizesSection(const MCSection &TextSec) const;
MCSection *getBBAddrMapSection(const MCSection &TextSec) const;
diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index 5ac7aba..eb875a7 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -52,7 +52,7 @@ class MCObjectStreamer : public MCStreamer {
DenseMap<const MCSymbol *, SmallVector<PendingAssignment, 1>>
pendingAssignments;
- SmallVector<std::unique_ptr<char[]>, 0> FragStorage;
+ SmallVector<std::unique_ptr<uint8_t[]>, 0> FragStorage;
// Available bytes in the current block for trailing data or new fragments.
size_t FragSpace = 0;
@@ -88,8 +88,8 @@ public:
// Add a fragment with a variable-size tail and start a new empty fragment.
void insert(MCFragment *F);
- char *getCurFragEnd() const {
- return reinterpret_cast<char *>(CurFrag + 1) + CurFrag->getFixedSize();
+ uint8_t *getCurFragEnd() const {
+ return reinterpret_cast<uint8_t *>(CurFrag + 1) + CurFrag->getFixedSize();
}
MCFragment *allocFragSpace(size_t Headroom);
// Add a new fragment to the current section without a variable-size tail.
@@ -97,7 +97,7 @@ public:
void ensureHeadroom(size_t Headroom);
void appendContents(ArrayRef<char> Contents);
- void appendContents(size_t Num, char Elt);
+ void appendContents(size_t Num, uint8_t Elt);
// Add a fixup to the current fragment. Call ensureHeadroom beforehand to
// ensure the fixup and appended content apply to the same fragment.
void addFixup(const MCExpr *Value, MCFixupKind Kind);
diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index 2e929d8..4022ea7 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -80,19 +80,22 @@ private:
FragmentType Kind;
-protected:
+ //== Used by certain fragment types for better packing.
+
+ // The number of fixups for the optional variable-size tail must be small.
+ uint8_t VarFixupSize = 0;
+
bool LinkerRelaxable : 1;
- /// Used by certain fragment types for better packing.
- ///
/// FT_Data, FT_Relaxable
bool HasInstructions : 1;
/// FT_Relaxable, x86-specific
bool AllowAutoPadding : 1;
// Track content and fixups for the fixed-size part as fragments are
- // appended to the section. The content remains immutable, except when
- // modified by applyFixup.
+ // appended to the section. The content is stored as trailing data of the
+ // MCFragment. The content remains immutable, except when modified by
+ // applyFixup.
uint32_t FixedSize = 0;
uint32_t FixupStart = 0;
uint32_t FixupEnd = 0;
@@ -102,7 +105,6 @@ protected:
uint32_t VarContentStart = 0;
uint32_t VarContentEnd = 0;
uint32_t VarFixupStart = 0;
- uint32_t VarFixupEnd = 0;
const MCSubtargetInfo *STI = nullptr;
@@ -296,13 +298,8 @@ public:
}
};
-/// Interface implemented by fragments that contain encoded instructions and/or
-/// data.
-class MCEncodedFragment : public MCFragment {
-protected:
- MCEncodedFragment(MCFragment::FragmentType FType, bool HasInstructions)
- : MCFragment(FType, HasInstructions) {}
-};
+// MCFragment subclasses do not use the fixed-size part or variable-size tail of
+// MCFragment. Instead, they encode content in a specialized way.
class MCFillFragment : public MCFragment {
uint8_t ValueSize;
@@ -318,7 +315,7 @@ class MCFillFragment : public MCFragment {
public:
MCFillFragment(uint64_t Value, uint8_t VSize, const MCExpr &NumValues,
SMLoc Loc)
- : MCFragment(FT_Fill, false), ValueSize(VSize), Value(Value),
+ : MCFragment(FT_Fill), ValueSize(VSize), Value(Value),
NumValues(NumValues), Loc(Loc) {}
uint64_t getValue() const { return Value; }
@@ -349,7 +346,7 @@ class MCNopsFragment : public MCFragment {
public:
MCNopsFragment(int64_t NumBytes, int64_t ControlledNopLength, SMLoc L,
const MCSubtargetInfo &STI)
- : MCFragment(FT_Nops, false), Size(NumBytes),
+ : MCFragment(FT_Nops), Size(NumBytes),
ControlledNopLength(ControlledNopLength), Loc(L), STI(STI) {}
int64_t getNumBytes() const { return Size; }
@@ -376,7 +373,7 @@ class MCOrgFragment : public MCFragment {
public:
MCOrgFragment(const MCExpr &Offset, int8_t Value, SMLoc Loc)
- : MCFragment(FT_Org, false), Value(Value), Offset(&Offset), Loc(Loc) {}
+ : MCFragment(FT_Org), Value(Value), Offset(&Offset), Loc(Loc) {}
const MCExpr &getOffset() const { return *Offset; }
@@ -394,8 +391,7 @@ class MCSymbolIdFragment : public MCFragment {
const MCSymbol *Sym;
public:
- MCSymbolIdFragment(const MCSymbol *Sym)
- : MCFragment(FT_SymbolId, false), Sym(Sym) {}
+ MCSymbolIdFragment(const MCSymbol *Sym) : MCFragment(FT_SymbolId), Sym(Sym) {}
const MCSymbol *getSymbol() { return Sym; }
const MCSymbol *getSymbol() const { return Sym; }
@@ -407,7 +403,7 @@ public:
/// Fragment representing the binary annotations produced by the
/// .cv_inline_linetable directive.
-class MCCVInlineLineTableFragment : public MCEncodedFragment {
+class MCCVInlineLineTableFragment : public MCFragment {
unsigned SiteFuncId;
unsigned StartFileId;
unsigned StartLineNum;
@@ -422,7 +418,7 @@ public:
MCCVInlineLineTableFragment(unsigned SiteFuncId, unsigned StartFileId,
unsigned StartLineNum, const MCSymbol *FnStartSym,
const MCSymbol *FnEndSym)
- : MCEncodedFragment(FT_CVInlineLines, false), SiteFuncId(SiteFuncId),
+ : MCFragment(FT_CVInlineLines), SiteFuncId(SiteFuncId),
StartFileId(StartFileId), StartLineNum(StartLineNum),
FnStartSym(FnStartSym), FnEndSym(FnEndSym) {}
@@ -435,7 +431,7 @@ public:
};
/// Fragment representing the .cv_def_range directive.
-class MCCVDefRangeFragment : public MCEncodedFragment {
+class MCCVDefRangeFragment : public MCFragment {
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
StringRef FixedSizePortion;
@@ -447,8 +443,7 @@ public:
MCCVDefRangeFragment(
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
StringRef FixedSizePortion)
- : MCEncodedFragment(FT_CVDefRange, false),
- Ranges(Ranges.begin(), Ranges.end()),
+ : MCFragment(FT_CVDefRange), Ranges(Ranges.begin(), Ranges.end()),
FixedSizePortion(FixedSizePortion) {}
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> getRanges() const {
@@ -479,8 +474,7 @@ class MCBoundaryAlignFragment : public MCFragment {
public:
MCBoundaryAlignFragment(Align AlignBoundary, const MCSubtargetInfo &STI)
- : MCFragment(FT_BoundaryAlign, false), AlignBoundary(AlignBoundary),
- STI(STI) {}
+ : MCFragment(FT_BoundaryAlign), AlignBoundary(AlignBoundary), STI(STI) {}
uint64_t getSize() const { return Size; }
void setSize(uint64_t Value) { Size = Value; }
@@ -650,11 +644,10 @@ inline ArrayRef<MCFixup> MCFragment::getFixups() const {
inline MutableArrayRef<MCFixup> MCFragment::getVarFixups() {
return MutableArrayRef(getParent()->FixupStorage)
- .slice(VarFixupStart, VarFixupEnd - VarFixupStart);
+ .slice(VarFixupStart, VarFixupSize);
}
inline ArrayRef<MCFixup> MCFragment::getVarFixups() const {
- return ArrayRef(getParent()->FixupStorage)
- .slice(VarFixupStart, VarFixupEnd - VarFixupStart);
+ return ArrayRef(getParent()->FixupStorage).slice(VarFixupStart, VarFixupSize);
}
//== FT_Relaxable functions
diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h
index e883f2f..3bf8c29 100644
--- a/llvm/include/llvm/ObjectYAML/ELFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h
@@ -117,7 +117,7 @@ struct FileHeader {
llvm::yaml::Hex8 ABIVersion;
ELF_ET Type;
std::optional<ELF_EM> Machine;
- ELF_EF Flags;
+ std::optional<ELF_EF> Flags;
llvm::yaml::Hex64 Entry;
std::optional<StringRef> SectionHeaderStringTable;
diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc
index 3f785bd..26baddd 100644
--- a/llvm/include/llvm/ProfileData/MemProfData.inc
+++ b/llvm/include/llvm/ProfileData/MemProfData.inc
@@ -33,11 +33,10 @@
(uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
// The version number of the raw binary format.
-#define MEMPROF_RAW_VERSION 4ULL
+#define MEMPROF_RAW_VERSION 5ULL
// Currently supported versions.
-#define MEMPROF_RAW_SUPPORTED_VERSIONS \
- { 3ULL, 4ULL }
+#define MEMPROF_RAW_SUPPORTED_VERSIONS {3ULL, 4ULL, 5ULL}
#define MEMPROF_V3_MIB_SIZE 132ULL;
@@ -229,6 +228,41 @@ void Merge(const MemInfoBlock &newMIB) {
} __attribute__((__packed__));
#endif
+constexpr int MantissaBits = 12;
+constexpr int ExponentBits = 4;
+constexpr uint16_t MaxMantissa = (1U << MantissaBits) - 1;
+constexpr uint16_t MaxExponent = (1U << ExponentBits) - 1;
+constexpr uint64_t MaxRepresentableValue = static_cast<uint64_t>(MaxMantissa)
+ << MaxExponent;
+
+// Encodes a 64-bit unsigned integer into a 16-bit scaled integer format.
+inline uint16_t encodeHistogramCount(uint64_t Count) {
+ if (Count == 0)
+ return 0;
+
+ if (Count > MaxRepresentableValue)
+ Count = MaxRepresentableValue;
+
+ if (Count <= MaxMantissa)
+ return Count;
+
+ uint64_t M = Count;
+ uint16_t E = 0;
+ while (M > MaxMantissa) {
+ M = (M + 1) >> 1;
+ E++;
+ }
+ return (E << MantissaBits) | static_cast<uint16_t>(M);
+}
+
+// Decodes a 16-bit scaled integer and returns the
+// decoded 64-bit unsigned integer.
+inline uint64_t decodeHistogramCount(uint16_t EncodedValue) {
+ const uint16_t E = EncodedValue >> MantissaBits;
+ const uint16_t M = EncodedValue & MaxMantissa;
+ return static_cast<uint64_t>(M) << E;
+}
+
} // namespace memprof
} // namespace llvm
diff --git a/llvm/include/llvm/Support/DebugLog.h b/llvm/include/llvm/Support/DebugLog.h
index 8fca2d5..a331295 100644
--- a/llvm/include/llvm/Support/DebugLog.h
+++ b/llvm/include/llvm/Support/DebugLog.h
@@ -61,8 +61,10 @@ namespace llvm {
for (bool _c = \
(::llvm::DebugFlag && ::llvm::isCurrentDebugType(TYPE, LEVEL)); \
_c; _c = false) \
+ for (::llvm::impl::RAIINewLineStream NewLineStream{(STREAM)}; _c; \
+ _c = false) \
::llvm::impl::raw_ldbg_ostream{ \
- ::llvm::impl::computePrefix(TYPE, FILE, LINE, LEVEL), (STREAM)} \
+ ::llvm::impl::computePrefix(TYPE, FILE, LINE, LEVEL), NewLineStream} \
.asLvalue()
#define DEBUGLOG_WITH_STREAM_TYPE_AND_FILE(STREAM, LEVEL, TYPE, FILE) \
@@ -81,14 +83,15 @@ namespace llvm {
namespace impl {
-/// A raw_ostream that tracks `\n` and print the prefix.
+/// A raw_ostream that tracks `\n` and print the prefix after each
+/// newline.
class LLVM_ABI raw_ldbg_ostream final : public raw_ostream {
std::string Prefix;
raw_ostream &Os;
- bool HasPendingNewline = true;
+ bool HasPendingNewline;
- /// Split the line on newlines and insert the prefix before each newline.
- /// Forward everything to the underlying stream.
+ /// Split the line on newlines and insert the prefix before each
+ /// newline. Forward everything to the underlying stream.
void write_impl(const char *Ptr, size_t Size) final {
auto Str = StringRef(Ptr, Size);
// Handle the initial prefix.
@@ -109,22 +112,18 @@ class LLVM_ABI raw_ldbg_ostream final : public raw_ostream {
}
void emitPrefix() { Os.write(Prefix.c_str(), Prefix.size()); }
void writeWithPrefix(StringRef Str) {
- if (HasPendingNewline) {
- emitPrefix();
- HasPendingNewline = false;
- }
+ flushEol();
Os.write(Str.data(), Str.size());
}
public:
- explicit raw_ldbg_ostream(std::string Prefix, raw_ostream &Os)
- : Prefix(std::move(Prefix)), Os(Os) {
+ explicit raw_ldbg_ostream(std::string Prefix, raw_ostream &Os,
+ bool HasPendingNewline = true)
+ : Prefix(std::move(Prefix)), Os(Os),
+ HasPendingNewline(HasPendingNewline) {
SetUnbuffered();
}
- ~raw_ldbg_ostream() final {
- flushEol();
- Os << '\n';
- }
+ ~raw_ldbg_ostream() final { flushEol(); }
void flushEol() {
if (HasPendingNewline) {
emitPrefix();
@@ -135,10 +134,22 @@ public:
/// Forward the current_pos method to the underlying stream.
uint64_t current_pos() const final { return Os.tell(); }
- /// Some of the `<<` operators expect an lvalue, so we trick the type system.
+ /// Some of the `<<` operators expect an lvalue, so we trick the type
+ /// system.
raw_ldbg_ostream &asLvalue() { return *this; }
};
+/// A raw_ostream that prints a newline on destruction, useful for LDBG()
+class RAIINewLineStream final : public raw_ostream {
+ raw_ostream &Os;
+
+public:
+ RAIINewLineStream(raw_ostream &Os) : Os(Os) { SetUnbuffered(); }
+ ~RAIINewLineStream() { Os << '\n'; }
+ void write_impl(const char *Ptr, size_t Size) final { Os.write(Ptr, Size); }
+ uint64_t current_pos() const final { return Os.tell(); }
+};
+
/// Remove the path prefix from the file name.
static LLVM_ATTRIBUTE_UNUSED constexpr const char *
getShortFileName(const char *path) {
diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h
index 734b795..d976773 100644
--- a/llvm/include/llvm/Support/VirtualFileSystem.h
+++ b/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -1069,7 +1069,7 @@ public:
/// Redirect each of the remapped files from first to second.
static std::unique_ptr<RedirectingFileSystem>
create(ArrayRef<std::pair<std::string, std::string>> RemappedFiles,
- bool UseExternalNames, FileSystem &ExternalFS);
+ bool UseExternalNames, IntrusiveRefCntPtr<FileSystem> ExternalFS);
ErrorOr<Status> status(const Twine &Path) override;
bool exists(const Twine &Path) override;
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 719c0ee..e57032a 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6494,7 +6494,7 @@ struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> {
}
constexpr static const std::optional<TypeSize> HasNoAllocationSize =
- std::optional<TypeSize>(TypeSize(-1, true));
+ std::make_optional<TypeSize>(-1, true);
LLVM_ABI static const char ID;
};
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index cba3736..43ff084 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -400,19 +400,11 @@ public:
/// Returns true if the loop has exactly one uncountable early exit, i.e. an
/// uncountable exit that isn't the latch block.
- bool hasUncountableEarlyExit() const {
- return getUncountableEdge().has_value();
- }
+ bool hasUncountableEarlyExit() const { return UncountableExitingBB; }
/// Returns the uncountable early exiting block, if there is exactly one.
BasicBlock *getUncountableEarlyExitingBlock() const {
- return hasUncountableEarlyExit() ? getUncountableEdge()->first : nullptr;
- }
-
- /// Returns the destination of the uncountable early exiting block, if there
- /// is exactly one.
- BasicBlock *getUncountableEarlyExitBlock() const {
- return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr;
+ return UncountableExitingBB;
}
/// Return true if there is store-load forwarding dependencies.
@@ -473,13 +465,6 @@ public:
return CountableExitingBlocks;
}
- /// Returns the loop edge to an uncountable exit, or std::nullopt if there
- /// isn't a single such edge.
- std::optional<std::pair<BasicBlock *, BasicBlock *>>
- getUncountableEdge() const {
- return UncountableEdge;
- }
-
private:
/// Return true if the pre-header, exiting and latch blocks of \p Lp and all
/// its nested loops are considered legal for vectorization. These legal
@@ -659,9 +644,9 @@ private:
/// the exact backedge taken count is not computable.
SmallVector<BasicBlock *, 4> CountableExitingBlocks;
- /// Keep track of the loop edge to an uncountable exit, comprising a pair
- /// of (Exiting, Exit) blocks, if there is exactly one early exit.
- std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge;
+ /// Keep track of an uncountable exiting block, if there is exactly one early
+ /// exit.
+ BasicBlock *UncountableExitingBB = nullptr;
};
} // namespace llvm