aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp92
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp1
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp1
-rw-r--r--llvm/lib/CodeGen/CodeGen.cpp1
-rw-r--r--llvm/lib/CodeGen/CommandFlags.cpp9
-rw-r--r--llvm/lib/CodeGen/MIR2Vec.cpp166
-rw-r--r--llvm/lib/CodeGen/MachineLICM.cpp18
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp15
-rw-r--r--llvm/lib/CodeGen/TargetOptionsImpl.cpp2
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp114
-rw-r--r--llvm/lib/Linker/IRMover.cpp10
-rw-r--r--llvm/lib/Support/AllocToken.cpp50
-rw-r--r--llvm/lib/Support/CMakeLists.txt1
-rw-r--r--llvm/lib/Support/SpecialCaseList.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td8
-rw-r--r--llvm/lib/Target/ARM/ARMAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp15
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h1
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.cpp10
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.h3
-rw-r--r--llvm/lib/Target/DirectX/DXILPrepare.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp40
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp3
-rw-r--r--llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp5
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp229
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td41
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp25
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h1
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp70
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoA.td80
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td8
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp50
-rw-r--r--llvm/lib/Target/TargetMachine.cpp1
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp7
-rw-r--r--llvm/lib/TargetParser/ARMTargetParser.cpp6
-rw-r--r--llvm/lib/TargetParser/RISCVISAInfo.cpp6
-rw-r--r--llvm/lib/Transforms/IPO/LowerTypeTests.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp30
-rw-r--r--llvm/lib/Transforms/Instrumentation/AllocToken.cpp130
-rw-r--r--llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp1
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp34
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h26
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp27
49 files changed, 1109 insertions, 258 deletions
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 8da51d0..b573023 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4866,6 +4866,89 @@ static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F,
return nullptr;
}
+/// Look for the following pattern and simplify %to_fold to %identicalPhi.
+/// Here %phi, %to_fold and %phi.next perform the same functionality as
+/// %identicalPhi and hence the select instruction %to_fold can be folded
+/// into %identicalPhi.
+///
+/// BB1:
+/// %identicalPhi = phi [ X, %BB0 ], [ %identicalPhi.next, %BB1 ]
+/// %phi = phi [ X, %BB0 ], [ %phi.next, %BB1 ]
+/// ...
+/// %identicalPhi.next = select %cmp, %val, %identicalPhi
+/// (or select %cmp, %identicalPhi, %val)
+/// %to_fold = select %cmp2, %identicalPhi, %phi
+/// %phi.next = select %cmp, %val, %to_fold
+/// (or select %cmp, %to_fold, %val)
+///
+/// Prove that %phi and %identicalPhi are the same by induction:
+///
+/// Base case: Both %phi and %identicalPhi are equal on entry to the loop.
+/// Inductive case:
+/// Suppose %phi and %identicalPhi are equal at iteration i.
+/// We look at their values at iteration i+1 which are %phi.next and
+/// %identicalPhi.next. They would have become different only when %cmp is
+/// false and the corresponding values %to_fold and %identicalPhi differ
+/// (similar reason for the other "or" case in the bracket).
+///
+/// The only condition when %to_fold and %identicalPh could differ is when %cmp2
+/// is false and %to_fold is %phi, which contradicts our inductive hypothesis
+/// that %phi and %identicalPhi are equal. Thus %phi and %identicalPhi are
+/// always equal at iteration i+1.
+bool isSimplifierIdenticalPHI(PHINode &PN, PHINode &IdenticalPN) {
+ if (PN.getParent() != IdenticalPN.getParent())
+ return false;
+ if (PN.getNumIncomingValues() != 2)
+ return false;
+
+ // Check that only the backedge incoming value is different.
+ unsigned DiffVals = 0;
+ BasicBlock *DiffValBB = nullptr;
+ for (unsigned i = 0; i < 2; i++) {
+ BasicBlock *PredBB = PN.getIncomingBlock(i);
+ if (PN.getIncomingValueForBlock(PredBB) !=
+ IdenticalPN.getIncomingValueForBlock(PredBB)) {
+ DiffVals++;
+ DiffValBB = PredBB;
+ }
+ }
+ if (DiffVals != 1)
+ return false;
+ // Now check that the backedge incoming values are two select
+ // instructions with the same condition. Either their true
+ // values are the same, or their false values are the same.
+ auto *SI = dyn_cast<SelectInst>(PN.getIncomingValueForBlock(DiffValBB));
+ auto *IdenticalSI =
+ dyn_cast<SelectInst>(IdenticalPN.getIncomingValueForBlock(DiffValBB));
+ if (!SI || !IdenticalSI)
+ return false;
+ if (SI->getCondition() != IdenticalSI->getCondition())
+ return false;
+
+ SelectInst *SIOtherVal = nullptr;
+ Value *IdenticalSIOtherVal = nullptr;
+ if (SI->getTrueValue() == IdenticalSI->getTrueValue()) {
+ SIOtherVal = dyn_cast<SelectInst>(SI->getFalseValue());
+ IdenticalSIOtherVal = IdenticalSI->getFalseValue();
+ } else if (SI->getFalseValue() == IdenticalSI->getFalseValue()) {
+ SIOtherVal = dyn_cast<SelectInst>(SI->getTrueValue());
+ IdenticalSIOtherVal = IdenticalSI->getTrueValue();
+ } else {
+ return false;
+ }
+
+ // Now check that the other values in select, i.e., %to_fold and
+ // %identicalPhi, are essentially the same value.
+ if (!SIOtherVal || IdenticalSIOtherVal != &IdenticalPN)
+ return false;
+ if (!(SIOtherVal->getTrueValue() == &IdenticalPN &&
+ SIOtherVal->getFalseValue() == &PN) &&
+ !(SIOtherVal->getTrueValue() == &PN &&
+ SIOtherVal->getFalseValue() == &IdenticalPN))
+ return false;
+ return true;
+}
+
/// Given operands for a SelectInst, see if we can fold the result.
/// If not, this returns null.
static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
@@ -5041,7 +5124,14 @@ static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
std::optional<bool> Imp = isImpliedByDomCondition(Cond, Q.CxtI, Q.DL);
if (Imp)
return *Imp ? TrueVal : FalseVal;
-
+ // Look for same PHIs in the true and false values.
+ if (auto *TruePHI = dyn_cast<PHINode>(TrueVal))
+ if (auto *FalsePHI = dyn_cast<PHINode>(FalseVal)) {
+ if (isSimplifierIdenticalPHI(*TruePHI, *FalsePHI))
+ return FalseVal;
+ if (isSimplifierIdenticalPHI(*FalsePHI, *TruePHI))
+ return TrueVal;
+ }
return nullptr;
}
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index cf63285..f71a534 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -451,6 +451,7 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
UpgradeModuleFlags(*M);
UpgradeNVVMAnnotations(*M);
UpgradeSectionAttributes(*M);
+ copyModuleAttrToFunctions(*M);
if (!Slots)
return false;
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index aaee1f0..cf7efbfa 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -7143,6 +7143,8 @@ Error BitcodeReader::materializeModule() {
UpgradeARCRuntime(*TheModule);
+ copyModuleAttrToFunctions(*TheModule);
+
return Error::success();
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index e2af0c5..fefde64f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1438,6 +1438,7 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,
BBFreqEnabled,
BrProbEnabled,
MF.hasBBSections() && NumMBBSectionRanges > 1,
+ // Use static_cast to avoid breakage of tests on windows.
static_cast<bool>(BBAddrMapSkipEmitBBEntries),
HasCalls,
false};
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index c438eae..9795a0b 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -98,6 +98,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineUniformityAnalysisPassPass(Registry);
initializeMIR2VecVocabLegacyAnalysisPass(Registry);
initializeMIR2VecVocabPrinterLegacyPassPass(Registry);
+ initializeMIR2VecPrinterLegacyPassPass(Registry);
initializeMachineUniformityInfoPrinterPassPass(Registry);
initializeMachineVerifierLegacyPassPass(Registry);
initializeObjCARCContractLegacyPassPass(Registry);
diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp
index 0522698..c1365f4 100644
--- a/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/llvm/lib/CodeGen/CommandFlags.cpp
@@ -64,7 +64,6 @@ CGOPT_EXP(uint64_t, LargeDataThreshold)
CGOPT(ExceptionHandling, ExceptionModel)
CGOPT_EXP(CodeGenFileType, FileType)
CGOPT(FramePointerKind, FramePointerUsage)
-CGOPT(bool, EnableUnsafeFPMath)
CGOPT(bool, EnableNoInfsFPMath)
CGOPT(bool, EnableNoNaNsFPMath)
CGOPT(bool, EnableNoSignedZerosFPMath)
@@ -219,12 +218,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
"Enable frame pointer elimination")));
CGBINDOPT(FramePointerUsage);
- static cl::opt<bool> EnableUnsafeFPMath(
- "enable-unsafe-fp-math",
- cl::desc("Enable optimizations that may decrease FP precision"),
- cl::init(false));
- CGBINDOPT(EnableUnsafeFPMath);
-
static cl::opt<bool> EnableNoInfsFPMath(
"enable-no-infs-fp-math",
cl::desc("Enable FP math optimizations that assume no +-Infs"),
@@ -552,7 +545,6 @@ TargetOptions
codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
TargetOptions Options;
Options.AllowFPOpFusion = getFuseFPOps();
- Options.UnsafeFPMath = getEnableUnsafeFPMath();
Options.NoInfsFPMath = getEnableNoInfsFPMath();
Options.NoNaNsFPMath = getEnableNoNaNsFPMath();
Options.NoSignedZerosFPMath = getEnableNoSignedZerosFPMath();
@@ -706,7 +698,6 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
if (getStackRealign())
NewAttrs.addAttribute("stackrealign");
- HANDLE_BOOL_ATTR(EnableUnsafeFPMathView, "unsafe-fp-math");
HANDLE_BOOL_ATTR(EnableNoInfsFPMathView, "no-infs-fp-math");
HANDLE_BOOL_ATTR(EnableNoNaNsFPMathView, "no-nans-fp-math");
HANDLE_BOOL_ATTR(EnableNoSignedZerosFPMathView, "no-signed-zeros-fp-math");
diff --git a/llvm/lib/CodeGen/MIR2Vec.cpp b/llvm/lib/CodeGen/MIR2Vec.cpp
index 5c78d98..99be1fc0 100644
--- a/llvm/lib/CodeGen/MIR2Vec.cpp
+++ b/llvm/lib/CodeGen/MIR2Vec.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MIR2Vec.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Module.h"
@@ -29,20 +30,30 @@ using namespace mir2vec;
STATISTIC(MIRVocabMissCounter,
"Number of lookups to MIR entities not present in the vocabulary");
-cl::OptionCategory llvm::mir2vec::MIR2VecCategory("MIR2Vec Options");
+namespace llvm {
+namespace mir2vec {
+cl::OptionCategory MIR2VecCategory("MIR2Vec Options");
// FIXME: Use a default vocab when not specified
static cl::opt<std::string>
VocabFile("mir2vec-vocab-path", cl::Optional,
cl::desc("Path to the vocabulary file for MIR2Vec"), cl::init(""),
cl::cat(MIR2VecCategory));
-cl::opt<float>
- llvm::mir2vec::OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0),
- cl::desc("Weight for machine opcode embeddings"),
- cl::cat(MIR2VecCategory));
+cl::opt<float> OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0),
+ cl::desc("Weight for machine opcode embeddings"),
+ cl::cat(MIR2VecCategory));
+cl::opt<MIR2VecKind> MIR2VecEmbeddingKind(
+ "mir2vec-kind", cl::Optional,
+ cl::values(clEnumValN(MIR2VecKind::Symbolic, "symbolic",
+ "Generate symbolic embeddings for MIR")),
+ cl::init(MIR2VecKind::Symbolic), cl::desc("MIR2Vec embedding kind"),
+ cl::cat(MIR2VecCategory));
+
+} // namespace mir2vec
+} // namespace llvm
//===----------------------------------------------------------------------===//
-// Vocabulary Implementation
+// Vocabulary
//===----------------------------------------------------------------------===//
MIRVocabulary::MIRVocabulary(VocabMap &&OpcodeEntries,
@@ -188,6 +199,28 @@ void MIRVocabulary::buildCanonicalOpcodeMapping() {
<< " unique base opcodes\n");
}
+Expected<MIRVocabulary>
+MIRVocabulary::createDummyVocabForTest(const TargetInstrInfo &TII,
+ unsigned Dim) {
+ assert(Dim > 0 && "Dimension must be greater than zero");
+
+ float DummyVal = 0.1f;
+
+ // Create dummy embeddings for all canonical opcode names
+ VocabMap DummyVocabMap;
+ for (unsigned Opcode = 0; Opcode < TII.getNumOpcodes(); ++Opcode) {
+ std::string BaseOpcode = extractBaseOpcodeName(TII.getName(Opcode));
+ if (DummyVocabMap.count(BaseOpcode) == 0) {
+ // Only add if not already present
+ DummyVocabMap[BaseOpcode] = Embedding(Dim, DummyVal);
+ DummyVal += 0.1f;
+ }
+ }
+
+ // Create and return vocabulary with dummy embeddings
+ return MIRVocabulary::create(std::move(DummyVocabMap), TII);
+}
+
//===----------------------------------------------------------------------===//
// MIR2VecVocabLegacyAnalysis Implementation
//===----------------------------------------------------------------------===//
@@ -258,7 +291,73 @@ MIR2VecVocabLegacyAnalysis::getMIR2VecVocabulary(const Module &M) {
}
//===----------------------------------------------------------------------===//
-// Printer Passes Implementation
+// MIREmbedder and its subclasses
+//===----------------------------------------------------------------------===//
+
+std::unique_ptr<MIREmbedder> MIREmbedder::create(MIR2VecKind Mode,
+ const MachineFunction &MF,
+ const MIRVocabulary &Vocab) {
+ switch (Mode) {
+ case MIR2VecKind::Symbolic:
+ return std::make_unique<SymbolicMIREmbedder>(MF, Vocab);
+ }
+ return nullptr;
+}
+
+Embedding MIREmbedder::computeEmbeddings(const MachineBasicBlock &MBB) const {
+ Embedding MBBVector(Dimension, 0);
+
+ // Get instruction info for opcode name resolution
+ const auto &Subtarget = MF.getSubtarget();
+ const auto *TII = Subtarget.getInstrInfo();
+ if (!TII) {
+ MF.getFunction().getContext().emitError(
+ "MIR2Vec: No TargetInstrInfo available; cannot compute embeddings");
+ return MBBVector;
+ }
+
+ // Process each machine instruction in the basic block
+ for (const auto &MI : MBB) {
+ // Skip debug instructions and other metadata
+ if (MI.isDebugInstr())
+ continue;
+ MBBVector += computeEmbeddings(MI);
+ }
+
+ return MBBVector;
+}
+
+Embedding MIREmbedder::computeEmbeddings() const {
+ Embedding MFuncVector(Dimension, 0);
+
+ // Consider all reachable machine basic blocks in the function
+ for (const auto *MBB : depth_first(&MF))
+ MFuncVector += computeEmbeddings(*MBB);
+ return MFuncVector;
+}
+
+SymbolicMIREmbedder::SymbolicMIREmbedder(const MachineFunction &MF,
+ const MIRVocabulary &Vocab)
+ : MIREmbedder(MF, Vocab) {}
+
+std::unique_ptr<SymbolicMIREmbedder>
+SymbolicMIREmbedder::create(const MachineFunction &MF,
+ const MIRVocabulary &Vocab) {
+ return std::make_unique<SymbolicMIREmbedder>(MF, Vocab);
+}
+
+Embedding SymbolicMIREmbedder::computeEmbeddings(const MachineInstr &MI) const {
+ // Skip debug instructions and other metadata
+ if (MI.isDebugInstr())
+ return Embedding(Dimension, 0);
+
+ // Todo: Add operand/argument contributions
+
+ return Vocab[MI.getOpcode()];
+}
+
+//===----------------------------------------------------------------------===//
+// Printer Passes
//===----------------------------------------------------------------------===//
char MIR2VecVocabPrinterLegacyPass::ID = 0;
@@ -297,3 +396,56 @@ MachineFunctionPass *
llvm::createMIR2VecVocabPrinterLegacyPass(raw_ostream &OS) {
return new MIR2VecVocabPrinterLegacyPass(OS);
}
+
+char MIR2VecPrinterLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(MIR2VecPrinterLegacyPass, "print-mir2vec",
+ "MIR2Vec Embedder Printer Pass", false, true)
+INITIALIZE_PASS_DEPENDENCY(MIR2VecVocabLegacyAnalysis)
+INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass)
+INITIALIZE_PASS_END(MIR2VecPrinterLegacyPass, "print-mir2vec",
+ "MIR2Vec Embedder Printer Pass", false, true)
+
+bool MIR2VecPrinterLegacyPass::runOnMachineFunction(MachineFunction &MF) {
+ auto &Analysis = getAnalysis<MIR2VecVocabLegacyAnalysis>();
+ auto VocabOrErr =
+ Analysis.getMIR2VecVocabulary(*MF.getFunction().getParent());
+ assert(VocabOrErr && "Failed to get MIR2Vec vocabulary");
+ auto &MIRVocab = *VocabOrErr;
+
+ auto Emb = mir2vec::MIREmbedder::create(MIR2VecEmbeddingKind, MF, MIRVocab);
+ if (!Emb) {
+ OS << "Error creating MIR2Vec embeddings for function " << MF.getName()
+ << "\n";
+ return false;
+ }
+
+ OS << "MIR2Vec embeddings for machine function " << MF.getName() << ":\n";
+ OS << "Machine Function vector: ";
+ Emb->getMFunctionVector().print(OS);
+
+ OS << "Machine basic block vectors:\n";
+ for (const MachineBasicBlock &MBB : MF) {
+ OS << "Machine basic block: " << MBB.getFullName() << ":\n";
+ Emb->getMBBVector(MBB).print(OS);
+ }
+
+ OS << "Machine instruction vectors:\n";
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ // Skip debug instructions as they are not
+ // embedded
+ if (MI.isDebugInstr())
+ continue;
+
+ OS << "Machine instruction: ";
+ MI.print(OS);
+ Emb->getMInstVector(MI).print(OS);
+ }
+ }
+
+ return false;
+}
+
+MachineFunctionPass *llvm::createMIR2VecPrinterLegacyPass(raw_ostream &OS) {
+ return new MIR2VecPrinterLegacyPass(OS);
+}
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 7acddff..729e73c 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -932,12 +932,11 @@ void MachineLICMImpl::InitRegPressure(MachineBasicBlock *BB) {
void MachineLICMImpl::UpdateRegPressure(const MachineInstr *MI,
bool ConsiderUnseenAsDef) {
auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
- for (const auto &RPIdAndCost : Cost) {
- unsigned Class = RPIdAndCost.first;
- if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
+ for (const auto &[Class, Weight] : Cost) {
+ if (static_cast<int>(RegPressure[Class]) < -Weight)
RegPressure[Class] = 0;
else
- RegPressure[Class] += RPIdAndCost.second;
+ RegPressure[Class] += Weight;
}
}
@@ -1215,11 +1214,10 @@ bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const {
/// given cost matrix can cause high register pressure.
bool MachineLICMImpl::CanCauseHighRegPressure(
const SmallDenseMap<unsigned, int> &Cost, bool CheapInstr) {
- for (const auto &RPIdAndCost : Cost) {
- if (RPIdAndCost.second <= 0)
+ for (const auto &[Class, Weight] : Cost) {
+ if (Weight <= 0)
continue;
- unsigned Class = RPIdAndCost.first;
int Limit = RegLimit[Class];
// Don't hoist cheap instructions if they would increase register pressure,
@@ -1228,7 +1226,7 @@ bool MachineLICMImpl::CanCauseHighRegPressure(
return true;
for (const auto &RP : BackTrace)
- if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
+ if (static_cast<int>(RP[Class]) + Weight >= Limit)
return true;
}
@@ -1246,8 +1244,8 @@ void MachineLICMImpl::UpdateBackTraceRegPressure(const MachineInstr *MI) {
// Update register pressure of blocks from loop header to current block.
for (auto &RP : BackTrace)
- for (const auto &RPIdAndCost : Cost)
- RP[RPIdAndCost.first] += RPIdAndCost.second;
+ for (const auto &[Class, Weight] : Cost)
+ RP[Class] += Weight;
}
/// Return true if it is potentially profitable to hoist the given loop
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 437d0f4..bf1abfe 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -3765,6 +3765,8 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_TO_UINT:
case ISD::LRINT:
case ISD::LLRINT:
+ case ISD::LROUND:
+ case ISD::LLROUND:
Res = SoftPromoteHalfOp_Op0WithStrict(N);
break;
case ISD::FP_TO_SINT_SAT:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 88a4a8b..b1776ea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -429,7 +429,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
- SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Op2 = N->getOperand(2);
+ switch (TLI.getExtendForAtomicRMWArg(N->getOpcode())) {
+ case ISD::SIGN_EXTEND:
+ Op2 = SExtPromotedInteger(Op2);
+ break;
+ case ISD::ZERO_EXTEND:
+ Op2 = ZExtPromotedInteger(Op2);
+ break;
+ case ISD::ANY_EXTEND:
+ Op2 = GetPromotedInteger(Op2);
+ break;
+ default:
+ llvm_unreachable("Invalid atomic op extension");
+ }
SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
N->getMemoryVT(),
N->getChain(), N->getBasePtr(),
diff --git a/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 5eb86e7..049efe8 100644
--- a/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -51,7 +51,7 @@ bool TargetOptions::FramePointerIsReserved(const MachineFunction &MF) const {
/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
/// that the rounding mode of the FPU can change from its default.
bool TargetOptions::HonorSignDependentRoundingFPMath() const {
- return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+ return HonorSignDependentRoundingFPMathOption;
}
/// NOTE: There are targets that still do not support the debug entry values
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 10f915d..7e5e7b5 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -6045,6 +6045,120 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
}
}
+// Check if the function attribute is not present and set it.
+static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName,
+ StringRef Value) {
+ if (!F.hasFnAttribute(FnAttrName))
+ F.addFnAttr(FnAttrName, Value);
+}
+
+// Check if the function attribute is not present and set it if needed.
+// If the attribute is "false" then removes it.
+// If the attribute is "true" resets it to a valueless attribute.
+static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
+ if (!F.hasFnAttribute(FnAttrName)) {
+ if (Set)
+ F.addFnAttr(FnAttrName);
+ } else {
+ auto A = F.getFnAttribute(FnAttrName);
+ if ("false" == A.getValueAsString())
+ F.removeFnAttr(FnAttrName);
+ else if ("true" == A.getValueAsString()) {
+ F.removeFnAttr(FnAttrName);
+ F.addFnAttr(FnAttrName);
+ }
+ }
+}
+
+void llvm::copyModuleAttrToFunctions(Module &M) {
+ Triple T(M.getTargetTriple());
+ if (!T.isThumb() && !T.isARM() && !T.isAArch64())
+ return;
+
+ uint64_t BTEValue = 0;
+ uint64_t BPPLRValue = 0;
+ uint64_t GCSValue = 0;
+ uint64_t SRAValue = 0;
+ uint64_t SRAALLValue = 0;
+ uint64_t SRABKeyValue = 0;
+
+ NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
+ if (ModFlags) {
+ for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
+ MDNode *Op = ModFlags->getOperand(I);
+ if (Op->getNumOperands() != 3)
+ continue;
+
+ MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
+ auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
+ if (!ID || !CI)
+ continue;
+
+ StringRef IDStr = ID->getString();
+ uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
+ : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
+ : IDStr == "guarded-control-stack" ? &GCSValue
+ : IDStr == "sign-return-address" ? &SRAValue
+ : IDStr == "sign-return-address-all" ? &SRAALLValue
+ : IDStr == "sign-return-address-with-bkey"
+ ? &SRABKeyValue
+ : nullptr;
+ if (!ValPtr)
+ continue;
+
+ *ValPtr = CI->getZExtValue();
+ if (*ValPtr == 2)
+ return;
+ }
+ }
+
+ bool BTE = BTEValue == 1;
+ bool BPPLR = BPPLRValue == 1;
+ bool GCS = GCSValue == 1;
+ bool SRA = SRAValue == 1;
+
+ StringRef SignTypeValue = "non-leaf";
+ if (SRA && SRAALLValue == 1)
+ SignTypeValue = "all";
+
+ StringRef SignKeyValue = "a_key";
+ if (SRA && SRABKeyValue == 1)
+ SignKeyValue = "b_key";
+
+ for (Function &F : M.getFunctionList()) {
+ if (F.isDeclaration())
+ continue;
+
+ if (SRA) {
+ setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
+ setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
+ } else {
+ if (auto A = F.getFnAttribute("sign-return-address");
+ A.isValid() && "none" == A.getValueAsString()) {
+ F.removeFnAttr("sign-return-address");
+ F.removeFnAttr("sign-return-address-key");
+ }
+ }
+ ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
+ ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
+ ConvertFunctionAttr(F, GCS, "guarded-control-stack");
+ }
+
+ if (BTE)
+ M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
+ if (BPPLR)
+ M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
+ if (GCS)
+ M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
+ if (SRA) {
+ M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
+ if (SRAALLValue == 1)
+ M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
+ if (SRABKeyValue == 1)
+ M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
+ }
+}
+
static bool isOldLoopArgument(Metadata *MD) {
auto *T = dyn_cast_or_null<MDTuple>(MD);
if (!T)
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index 1bff6cd..f78d9b0 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -1512,6 +1512,11 @@ Error IRLinker::run() {
// Loop over all of the linked values to compute type mappings.
computeTypeMapping();
+ // Convert module level attributes to function level attributes because
+ // after merging modules the attributes might change and would have different
+ // effect on the functions as the original module would have.
+ copyModuleAttrToFunctions(*SrcM);
+
std::reverse(Worklist.begin(), Worklist.end());
while (!Worklist.empty()) {
GlobalValue *GV = Worklist.back();
@@ -1677,6 +1682,11 @@ IRMover::IRMover(Module &M) : Composite(M) {
for (const auto *MD : StructTypes.getVisitedMetadata()) {
SharedMDs[MD].reset(const_cast<MDNode *>(MD));
}
+
+ // Convert module level attributes to function level attributes because
+ // after merging modules the attributes might change and would have different
+ // effect on the functions as the original module would have.
+ copyModuleAttrToFunctions(M);
}
Error IRMover::move(std::unique_ptr<Module> Src,
diff --git a/llvm/lib/Support/AllocToken.cpp b/llvm/lib/Support/AllocToken.cpp
new file mode 100644
index 0000000..95ecda2
--- /dev/null
+++ b/llvm/lib/Support/AllocToken.cpp
@@ -0,0 +1,50 @@
+//===- AllocToken.cpp - Allocation Token Calculation ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Definition of AllocToken modes and shared calculation of stateless token IDs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/AllocToken.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SipHash.h"
+
+using namespace llvm;
+
+static uint64_t getStableHash(const AllocTokenMetadata &Metadata,
+ uint64_t MaxTokens) {
+ return getStableSipHash(Metadata.TypeName) % MaxTokens;
+}
+
+std::optional<uint64_t> llvm::getAllocToken(AllocTokenMode Mode,
+ const AllocTokenMetadata &Metadata,
+ uint64_t MaxTokens) {
+ assert(MaxTokens && "Must provide non-zero max tokens");
+
+ switch (Mode) {
+ case AllocTokenMode::Increment:
+ case AllocTokenMode::Random:
+ // Stateful modes cannot be implemented as a pure function.
+ return std::nullopt;
+
+ case AllocTokenMode::TypeHash:
+ return getStableHash(Metadata, MaxTokens);
+
+ case AllocTokenMode::TypeHashPointerSplit: {
+ if (MaxTokens == 1)
+ return 0;
+ const uint64_t HalfTokens = MaxTokens / 2;
+ uint64_t Hash = getStableHash(Metadata, HalfTokens);
+ if (Metadata.ContainsPointer)
+ Hash += HalfTokens;
+ return Hash;
+ }
+ }
+
+ llvm_unreachable("");
+}
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 42b21b5..671a5fe 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -149,6 +149,7 @@ add_llvm_component_library(LLVMSupport
AArch64BuildAttributes.cpp
ARMAttributeParser.cpp
ARMWinEH.cpp
+ AllocToken.cpp
Allocator.cpp
AutoConvert.cpp
Base64.cpp
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 549c418..f74e52a 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -111,7 +111,7 @@ Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) {
return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M);
}
-LLVM_ABI void SpecialCaseList::Matcher::preprocess(bool BySize) {
+void SpecialCaseList::Matcher::preprocess(bool BySize) {
return std::visit([&](auto &V) { return V.preprocess(BySize); }, M);
}
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 42ec8ba..7cce033 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -775,10 +775,10 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
let SubtargetPredicate = HasAddMinMaxInsts, isCommutable = 1, isReMaterializable = 1 in {
- defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP_I32_I32_I32_I32>;
- defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP_I32_I32_I32_I32>;
- defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP_I32_I32_I32_I32>;
- defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP_I32_I32_I32_I32>;
+ defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+ defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+ defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+ defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
}
defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 1f773e2..3368a50 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -820,7 +820,7 @@ void ARMAsmPrinter::emitAttributes() {
auto *BTIValue = mdconst::extract_or_null<ConstantInt>(
SourceModule->getModuleFlag("branch-target-enforcement"));
- if (BTIValue && BTIValue->isOne()) {
+ if (BTIValue && !BTIValue->isZero()) {
// If "+pacbti" is used as an architecture extension,
// Tag_BTI_extension is emitted in
// ARMTargetStreamer::emitTargetAttributes().
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 35e1127..b1a668e 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1089,7 +1089,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
// Register based DivRem for AEABI (RTABI 4.2)
if (TT.isTargetAEABI() || TT.isAndroid() || TT.isTargetGNUAEABI() ||
- TT.isTargetMuslAEABI() || TT.isOSWindows()) {
+ TT.isTargetMuslAEABI() || TT.isOSFuchsia() || TT.isOSWindows()) {
setOperationAction(ISD::SREM, MVT::i64, Custom);
setOperationAction(ISD::UREM, MVT::i64, Custom);
HasStandaloneRem = false;
@@ -1353,6 +1353,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::LRINT, MVT::f16, Expand);
+ setOperationAction(ISD::LROUND, MVT::f16, Expand);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
@@ -20574,7 +20575,7 @@ static TargetLowering::ArgListTy getDivRemArgList(
SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
- Subtarget->isTargetWindows()) &&
+ Subtarget->isTargetFuchsia() || Subtarget->isTargetWindows()) &&
"Register-based DivRem lowering only");
unsigned Opcode = Op->getOpcode();
assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index 406f4c1..597d311 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -1036,6 +1036,7 @@ bool LowOverheadLoop::ValidateLiveOuts() {
while (!Worklist.empty()) {
MachineInstr *MI = Worklist.pop_back_val();
if (MI->getOpcode() == ARM::MQPRCopy) {
+ LLVM_DEBUG(dbgs() << " Must generate copy as VMOV: " << *MI);
VMOVCopies.insert(MI);
MachineInstr *CopySrc =
RDI.getUniqueReachingMIDef(MI, MI->getOperand(1).getReg());
@@ -1045,6 +1046,20 @@ bool LowOverheadLoop::ValidateLiveOuts() {
LLVM_DEBUG(dbgs() << " Unable to handle live out: " << *MI);
VMOVCopies.clear();
return false;
+ } else if (isVectorPredicated(MI)) {
+ // If this is a predicated instruction with merging semantics,
+ // check where it gets its false lanes from, if any.
+ int InactiveIdx = findVPTInactiveOperandIdx(*MI);
+ if (InactiveIdx != -1) {
+ SmallPtrSet<MachineInstr *, 2> Defs;
+ MachineInstr *FalseSrc = RDI.getUniqueReachingMIDef(
+ MI, MI->getOperand(InactiveIdx).getReg());
+ if (FalseSrc) {
+ LLVM_DEBUG(dbgs()
+ << " Must check source of false lanes for: " << *MI);
+ Worklist.push_back(FalseSrc);
+ }
+ }
}
}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index b2d368e..4a0883c 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -343,6 +343,7 @@ public:
bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); }
bool isTargetWatchABI() const { return TargetTriple.isWatchABI(); }
bool isTargetDriverKit() const { return TargetTriple.isDriverKit(); }
+ bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 431ce38..f5653d4 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -805,6 +805,16 @@ int llvm::findFirstVPTPredOperandIdx(const MachineInstr &MI) {
return -1;
}
+int llvm::findVPTInactiveOperandIdx(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
+ if (MCID.operands()[i].OperandType == ARM::OPERAND_VPRED_R)
+ return i + ARM::SUBOP_vpred_r_inactive;
+
+ return -1;
+}
+
ARMVCC::VPTCodes llvm::getVPTInstrPredicate(const MachineInstr &MI,
Register &PredReg) {
int PIdx = findFirstVPTPredOperandIdx(MI);
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index 3ec3a621..1b0bf2d 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -90,6 +90,9 @@ inline ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI) {
Register PredReg;
return getVPTInstrPredicate(MI, PredReg);
}
+// Identify the input operand in an MVE predicated instruction which
+// contributes the values of any inactive vector lanes.
+int findVPTInactiveOperandIdx(const MachineInstr &MI);
// Recomputes the Block Mask of Instr, a VPT or VPST instruction.
// This rebuilds the block mask of the instruction depending on the predicates
diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp
index c8866bf..42e90f0 100644
--- a/llvm/lib/Target/DirectX/DXILPrepare.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp
@@ -294,6 +294,14 @@ public:
if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures"))
RootSignature->eraseFromParent();
+ // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and
+ // causes all tests using the DXIL Validator to fail.
+ //
+ // This is a temporary fix and should be replaced with a whitelist once
+ // we have determined all metadata that the DXIL Validator allows
+ if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa"))
+ ErrNo->eraseFromParent();
+
return true;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index a94e131..54c8972 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -117,8 +117,10 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- if (Subtarget.useHVX128BOps())
+ if (Subtarget.useHVX128BOps()) {
setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v64i1, Custom);
+ }
if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
Subtarget.useHVXFloatingPoint()) {
@@ -2024,13 +2026,9 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
// Handle bitcast from i32, v2i16, and v4i8 to v32i1.
// Splat the input into a 32-element i32 vector, then AND each element
// with a unique bitmask to isolate individual bits.
- if (ResTy == MVT::v32i1 &&
- (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
- Subtarget.useHVX128BOps()) {
- SDValue Val32 = Val;
- if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
- Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
-
+ auto bitcastI32ToV32I1 = [&](SDValue Val32) {
+ assert(Val32.getValueType().getSizeInBits() == 32 &&
+ "Input must be 32 bits");
MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
SmallVector<SDValue, 32> Mask;
@@ -2039,7 +2037,31 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
- return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded);
+ return DAG.getNode(HexagonISD::V2Q, dl, MVT::v32i1, Anded);
+ };
+ // === Case: v32i1 ===
+ if (ResTy == MVT::v32i1 &&
+ (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
+ Subtarget.useHVX128BOps()) {
+ SDValue Val32 = Val;
+ if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
+ Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
+ return bitcastI32ToV32I1(Val32);
+ }
+ // === Case: v64i1 ===
+ if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) {
+ // Split i64 into lo/hi 32-bit halves.
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Val);
+ SDValue HiShifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val,
+ DAG.getConstant(32, dl, MVT::i64));
+ SDValue Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, HiShifted);
+
+ // Reuse the same 32-bit logic twice.
+ SDValue LoRes = bitcastI32ToV32I1(Lo);
+ SDValue HiRes = bitcastI32ToV32I1(Hi);
+
+ // Concatenate into a v64i1 predicate.
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, LoRes, HiRes);
}
if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index e857b2d..edde7ac 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -2406,7 +2406,8 @@ ParseStatus RISCVAsmParser::parseVTypeI(OperandVector &Operands) {
}
bool RISCVAsmParser::generateVTypeError(SMLoc ErrorLoc) {
- if (STI->hasFeature(RISCV::FeatureStdExtZvfbfa))
+ if (STI->hasFeature(RISCV::FeatureStdExtZvfbfa) ||
+ STI->hasFeature(RISCV::FeatureVendorXSfvfbfexp16e))
return Error(
ErrorLoc,
"operand must be "
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index b8ec0bb..4bea4c4 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -654,7 +654,10 @@ static constexpr FeatureBitset XqciFeatureGroup = {
static constexpr FeatureBitset XSfVectorGroup = {
RISCV::FeatureVendorXSfvcp, RISCV::FeatureVendorXSfvqmaccdod,
RISCV::FeatureVendorXSfvqmaccqoq, RISCV::FeatureVendorXSfvfwmaccqqq,
- RISCV::FeatureVendorXSfvfnrclipxfqf, RISCV::FeatureVendorXSfmmbase};
+ RISCV::FeatureVendorXSfvfnrclipxfqf, RISCV::FeatureVendorXSfmmbase,
+ RISCV::FeatureVendorXSfvfexpa, RISCV::FeatureVendorXSfvfexpa64e,
+ RISCV::FeatureVendorXSfvfbfexp16e, RISCV::FeatureVendorXSfvfexp16e,
+ RISCV::FeatureVendorXSfvfexp32e};
static constexpr FeatureBitset XSfSystemGroup = {
RISCV::FeatureVendorXSiFivecdiscarddlone,
RISCV::FeatureVendorXSiFivecflushdlone,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 50f5a5d..7b9c4b3 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -220,7 +220,8 @@ void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo,
if (RISCVVType::getVLMUL(Imm) == RISCVVType::VLMUL::LMUL_RESERVED ||
RISCVVType::getSEW(Imm) > 64 ||
(RISCVVType::isAltFmt(Imm) &&
- !STI.hasFeature(RISCV::FeatureStdExtZvfbfa)) ||
+ !(STI.hasFeature(RISCV::FeatureStdExtZvfbfa) ||
+ STI.hasFeature(RISCV::FeatureVendorXSfvfbfexp16e))) ||
(Imm >> 9) != 0) {
O << formatImm(Imm);
return;
diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
index 5dd4bf4..98b636e 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
@@ -109,12 +109,70 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
// expanded instructions for each pseudo is correct in the Size field of the
// tablegen definition for the pseudo.
switch (MBBI->getOpcode()) {
+ case RISCV::PseudoAtomicSwap32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicSwap64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadAdd32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadAdd64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadSub32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadSub64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadAnd32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadAnd64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadOr32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 32, NextMBBI);
+ case RISCV::PseudoAtomicLoadOr64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 64, NextMBBI);
+ case RISCV::PseudoAtomicLoadXor32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadXor64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 64,
+ NextMBBI);
case RISCV::PseudoAtomicLoadNand32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
NextMBBI);
case RISCV::PseudoAtomicLoadNand64:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
NextMBBI);
+ case RISCV::PseudoAtomicLoadMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadMin64:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadMax64:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadUMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadUMin64:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadUMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadUMax64:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 64,
+ NextMBBI);
case RISCV::PseudoMaskedAtomicSwap32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
NextMBBI);
@@ -277,6 +335,36 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Xchg:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
+ .addReg(IncrReg)
+ .addImm(0);
+ break;
+ case AtomicRMWInst::Add:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Sub:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::And:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Or:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::OR), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Xor:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::XOR), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
case AtomicRMWInst::Nand:
BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
.addReg(DestReg)
@@ -433,38 +521,85 @@ static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
.addReg(ShamtReg);
}
-bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
- MachineBasicBlock::iterator &NextMBBI) {
- assert(IsMasked == true &&
- "Should only need to expand masked atomic max/min");
- assert(Width == 32 && "Should never need to expand masked 64-bit operations");
+static void doAtomicMinMaxOpExpansion(
+ const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
+ MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB,
+ MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB,
+ MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width,
+ const RISCVSubtarget *STI) {
+ Register DestReg = MI.getOperand(0).getReg();
+ Register ScratchReg = MI.getOperand(1).getReg();
+ Register AddrReg = MI.getOperand(2).getReg();
+ Register IncrReg = MI.getOperand(3).getReg();
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
- MachineInstr &MI = *MBBI;
- DebugLoc DL = MI.getDebugLoc();
- MachineFunction *MF = MBB.getParent();
- auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ // .loophead:
+ // lr.[w|d] dest, (addr)
+ // mv scratch, dest
+ // ifnochangeneeded scratch, incr, .looptail
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg)
+ .addReg(AddrReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
+ .addReg(DestReg)
+ .addImm(0);
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Max: {
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+ .addReg(ScratchReg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+ case AtomicRMWInst::Min: {
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+ .addReg(IncrReg)
+ .addReg(ScratchReg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+ case AtomicRMWInst::UMax:
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+ .addReg(ScratchReg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ case AtomicRMWInst::UMin:
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+ .addReg(IncrReg)
+ .addReg(ScratchReg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
- // Insert new MBBs.
- MF->insert(++MBB.getIterator(), LoopHeadMBB);
- MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
- MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
- MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+ // .loopifbody:
+ // mv scratch, incr
+ BuildMI(LoopIfBodyMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
+ .addReg(IncrReg)
+ .addImm(0);
- // Set up successors and transfer remaining instructions to DoneMBB.
- LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
- LoopHeadMBB->addSuccessor(LoopTailMBB);
- LoopIfBodyMBB->addSuccessor(LoopTailMBB);
- LoopTailMBB->addSuccessor(LoopHeadMBB);
- LoopTailMBB->addSuccessor(DoneMBB);
- DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
- DoneMBB->transferSuccessors(&MBB);
- MBB.addSuccessor(LoopHeadMBB);
+ // .looptail:
+ // sc.[w|d] scratch, scratch, (addr)
+ // bnez scratch, loop
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
+ ScratchReg)
+ .addReg(ScratchReg)
+ .addReg(AddrReg);
+ BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopHeadMBB);
+}
+static void doMaskedAtomicMinMaxOpExpansion(
+ const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
+ MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB,
+ MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB,
+ MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width,
+ const RISCVSubtarget *STI) {
+ assert(Width == 32 && "Should never need to expand masked 64-bit operations");
Register DestReg = MI.getOperand(0).getReg();
Register Scratch1Reg = MI.getOperand(1).getReg();
Register Scratch2Reg = MI.getOperand(2).getReg();
@@ -541,6 +676,44 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
.addReg(Scratch1Reg)
.addReg(RISCV::X0)
.addMBB(LoopHeadMBB);
+}
+
+bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI) {
+
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB.getParent();
+ auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs.
+ MF->insert(++MBB.getIterator(), LoopHeadMBB);
+ MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
+ MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
+ MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
+ LoopHeadMBB->addSuccessor(LoopTailMBB);
+ LoopIfBodyMBB->addSuccessor(LoopTailMBB);
+ LoopTailMBB->addSuccessor(LoopHeadMBB);
+ LoopTailMBB->addSuccessor(DoneMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopHeadMBB);
+
+ if (!IsMasked)
+ doAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB, LoopIfBodyMBB,
+ LoopTailMBB, DoneMBB, BinOp, Width, STI);
+ else
+ doMaskedAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB,
+ LoopIfBodyMBB, LoopTailMBB, DoneMBB, BinOp,
+ Width, STI);
NextMBBI = MBB.end();
MI.eraseFromParent();
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 19992e6..9e6b7f0 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -218,6 +218,7 @@ def HasStdExtZaamo
: Predicate<"Subtarget->hasStdExtZaamo()">,
AssemblerPredicate<(any_of FeatureStdExtZaamo),
"'Zaamo' (Atomic Memory Operations)">;
+def NoStdExtZaamo : Predicate<"!Subtarget->hasStdExtZaamo()">;
def FeatureStdExtZalrsc
: RISCVExtension<1, 0, "Load-Reserved/Store-Conditional">;
@@ -1334,6 +1335,44 @@ def HasVendorXSfvfnrclipxfqf
AssemblerPredicate<(all_of FeatureVendorXSfvfnrclipxfqf),
"'XSfvfnrclipxfqf' (SiFive FP32-to-int8 Ranged Clip Instructions)">;
+// Note: XSfvfbfexp16e depends on either Zvfbfmin _or_ Zvfbfa, which cannot be expressed here in
+// TableGen. Instead, we check that in RISCVISAInfo.
+def FeatureVendorXSfvfbfexp16e
+ : RISCVExtension<0, 5,
+ "SiFive Vector Floating-Point Exponential Function Instruction, BFloat16">;
+def HasVendorXSfvfbfexp16e : Predicate<"Subtarget->hasVendorXSfvfbfexp16e()">;
+
+def FeatureVendorXSfvfexp16e
+ : RISCVExtension<0, 5,
+ "SiFive Vector Floating-Point Exponential Function Instruction, Half Precision",
+ [FeatureStdExtZvfh]>;
+def HasVendorXSfvfexp16e : Predicate<"Subtarget->hasVendorXSfvfexp16e()">;
+
+def FeatureVendorXSfvfexp32e
+ : RISCVExtension<0, 5,
+ "SiFive Vector Floating-Point Exponential Function Instruction, Single Precision",
+ [FeatureStdExtZve32f]>;
+def HasVendorXSfvfexp32e : Predicate<"Subtarget->hasVendorXSfvfexp32e()">;
+
+def HasVendorXSfvfexpAnyFloat : Predicate<"Subtarget->hasVendorXSfvfexp16e() || Subtarget->hasVendorXSfvfexp32e()">;
+def HasVendorXSfvfexpAny : Predicate<"Subtarget->hasVendorXSfvfbfexp16e() || Subtarget->hasVendorXSfvfexp16e() || Subtarget->hasVendorXSfvfexp32e()">,
+ AssemblerPredicate<(any_of FeatureVendorXSfvfbfexp16e, FeatureVendorXSfvfexp16e, FeatureVendorXSfvfexp32e),
+ "'Xsfvfbfexp16e', 'Xsfvfexp16e', or 'Xsfvfexp32e' (SiFive Vector Floating-Point Exponential Function Instruction)">;
+
+def FeatureVendorXSfvfexpa
+ : RISCVExtension<0, 2,
+ "SiFive Vector Floating-Point Exponential Approximation Instruction",
+ [FeatureStdExtZve32f]>;
+def HasVendorXSfvfexpa : Predicate<"Subtarget->hasVendorXSfvfexpa()">,
+ AssemblerPredicate<(all_of FeatureVendorXSfvfexpa),
+ "'Xsfvfexpa' (SiFive Vector Floating-Point Exponential Approximation Instruction)">;
+
+def FeatureVendorXSfvfexpa64e
+ : RISCVExtension<0, 2,
+ "SiFive Vector Floating-Point Exponential Approximation Instruction with Double-Precision",
+ [FeatureVendorXSfvfexpa, FeatureStdExtZve64d]>;
+def HasVendorXSfvfexpa64e : Predicate<"Subtarget->hasVendorXSfvfexpa64e()">;
+
def FeatureVendorXSiFivecdiscarddlone
: RISCVExtension<1, 0,
"SiFive sf.cdiscard.d.l1 Instruction", []>;
@@ -1864,7 +1903,7 @@ def FeatureForcedAtomics : SubtargetFeature<
"forced-atomics", "HasForcedAtomics", "true",
"Assume that lock-free native-width atomics are available">;
def HasAtomicLdSt
- : Predicate<"Subtarget->hasStdExtA() || Subtarget->hasForcedAtomics()">;
+ : Predicate<"Subtarget->hasStdExtZalrsc() || Subtarget->hasForcedAtomics()">;
def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
"AllowTaggedGlobals",
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a77d765..26fe9ed 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -688,7 +688,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
else if (Subtarget.hasStdExtZicbop())
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
- if (Subtarget.hasStdExtA()) {
+ if (Subtarget.hasStdExtZalrsc()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
setMinCmpXchgSizeInBits(8);
@@ -1558,7 +1558,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
}
- if (Subtarget.hasStdExtA())
+ if (Subtarget.hasStdExtZaamo())
setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
if (Subtarget.hasForcedAtomics()) {
@@ -21875,7 +21875,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
// result is then sign extended to XLEN. With +A, the minimum width is
// 32 for both 64 and 32.
assert(getMinCmpXchgSizeInBits() == 32);
- assert(Subtarget.hasStdExtA());
+ assert(Subtarget.hasStdExtZalrsc());
return Op.getValueSizeInBits() - 31;
}
break;
@@ -24471,6 +24471,25 @@ ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const {
return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
}
+ISD::NodeType RISCVTargetLowering::getExtendForAtomicRMWArg(unsigned Op) const {
+ // Zaamo will use amo<op>.w which does not require extension.
+ if (Subtarget.hasStdExtZaamo() || Subtarget.hasForcedAtomics())
+ return ISD::ANY_EXTEND;
+
+ // Zalrsc pseudo expansions with comparison require sign-extension.
+ assert(Subtarget.hasStdExtZalrsc());
+ switch (Op) {
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ return ISD::SIGN_EXTEND;
+ default:
+ break;
+ }
+ return ISD::ANY_EXTEND;
+}
+
Register RISCVTargetLowering::getExceptionPointerRegister(
const Constant *PersonalityFn) const {
return RISCV::X10;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 3f81ed7..9e3e2a9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -245,6 +245,7 @@ public:
}
ISD::NodeType getExtendForAtomicCmpSwapArg() const override;
+ ISD::NodeType getExtendForAtomicRMWArg(unsigned Op) const override;
bool shouldTransformSignedTruncationCheck(EVT XVT,
unsigned KeptBits) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 12f776b..912b82d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1689,42 +1689,44 @@ bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
// instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.
// TODO: Support more operations.
unsigned getPredicatedOpcode(unsigned Opcode) {
+ // clang-format off
switch (Opcode) {
- case RISCV::ADD: return RISCV::PseudoCCADD; break;
- case RISCV::SUB: return RISCV::PseudoCCSUB; break;
- case RISCV::SLL: return RISCV::PseudoCCSLL; break;
- case RISCV::SRL: return RISCV::PseudoCCSRL; break;
- case RISCV::SRA: return RISCV::PseudoCCSRA; break;
- case RISCV::AND: return RISCV::PseudoCCAND; break;
- case RISCV::OR: return RISCV::PseudoCCOR; break;
- case RISCV::XOR: return RISCV::PseudoCCXOR; break;
-
- case RISCV::ADDI: return RISCV::PseudoCCADDI; break;
- case RISCV::SLLI: return RISCV::PseudoCCSLLI; break;
- case RISCV::SRLI: return RISCV::PseudoCCSRLI; break;
- case RISCV::SRAI: return RISCV::PseudoCCSRAI; break;
- case RISCV::ANDI: return RISCV::PseudoCCANDI; break;
- case RISCV::ORI: return RISCV::PseudoCCORI; break;
- case RISCV::XORI: return RISCV::PseudoCCXORI; break;
-
- case RISCV::ADDW: return RISCV::PseudoCCADDW; break;
- case RISCV::SUBW: return RISCV::PseudoCCSUBW; break;
- case RISCV::SLLW: return RISCV::PseudoCCSLLW; break;
- case RISCV::SRLW: return RISCV::PseudoCCSRLW; break;
- case RISCV::SRAW: return RISCV::PseudoCCSRAW; break;
-
- case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break;
- case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break;
- case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break;
- case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break;
-
- case RISCV::ANDN: return RISCV::PseudoCCANDN; break;
- case RISCV::ORN: return RISCV::PseudoCCORN; break;
- case RISCV::XNOR: return RISCV::PseudoCCXNOR; break;
-
- case RISCV::NDS_BFOS: return RISCV::PseudoCCNDS_BFOS; break;
- case RISCV::NDS_BFOZ: return RISCV::PseudoCCNDS_BFOZ; break;
+ case RISCV::ADD: return RISCV::PseudoCCADD;
+ case RISCV::SUB: return RISCV::PseudoCCSUB;
+ case RISCV::SLL: return RISCV::PseudoCCSLL;
+ case RISCV::SRL: return RISCV::PseudoCCSRL;
+ case RISCV::SRA: return RISCV::PseudoCCSRA;
+ case RISCV::AND: return RISCV::PseudoCCAND;
+ case RISCV::OR: return RISCV::PseudoCCOR;
+ case RISCV::XOR: return RISCV::PseudoCCXOR;
+
+ case RISCV::ADDI: return RISCV::PseudoCCADDI;
+ case RISCV::SLLI: return RISCV::PseudoCCSLLI;
+ case RISCV::SRLI: return RISCV::PseudoCCSRLI;
+ case RISCV::SRAI: return RISCV::PseudoCCSRAI;
+ case RISCV::ANDI: return RISCV::PseudoCCANDI;
+ case RISCV::ORI: return RISCV::PseudoCCORI;
+ case RISCV::XORI: return RISCV::PseudoCCXORI;
+
+ case RISCV::ADDW: return RISCV::PseudoCCADDW;
+ case RISCV::SUBW: return RISCV::PseudoCCSUBW;
+ case RISCV::SLLW: return RISCV::PseudoCCSLLW;
+ case RISCV::SRLW: return RISCV::PseudoCCSRLW;
+ case RISCV::SRAW: return RISCV::PseudoCCSRAW;
+
+ case RISCV::ADDIW: return RISCV::PseudoCCADDIW;
+ case RISCV::SLLIW: return RISCV::PseudoCCSLLIW;
+ case RISCV::SRLIW: return RISCV::PseudoCCSRLIW;
+ case RISCV::SRAIW: return RISCV::PseudoCCSRAIW;
+
+ case RISCV::ANDN: return RISCV::PseudoCCANDN;
+ case RISCV::ORN: return RISCV::PseudoCCORN;
+ case RISCV::XNOR: return RISCV::PseudoCCXNOR;
+
+ case RISCV::NDS_BFOS: return RISCV::PseudoCCNDS_BFOS;
+ case RISCV::NDS_BFOZ: return RISCV::PseudoCCNDS_BFOZ;
}
+ // clang-format on
return RISCV::INSTRUCTION_LIST_END;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 571d72f..5c81a09 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -158,9 +158,9 @@ class seq_cst_store<PatFrag base>
}
} // IsAtomic = 1
-// Atomic load/store are available under both +a and +force-atomics.
-// Fences will be inserted for atomic load/stores according to the logic in
-// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
+// Atomic load/store are available under +zalrsc (thus also +a) and
+// +force-atomics. Fences will be inserted for atomic load/stores according to
+// the logic in RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
// The normal loads/stores are relaxed (unordered) loads/stores that don't have
// any ordering. This is necessary because AtomicExpandPass has added fences to
// atomic load/stores and changed them to unordered ones.
@@ -308,7 +308,65 @@ class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
timm:$ordering)>;
-let Predicates = [HasStdExtA] in {
+let Predicates = [HasStdExtZalrsc, NoStdExtZaamo] in {
+
+let Size = 16 in {
+def PseudoAtomicSwap32 : PseudoAMO;
+def PseudoAtomicLoadAdd32 : PseudoAMO;
+def PseudoAtomicLoadSub32 : PseudoAMO;
+def PseudoAtomicLoadAnd32 : PseudoAMO;
+def PseudoAtomicLoadOr32 : PseudoAMO;
+def PseudoAtomicLoadXor32 : PseudoAMO;
+} // Size = 16
+let Size = 24 in {
+def PseudoAtomicLoadMax32 : PseudoAMO;
+def PseudoAtomicLoadMin32 : PseudoAMO;
+def PseudoAtomicLoadUMax32 : PseudoAMO;
+def PseudoAtomicLoadUMin32 : PseudoAMO;
+} // Size = 24
+
+defm : PseudoAMOPat<"atomic_swap_i32", PseudoAtomicSwap32>;
+defm : PseudoAMOPat<"atomic_load_add_i32", PseudoAtomicLoadAdd32>;
+defm : PseudoAMOPat<"atomic_load_sub_i32", PseudoAtomicLoadSub32>;
+defm : PseudoAMOPat<"atomic_load_and_i32", PseudoAtomicLoadAnd32>;
+defm : PseudoAMOPat<"atomic_load_or_i32", PseudoAtomicLoadOr32>;
+defm : PseudoAMOPat<"atomic_load_xor_i32", PseudoAtomicLoadXor32>;
+defm : PseudoAMOPat<"atomic_load_max_i32", PseudoAtomicLoadMax32>;
+defm : PseudoAMOPat<"atomic_load_min_i32", PseudoAtomicLoadMin32>;
+defm : PseudoAMOPat<"atomic_load_umax_i32", PseudoAtomicLoadUMax32>;
+defm : PseudoAMOPat<"atomic_load_umin_i32", PseudoAtomicLoadUMin32>;
+} // Predicates = [HasStdExtZalrsc, NoStdExtZaamo]
+
+let Predicates = [HasStdExtZalrsc, NoStdExtZaamo, IsRV64] in {
+
+let Size = 16 in {
+def PseudoAtomicSwap64 : PseudoAMO;
+def PseudoAtomicLoadAdd64 : PseudoAMO;
+def PseudoAtomicLoadSub64 : PseudoAMO;
+def PseudoAtomicLoadAnd64 : PseudoAMO;
+def PseudoAtomicLoadOr64 : PseudoAMO;
+def PseudoAtomicLoadXor64 : PseudoAMO;
+} // Size = 16
+let Size = 24 in {
+def PseudoAtomicLoadMax64 : PseudoAMO;
+def PseudoAtomicLoadMin64 : PseudoAMO;
+def PseudoAtomicLoadUMax64 : PseudoAMO;
+def PseudoAtomicLoadUMin64 : PseudoAMO;
+} // Size = 24
+
+defm : PseudoAMOPat<"atomic_swap_i64", PseudoAtomicSwap64, i64>;
+defm : PseudoAMOPat<"atomic_load_add_i64", PseudoAtomicLoadAdd64, i64>;
+defm : PseudoAMOPat<"atomic_load_sub_i64", PseudoAtomicLoadSub64, i64>;
+defm : PseudoAMOPat<"atomic_load_and_i64", PseudoAtomicLoadAnd64, i64>;
+defm : PseudoAMOPat<"atomic_load_or_i64", PseudoAtomicLoadOr64, i64>;
+defm : PseudoAMOPat<"atomic_load_xor_i64", PseudoAtomicLoadXor64, i64>;
+defm : PseudoAMOPat<"atomic_load_max_i64", PseudoAtomicLoadMax64, i64>;
+defm : PseudoAMOPat<"atomic_load_min_i64", PseudoAtomicLoadMin64, i64>;
+defm : PseudoAMOPat<"atomic_load_umax_i64", PseudoAtomicLoadUMax64, i64>;
+defm : PseudoAMOPat<"atomic_load_umin_i64", PseudoAtomicLoadUMin64, i64>;
+} // Predicates = [HasStdExtZalrsc, NoStdExtZaamo, IsRV64]
+
+let Predicates = [HasStdExtZalrsc] in {
let Size = 20 in
def PseudoAtomicLoadNand32 : PseudoAMO;
@@ -347,14 +405,14 @@ def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax,
PseudoMaskedAtomicLoadUMax32>;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin,
PseudoMaskedAtomicLoadUMin32>;
-} // Predicates = [HasStdExtA]
+} // Predicates = [HasStdExtZalrsc]
-let Predicates = [HasStdExtA, IsRV64] in {
+let Predicates = [HasStdExtZalrsc, IsRV64] in {
let Size = 20 in
def PseudoAtomicLoadNand64 : PseudoAMO;
defm : PseudoAMOPat<"atomic_load_nand_i64", PseudoAtomicLoadNand64, i64>;
-} // Predicates = [HasStdExtA, IsRV64]
+} // Predicates = [HasStdExtZalrsc, IsRV64]
/// Compare and exchange
@@ -385,17 +443,17 @@ multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
}
-let Predicates = [HasStdExtA, NoStdExtZacas] in {
+let Predicates = [HasStdExtZalrsc, NoStdExtZacas] in {
def PseudoCmpXchg32 : PseudoCmpXchg;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_i32", PseudoCmpXchg32>;
}
-let Predicates = [HasStdExtA, NoStdExtZacas, IsRV64] in {
+let Predicates = [HasStdExtZalrsc, NoStdExtZacas, IsRV64] in {
def PseudoCmpXchg64 : PseudoCmpXchg;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_i64", PseudoCmpXchg64, i64>;
}
-let Predicates = [HasStdExtA] in {
+let Predicates = [HasStdExtZalrsc] in {
def PseudoMaskedCmpXchg32
: Pseudo<(outs GPR:$res, GPR:$scratch),
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask,
@@ -412,4 +470,4 @@ def : Pat<(XLenVT (int_riscv_masked_cmpxchg
(XLenVT GPR:$mask), (XLenVT timm:$ordering))),
(PseudoMaskedCmpXchg32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
-} // Predicates = [HasStdExtA]
+} // Predicates = [HasStdExtZalrsc]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 6a4119a..4104abd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -217,6 +217,14 @@ let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0,
defm FVW : CustomSiFiveVCIX<"fvw", VCIX_XVW, VR, VR, FPR32>, Sched<[]>;
}
+let Predicates = [HasVendorXSfvfexpAny], DecoderNamespace = "XSfvector" in {
+ def SF_VFEXP_V : VALUVs2<0b010011, 0b00111, OPFVV, "sf.vfexp.v">;
+}
+
+let Predicates = [HasVendorXSfvfexpa], DecoderNamespace = "XSfvector" in {
+ def SF_VFEXPA_V : VALUVs2<0b010011, 0b00110, OPFVV, "sf.vfexpa.v">;
+}
+
let Predicates = [HasVendorXSfvqmaccdod], DecoderNamespace = "XSfvector",
DestEEW = EEWSEWx4, RVVConstraint=VS2Constraint in {
def SF_VQMACCU_2x8x2 : CustomSiFiveVMACC<0b101100, OPMVV, "sf.vqmaccu.2x8x2">;
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 5591d9f..021353a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -355,9 +355,9 @@ private:
SPIRVType *widenTypeToVec4(const SPIRVType *Type, MachineInstr &I) const;
bool extractSubvector(Register &ResVReg, const SPIRVType *ResType,
Register &ReadReg, MachineInstr &InsertionPoint) const;
- bool generateImageRead(Register &ResVReg, const SPIRVType *ResType,
- Register ImageReg, Register IdxReg, DebugLoc Loc,
- MachineInstr &Pos) const;
+ bool generateImageReadOrFetch(Register &ResVReg, const SPIRVType *ResType,
+ Register ImageReg, Register IdxReg,
+ DebugLoc Loc, MachineInstr &Pos) const;
bool BuildCOPY(Register DestReg, Register SrcReg, MachineInstr &I) const;
bool loadVec3BuiltinInputID(SPIRV::BuiltIn::BuiltIn BuiltInValue,
Register ResVReg, const SPIRVType *ResType,
@@ -1321,8 +1321,8 @@ bool SPIRVInstructionSelector::selectLoad(Register ResVReg,
}
Register IdxReg = IntPtrDef->getOperand(3).getReg();
- return generateImageRead(ResVReg, ResType, NewHandleReg, IdxReg,
- I.getDebugLoc(), I);
+ return generateImageReadOrFetch(ResVReg, ResType, NewHandleReg, IdxReg,
+ I.getDebugLoc(), I);
}
}
@@ -3639,27 +3639,33 @@ bool SPIRVInstructionSelector::selectReadImageIntrinsic(
DebugLoc Loc = I.getDebugLoc();
MachineInstr &Pos = I;
- return generateImageRead(ResVReg, ResType, NewImageReg, IdxReg, Loc, Pos);
+ return generateImageReadOrFetch(ResVReg, ResType, NewImageReg, IdxReg, Loc,
+ Pos);
}
-bool SPIRVInstructionSelector::generateImageRead(Register &ResVReg,
- const SPIRVType *ResType,
- Register ImageReg,
- Register IdxReg, DebugLoc Loc,
- MachineInstr &Pos) const {
+bool SPIRVInstructionSelector::generateImageReadOrFetch(
+ Register &ResVReg, const SPIRVType *ResType, Register ImageReg,
+ Register IdxReg, DebugLoc Loc, MachineInstr &Pos) const {
SPIRVType *ImageType = GR.getSPIRVTypeForVReg(ImageReg);
assert(ImageType && ImageType->getOpcode() == SPIRV::OpTypeImage &&
"ImageReg is not an image type.");
+
bool IsSignedInteger =
sampledTypeIsSignedInteger(GR.getTypeForSPIRVType(ImageType));
+ // Check if the "sampled" operand of the image type is 1.
+ // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpImageFetch
+ auto SampledOp = ImageType->getOperand(6);
+ bool IsFetch = (SampledOp.getImm() == 1);
uint64_t ResultSize = GR.getScalarOrVectorComponentCount(ResType);
if (ResultSize == 4) {
- auto BMI = BuildMI(*Pos.getParent(), Pos, Loc, TII.get(SPIRV::OpImageRead))
- .addDef(ResVReg)
- .addUse(GR.getSPIRVTypeID(ResType))
- .addUse(ImageReg)
- .addUse(IdxReg);
+ auto BMI =
+ BuildMI(*Pos.getParent(), Pos, Loc,
+ TII.get(IsFetch ? SPIRV::OpImageFetch : SPIRV::OpImageRead))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(ImageReg)
+ .addUse(IdxReg);
if (IsSignedInteger)
BMI.addImm(0x1000); // SignExtend
@@ -3668,11 +3674,13 @@ bool SPIRVInstructionSelector::generateImageRead(Register &ResVReg,
SPIRVType *ReadType = widenTypeToVec4(ResType, Pos);
Register ReadReg = MRI->createVirtualRegister(GR.getRegClass(ReadType));
- auto BMI = BuildMI(*Pos.getParent(), Pos, Loc, TII.get(SPIRV::OpImageRead))
- .addDef(ReadReg)
- .addUse(GR.getSPIRVTypeID(ReadType))
- .addUse(ImageReg)
- .addUse(IdxReg);
+ auto BMI =
+ BuildMI(*Pos.getParent(), Pos, Loc,
+ TII.get(IsFetch ? SPIRV::OpImageFetch : SPIRV::OpImageRead))
+ .addDef(ReadReg)
+ .addUse(GR.getSPIRVTypeID(ReadType))
+ .addUse(ImageReg)
+ .addUse(IdxReg);
if (IsSignedInteger)
BMI.addImm(0x1000); // SignExtend
bool Succeed = BMI.constrainAllUses(TII, TRI, RBI);
diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp
index cf85691..9bda8a4 100644
--- a/llvm/lib/Target/TargetMachine.cpp
+++ b/llvm/lib/Target/TargetMachine.cpp
@@ -158,7 +158,6 @@ void TargetMachine::resetTargetOptions(const Function &F) const {
Options.X = F.getFnAttribute(Y).getValueAsBool(); \
} while (0)
- RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
RESET_OPTION(NoSignedZerosFPMath, "no-signed-zeros-fp-math");
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b5f8ee5..b54a1e7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29516,11 +29516,8 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
if (IgnoreNaN || DAG.isKnownNeverNaN(IsNum ? NewY : NewX))
return MinMax;
- if (DAG.isKnownNeverNaN(NewX))
- NewX = NewY;
-
- SDValue IsNaN =
- DAG.getSetCC(DL, SetCCType, NewX, NewX, IsNum ? ISD::SETO : ISD::SETUO);
+ SDValue NaNSrc = IsNum ? MinMax : NewX;
+ SDValue IsNaN = DAG.getSetCC(DL, SetCCType, NaNSrc, NaNSrc, ISD::SETUO);
return DAG.getSelect(DL, VT, IsNaN, NewX, MinMax);
}
diff --git a/llvm/lib/TargetParser/ARMTargetParser.cpp b/llvm/lib/TargetParser/ARMTargetParser.cpp
index 7882045..0fce5b9 100644
--- a/llvm/lib/TargetParser/ARMTargetParser.cpp
+++ b/llvm/lib/TargetParser/ARMTargetParser.cpp
@@ -567,8 +567,8 @@ StringRef ARM::computeDefaultTargetABI(const Triple &TT) {
default:
if (TT.isOSNetBSD())
return "apcs-gnu";
- if (TT.isOSFreeBSD() || TT.isOSOpenBSD() || TT.isOSHaiku() ||
- TT.isOHOSFamily())
+ if (TT.isOSFreeBSD() || TT.isOSFuchsia() || TT.isOSOpenBSD() ||
+ TT.isOSHaiku() || TT.isOHOSFamily())
return "aapcs-linux";
return "aapcs";
}
@@ -648,6 +648,8 @@ StringRef ARM::getARMCPUForArch(const llvm::Triple &Triple, StringRef MArch) {
}
case llvm::Triple::OpenBSD:
return "cortex-a8";
+ case llvm::Triple::Fuchsia:
+ return "cortex-a53";
default:
switch (Triple.getEnvironment()) {
case llvm::Triple::EABIHF:
diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp
index 31126cc..f08a0c0 100644
--- a/llvm/lib/TargetParser/RISCVISAInfo.cpp
+++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp
@@ -765,6 +765,12 @@ Error RISCVISAInfo::checkDependency() {
if (HasZvl && !HasVector)
return getExtensionRequiresError("zvl*b", "v' or 'zve*");
+ if (Exts.count("xsfvfbfexp16e") &&
+ !(Exts.count("zvfbfmin") || Exts.count("zvfbfa")))
+ return createStringError(errc::invalid_argument,
+ "'xsfvfbfexp16e' requires 'zvfbfmin' or "
+ "'zvfbfa' extension to also be specified");
+
if (HasD && (HasC || Exts.count("zcd")))
for (auto Ext : ZcdOverlaps)
if (Exts.count(Ext.str()))
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 46fb567..aa1346d 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1271,7 +1271,7 @@ bool LowerTypeTestsModule::hasBranchTargetEnforcement() {
// the module flags.
if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
M.getModuleFlag("branch-target-enforcement")))
- HasBranchTargetEnforcement = (BTE->getZExtValue() != 0);
+ HasBranchTargetEnforcement = !BTE->isZero();
else
HasBranchTargetEnforcement = 0;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 975498f..5aa8de3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3455,27 +3455,45 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
// select a, false, b -> select !a, b, false
if (match(TrueVal, m_Specific(Zero))) {
Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
- return SelectInst::Create(NotCond, FalseVal, Zero);
+ Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI;
+ SelectInst *NewSI =
+ SelectInst::Create(NotCond, FalseVal, Zero, "", nullptr, MDFrom);
+ NewSI->swapProfMetadata();
+ return NewSI;
}
// select a, b, true -> select !a, true, b
if (match(FalseVal, m_Specific(One))) {
Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
- return SelectInst::Create(NotCond, One, TrueVal);
+ Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI;
+ SelectInst *NewSI =
+ SelectInst::Create(NotCond, One, TrueVal, "", nullptr, MDFrom);
+ NewSI->swapProfMetadata();
+ return NewSI;
}
// DeMorgan in select form: !a && !b --> !(a || b)
// select !a, !b, false --> not (select a, true, b)
if (match(&SI, m_LogicalAnd(m_Not(m_Value(A)), m_Not(m_Value(B)))) &&
(CondVal->hasOneUse() || TrueVal->hasOneUse()) &&
- !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr()))
- return BinaryOperator::CreateNot(Builder.CreateSelect(A, One, B));
+ !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr())) {
+ Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI;
+ SelectInst *NewSI =
+ cast<SelectInst>(Builder.CreateSelect(A, One, B, "", MDFrom));
+ NewSI->swapProfMetadata();
+ return BinaryOperator::CreateNot(NewSI);
+ }
// DeMorgan in select form: !a || !b --> !(a && b)
// select !a, true, !b --> not (select a, b, false)
if (match(&SI, m_LogicalOr(m_Not(m_Value(A)), m_Not(m_Value(B)))) &&
(CondVal->hasOneUse() || FalseVal->hasOneUse()) &&
- !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr()))
- return BinaryOperator::CreateNot(Builder.CreateSelect(A, B, Zero));
+ !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr())) {
+ Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI;
+ SelectInst *NewSI =
+ cast<SelectInst>(Builder.CreateSelect(A, B, Zero, "", MDFrom));
+ NewSI->swapProfMetadata();
+ return BinaryOperator::CreateNot(NewSI);
+ }
// select (select a, true, b), true, b -> select a, true, b
if (match(CondVal, m_Select(m_Value(A), m_One(), m_Value(B))) &&
diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
index 40720ae..0873845 100644
--- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
@@ -31,10 +31,12 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
+#include "llvm/Support/AllocToken.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -53,29 +55,12 @@
#include <variant>
using namespace llvm;
+using TokenMode = AllocTokenMode;
#define DEBUG_TYPE "alloc-token"
namespace {
-//===--- Constants --------------------------------------------------------===//
-
-enum class TokenMode : unsigned {
- /// Incrementally increasing token ID.
- Increment = 0,
-
- /// Simple mode that returns a statically-assigned random token ID.
- Random = 1,
-
- /// Token ID based on allocated type hash.
- TypeHash = 2,
-
- /// Token ID based on allocated type hash, where the top half ID-space is
- /// reserved for types that contain pointers and the bottom half for types
- /// that do not contain pointers.
- TypeHashPointerSplit = 3,
-};
-
//===--- Command-line options ---------------------------------------------===//
cl::opt<TokenMode> ClMode(
@@ -131,7 +116,7 @@ cl::opt<uint64_t> ClFallbackToken(
//===--- Statistics -------------------------------------------------------===//
-STATISTIC(NumFunctionsInstrumented, "Functions instrumented");
+STATISTIC(NumFunctionsModified, "Functions modified");
STATISTIC(NumAllocationsInstrumented, "Allocations instrumented");
//===----------------------------------------------------------------------===//
@@ -140,9 +125,19 @@ STATISTIC(NumAllocationsInstrumented, "Allocations instrumented");
///
/// Expected format is: !{<type-name>, <contains-pointer>}
MDNode *getAllocTokenMetadata(const CallBase &CB) {
- MDNode *Ret = CB.getMetadata(LLVMContext::MD_alloc_token);
- if (!Ret)
- return nullptr;
+ MDNode *Ret = nullptr;
+ if (auto *II = dyn_cast<IntrinsicInst>(&CB);
+ II && II->getIntrinsicID() == Intrinsic::alloc_token_id) {
+ auto *MDV = cast<MetadataAsValue>(II->getArgOperand(0));
+ Ret = cast<MDNode>(MDV->getMetadata());
+ // If the intrinsic has an empty MDNode, type inference failed.
+ if (Ret->getNumOperands() == 0)
+ return nullptr;
+ } else {
+ Ret = CB.getMetadata(LLVMContext::MD_alloc_token);
+ if (!Ret)
+ return nullptr;
+ }
assert(Ret->getNumOperands() == 2 && "bad !alloc_token");
assert(isa<MDString>(Ret->getOperand(0)));
assert(isa<ConstantAsMetadata>(Ret->getOperand(1)));
@@ -206,22 +201,19 @@ public:
using ModeBase::ModeBase;
uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
- const auto [N, H] = getHash(CB, ORE);
- return N ? boundedToken(H) : H;
- }
-protected:
- std::pair<MDNode *, uint64_t> getHash(const CallBase &CB,
- OptimizationRemarkEmitter &ORE) {
if (MDNode *N = getAllocTokenMetadata(CB)) {
MDString *S = cast<MDString>(N->getOperand(0));
- return {N, getStableSipHash(S->getString())};
+ AllocTokenMetadata Metadata{S->getString(), containsPointer(N)};
+ if (auto Token = getAllocToken(TokenMode::TypeHash, Metadata, MaxTokens))
+ return *Token;
}
// Fallback.
remarkNoMetadata(CB, ORE);
- return {nullptr, ClFallbackToken};
+ return ClFallbackToken;
}
+protected:
/// Remark that there was no precise type information.
static void remarkNoMetadata(const CallBase &CB,
OptimizationRemarkEmitter &ORE) {
@@ -242,20 +234,18 @@ public:
using TypeHashMode::TypeHashMode;
uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
- if (MaxTokens == 1)
- return 0;
- const uint64_t HalfTokens = MaxTokens / 2;
- const auto [N, H] = getHash(CB, ORE);
- if (!N) {
- // Pick the fallback token (ClFallbackToken), which by default is 0,
- // meaning it'll fall into the pointer-less bucket. Override by setting
- // -alloc-token-fallback if that is the wrong choice.
- return H;
+ if (MDNode *N = getAllocTokenMetadata(CB)) {
+ MDString *S = cast<MDString>(N->getOperand(0));
+ AllocTokenMetadata Metadata{S->getString(), containsPointer(N)};
+ if (auto Token = getAllocToken(TokenMode::TypeHashPointerSplit, Metadata,
+ MaxTokens))
+ return *Token;
}
- uint64_t Hash = H % HalfTokens; // base hash
- if (containsPointer(N))
- Hash += HalfTokens;
- return Hash;
+ // Pick the fallback token (ClFallbackToken), which by default is 0, meaning
+ // it'll fall into the pointer-less bucket. Override by setting
+ // -alloc-token-fallback if that is the wrong choice.
+ remarkNoMetadata(CB, ORE);
+ return ClFallbackToken;
}
};
@@ -315,6 +305,9 @@ private:
FunctionCallee getTokenAllocFunction(const CallBase &CB, uint64_t TokenID,
LibFunc OriginalFunc);
+ /// Lower alloc_token_* intrinsics.
+ void replaceIntrinsicInst(IntrinsicInst *II, OptimizationRemarkEmitter &ORE);
+
/// Return the token ID from metadata in the call.
uint64_t getToken(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
return std::visit([&](auto &&Mode) { return Mode(CB, ORE); }, Mode);
@@ -336,21 +329,32 @@ bool AllocToken::instrumentFunction(Function &F) {
// Do not apply any instrumentation for naked functions.
if (F.hasFnAttribute(Attribute::Naked))
return false;
- if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
- return false;
// Don't touch available_externally functions, their actual body is elsewhere.
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
return false;
- // Only instrument functions that have the sanitize_alloc_token attribute.
- if (!F.hasFnAttribute(Attribute::SanitizeAllocToken))
- return false;
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
SmallVector<std::pair<CallBase *, LibFunc>, 4> AllocCalls;
+ SmallVector<IntrinsicInst *, 4> IntrinsicInsts;
+
+ // Only instrument functions that have the sanitize_alloc_token attribute.
+ const bool InstrumentFunction =
+ F.hasFnAttribute(Attribute::SanitizeAllocToken) &&
+ !F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation);
// Collect all allocation calls to avoid iterator invalidation.
for (Instruction &I : instructions(F)) {
+ // Collect all alloc_token_* intrinsics.
+ if (auto *II = dyn_cast<IntrinsicInst>(&I);
+ II && II->getIntrinsicID() == Intrinsic::alloc_token_id) {
+ IntrinsicInsts.emplace_back(II);
+ continue;
+ }
+
+ if (!InstrumentFunction)
+ continue;
+
auto *CB = dyn_cast<CallBase>(&I);
if (!CB)
continue;
@@ -359,11 +363,21 @@ bool AllocToken::instrumentFunction(Function &F) {
}
bool Modified = false;
- for (auto &[CB, Func] : AllocCalls)
- Modified |= replaceAllocationCall(CB, Func, ORE, TLI);
- if (Modified)
- NumFunctionsInstrumented++;
+ if (!AllocCalls.empty()) {
+ for (auto &[CB, Func] : AllocCalls)
+ Modified |= replaceAllocationCall(CB, Func, ORE, TLI);
+ if (Modified)
+ NumFunctionsModified++;
+ }
+
+ if (!IntrinsicInsts.empty()) {
+ for (auto *II : IntrinsicInsts)
+ replaceIntrinsicInst(II, ORE);
+ Modified = true;
+ NumFunctionsModified++;
+ }
+
return Modified;
}
@@ -381,7 +395,7 @@ AllocToken::shouldInstrumentCall(const CallBase &CB,
if (TLI.getLibFunc(*Callee, Func)) {
if (isInstrumentableLibFunc(Func, CB, TLI))
return Func;
- } else if (Options.Extended && getAllocTokenMetadata(CB)) {
+ } else if (Options.Extended && CB.getMetadata(LLVMContext::MD_alloc_token)) {
return NotLibFunc;
}
@@ -528,6 +542,16 @@ FunctionCallee AllocToken::getTokenAllocFunction(const CallBase &CB,
return TokenAlloc;
}
+void AllocToken::replaceIntrinsicInst(IntrinsicInst *II,
+ OptimizationRemarkEmitter &ORE) {
+ assert(II->getIntrinsicID() == Intrinsic::alloc_token_id);
+
+ uint64_t TokenID = getToken(*II, ORE);
+ Value *V = ConstantInt::get(IntPtrTy, TokenID);
+ II->replaceAllUsesWith(V);
+ II->eraseFromParent();
+}
+
} // namespace
AllocTokenPass::AllocTokenPass(AllocTokenOptions Opts)
diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
index d18c0d0..80e77e09 100644
--- a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
@@ -2020,7 +2020,6 @@ static void moveFastMathFlags(Function &F,
F.removeFnAttr(attr); \
FMF.set##setter(); \
}
- MOVE_FLAG("unsafe-fp-math", Fast)
MOVE_FLAG("no-infs-fp-math", NoInfs)
MOVE_FLAG("no-nans-fp-math", NoNaNs)
MOVE_FLAG("no-signed-zeros-fp-math", NoSignedZeros)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index b4e4dc2..c95c887 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -217,32 +217,6 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
return Parent->getEnclosingBlockWithPredecessors();
}
-bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
- const VPDominatorTree &VPDT) {
- auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
- if (!VPBB)
- return false;
-
- // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
- // VPBB as its entry, i.e., free of predecessors.
- if (auto *R = VPBB->getParent())
- return !R->isReplicator() && !VPBB->hasPredecessors();
-
- // A header dominates its second predecessor (the latch), with the other
- // predecessor being the preheader
- return VPB->getPredecessors().size() == 2 &&
- VPDT.dominates(VPB, VPB->getPredecessors()[1]);
-}
-
-bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
- const VPDominatorTree &VPDT) {
- // A latch has a header as its second successor, with its other successor
- // leaving the loop. A preheader OTOH has a header as its first (and only)
- // successor.
- return VPB->getNumSuccessors() == 2 &&
- VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
-}
-
VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
iterator It = begin();
while (It != end() && It->isPhi())
@@ -768,8 +742,12 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
VPRegionBlock *VPRegionBlock::clone() {
const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
- auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting,
- getName(), isReplicator());
+ VPlan &Plan = *getPlan();
+ VPRegionBlock *NewRegion =
+ isReplicator()
+ ? Plan.createReplicateRegion(NewEntry, NewExiting, getName())
+ : Plan.createLoopRegion(getName(), NewEntry, NewExiting);
+
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
Block->setParent(NewRegion);
return NewRegion;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8274431..167ba55 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -4450,22 +4450,24 @@ public:
return VPB;
}
- /// Create a new VPRegionBlock with \p Entry, \p Exiting and \p Name. If \p
- /// IsReplicator is true, the region is a replicate region. The returned block
- /// is owned by the VPlan and deleted once the VPlan is destroyed.
- VPRegionBlock *createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
- const std::string &Name = "",
- bool IsReplicator = false) {
- auto *VPB = new VPRegionBlock(Entry, Exiting, Name, IsReplicator);
+ /// Create a new loop region with \p Name and entry and exiting blocks set
+ /// to \p Entry and \p Exiting respectively, if set. The returned block is
+ /// owned by the VPlan and deleted once the VPlan is destroyed.
+ VPRegionBlock *createLoopRegion(const std::string &Name = "",
+ VPBlockBase *Entry = nullptr,
+ VPBlockBase *Exiting = nullptr) {
+ auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
+ : new VPRegionBlock(Name);
CreatedBlocks.push_back(VPB);
return VPB;
}
- /// Create a new loop VPRegionBlock with \p Name and entry and exiting blocks set
- /// to nullptr. The returned block is owned by the VPlan and deleted once the
- /// VPlan is destroyed.
- VPRegionBlock *createVPRegionBlock(const std::string &Name = "") {
- auto *VPB = new VPRegionBlock(Name);
+ /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
+ /// returned block is owned by the VPlan and deleted once the VPlan is
+ /// destroyed.
+ VPRegionBlock *createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting,
+ const std::string &Name = "") {
+ auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
CreatedBlocks.push_back(VPB);
return VPB;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 332791a..65688a3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -406,7 +406,7 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
// LatchExitVPB, taking care to preserve the original predecessor & successor
// order of blocks. Set region entry and exiting after both HeaderVPB and
// LatchVPBB have been disconnected from their predecessors/successors.
- auto *R = Plan.createVPRegionBlock();
+ auto *R = Plan.createLoopRegion();
VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, R);
VPBlockUtils::disconnectBlocks(LatchVPBB, R);
VPBlockUtils::connectBlocks(PreheaderVPBB, R);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 7bf8d83..ff25ef5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -372,7 +372,7 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,
auto *Exiting =
Plan.createVPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
VPRegionBlock *Region =
- Plan.createVPRegionBlock(Entry, Exiting, RegionName, true);
+ Plan.createReplicateRegion(Entry, Exiting, RegionName);
// Note: first set Entry as region entry and then connect successors starting
// from it in order, to propagate the "parent" of each VPBasicBlock.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 10801c0..32e4b88 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -8,6 +8,7 @@
#include "VPlanUtils.h"
#include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
#include "VPlanPatternMatch.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -253,3 +254,29 @@ vputils::getRecipesForUncountableExit(VPlan &Plan,
return UncountableCondition;
}
+
+bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
+ const VPDominatorTree &VPDT) {
+ auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
+ if (!VPBB)
+ return false;
+
+ // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
+ // VPBB as its entry, i.e., free of predecessors.
+ if (auto *R = VPBB->getParent())
+ return !R->isReplicator() && !VPBB->hasPredecessors();
+
+ // A header dominates its second predecessor (the latch), with the other
+ // predecessor being the preheader
+ return VPB->getPredecessors().size() == 2 &&
+ VPDT.dominates(VPB, VPB->getPredecessors()[1]);
+}
+
+bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
+ const VPDominatorTree &VPDT) {
+ // A latch has a header as its second successor, with its other successor
+ // leaving the loop. A preheader OTOH has a header as its first (and only)
+ // successor.
+ return VPB->getNumSuccessors() == 2 &&
+ VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
+}