aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/DXILResource.cpp19
-rw-r--r--llvm/lib/Analysis/DependenceAnalysis.cpp11
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp4
-rw-r--r--llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp28
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFContext.cpp19
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp22
-rw-r--r--llvm/lib/Passes/StandardInstrumentations.cpp2
-rw-r--r--llvm/lib/Remarks/Remark.cpp14
-rw-r--r--llvm/lib/Support/SpecialCaseList.cpp32
-rw-r--r--llvm/lib/Target/AArch64/AArch64BranchTargets.cpp14
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp89
-rw-r--r--llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp13
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td10
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td7
-rw-r--r--llvm/lib/Target/ARM/ARMProcessors.td11
-rw-r--r--llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp43
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXIntrinsics.td155
-rw-r--r--llvm/lib/Target/PowerPC/P10InstrResources.td13
-rw-r--r--llvm/lib/Target/PowerPC/P9InstrResources.td2
-rw-r--r--llvm/lib/Target/PowerPC/PPC.td1
-rw-r--r--llvm/lib/Target/PowerPC/PPCBack2BackFusion.def8
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrFormats.td30
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrFuture.td30
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td23
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp86
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h5
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp30
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.h4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVUtils.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp6
-rw-r--r--llvm/lib/Target/X86/X86.h15
-rw-r--r--llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp1
-rw-r--r--llvm/lib/Target/X86/X86LowerAMXType.cpp96
-rw-r--r--llvm/lib/Target/X86/X86PassRegistry.def7
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp2
-rw-r--r--llvm/lib/TargetParser/Host.cpp1
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp23
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp20
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp6
-rw-r--r--llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp21
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp11
-rw-r--r--llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp12
-rw-r--r--llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/PredicateInfo.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp27
66 files changed, 824 insertions, 216 deletions
diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp
index 6f19a68..27114e0 100644
--- a/llvm/lib/Analysis/DXILResource.cpp
+++ b/llvm/lib/Analysis/DXILResource.cpp
@@ -206,6 +206,14 @@ static dxil::ElementType toDXILElementType(Type *Ty, bool IsSigned) {
return ElementType::Invalid;
}
+static dxil::ElementType toDXILStorageType(dxil::ElementType ET) {
+ if (ET == dxil::ElementType::U64 || ET == dxil::ElementType::F64 ||
+ ET == dxil::ElementType::I64 || ET == dxil::ElementType::SNormF64 ||
+ ET == dxil::ElementType::UNormF64)
+ return dxil::ElementType::U32;
+ return ET;
+}
+
ResourceTypeInfo::ResourceTypeInfo(TargetExtType *HandleTy,
const dxil::ResourceClass RC_,
const dxil::ResourceKind Kind_)
@@ -569,10 +577,11 @@ ResourceTypeInfo::TypedInfo ResourceTypeInfo::getTyped() const {
auto [ElTy, IsSigned] = getTypedElementType(Kind, HandleTy);
dxil::ElementType ET = toDXILElementType(ElTy, IsSigned);
+ dxil::ElementType DXILStorageTy = toDXILStorageType(ET);
uint32_t Count = 1;
if (auto *VTy = dyn_cast<FixedVectorType>(ElTy))
Count = VTy->getNumElements();
- return {ET, Count};
+ return {ET, DXILStorageTy, Count};
}
dxil::SamplerFeedbackType ResourceTypeInfo::getFeedbackType() const {
@@ -636,7 +645,10 @@ void ResourceTypeInfo::print(raw_ostream &OS, const DataLayout &DL) const {
OS << " Alignment: " << Struct.AlignLog2 << "\n";
} else if (isTyped()) {
TypedInfo Typed = getTyped();
- OS << " Element Type: " << getElementTypeName(Typed.ElementTy) << "\n"
+ OS << " Element Type: " << getElementTypeName(Typed.ElementTy);
+ if (Typed.ElementTy != Typed.DXILStorageTy)
+ OS << " (stored as " << getElementTypeName(Typed.DXILStorageTy) << ")";
+ OS << "\n"
<< " Element Count: " << Typed.ElementCount << "\n";
} else if (isFeedback())
OS << " Feedback Type: " << getSamplerFeedbackTypeName(getFeedbackType())
@@ -714,7 +726,8 @@ MDTuple *ResourceInfo::getAsMetadata(Module &M,
Tags.push_back(getIntMD(RTI.getStruct(DL).Stride));
} else if (RTI.isTyped()) {
Tags.push_back(getIntMD(llvm::to_underlying(ExtPropTags::ElementType)));
- Tags.push_back(getIntMD(llvm::to_underlying(RTI.getTyped().ElementTy)));
+ Tags.push_back(
+ getIntMD(llvm::to_underlying(RTI.getTyped().DXILStorageTy)));
} else if (RTI.isFeedback()) {
Tags.push_back(
getIntMD(llvm::to_underlying(ExtPropTags::SamplerFeedbackKind)));
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index a572eef..84ee8c0 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -1131,9 +1131,14 @@ bool DependenceInfo::haveSameSD(const Loop *SrcLoop,
if (SE->hasLoopInvariantBackedgeTakenCount(DstLoop))
DstUP = SE->getBackedgeTakenCount(DstLoop);
- if (SrcUB != nullptr && DstUP != nullptr &&
- SE->isKnownPredicate(ICmpInst::ICMP_EQ, SrcUB, DstUP))
- return true;
+ if (SrcUB != nullptr && DstUP != nullptr) {
+ Type *WiderType = SE->getWiderType(SrcUB->getType(), DstUP->getType());
+ SrcUB = SE->getNoopOrZeroExtend(SrcUB, WiderType);
+ DstUP = SE->getNoopOrZeroExtend(DstUP, WiderType);
+
+ if (SE->isKnownPredicate(ICmpInst::ICMP_EQ, SrcUB, DstUP))
+ return true;
+ }
return false;
}
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 7adb25d..e27a9b1 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2982,6 +2982,10 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
if (!StrideExpr)
return;
+ if (auto *Unknown = dyn_cast<SCEVUnknown>(StrideExpr))
+ if (isa<UndefValue>(Unknown->getValue()))
+ return;
+
LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
"versioning:");
LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *StrideExpr << "\n");
diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
index 93ae548..7bef3a8 100644
--- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
+++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
@@ -86,10 +86,7 @@ template <> struct llvm::DenseMapInfo<VariableID> {
using VarLocInsertPt = PointerUnion<const Instruction *, const DbgRecord *>;
template <> struct std::hash<VarLocInsertPt> {
- using argument_type = VarLocInsertPt;
- using result_type = std::size_t;
-
- result_type operator()(const argument_type &Arg) const {
+ std::size_t operator()(const VarLocInsertPt &Arg) const {
return std::hash<void *>()(Arg.getOpaqueValue());
}
};
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8676060..cf221bb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16481,10 +16481,34 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
DAG, DL);
}
break;
- case ISD::AVGFLOORS:
- case ISD::AVGFLOORU:
case ISD::AVGCEILS:
case ISD::AVGCEILU:
+ // trunc (avgceilu (sext (x), sext (y))) -> avgceils(x, y)
+ // trunc (avgceils (zext (x), zext (y))) -> avgceilu(x, y)
+ if (N0.hasOneUse()) {
+ SDValue Op0 = N0.getOperand(0);
+ SDValue Op1 = N0.getOperand(1);
+ if (N0.getOpcode() == ISD::AVGCEILU) {
+ if (TLI.isOperationLegalOrCustom(ISD::AVGCEILS, VT) &&
+ Op0.getOpcode() == ISD::SIGN_EXTEND &&
+ Op1.getOpcode() == ISD::SIGN_EXTEND &&
+ Op0.getOperand(0).getValueType() == VT &&
+ Op1.getOperand(0).getValueType() == VT)
+ return DAG.getNode(ISD::AVGCEILS, DL, VT, Op0.getOperand(0),
+ Op1.getOperand(0));
+ } else {
+ if (TLI.isOperationLegalOrCustom(ISD::AVGCEILU, VT) &&
+ Op0.getOpcode() == ISD::ZERO_EXTEND &&
+ Op1.getOpcode() == ISD::ZERO_EXTEND &&
+ Op0.getOperand(0).getValueType() == VT &&
+ Op1.getOperand(0).getValueType() == VT)
+ return DAG.getNode(ISD::AVGCEILU, DL, VT, Op0.getOperand(0),
+ Op1.getOperand(0));
+ }
+ }
+ [[fallthrough]];
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
case ISD::ABDS:
case ISD::ABDU:
// (trunc (avg a, b)) -> (avg (trunc a), (trunc b))
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 73df62a..41cea45 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1344,9 +1344,20 @@ void DWARFContext::dump(
DWARFTypeUnit *DWARFContext::getTypeUnitForHash(uint64_t Hash, bool IsDWO) {
DWARFUnitVector &DWOUnits = State->getDWOUnits();
if (const auto &TUI = getTUIndex()) {
- if (const auto *R = TUI.getFromHash(Hash))
- return dyn_cast_or_null<DWARFTypeUnit>(
- DWOUnits.getUnitForIndexEntry(*R));
+ if (const auto *R = TUI.getFromHash(Hash)) {
+ if (TUI.getVersion() >= 5) {
+ return dyn_cast_or_null<DWARFTypeUnit>(
+ DWOUnits.getUnitForIndexEntry(*R, DW_SECT_INFO));
+ } else {
+ DWARFUnit *TypesUnit = nullptr;
+ getDWARFObj().forEachTypesDWOSections([&](const DWARFSection &S) {
+ if (!TypesUnit)
+ TypesUnit =
+ DWOUnits.getUnitForIndexEntry(*R, DW_SECT_EXT_TYPES, &S);
+ });
+ return dyn_cast_or_null<DWARFTypeUnit>(TypesUnit);
+ }
+ }
return nullptr;
}
return State->getTypeUnitMap(IsDWO).lookup(Hash);
@@ -1358,7 +1369,7 @@ DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) {
if (const auto &CUI = getCUIndex()) {
if (const auto *R = CUI.getFromHash(Hash))
return dyn_cast_or_null<DWARFCompileUnit>(
- DWOUnits.getUnitForIndexEntry(*R));
+ DWOUnits.getUnitForIndexEntry(*R, DW_SECT_INFO));
return nullptr;
}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index ef59c82..da0bf03 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -161,17 +161,24 @@ DWARFUnit *DWARFUnitVector::getUnitForOffset(uint64_t Offset) const {
return nullptr;
}
-DWARFUnit *
-DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) {
- const auto *CUOff = E.getContribution(DW_SECT_INFO);
+DWARFUnit *DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E,
+ DWARFSectionKind Sec,
+ const DWARFSection *Section) {
+ const auto *CUOff = E.getContribution(Sec);
if (!CUOff)
return nullptr;
uint64_t Offset = CUOff->getOffset();
- auto end = begin() + getNumInfoUnits();
+ auto begin = this->begin();
+ auto end = begin + getNumInfoUnits();
+
+ if (Sec == DW_SECT_EXT_TYPES) {
+ begin = end;
+ end = this->end();
+ }
auto *CU =
- std::upper_bound(begin(), end, CUOff->getOffset(),
+ std::upper_bound(begin, end, CUOff->getOffset(),
[](uint64_t LHS, const std::unique_ptr<DWARFUnit> &RHS) {
return LHS < RHS->getNextUnitOffset();
});
@@ -181,13 +188,14 @@ DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) {
if (!Parser)
return nullptr;
- auto U = Parser(Offset, DW_SECT_INFO, nullptr, &E);
+ auto U = Parser(Offset, Sec, Section, &E);
if (!U)
return nullptr;
auto *NewCU = U.get();
this->insert(CU, std::move(U));
- ++NumInfoUnits;
+ if (Sec == DW_SECT_INFO)
+ ++NumInfoUnits;
return NewCU;
}
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index de29330..7290a86 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -2499,7 +2499,7 @@ void PrintCrashIRInstrumentation::registerCallbacks(
[&PIC, this](StringRef PassID, Any IR) {
SavedIR.clear();
raw_string_ostream OS(SavedIR);
- OS << formatv("*** Dump of {0}IR Before Last Pass {1}",
+ OS << formatv("; *** Dump of {0}IR Before Last Pass {1}",
llvm::forcePrintModuleIR() ? "Module " : "", PassID);
if (!isInteresting(IR, PassID, PIC.getPassNameForClassName(PassID))) {
OS << " Filtered Out ***\n";
diff --git a/llvm/lib/Remarks/Remark.cpp b/llvm/lib/Remarks/Remark.cpp
index 0e98cad..09f24e9 100644
--- a/llvm/lib/Remarks/Remark.cpp
+++ b/llvm/lib/Remarks/Remark.cpp
@@ -13,6 +13,7 @@
#include "llvm/Remarks/Remark.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
#include <optional>
using namespace llvm;
@@ -26,16 +27,13 @@ std::string Remark::getArgsAsMsg() const {
return Str;
}
-/// Returns the value of a specified key parsed from StringRef.
-std::optional<int> Argument::getValAsInt() const {
- APInt KeyVal;
- if (Val.getAsInteger(10, KeyVal))
- return std::nullopt;
- return KeyVal.getSExtValue();
+Argument *Remark::getArgByKey(StringRef Key) {
+ auto *It = find_if(Args, [&](auto &Arg) { return Arg.Key == Key; });
+ if (It == Args.end())
+ return nullptr;
+ return &*It;
}
-bool Argument::isValInt() const { return getValAsInt().has_value(); }
-
void RemarkLocation::print(raw_ostream &OS) const {
OS << "{ "
<< "File: " << SourceFilePath << ", Line: " << SourceLine
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 3a97185..246d90c 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -94,6 +94,19 @@ void SpecialCaseList::GlobMatcher::preprocess(bool BySize) {
StringRef Prefix = G.Pattern.prefix();
StringRef Suffix = G.Pattern.suffix();
+ if (Suffix.empty() && Prefix.empty()) {
+ // If both prefix and suffix are empty put into special tree to search by
+ // substring in a middle.
+ StringRef Substr = G.Pattern.longest_substr();
+ if (!Substr.empty()) {
+ // But only if substring is not empty. Searching this tree is more
+ // expensive.
+ auto &V = SubstrToGlob.emplace(Substr).first->second;
+ V.emplace_back(&G);
+ continue;
+ }
+ }
+
auto &SToGlob = PrefixSuffixToGlob.emplace(Prefix).first->second;
auto &V = SToGlob.emplace(reverse(Suffix)).first->second;
V.emplace_back(&G);
@@ -119,6 +132,25 @@ void SpecialCaseList::GlobMatcher::match(
}
}
}
+
+ if (!SubstrToGlob.empty()) {
+ // As we don't know when substring exactly starts, we will try all
+ // possibilities. In most cases search will fail on first characters.
+ for (StringRef Q = Query; !Q.empty(); Q = Q.drop_front()) {
+ for (const auto &[_, V] : SubstrToGlob.find_prefixes(Q)) {
+ for (const auto *G : V) {
+ if (G->Pattern.match(Query)) {
+ Cb(G->Name, G->LineNo);
+ // As soon as we find a match in the vector, we can break for this
+ // vector, since the globs are already sorted by priority within the
+ // prefix group. However, we continue searching other prefix groups
+ // in the map, as they may contain a better match overall.
+ break;
+ }
+ }
+ }
+ }
+ }
}
SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)
diff --git a/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp b/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
index 137ff89..f13554f 100644
--- a/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
+++ b/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
@@ -47,6 +47,8 @@ public:
StringRef getPassName() const override { return AARCH64_BRANCH_TARGETS_NAME; }
private:
+ const AArch64Subtarget *Subtarget;
+
void addBTI(MachineBasicBlock &MBB, bool CouldCall, bool CouldJump,
bool NeedsWinCFI);
};
@@ -75,6 +77,8 @@ bool AArch64BranchTargets::runOnMachineFunction(MachineFunction &MF) {
<< "********** Function: " << MF.getName() << '\n');
const Function &F = MF.getFunction();
+ Subtarget = &MF.getSubtarget<AArch64Subtarget>();
+
// LLVM does not consider basic blocks which are the targets of jump tables
// to be address-taken (the address can't escape anywhere else), but they are
// used for indirect branches, so need BTI instructions.
@@ -100,9 +104,8 @@ bool AArch64BranchTargets::runOnMachineFunction(MachineFunction &MF) {
// a BTI, and pointing the indirect branch at that. For non-ELF targets we
// can't rely on that, so we assume that `CouldCall` is _always_ true due
// to the risk of long-branch thunks at link time.
- if (&MBB == &*MF.begin() &&
- (!MF.getSubtarget<AArch64Subtarget>().isTargetELF() ||
- (F.hasAddressTaken() || !F.hasLocalLinkage())))
+ if (&MBB == &*MF.begin() && (!Subtarget->isTargetELF() ||
+ (F.hasAddressTaken() || !F.hasLocalLinkage())))
CouldCall = true;
// If the block itself is address-taken, it could be indirectly branched
@@ -132,9 +135,6 @@ void AArch64BranchTargets::addBTI(MachineBasicBlock &MBB, bool CouldCall,
<< (CouldCall ? "c" : "") << " to " << MBB.getName()
<< "\n");
- const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(
- MBB.getParent()->getSubtarget().getInstrInfo());
-
unsigned HintNum = 32;
if (CouldCall)
HintNum |= 2;
@@ -162,6 +162,8 @@ void AArch64BranchTargets::addBTI(MachineBasicBlock &MBB, bool CouldCall,
MBBI->getOpcode() == AArch64::PACIBSP))
return;
+ const AArch64InstrInfo *TII = Subtarget->getInstrInfo();
+
// Insert BTI exactly at the first executable instruction.
const DebugLoc DL = MBB.findDebugLoc(MBBI);
MachineInstr *BTI = BuildMI(MBB, MBBI, DL, TII->get(AArch64::HINT))
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 1e607f4..f63981b 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1871,7 +1871,7 @@ bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
}
bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ TII = MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
bool Modified = false;
for (auto &MBB : MF)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index c76689f..0f7b34c 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -644,10 +644,10 @@ bool AArch64FrameLowering::hasReservedCallFrame(
MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- const AArch64InstrInfo *TII =
- static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
- const AArch64TargetLowering *TLI =
- MF.getSubtarget<AArch64Subtarget>().getTargetLowering();
+
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64InstrInfo *TII = Subtarget.getInstrInfo();
+ const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
[[maybe_unused]] MachineFrameInfo &MFI = MF.getFrameInfo();
DebugLoc DL = I->getDebugLoc();
unsigned Opc = I->getOpcode();
@@ -1319,8 +1319,8 @@ StackOffset AArch64FrameLowering::getStackOffset(const MachineFunction &MF,
// TODO: This function currently does not work for scalable vectors.
int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
int FI) const {
- const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
+ const AArch64RegisterInfo *RegInfo =
+ MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
? getFPOffset(MF, ObjectOffset).getFixed()
@@ -1343,10 +1343,9 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
TargetStackID::Value StackID, Register &FrameReg, bool PreferFP,
bool ForSimm) const {
const auto &MFI = MF.getFrameInfo();
- const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
- const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed();
int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed();
@@ -1466,7 +1465,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
return FPOffset;
}
FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
- : (unsigned)AArch64::SP;
+ : MCRegister(AArch64::SP);
return SPOffset;
}
@@ -1589,8 +1588,8 @@ static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
namespace {
struct RegPairInfo {
- unsigned Reg1 = AArch64::NoRegister;
- unsigned Reg2 = AArch64::NoRegister;
+ Register Reg1;
+ Register Reg2;
int FrameIdx;
int Offset;
enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG } Type;
@@ -1598,21 +1597,21 @@ struct RegPairInfo {
RegPairInfo() = default;
- bool isPaired() const { return Reg2 != AArch64::NoRegister; }
+ bool isPaired() const { return Reg2.isValid(); }
bool isScalable() const { return Type == PPR || Type == ZPR; }
};
} // end anonymous namespace
-unsigned findFreePredicateReg(BitVector &SavedRegs) {
+MCRegister findFreePredicateReg(BitVector &SavedRegs) {
for (unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {
if (SavedRegs.test(PReg)) {
unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;
- return PNReg;
+ return MCRegister(PNReg);
}
}
- return AArch64::NoRegister;
+ return MCRegister();
}
// The multivector LD/ST are available only for SME or SVE2p1 targets
@@ -1930,8 +1929,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
}
bool PTrueCreated = false;
for (const RegPairInfo &RPI : llvm::reverse(RegPairs)) {
- unsigned Reg1 = RPI.Reg1;
- unsigned Reg2 = RPI.Reg2;
+ Register Reg1 = RPI.Reg1;
+ Register Reg2 = RPI.Reg2;
unsigned StrOpc;
// Issue sequence of spills for cs regs. The first spill may be converted
@@ -1967,7 +1966,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
break;
}
- unsigned X0Scratch = AArch64::NoRegister;
+ Register X0Scratch;
auto RestoreX0 = make_scope_exit([&] {
if (X0Scratch != AArch64::NoRegister)
BuildMI(MBB, MI, DL, TII.get(TargetOpcode::COPY), AArch64::X0)
@@ -2009,11 +2008,15 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
}
}
- LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
- if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
- dbgs() << ") -> fi#(" << RPI.FrameIdx;
- if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
- dbgs() << ")\n");
+ LLVM_DEBUG({
+ dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
+ if (RPI.isPaired())
+ dbgs() << ", " << printReg(Reg2, TRI);
+ dbgs() << ") -> fi#(" << RPI.FrameIdx;
+ if (RPI.isPaired())
+ dbgs() << ", " << RPI.FrameIdx + 1;
+ dbgs() << ")\n";
+ });
assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
"Windows unwdinding requires a consecutive (FP,LR) pair");
@@ -2143,8 +2146,8 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
bool PTrueCreated = false;
for (const RegPairInfo &RPI : RegPairs) {
- unsigned Reg1 = RPI.Reg1;
- unsigned Reg2 = RPI.Reg2;
+ Register Reg1 = RPI.Reg1;
+ Register Reg2 = RPI.Reg2;
// Issue sequence of restores for cs regs. The last restore may be converted
// to a post-increment load later by emitEpilogue if the callee-save stack
@@ -2176,11 +2179,15 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
case RegPairInfo::VG:
continue;
}
- LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
- if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
- dbgs() << ") -> fi#(" << RPI.FrameIdx;
- if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
- dbgs() << ")\n");
+ LLVM_DEBUG({
+ dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
+ if (RPI.isPaired())
+ dbgs() << ", " << printReg(Reg2, TRI);
+ dbgs() << ") -> fi#(" << RPI.FrameIdx;
+ if (RPI.isPaired())
+ dbgs() << ", " << RPI.FrameIdx + 1;
+ dbgs() << ")\n";
+ });
// Windows unwind codes require consecutive registers if registers are
// paired. Make the switch here, so that the code below will save (x,x+1)
@@ -2435,8 +2442,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
- const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
+ const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned UnspilledCSGPR = AArch64::NoRegister;
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
@@ -2444,9 +2450,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
MachineFrameInfo &MFI = MF.getFrameInfo();
const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
- unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
- ? RegInfo->getBaseRegister()
- : (unsigned)AArch64::NoRegister;
+ MCRegister BasePointerReg =
+ RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister() : MCRegister();
unsigned ExtraCSSpill = 0;
bool HasUnpairedGPR64 = false;
@@ -2456,7 +2461,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// Figure out which callee-saved registers to save/restore.
for (unsigned i = 0; CSRegs[i]; ++i) {
- const unsigned Reg = CSRegs[i];
+ const MCRegister Reg = CSRegs[i];
// Add the base pointer register to SavedRegs if it is callee-save.
if (Reg == BasePointerReg)
@@ -2470,7 +2475,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
bool RegUsed = SavedRegs.test(Reg);
- unsigned PairedReg = AArch64::NoRegister;
+ MCRegister PairedReg;
const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
if (RegIsGPR64 || AArch64::FPR64RegClass.contains(Reg) ||
AArch64::FPR128RegClass.contains(Reg)) {
@@ -2522,8 +2527,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
// Find a suitable predicate register for the multi-vector spill/fill
// instructions.
- unsigned PnReg = findFreePredicateReg(SavedRegs);
- if (PnReg != AArch64::NoRegister)
+ MCRegister PnReg = findFreePredicateReg(SavedRegs);
+ if (PnReg.isValid())
AFI->setPredicateRegForFillSpill(PnReg);
// If no free callee-save has been found assign one.
if (!AFI->getPredicateRegForFillSpill() &&
@@ -2558,7 +2563,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
unsigned PPRCSStackSize = 0;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
for (unsigned Reg : SavedRegs.set_bits()) {
- auto *RC = TRI->getMinimalPhysRegClass(Reg);
+ auto *RC = TRI->getMinimalPhysRegClass(MCRegister(Reg));
assert(RC && "expected register class!");
auto SpillSize = TRI->getSpillSize(*RC);
bool IsZPR = AArch64::ZPRRegClass.contains(Reg);
@@ -2600,7 +2605,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
LLVM_DEBUG({
dbgs() << "*** determineCalleeSaves\nSaved CSRs:";
for (unsigned Reg : SavedRegs.set_bits())
- dbgs() << ' ' << printReg(Reg, RegInfo);
+ dbgs() << ' ' << printReg(MCRegister(Reg), RegInfo);
dbgs() << "\n";
});
diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
index d67182d..03dd1cd 100644
--- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
@@ -649,7 +649,7 @@ bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) {
}
bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ TII = MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
bool Modified = false;
for (auto &MBB : MF)
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 79975b0..5bfb19d9 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -620,7 +620,7 @@ AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
return RC;
}
-unsigned AArch64RegisterInfo::getBaseRegister() const { return AArch64::X19; }
+MCRegister AArch64RegisterInfo::getBaseRegister() const { return AArch64::X19; }
bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 47d76f3..3b0f4f6 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -124,7 +124,7 @@ public:
bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override;
bool hasBasePointer(const MachineFunction &MF) const;
- unsigned getBaseRegister() const;
+ MCRegister getBaseRegister() const;
bool isArgumentRegister(const MachineFunction &MF,
MCRegister Reg) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
index d695f26..b4a4f4c 100644
--- a/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
@@ -33,6 +33,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
@@ -49,8 +50,8 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/Pass.h"
-#include <unordered_map>
#include <map>
+#include <unordered_map>
using namespace llvm;
@@ -67,7 +68,7 @@ namespace {
struct AArch64SIMDInstrOpt : public MachineFunctionPass {
static char ID;
- const TargetInstrInfo *TII;
+ const AArch64InstrInfo *TII;
MachineRegisterInfo *MRI;
TargetSchedModel SchedModel;
@@ -694,13 +695,9 @@ bool AArch64SIMDInstrOpt::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
- TII = MF.getSubtarget().getInstrInfo();
MRI = &MF.getRegInfo();
- const TargetSubtargetInfo &ST = MF.getSubtarget();
- const AArch64InstrInfo *AAII =
- static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
- if (!AAII)
- return false;
+ const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
+ TII = ST.getInstrInfo();
SchedModel.init(&ST);
if (!SchedModel.hasInstrSchedModel())
return false;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 5c3e26e..4cd51d6 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -1114,7 +1114,6 @@ bool AArch64InstPrinter::printSyslAlias(const MCInst *MI,
} else
return false;
- std::string Str;
llvm::transform(Name, Name.begin(), ::tolower);
O << '\t' << Ins << '\t' << Reg.str() << ", " << Name;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 1c8383c..54d94b1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1466,6 +1466,13 @@ def FeatureClusters : SubtargetFeature< "clusters",
"Has clusters of workgroups support"
>;
+def FeatureWaitsBeforeSystemScopeStores : SubtargetFeature<
+ "waits-before-system-scope-stores",
+ "RequiresWaitsBeforeSystemScopeStores",
+ "true",
+ "Target requires waits for loads and atomics before system scope stores"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -2060,7 +2067,8 @@ def FeatureISAVersion12 : FeatureSet<
FeatureMaxHardClauseLength32,
Feature1_5xVGPRs,
FeatureMemoryAtomicFAddF32DenormalSupport,
- FeatureBVHDualAndBVH8Insts
+ FeatureBVHDualAndBVH8Insts,
+ FeatureWaitsBeforeSystemScopeStores,
]>;
def FeatureISAVersion12_50 : FeatureSet<
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 5580e4c..09338c5 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -9028,6 +9028,9 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
+ // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
+ // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
+ Inst.addOperand(MCOperand::createImm(0));
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index ac660d5..f377b8a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -290,6 +290,7 @@ protected:
bool Has45BitNumRecordsBufferResource = false;
bool HasClusters = false;
+ bool RequiresWaitsBeforeSystemScopeStores = false;
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
@@ -1861,6 +1862,10 @@ public:
bool has45BitNumRecordsBufferResource() const {
return Has45BitNumRecordsBufferResource;
}
+
+ bool requiresWaitsBeforeSystemScopeStores() const {
+ return RequiresWaitsBeforeSystemScopeStores;
+ }
};
class GCNUserSGPRUsageInfo {
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index a177a42..6ab8d552 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -2673,7 +2673,8 @@ bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const {
const unsigned Scope = CPol->getImm() & CPol::SCOPE;
// GFX12.0 only: Extra waits needed before system scope stores.
- if (!ST.hasGFX1250Insts() && !Atomic && Scope == CPol::SCOPE_SYS)
+ if (ST.requiresWaitsBeforeSystemScopeStores() && !Atomic &&
+ Scope == CPol::SCOPE_SYS)
Changed |= insertWaitsBeforeSystemScopeStore(MI.getIterator());
return Changed;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index ebd2e7e..d80a6f3 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1874,9 +1874,13 @@ void SIRegisterInfo::buildSpillLoadStore(
}
bool IsSrcDstDef = SrcDstRegState & RegState::Define;
+ bool PartialReloadCopy = (RemEltSize != EltSize) && !IsStore;
if (NeedSuperRegImpOperand &&
- (IsFirstSubReg || (IsLastSubReg && !IsSrcDstDef)))
+ (IsFirstSubReg || (IsLastSubReg && !IsSrcDstDef))) {
MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
+ if (PartialReloadCopy)
+ MIB.addReg(ValueReg, RegState::Implicit);
+ }
// The epilog restore of a wwm-scratch register can cause undesired
// optimization during machine-cp post PrologEpilogInserter if the same
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 84287b6..1931e0b 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -838,9 +838,10 @@ def S_CBRANCH_G_FORK : SOP2_Pseudo <
let SubtargetPredicate = isGFX6GFX7GFX8GFX9;
}
-let Defs = [SCC] in {
-def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32">;
-} // End Defs = [SCC]
+let isCommutable = 1, Defs = [SCC] in
+def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32",
+ [(set i32:$sdst, (UniformUnaryFrag<abs> (sub_oneuse i32:$src0, i32:$src1)))]
+>;
let SubtargetPredicate = isGFX8GFX9 in {
def S_RFE_RESTORE_B64 : SOP2_Pseudo <
diff --git a/llvm/lib/Target/ARM/ARMProcessors.td b/llvm/lib/Target/ARM/ARMProcessors.td
index 7453727..b60569e 100644
--- a/llvm/lib/Target/ARM/ARMProcessors.td
+++ b/llvm/lib/Target/ARM/ARMProcessors.td
@@ -421,6 +421,17 @@ def : ProcessorModel<"cortex-m52", CortexM55Model, [ARMv81mMainline,
FeatureMVEVectorCostFactor1,
HasMVEFloatOps]>;
+def : ProcessorModel<"star-mc3", CortexM55Model, [ARMv81mMainline,
+ FeatureDSP,
+ FeatureFPARMv8_D16,
+ FeatureHasNoBranchPredictor,
+ FeaturePACBTI,
+ FeatureUseMISched,
+ FeaturePreferBranchAlign32,
+ FeatureHasSlowFPVMLx,
+ FeatureMVEVectorCostFactor1,
+ HasMVEFloatOps]>;
+
def : ProcNoItin<"cortex-a32", [ARMv8a,
FeatureHWDivThumb,
FeatureHWDivARM,
diff --git a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp
index dc84ae4..9da3bdb 100644
--- a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp
@@ -49,7 +49,7 @@ static StringRef getRCPrefix(dxil::ResourceClass RC) {
static StringRef getFormatName(const dxil::ResourceTypeInfo &RI) {
if (RI.isTyped()) {
- switch (RI.getTyped().ElementTy) {
+ switch (RI.getTyped().DXILStorageTy) {
case dxil::ElementType::I1:
return "i1";
case dxil::ElementType::I16:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index ca4a655..80c96c6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -1701,6 +1701,43 @@ lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
DAG.getConstant(Imm, DL, GRLenVT));
}
+/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
+///
+/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
+/// reverse whose mask likes:
+/// <7, 6, 5, 4, 3, 2, 1, 0>
+///
+/// When undef's appear in the mask they are treated as if they were whatever
+/// value is necessary in order to fit the above forms.
+static SDValue
+lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget) {
+ // Only vectors with i8/i16 elements which cannot match other patterns
+ // directly needs to do this.
+ if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
+ VT != MVT::v16i16)
+ return SDValue();
+
+ if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
+ return SDValue();
+
+ int WidenNumElts = VT.getVectorNumElements() / 4;
+ SmallVector<int, 16> WidenMask(WidenNumElts, -1);
+ for (int i = 0; i < WidenNumElts; ++i)
+ WidenMask[i] = WidenNumElts - 1 - i;
+
+ MVT WidenVT = MVT::getVectorVT(
+ VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
+ SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
+ SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
+ DAG.getUNDEF(WidenVT), WidenMask);
+
+ return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
+ DAG.getBitcast(VT, WidenRev),
+ DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
+}
+
/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
///
/// VPACKEV interleaves the even elements from each vector.
@@ -2004,6 +2041,9 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
if ((Result =
lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
return Result;
+ if ((Result =
+ lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
+ return Result;
// TODO: This comment may be enabled in the future to better match the
// pattern for instruction selection.
@@ -2622,6 +2662,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return Result;
if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
return Result;
+ if ((Result =
+ lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
+ return Result;
// TODO: This comment may be enabled in the future to better match the
// pattern for instruction selection.
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 598735f..c923f0e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1082,6 +1082,161 @@ let Predicates = [hasPTX<70>, hasSM<80>] in {
"mbarrier.pending_count.b64",
[(set i32:$res, (int_nvvm_mbarrier_pending_count i64:$state))]>;
}
+
+class MBAR_UTIL<string op, string scope,
+ string space = "", string sem = "",
+ bit tl = 0, bit parity = 0> {
+ // The mbarrier instructions in PTX ISA are of the general form:
+ // mbarrier.op.semantics.scope.space.b64 arg1, arg2 ...
+ // where:
+ // op -> arrive, expect_tx, complete_tx, arrive.expect_tx etc.
+ // semantics -> acquire, release, relaxed (default depends on the op)
+ // scope -> cta or cluster (default is cta-scope)
+ // space -> shared::cta or shared::cluster (default is shared::cta)
+ //
+ // The 'semantics' and 'scope' go together. If one is specified,
+ // then the other _must_ be specified. For example:
+ // (A) mbarrier.arrive <args> (valid, release and cta are default)
+ // (B) mbarrier.arrive.release.cta <args> (valid, sem/scope mentioned explicitly)
+ // (C) mbarrier.arrive.release <args> (invalid, needs scope)
+ // (D) mbarrier.arrive.cta <args> (invalid, needs order)
+ //
+ // Wherever possible, we prefer form (A) to (B) since it is available
+ // from early PTX versions. In most cases, explicitly specifying the
+ // scope requires a later version of PTX.
+ string _scope_asm = !cond(
+ !eq(scope, "scope_cluster") : "cluster",
+ !eq(scope, "scope_cta") : !if(!empty(sem), "", "cta"),
+ true : scope);
+ string _space_asm = !cond(
+ !eq(space, "space_cta") : "shared",
+ !eq(space, "space_cluster") : "shared::cluster",
+ true : space);
+
+ string _parity = !if(parity, "parity", "");
+ string asm_str = StrJoin<".", ["mbarrier", op, _parity,
+ sem, _scope_asm, _space_asm, "b64"]>.ret;
+
+ string _intr_suffix = StrJoin<"_", [!subst(".", "_", op), _parity,
+ !if(tl, "tl", ""),
+ sem, scope, space]>.ret;
+ string intr_name = "int_nvvm_mbarrier_" # _intr_suffix;
+
+ // Predicate checks:
+ // These are used only for the "test_wait/try_wait" variants as they
+ // have evolved since sm80 and are complex. The predicates for the
+ // remaining instructions are straightforward and have already been
+ // applied directly.
+ Predicate _sm_pred = !cond(!or(
+ !eq(op, "try_wait"),
+ !eq(scope, "scope_cluster"),
+ !eq(sem, "relaxed")) : hasSM<90>,
+ true : hasSM<80>);
+ Predicate _ptx_pred = !cond(
+ !eq(sem, "relaxed") : hasPTX<86>,
+ !ne(_scope_asm, "") : hasPTX<80>,
+ !eq(op, "try_wait") : hasPTX<78>,
+ parity : hasPTX<71>,
+ true : hasPTX<70>);
+ list<Predicate> preds = [_ptx_pred, _sm_pred];
+}
+
+foreach op = ["expect_tx", "complete_tx"] in {
+ foreach scope = ["scope_cta", "scope_cluster"] in {
+ foreach space = ["space_cta", "space_cluster"] in {
+ defvar intr = !cast<Intrinsic>(MBAR_UTIL<op, scope, space>.intr_name);
+ defvar suffix = StrJoin<"_", [op, scope, space]>.ret;
+ def mbar_ # suffix : BasicNVPTXInst<(outs), (ins ADDR:$addr, B32:$tx_count),
+ MBAR_UTIL<op, scope, space, "relaxed">.asm_str,
+ [(intr addr:$addr, i32:$tx_count)]>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+ } // space
+ } // scope
+} // op
+
+multiclass MBAR_ARR_INTR<string op, string scope, string sem,
+ list<Predicate> pred = []> {
+ // When either of sem or scope is non-default, both have to
+ // be explicitly specified. So, explicitly state that
+ // sem is `release` when scope is `cluster`.
+ defvar asm_sem = !if(!and(!empty(sem), !eq(scope, "scope_cluster")),
+ "release", sem);
+
+ defvar asm_cta = MBAR_UTIL<op, scope, "space_cta", asm_sem>.asm_str;
+ defvar intr_cta = !cast<Intrinsic>(MBAR_UTIL<op, scope,
+ "space_cta", sem>.intr_name);
+
+ defvar asm_cluster = MBAR_UTIL<op, scope, "space_cluster", asm_sem>.asm_str;
+ defvar intr_cluster = !cast<Intrinsic>(MBAR_UTIL<op, scope,
+ "space_cluster", sem>.intr_name);
+
+ def _CTA : NVPTXInst<(outs B64:$state),
+ (ins ADDR:$addr, B32:$tx_count),
+ asm_cta # " $state, [$addr], $tx_count;",
+ [(set i64:$state, (intr_cta addr:$addr, i32:$tx_count))]>,
+ Requires<pred>;
+ def _CLUSTER : NVPTXInst<(outs),
+ (ins ADDR:$addr, B32:$tx_count),
+ asm_cluster # " _, [$addr], $tx_count;",
+ [(intr_cluster addr:$addr, i32:$tx_count)]>,
+ Requires<pred>;
+}
+foreach op = ["arrive", "arrive.expect_tx",
+ "arrive_drop", "arrive_drop.expect_tx"] in {
+ foreach scope = ["scope_cta", "scope_cluster"] in {
+ defvar suffix = !subst(".", "_", op) # scope;
+ defm mbar_ # suffix # _release : MBAR_ARR_INTR<op, scope, "", [hasPTX<80>, hasSM<90>]>;
+ defm mbar_ # suffix # _relaxed : MBAR_ARR_INTR<op, scope, "relaxed", [hasPTX<86>, hasSM<90>]>;
+ } // scope
+} // op
+
+multiclass MBAR_WAIT_INTR<string op, string scope, string sem, bit time_limit> {
+ // When either of sem or scope is non-default, both have to
+ // be explicitly specified. So, explicitly state that the
+ // semantics is `acquire` when the scope is `cluster`.
+ defvar asm_sem = !if(!and(!empty(sem), !eq(scope, "scope_cluster")),
+ "acquire", sem);
+
+ defvar asm_parity = MBAR_UTIL<op, scope, "space_cta", asm_sem,
+ time_limit, 1>.asm_str;
+ defvar pred_parity = MBAR_UTIL<op, scope, "space_cta", asm_sem,
+ time_limit, 1>.preds;
+ defvar intr_parity = !cast<Intrinsic>(MBAR_UTIL<op, scope, "space_cta",
+ sem, time_limit, 1>.intr_name);
+
+ defvar asm_state = MBAR_UTIL<op, scope, "space_cta", asm_sem,
+ time_limit>.asm_str;
+ defvar pred_state = MBAR_UTIL<op, scope, "space_cta", asm_sem,
+ time_limit>.preds;
+ defvar intr_state = !cast<Intrinsic>(MBAR_UTIL<op, scope, "space_cta",
+ sem, time_limit>.intr_name);
+
+ defvar ins_tl_dag = !if(time_limit, (ins B32:$tl), (ins));
+ defvar tl_suffix = !if(time_limit, ", $tl;", ";");
+ defvar intr_state_dag = !con((intr_state addr:$addr, i64:$state),
+ !if(time_limit, (intr_state i32:$tl), (intr_state)));
+ defvar intr_parity_dag = !con((intr_parity addr:$addr, i32:$phase),
+ !if(time_limit, (intr_parity i32:$tl), (intr_parity)));
+
+ def _STATE : NVPTXInst<(outs B1:$res), !con((ins ADDR:$addr, B64:$state), ins_tl_dag),
+ asm_state # " $res, [$addr], $state" # tl_suffix,
+ [(set i1:$res, intr_state_dag)]>,
+ Requires<pred_state>;
+ def _PARITY : NVPTXInst<(outs B1:$res), !con((ins ADDR:$addr, B32:$phase), ins_tl_dag),
+ asm_parity # " $res, [$addr], $phase" # tl_suffix,
+ [(set i1:$res, intr_parity_dag)]>,
+ Requires<pred_parity>;
+}
+foreach op = ["test_wait", "try_wait"] in {
+ foreach scope = ["scope_cta", "scope_cluster"] in {
+ foreach time_limit = !if(!eq(op, "try_wait"), [true, false], [false]) in {
+ defvar suffix = StrJoin<"_", [op, scope, !if(time_limit, "tl", "")]>.ret;
+ defm mbar_ # suffix # "_acquire" : MBAR_WAIT_INTR<op, scope, "", time_limit>;
+ defm mbar_ # suffix # "_relaxed" : MBAR_WAIT_INTR<op, scope, "relaxed", time_limit>;
+ } // time_limit
+ } // scope
+} // op
+
//-----------------------------------
// Math Functions
//-----------------------------------
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index 92af04a..4695a6f 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -825,8 +825,7 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read],
(instrs
SRADI_rec,
- SRAWI_rec,
- SRAWI8_rec
+ SRAWI8_rec, SRAWI_rec
)>;
// Single crack instructions
@@ -834,8 +833,7 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read],
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
(instrs
SRAD_rec,
- SRAW_rec,
- SRAW8_rec
+ SRAW8_rec, SRAW_rec
)>;
// 2-way crack instructions
@@ -883,7 +881,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY],
// 3 Cycles ALU operations, 1 input operands
def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
(instrs
- ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, ADDItocL8, LI, LI8,
+ ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, LI, LI8,
ADDIC, ADDIC8,
ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8,
ADDME, ADDME8,
@@ -1864,7 +1862,7 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read]
(instrs
CP_PASTE8_rec, CP_PASTE_rec,
SLBIEG,
- TLBIE
+ TLBIE, TLBIE8P9, TLBIEP9
)>;
// Single crack instructions
@@ -1886,8 +1884,7 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read,
def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
(instrs
ISYNC,
- SYNCP10,
- SYNC
+ SYNC, SYNCP10
)>;
// Expand instructions
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 801ae83..3f5f7d3 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -905,7 +905,7 @@ def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
SLBIEG,
STMW,
STSWI,
- TLBIE
+ TLBIE, TLBIEP9, TLBIE8P9
)>;
// Vector Store Instruction
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 4ff2f8a..5d9ec4a 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -409,6 +409,7 @@ def HasP10Vector : Predicate<"Subtarget->hasP10Vector()">;
def IsISA2_06 : Predicate<"Subtarget->isISA2_06()">;
def IsISA2_07 : Predicate<"Subtarget->isISA2_07()">;
def IsISA3_0 : Predicate<"Subtarget->isISA3_0()">;
+def IsNotISA3_0 : Predicate<"!Subtarget->isISA3_0()">;
def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">;
def IsNotISA3_1 : Predicate<"!Subtarget->isISA3_1()">;
def IsISAFuture : Predicate<"Subtarget->isISAFuture()">;
diff --git a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
index 6bb66bc..043c9e4 100644
--- a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
+++ b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
@@ -29,7 +29,7 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
ADDIStocHA8,
ADDIdtprelL32,
ADDItlsldLADDR32,
- ADDItocL8,
+ ADDItocL,
ADDME,
ADDME8,
ADDME8O,
@@ -209,7 +209,9 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
SRADI,
SRADI_32,
SRAW,
+ SRAW8,
SRAWI,
+ SRAWI8,
SRD,
SRD_rec,
SRW,
@@ -518,7 +520,7 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
ADDIStocHA8,
ADDIdtprelL32,
ADDItlsldLADDR32,
- ADDItocL8,
+ ADDItocL,
ADDME,
ADDME8,
ADDME8O,
@@ -747,7 +749,9 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
SRADI,
SRADI_32,
SRAW,
+ SRAW8,
SRAWI,
+ SRAWI8,
SRD,
SRD_rec,
SRW,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index fba1c66..1a77b00 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -850,6 +850,36 @@ class XForm_45<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+class XForm_RSB5_UIMM2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, NoItinerary> {
+
+ bits<5> RS;
+ bits<5> RB;
+ bits<2> RIC;
+
+ let Pattern = pattern;
+
+ let Inst{6...10} = RS;
+ let Inst{11} = 0;
+ let Inst{12...13} = RIC;
+ let Inst{14...15} = 0;
+ let Inst{16...20} = RB;
+ let Inst{21...30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_RSB5_UIMM2_2UIMM1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, list<dag> pattern>
+ : XForm_RSB5_UIMM2<opcode, xo, OOL, IOL, asmstr, pattern> {
+
+ bits<1> PRS;
+ bits<1> R;
+
+ let Inst{14} = PRS;
+ let Inst{15} = R;
+}
+
class X_FRT5_XO2_XO3_XO10<bits<6> opcode, bits<2> xo1, bits<3> xo2, bits<10> xo,
dag OOL, dag IOL, string asmstr, InstrItinClass itin,
list<dag> pattern>
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 1aefea1..b0bed71c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -11,6 +11,18 @@
//
//===----------------------------------------------------------------------===//
+class XForm_RS5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ list<dag> pattern> : I<opcode, OOL, IOL, asmstr, NoItinerary> {
+ bits<5> RS;
+
+ let Pattern = pattern;
+
+ let Inst{6...10} = RS;
+ let Inst{11...20} = 0;
+ let Inst{21...30} = xo;
+ let Inst{31} = 0;
+}
+
class XOForm_RTAB5_L1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
string asmstr, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, NoItinerary> {
@@ -294,6 +306,24 @@ let Predicates = [IsISAFuture] in {
defm SUBFUS : XOForm_RTAB5_L1r<31, 72, (outs g8rc:$RT),
(ins g8rc:$RA, g8rc:$RB, u1imm:$L), "subfus",
"$RT, $L, $RA, $RB", []>;
+ def TLBSYNCIO
+ : XForm_RS5<31, 564, (outs), (ins g8rc:$RS), "tlbsyncio $RS", []>;
+ def PTESYNCIO
+ : XForm_RS5<31, 596, (outs), (ins g8rc:$RS), "ptesyncio $RS", []>;
+ def TLBIEP : XForm_RSB5_UIMM2_2UIMM1<31, 50, (outs),
+ (ins gprc:$RB, gprc:$RS, u2imm:$RIC,
+ u1imm:$PRS, u1imm:$R),
+ "tlbiep $RB, $RS, $RIC, $PRS, $R", []>;
+ def TLBIEIO
+ : XForm_RSB5_UIMM2<31, 18, (outs), (ins g8rc:$RB, g8rc:$RS, u2imm:$RIC),
+ "tlbieio $RB, $RS, $RIC", []>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ def TLBIEP8
+ : XForm_RSB5_UIMM2_2UIMM1<31, 50, (outs),
+ (ins g8rc:$RB, g8rc:$RS, u2imm:$RIC,
+ u1imm:$PRS, u1imm:$R),
+ "tlbiep $RB, $RS, $RIC, $PRS, $R", []>;
+ }
}
let Predicates = [HasVSX, IsISAFuture] in {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 44d1a44..f399811 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -4321,7 +4321,22 @@ def TLBLI : XForm_16b<31, 1010, (outs), (ins gprc:$RB),
"tlbli $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>;
def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RST, gprc:$RB),
- "tlbie $RB,$RST", IIC_SprTLBIE, []>;
+ "tlbie $RB, $RST", IIC_SprTLBIE, []>,
+ Requires<[IsNotISA3_0]>;
+
+let Predicates = [IsISA3_0] in {
+ def TLBIEP9 : XForm_RSB5_UIMM2_2UIMM1<31, 306, (outs),
+ (ins gprc:$RB, gprc:$RS, u2imm:$RIC,
+ u1imm:$PRS, u1imm:$R),
+ "tlbie $RB, $RS, $RIC, $PRS, $R", []>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ def TLBIE8P9
+ : XForm_RSB5_UIMM2_2UIMM1<31, 306, (outs),
+ (ins g8rc:$RB, g8rc:$RS, u2imm:$RIC,
+ u1imm:$PRS, u1imm:$R),
+ "tlbie $RB, $RS, $RIC, $PRS, $R", []>;
+ }
+}
def TLBSX : XForm_tlb<914, (outs), (ins gprc:$RA, gprc:$RB), "tlbsx $RA, $RB",
IIC_LdStLoad>, Requires<[IsBookE]>;
@@ -4669,7 +4684,11 @@ def : InstAlias<"mficcr $Rx", (MFSPR gprc:$Rx, 1019)>, Requires<[IsPPC4xx]>;
}
-def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>;
+def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>, Requires<[IsNotISA3_0]>;
+let Predicates = [IsISA3_0] in {
+ def : InstAlias<"tlbie $RB", (TLBIEP9 R0, gprc:$RB, 0, 0, 0)>;
+ def : InstAlias<"tlbie $RB, $RS", (TLBIEP9 gprc:$RB, gprc:$RS, 0, 0, 0)>;
+}
def : InstAlias<"tlbrehi $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 0)>,
Requires<[IsPPC4xx]>;
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 53633ea..8198173 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -92,6 +92,8 @@ private:
void emitFence(AtomicOrdering FenceOrdering, SyncScope::ID FenceSSID,
MachineIRBuilder &MIB) const;
bool selectUnmergeValues(MachineInstr &MI, MachineIRBuilder &MIB) const;
+ bool selectIntrinsicWithSideEffects(MachineInstr &I,
+ MachineIRBuilder &MIB) const;
ComplexRendererFns selectShiftMask(MachineOperand &Root,
unsigned ShiftWidth) const;
@@ -714,6 +716,88 @@ static unsigned selectRegImmLoadStoreOp(unsigned GenericOpc, unsigned OpSize) {
return GenericOpc;
}
+bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
+ MachineInstr &I, MachineIRBuilder &MIB) const {
+ // Find the intrinsic ID.
+ unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
+ // Select the instruction.
+ switch (IntrinID) {
+ default:
+ return false;
+ case Intrinsic::riscv_vlm:
+ case Intrinsic::riscv_vle:
+ case Intrinsic::riscv_vle_mask:
+ case Intrinsic::riscv_vlse:
+ case Intrinsic::riscv_vlse_mask: {
+ bool IsMasked = IntrinID == Intrinsic::riscv_vle_mask ||
+ IntrinID == Intrinsic::riscv_vlse_mask;
+ bool IsStrided = IntrinID == Intrinsic::riscv_vlse ||
+ IntrinID == Intrinsic::riscv_vlse_mask;
+ LLT VT = MRI->getType(I.getOperand(0).getReg());
+ unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
+
+ // Result vector
+ const Register DstReg = I.getOperand(0).getReg();
+
+ // Sources
+ bool HasPassthruOperand = IntrinID != Intrinsic::riscv_vlm;
+ unsigned CurOp = 2;
+ SmallVector<SrcOp, 4> SrcOps; // Source registers.
+
+ // Passthru
+ if (HasPassthruOperand) {
+ auto PassthruReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(PassthruReg);
+ } else {
+ SrcOps.push_back(Register(RISCV::NoRegister));
+ }
+
+ // Base Pointer
+ auto PtrReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(PtrReg);
+
+ // Stride
+ if (IsStrided) {
+ auto StrideReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(StrideReg);
+ }
+
+ // Mask
+ if (IsMasked) {
+ auto MaskReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(MaskReg);
+ }
+
+ RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(VT));
+ const RISCV::VLEPseudo *P =
+ RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
+ static_cast<unsigned>(LMUL));
+
+ auto PseudoMI = MIB.buildInstr(P->Pseudo, {DstReg}, SrcOps);
+
+ // Select VL
+ auto VLOpFn = renderVLOp(I.getOperand(CurOp++));
+ for (auto &RenderFn : *VLOpFn)
+ RenderFn(PseudoMI);
+
+ // SEW
+ PseudoMI.addImm(Log2SEW);
+
+ // Policy
+ uint64_t Policy = RISCVVType::MASK_AGNOSTIC;
+ if (IsMasked)
+ Policy = I.getOperand(CurOp++).getImm();
+ PseudoMI.addImm(Policy);
+
+ // Memref
+ PseudoMI.cloneMemRefs(I);
+
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);
+ }
+ }
+}
+
bool RISCVInstructionSelector::select(MachineInstr &MI) {
MachineIRBuilder MIB(MI);
@@ -984,6 +1068,8 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
return constrainSelectedInstRegOperands(*NewInst, TII, TRI, RBI);
}
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ return selectIntrinsicWithSideEffects(MI, MIB);
default:
return false;
}
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h
index d76180c..ea41716 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h
@@ -245,7 +245,10 @@ struct ExtendedBuiltin {
enum InstFlags {
// It is a half type
- INST_PRINTER_WIDTH16 = 1
+ INST_PRINTER_WIDTH16 = 1,
+ // It is a 64-bit type
+ INST_PRINTER_WIDTH64 = INST_PRINTER_WIDTH16 << 1,
+
};
} // namespace SPIRV
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp
index 35a2ee1..62f5e47 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp
@@ -167,6 +167,36 @@ void SPIRVInstPrinter::printInst(const MCInst *MI, uint64_t Address,
MI, FirstVariableIndex, OS);
printRemainingVariableOps(MI, FirstVariableIndex + 1, OS);
break;
+ case SPIRV::OpSwitch:
+ if (MI->getFlags() & SPIRV::INST_PRINTER_WIDTH64) {
+ // In binary format 64-bit types are split into two 32-bit operands,
+ // but in text format combine these into a single 64-bit value as
+ // this is what tools such as spirv-as require.
+ const unsigned NumOps = MI->getNumOperands();
+ for (unsigned OpIdx = NumFixedOps; OpIdx < NumOps;) {
+ if (OpIdx + 1 >= NumOps || !MI->getOperand(OpIdx).isImm() ||
+ !MI->getOperand(OpIdx + 1).isImm()) {
+ llvm_unreachable("Unexpected OpSwitch operands");
+ continue;
+ }
+ OS << ' ';
+ uint64_t LowBits = MI->getOperand(OpIdx).getImm();
+ uint64_t HighBits = MI->getOperand(OpIdx + 1).getImm();
+ uint64_t CombinedValue = (HighBits << 32) | LowBits;
+ OS << formatImm(CombinedValue);
+ OpIdx += 2;
+
+ // Next should be the label
+ if (OpIdx < NumOps) {
+ OS << ' ';
+ printOperand(MI, OpIdx, OS);
+ OpIdx++;
+ }
+ }
+ } else {
+ printRemainingVariableOps(MI, NumFixedOps, OS);
+ }
+ break;
case SPIRV::OpImageSampleImplicitLod:
case SPIRV::OpImageSampleDrefImplicitLod:
case SPIRV::OpImageSampleProjImplicitLod:
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h
index 4de9d6a..4c5b81f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h
@@ -62,7 +62,9 @@ public:
namespace SPIRV {
enum AsmComments {
// It is a half type
- ASM_PRINTER_WIDTH16 = MachineInstr::TAsmComments
+ ASM_PRINTER_WIDTH16 = MachineInstr::TAsmComments,
+ // It is a 64 bit type
+ ASM_PRINTER_WIDTH64 = ASM_PRINTER_WIDTH16 << 1,
};
} // namespace SPIRV
diff --git a/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp b/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp
index e39666c..9aa07b5 100644
--- a/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp
@@ -25,6 +25,8 @@ void SPIRVMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI,
// Propagate previously set flags
if (MI->getAsmPrinterFlags() & SPIRV::ASM_PRINTER_WIDTH16)
OutMI.setFlags(SPIRV::INST_PRINTER_WIDTH16);
+ if (MI->getAsmPrinterFlags() & SPIRV::ASM_PRINTER_WIDTH64)
+ OutMI.setFlags(SPIRV::INST_PRINTER_WIDTH64);
const MachineFunction *MF = MI->getMF();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
index 4e2cc88..8f2fc01 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -105,6 +105,8 @@ void addNumImm(const APInt &Imm, MachineInstrBuilder &MIB) {
uint32_t LowBits = FullImm & 0xffffffff;
uint32_t HighBits = (FullImm >> 32) & 0xffffffff;
MIB.addImm(LowBits).addImm(HighBits);
+ // Asm Printer needs this info to print 64-bit operands correctly
+ MIB.getInstr()->setAsmPrinterFlag(SPIRV::ASM_PRINTER_WIDTH64);
return;
}
report_fatal_error("Unsupported constant bitwidth");
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index de28faf..3da720f 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1714,7 +1714,7 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
}
if (Constraint[1] == '@') {
if (StringRef("{@cc}").compare(Constraint) == 0)
- return std::make_pair(0u, &SystemZ::GR32BitRegClass);
+ return std::make_pair(SystemZ::CC, &SystemZ::CCRRegClass);
}
}
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
@@ -1766,10 +1766,6 @@ SDValue SystemZTargetLowering::LowerAsmOutputForConstraint(
OpInfo.ConstraintVT.getSizeInBits() < 8)
report_fatal_error("Glue output operand is of invalid type");
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- MRI.addLiveIn(SystemZ::CC);
-
if (Glue.getNode()) {
Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
Chain = Glue.getValue(1);
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 706ab2b..51b540a 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -14,7 +14,10 @@
#ifndef LLVM_LIB_TARGET_X86_X86_H
#define LLVM_LIB_TARGET_X86_X86_H
+#include "llvm/IR/Analysis.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -162,7 +165,17 @@ FunctionPass *createX86WinEHUnwindV2Pass();
/// The pass transforms load/store <256 x i32> to AMX load/store intrinsics
/// or split the data to two <128 x i32>.
-FunctionPass *createX86LowerAMXTypePass();
+class X86LowerAMXTypePass : public PassInfoMixin<X86LowerAMXTypePass> {
+private:
+ const TargetMachine *TM;
+
+public:
+ X86LowerAMXTypePass(const TargetMachine *TM) : TM(TM) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+ static bool isRequired() { return true; }
+};
+
+FunctionPass *createX86LowerAMXTypeLegacyPass();
/// The pass transforms amx intrinsics to scalar operation if the function has
/// optnone attribute or it is O0.
diff --git a/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp b/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
index d979517..2c0443d 100644
--- a/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
@@ -10,6 +10,7 @@
/// TODO: Port CodeGen passes to new pass manager.
//===----------------------------------------------------------------------===//
+#include "X86.h"
#include "X86ISelDAGToDAG.h"
#include "X86TargetMachine.h"
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index 0ba71ad..8ffd454 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -46,12 +46,14 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Analysis.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsX86.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -64,7 +66,7 @@
using namespace llvm;
using namespace PatternMatch;
-#define DEBUG_TYPE "lower-amx-type"
+#define DEBUG_TYPE "x86-lower-amx-type"
static bool isAMXCast(Instruction *II) {
return match(II,
@@ -137,7 +139,7 @@ static Instruction *getFirstNonAllocaInTheEntryBlock(Function &F) {
class ShapeCalculator {
private:
- TargetMachine *TM = nullptr;
+ const TargetMachine *TM = nullptr;
// In AMX intrinsics we let Shape = {Row, Col}, but the
// RealCol = Col / ElementSize. We may use the RealCol
@@ -145,7 +147,7 @@ private:
std::map<Value *, Value *> Col2Row, Row2Col;
public:
- ShapeCalculator(TargetMachine *TargetM) : TM(TargetM) {}
+ ShapeCalculator(const TargetMachine *TargetM) : TM(TargetM) {}
std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo);
std::pair<Value *, Value *> getShape(PHINode *Phi);
Value *getRowFromCol(Instruction *II, Value *V, unsigned Granularity);
@@ -1432,8 +1434,58 @@ bool X86LowerAMXCast::transformAllAMXCast() {
return Change;
}
+bool lowerAmxType(Function &F, const TargetMachine *TM,
+ TargetLibraryInfo *TLI) {
+ // Performance optimization: most code doesn't use AMX, so return early if
+ // there are no instructions that produce AMX values. This is sufficient, as
+ // AMX arguments and constants are not allowed -- so any producer of an AMX
+ // value must be an instruction.
+ // TODO: find a cheaper way for this, without looking at all instructions.
+ if (!containsAMXCode(F))
+ return false;
+
+ bool C = false;
+ ShapeCalculator SC(TM);
+ X86LowerAMXCast LAC(F, &SC);
+ C |= LAC.combineAMXcast(TLI);
+ // There might be remaining AMXcast after combineAMXcast and they should be
+ // handled elegantly.
+ C |= LAC.transformAllAMXCast();
+
+ X86LowerAMXType LAT(F, &SC);
+ C |= LAT.visit();
+
+ // Prepare for fast register allocation at O0.
+ // Todo: May better check the volatile model of AMX code, not just
+ // by checking Attribute::OptimizeNone and CodeGenOptLevel::None.
+ if (TM->getOptLevel() == CodeGenOptLevel::None) {
+ // If Front End not use O0 but the Mid/Back end use O0, (e.g.
+ // "Clang -O2 -S -emit-llvm t.c" + "llc t.ll") we should make
+ // sure the amx data is volatile, that is necessary for AMX fast
+ // register allocation.
+ if (!F.hasFnAttribute(Attribute::OptimizeNone)) {
+ X86VolatileTileData VTD(F);
+ C = VTD.volatileTileData() || C;
+ }
+ }
+
+ return C;
+}
+
} // anonymous namespace
+PreservedAnalyses X86LowerAMXTypePass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+ bool Changed = lowerAmxType(F, TM, &TLI);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA = PreservedAnalyses::none();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
namespace {
class X86LowerAMXTypeLegacyPass : public FunctionPass {
@@ -1443,44 +1495,10 @@ public:
X86LowerAMXTypeLegacyPass() : FunctionPass(ID) {}
bool runOnFunction(Function &F) override {
- // Performance optimization: most code doesn't use AMX, so return early if
- // there are no instructions that produce AMX values. This is sufficient, as
- // AMX arguments and constants are not allowed -- so any producer of an AMX
- // value must be an instruction.
- // TODO: find a cheaper way for this, without looking at all instructions.
- if (!containsAMXCode(F))
- return false;
-
- bool C = false;
TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
-
- ShapeCalculator SC(TM);
- X86LowerAMXCast LAC(F, &SC);
- C |= LAC.combineAMXcast(TLI);
- // There might be remaining AMXcast after combineAMXcast and they should be
- // handled elegantly.
- C |= LAC.transformAllAMXCast();
-
- X86LowerAMXType LAT(F, &SC);
- C |= LAT.visit();
-
- // Prepare for fast register allocation at O0.
- // Todo: May better check the volatile model of AMX code, not just
- // by checking Attribute::OptimizeNone and CodeGenOptLevel::None.
- if (TM->getOptLevel() == CodeGenOptLevel::None) {
- // If Front End not use O0 but the Mid/Back end use O0, (e.g.
- // "Clang -O2 -S -emit-llvm t.c" + "llc t.ll") we should make
- // sure the amx data is volatile, that is nessary for AMX fast
- // register allocation.
- if (!F.hasFnAttribute(Attribute::OptimizeNone)) {
- X86VolatileTileData VTD(F);
- C = VTD.volatileTileData() || C;
- }
- }
-
- return C;
+ return lowerAmxType(F, TM, TLI);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -1501,6 +1519,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(X86LowerAMXTypeLegacyPass, DEBUG_TYPE, PassName, false,
false)
-FunctionPass *llvm::createX86LowerAMXTypePass() {
+FunctionPass *llvm::createX86LowerAMXTypeLegacyPass() {
return new X86LowerAMXTypeLegacyPass();
}
diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def
index 3f2a433..fc25d55 100644
--- a/llvm/lib/Target/X86/X86PassRegistry.def
+++ b/llvm/lib/Target/X86/X86PassRegistry.def
@@ -12,11 +12,16 @@
// NOTE: NO INCLUDE GUARD DESIRED!
+#ifndef FUNCTION_PASS
+#define FUNCTION_PASS(NAME, CREATE_PASS)
+#endif
+FUNCTION_PASS("x86-lower-amx-type", X86LowerAMXTypePass(this))
+#undef FUNCTION_PASS
+
#ifndef DUMMY_FUNCTION_PASS
#define DUMMY_FUNCTION_PASS(NAME, CREATE_PASS)
#endif
DUMMY_FUNCTION_PASS("lower-amx-intrinsics", X86LowerAMXIntrinsics(*this))
-DUMMY_FUNCTION_PASS("lower-amx-type", X86LowerAMXTypePass(*this))
DUMMY_FUNCTION_PASS("x86-partial-reduction", X86PartialReduction())
DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass())
#undef DUMMY_FUNCTION_PASS
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 8dd6f3d..9a76abc 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -423,7 +423,7 @@ void X86PassConfig::addIRPasses() {
// We add both pass anyway and when these two passes run, we skip the pass
// based on the option level and option attribute.
addPass(createX86LowerAMXIntrinsicsPass());
- addPass(createX86LowerAMXTypePass());
+ addPass(createX86LowerAMXTypeLegacyPass());
TargetPassConfig::addIRPasses();
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 6065575..c8d1938 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -369,6 +369,7 @@ getHostCPUNameForARMFromComponents(StringRef Implementer, StringRef Hardware,
if (Implementer == "0x63") { // Arm China.
return StringSwitch<const char *>(Part)
.Case("0x132", "star-mc1")
+ .Case("0xd25", "star-mc3")
.Default("generic");
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 9b9fe26..f939e7a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1525,7 +1525,15 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
}
// Try to extend the entire expression tree to the wide destination type.
- if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
+ bool ShouldExtendExpression = true;
+ Value *TruncSrc = nullptr;
+ // It is not desirable to extend expression in the trunc + sext pattern when
+ // destination type is narrower than original (pre-trunc) type.
+ if (match(Src, m_Trunc(m_Value(TruncSrc))))
+ if (TruncSrc->getType()->getScalarSizeInBits() > DestBitSize)
+ ShouldExtendExpression = false;
+ if (ShouldExtendExpression && shouldChangeType(SrcTy, DestTy) &&
+ canEvaluateSExtd(Src, DestTy)) {
// Okay, we can transform this! Insert the new expression now.
LLVM_DEBUG(
dbgs() << "ICE: EvaluateInDifferentType converting expression type"
@@ -1545,13 +1553,18 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
ShAmt);
}
- Value *X;
- if (match(Src, m_Trunc(m_Value(X)))) {
+ Value *X = TruncSrc;
+ if (X) {
// If the input has more sign bits than bits truncated, then convert
// directly to final type.
unsigned XBitSize = X->getType()->getScalarSizeInBits();
- if (ComputeNumSignBits(X, &Sext) > XBitSize - SrcBitSize)
- return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
+ bool HasNSW = cast<TruncInst>(Src)->hasNoSignedWrap();
+ if (HasNSW || (ComputeNumSignBits(X, &Sext) > XBitSize - SrcBitSize)) {
+ auto *Res = CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
+ if (auto *ResTrunc = dyn_cast<TruncInst>(Res); ResTrunc && HasNSW)
+ ResTrunc->setHasNoSignedWrap(true);
+ return Res;
+ }
// If input is a trunc from the destination type, then convert into shifts.
if (Src->hasOneUse() && X->getType() == DestTy) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 651e305..550dfc5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -105,6 +105,8 @@ static Value *simplifyShiftSelectingPackedElement(Instruction *I,
if (~KnownShrBits.Zero != ShlAmt)
return nullptr;
+ IRBuilderBase::InsertPointGuard Guard(IC.Builder);
+ IC.Builder.SetInsertPoint(I);
Value *ShrAmtZ =
IC.Builder.CreateICmpEQ(ShrAmt, Constant::getNullValue(ShrAmt->getType()),
ShrAmt->getName() + ".z");
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 9c8de45..67f837c 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3358,21 +3358,21 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (TyAllocSize == 1) {
// Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
- // but only if the result pointer is only used as if it were an integer,
- // or both point to the same underlying object (otherwise provenance is
- // not necessarily retained).
+ // but only if the result pointer is only used as if it were an integer.
+ // (The case where the underlying object is the same is handled by
+ // InstSimplify.)
Value *X = GEP.getPointerOperand();
Value *Y;
- if (match(GEP.getOperand(1),
- m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) &&
+ if (match(GEP.getOperand(1), m_Sub(m_PtrToIntOrAddr(m_Value(Y)),
+ m_PtrToIntOrAddr(m_Specific(X)))) &&
GEPType == Y->getType()) {
- bool HasSameUnderlyingObject =
- getUnderlyingObject(X) == getUnderlyingObject(Y);
+ bool HasNonAddressBits =
+ DL.getAddressSizeInBits(AS) != DL.getPointerSizeInBits(AS);
bool Changed = false;
GEP.replaceUsesWithIf(Y, [&](Use &U) {
- bool ShouldReplace = HasSameUnderlyingObject ||
- isa<ICmpInst>(U.getUser()) ||
- isa<PtrToIntInst>(U.getUser());
+ bool ShouldReplace = isa<PtrToAddrInst>(U.getUser()) ||
+ (!HasNonAddressBits &&
+ isa<ICmpInst, PtrToIntInst>(U.getUser()));
Changed |= ShouldReplace;
return ShouldReplace;
});
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index cb6ca72..7c364f8 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1539,7 +1539,7 @@ void AddressSanitizer::getInterestingMemoryOperands(
IID == Intrinsic::experimental_vp_strided_load) {
Stride = VPI->getOperand(PtrOpNo + 1);
// Use the pointer alignment as the element alignment if the stride is a
- // mutiple of the pointer alignment. Otherwise, the element alignment
+ // multiple of the pointer alignment. Otherwise, the element alignment
// should be Align(1).
unsigned PointerAlign = Alignment.valueOrOne().value();
if (!isa<ConstantInt>(Stride) ||
@@ -2399,7 +2399,7 @@ void ModuleAddressSanitizer::instrumentGlobalsELF(
// Putting globals in a comdat changes the semantic and potentially cause
// false negative odr violations at link time. If odr indicators are used, we
- // keep the comdat sections, as link time odr violations will be dectected on
+ // keep the comdat sections, as link time odr violations will be detected on
// the odr indicator symbols.
bool UseComdatForGlobalsGC = UseOdrIndicator && !UniqueModuleId.empty();
@@ -3858,7 +3858,7 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
I->eraseFromParent();
}
- // Replace all uses of AddessReturnedByAlloca with NewAddressPtr.
+ // Replace all uses of AddressReturnedByAlloca with NewAddressPtr.
AI->replaceAllUsesWith(NewAddressPtr);
// We are done. Erase old alloca from parent.
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 72e8e50..0688bc7 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -359,7 +359,7 @@ class CHR {
unsigned Count = 0;
// Find out how many times region R is cloned. Note that if the parent
// of R is cloned, R is also cloned, but R's clone count is not updated
- // from the clone of the parent. We need to accumlate all the counts
+ // from the clone of the parent. We need to accumulate all the counts
// from the ancestors to get the clone count.
while (R) {
Count += DuplicationCount[R];
@@ -1513,7 +1513,7 @@ static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
BI->swapSuccessors();
// Don't need to swap this in terms of
// TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
- // mean whehter the branch is likely go into the if-then rather than
+ // mean whether the branch is likely go into the if-then rather than
// successor0/successor1 and because we can tell which edge is the then or
// the else one by comparing the destination to the region exit block.
continue;
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index cf87e35..1e5946a 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -83,7 +83,7 @@ static cl::opt<unsigned>
// ICP the candidate function even when only a declaration is present.
static cl::opt<bool> ICPAllowDecls(
"icp-allow-decls", cl::init(false), cl::Hidden,
- cl::desc("Promote the target candidate even when the defintion "
+ cl::desc("Promote the target candidate even when the definition "
" is not available"));
// ICP hot candidate functions only. When setting to false, non-cold functions
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 5e7548b..7795cce 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -139,7 +139,7 @@ cl::opt<bool> ConditionalCounterUpdate(
cl::init(false));
// If the option is not specified, the default behavior about whether
-// counter promotion is done depends on how instrumentaiton lowering
+// counter promotion is done depends on how instrumentation lowering
// pipeline is setup, i.e., the default value of true of this option
// does not mean the promotion will be done by default. Explicitly
// setting this option can override the default behavior.
@@ -1052,7 +1052,7 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
GlobalVariable *Name = Ind->getName();
auto It = ProfileDataMap.find(Name);
assert(It != ProfileDataMap.end() && It->second.DataVar &&
- "value profiling detected in function with no counter incerement");
+ "value profiling detected in function with no counter increment");
GlobalVariable *DataVar = It->second.DataVar;
uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
index 3c0f185..05616d8 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
@@ -490,7 +490,7 @@ void createProfileFileNameVar(Module &M) {
}
}
-// Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible
+// Set MemprofHistogramFlag as a Global variable in IR. This makes it accessible
// to the runtime, changing shadow count behavior.
void createMemprofHistogramFlagVar(Module &M) {
const StringRef VarName(MemProfHistogramFlagVar);
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 10b03bb..471c6ec 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3136,7 +3136,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// If we don't instrument it and it gets inlined,
/// our interceptor will not kick in and we will lose the memmove.
/// If we instrument the call here, but it does not get inlined,
- /// we will memove the shadow twice: which is bad in case
+ /// we will memmove the shadow twice: which is bad in case
/// of overlapping regions. So, we simply lower the intrinsic to a call.
///
/// Similar situation exists for memcpy and memset.
@@ -4775,7 +4775,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// _mm_round_ps / _mm_round_ps.
// Similar to maybeHandleSimpleNomemIntrinsic except
- // the second argument is guranteed to be a constant integer.
+ // the second argument is guaranteed to be a constant integer.
void handleRoundPdPsIntrinsic(IntrinsicInst &I) {
assert(I.getArgOperand(0)->getType() == I.getType());
assert(I.arg_size() == 2);
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index f5b6686..5f87ed6 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -176,7 +176,7 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
assert(areAllBBsReachable(
F, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M)
.getManager()) &&
- "Function has unreacheable basic blocks. The expectation was that "
+ "Function has unreachable basic blocks. The expectation was that "
"DCE was run before.");
auto It = FlattenedProfile.find(AssignGUIDPass::getGUID(F));
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
index 0a358d4..de7c169 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
@@ -253,7 +253,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
Value *RealContext = nullptr;
StructType *ThisContextType = nullptr;
- Value *TheRootFuctionData = nullptr;
+ Value *TheRootFunctionData = nullptr;
Value *ExpectedCalleeTLSAddr = nullptr;
Value *CallsiteInfoTLSAddr = nullptr;
const bool HasMusttail = [&F]() {
@@ -283,7 +283,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
Guid = Builder.getInt64(
AssignGUIDPass::getGUID(cast<Function>(*Mark->getNameValue())));
// The type of the context of this function is now knowable since we have
- // NumCallsites and NumCounters. We delcare it here because it's more
+ // NumCallsites and NumCounters. We declare it here because it's more
// convenient - we have the Builder.
ThisContextType = StructType::get(
F.getContext(),
@@ -291,28 +291,27 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
ArrayType::get(Builder.getPtrTy(), NumCallsites)});
// Figure out which way we obtain the context object for this function -
// if it's an entrypoint, then we call StartCtx, otherwise GetCtx. In the
- // former case, we also set TheRootFuctionData since we need to release it
- // at the end (plus it can be used to know if we have an entrypoint or a
- // regular function)
- // Don't set a name, they end up taking a lot of space and we don't need
- // them.
+ // former case, we also set TheRootFunctionData since we need to release
+ // it at the end (plus it can be used to know if we have an entrypoint or
+ // a regular function). Don't set a name, they end up taking a lot of
+ // space and we don't need them.
// Zero-initialize the FunctionData, except for functions that have
// musttail calls. There, we set the CtxRoot field to 1, which will be
// treated as a "can't be set as root".
- TheRootFuctionData = new GlobalVariable(
+ TheRootFunctionData = new GlobalVariable(
M, FunctionDataTy, false, GlobalVariable::InternalLinkage,
HasMusttail ? CannotBeRootInitializer
: Constant::getNullValue(FunctionDataTy));
if (ContextRootSet.contains(&F)) {
Context = Builder.CreateCall(
- StartCtx, {TheRootFuctionData, Guid, Builder.getInt32(NumCounters),
+ StartCtx, {TheRootFunctionData, Guid, Builder.getInt32(NumCounters),
Builder.getInt32(NumCallsites)});
ORE.emit(
[&] { return OptimizationRemark(DEBUG_TYPE, "Entrypoint", &F); });
} else {
- Context = Builder.CreateCall(GetCtx, {TheRootFuctionData, &F, Guid,
+ Context = Builder.CreateCall(GetCtx, {TheRootFunctionData, &F, Guid,
Builder.getInt32(NumCounters),
Builder.getInt32(NumCallsites)});
ORE.emit([&] {
@@ -399,7 +398,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
} else if (!HasMusttail && isa<ReturnInst>(I)) {
// Remember to release the context if we are an entrypoint.
IRBuilder<> Builder(&I);
- Builder.CreateCall(ReleaseCtx, {TheRootFuctionData});
+ Builder.CreateCall(ReleaseCtx, {TheRootFunctionData});
ContextWasReleased = true;
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 120c4f6..71736cf 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -1957,7 +1957,7 @@ static bool InstrumentAllFunctions(
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
function_ref<LoopInfo *(Function &)> LookupLI,
PGOInstrumentationType InstrumentationType) {
- // For the context-sensitve instrumentation, we should have a separated pass
+ // For the context-sensitive instrumentation, we should have a separated pass
// (before LTO/ThinLTO linking) to create these variables.
if (InstrumentationType == PGOInstrumentationType::FDO)
createIRLevelProfileFlagVar(M, InstrumentationType);
@@ -2248,7 +2248,7 @@ static bool annotateAllFunctions(
Func.populateCoverage();
continue;
}
- // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
+ // When PseudoKind is set to a value other than InstrProfRecord::NotPseudo,
// it means the profile for the function is unrepresentative and this
// function is actually hot / warm. We will reset the function hot / cold
// attribute and drop all the profile counters.
diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
index 4801ac7..210b126 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
@@ -481,15 +481,18 @@ StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
} // namespace
SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
- SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles)
- : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {}
+ SanitizerBinaryMetadataOptions Opts,
+ IntrusiveRefCntPtr<vfs::FileSystem> VFS,
+ ArrayRef<std::string> IgnorelistFiles)
+ : Options(std::move(Opts)),
+ VFS(VFS ? std::move(VFS) : vfs::getRealFileSystem()),
+ IgnorelistFiles(std::move(IgnorelistFiles)) {}
PreservedAnalyses
SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {
std::unique_ptr<SpecialCaseList> Ignorelist;
if (!IgnorelistFiles.empty()) {
- Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles,
- *vfs::getRealFileSystem());
+ Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles, *VFS);
if (Ignorelist->inSection("metadata", "src", M.getSourceFileName()))
return PreservedAnalyses::all();
}
diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index b74a070..09abf6a 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -318,6 +318,18 @@ private:
};
} // namespace
+SanitizerCoveragePass::SanitizerCoveragePass(
+ SanitizerCoverageOptions Options, IntrusiveRefCntPtr<vfs::FileSystem> VFS,
+ const std::vector<std::string> &AllowlistFiles,
+ const std::vector<std::string> &BlocklistFiles)
+ : Options(std::move(Options)),
+ VFS(VFS ? std::move(VFS) : vfs::getRealFileSystem()) {
+ if (AllowlistFiles.size() > 0)
+ Allowlist = SpecialCaseList::createOrDie(AllowlistFiles, *this->VFS);
+ if (BlocklistFiles.size() > 0)
+ Blocklist = SpecialCaseList::createOrDie(BlocklistFiles, *this->VFS);
+}
+
PreservedAnalyses SanitizerCoveragePass::run(Module &M,
ModuleAnalysisManager &MAM) {
auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 0d48a35..fd0e9f1 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -355,7 +355,7 @@ static bool isVtableAccess(Instruction *I) {
}
// Do not instrument known races/"benign races" that come from compiler
-// instrumentatin. The user has no way of suppressing them.
+// instrumentation. The user has no way of suppressing them.
static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) {
// Peel off GEPs and BitCasts.
Addr = Addr->stripInBoundsOffsets();
diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
index 9471ae3..78d4a57e 100644
--- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
@@ -643,7 +643,7 @@ bool TypeSanitizer::instrumentWithShadowUpdate(
// doesn't match, then we call the runtime (which may yet determine that
// the mismatch is okay).
//
- // The checks generated below have the following strucutre.
+ // The checks generated below have the following structure.
//
// ; First we load the descriptor for the load from shadow memory and
// ; compare it against the type descriptor for the current access type.
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 371d9e6..a9ab3b3 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -819,7 +819,7 @@ public:
OS << "]";
} else if (const auto *PS = dyn_cast<PredicateSwitch>(PI)) {
OS << "; switch predicate info { CaseValue: " << *PS->CaseValue
- << " Switch:" << *PS->Switch << " Edge: [";
+ << " Edge: [";
PS->From->printAsOperand(OS);
OS << ",";
PS->To->printAsOperand(OS);
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index d831c27..c537be5c 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7551,6 +7551,7 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
/// log2(C)-indexed value table (instead of traditionally emitting a load of the
/// address of the jump target, and indirectly jump to it).
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
+ DomTreeUpdater *DTU,
const DataLayout &DL,
const TargetTransformInfo &TTI) {
Value *Condition = SI->getCondition();
@@ -7573,12 +7574,6 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
if (SI->getNumCases() < 4)
return false;
- // We perform this optimization only for switches with
- // unreachable default case.
- // This assumtion will save us from checking if `Condition` is a power of two.
- if (!SI->defaultDestUnreachable())
- return false;
-
// Check that switch cases are powers of two.
SmallVector<uint64_t, 4> Values;
for (const auto &Case : SI->cases()) {
@@ -7598,6 +7593,24 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
Builder.SetInsertPoint(SI);
+ if (!SI->defaultDestUnreachable()) {
+ // Let non-power-of-two inputs jump to the default case, when the latter is
+ // reachable.
+ auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
+ auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
+
+ auto *OrigBB = SI->getParent();
+ auto *DefaultCaseBB = SI->getDefaultDest();
+ BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
+ auto It = OrigBB->getTerminator()->getIterator();
+ BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
+ It->eraseFromParent();
+
+ addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
+ }
+
// Replace each case with its trailing zeros number.
for (auto &Case : SI->cases()) {
auto *OrigValue = Case.getCaseValue();
@@ -7953,7 +7966,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
Options.ConvertSwitchToLookupTable))
return requestResimplify();
- if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
+ if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
return requestResimplify();
if (reduceSwitchRange(SI, Builder, DL, TTI))