aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/AssumptionCache.cpp8
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp38
-rw-r--r--llvm/lib/Analysis/TrainingLogger.cpp5
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp53
-rw-r--r--llvm/lib/CodeGen/CalcSpillWeights.cpp5
-rw-r--r--llvm/lib/CodeGen/InlineSpiller.cpp9
-rw-r--r--llvm/lib/CodeGen/SpillPlacement.cpp4
-rw-r--r--llvm/lib/CodeGen/SpillPlacement.h2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp6
-rw-r--r--llvm/lib/IR/DebugInfoMetadata.cpp33
-rw-r--r--llvm/lib/MC/ELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td43
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h3
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp4
-rw-r--r--llvm/lib/TargetParser/Host.cpp3
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfileProbe.cpp3
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp27
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp25
22 files changed, 168 insertions, 120 deletions
diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp
index bbcf25e..81b2667 100644
--- a/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/llvm/lib/Analysis/AssumptionCache.cpp
@@ -94,13 +94,7 @@ findAffectedValues(CallBase *CI, TargetTransformInfo *TTI,
if (Pred == ICmpInst::ICMP_EQ) {
// For equality comparisons, we handle the case of bit inversion.
auto AddAffectedFromEq = [&AddAffected](Value *V) {
- Value *A;
- if (match(V, m_Not(m_Value(A)))) {
- AddAffected(A);
- V = A;
- }
-
- Value *B;
+ Value *A, *B;
// (A & B) or (A | B) or (A ^ B).
if (match(V, m_BitwiseLogic(m_Value(A), m_Value(B)))) {
AddAffected(A);
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 8b3fc8d..9670e53 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -8410,6 +8410,44 @@ void ScalarEvolution::forgetValue(Value *V) {
forgetMemoizedResults(ToForget);
}
+void ScalarEvolution::forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V) {
+ if (!isSCEVable(V->getType()))
+ return;
+
+ // If SCEV looked through a trivial LCSSA phi node, we might have SCEV's
+ // directly using a SCEVUnknown/SCEVAddRec defined in the loop. After an
+ // extra predecessor is added, this is no longer valid. Find all Unknowns and
+ // AddRecs defined in the loop and invalidate any SCEV's making use of them.
+ if (const SCEV *S = getExistingSCEV(V)) {
+ struct InvalidationRootCollector {
+ Loop *L;
+ SmallVector<const SCEV *, 8> Roots;
+
+ InvalidationRootCollector(Loop *L) : L(L) {}
+
+ bool follow(const SCEV *S) {
+ if (auto *SU = dyn_cast<SCEVUnknown>(S)) {
+ if (auto *I = dyn_cast<Instruction>(SU->getValue()))
+ if (L->contains(I))
+ Roots.push_back(S);
+ } else if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+ if (L->contains(AddRec->getLoop()))
+ Roots.push_back(S);
+ }
+ return true;
+ }
+ bool isDone() const { return false; }
+ };
+
+ InvalidationRootCollector C(L);
+ visitAll(S, C);
+ forgetMemoizedResults(C.Roots);
+ }
+
+ // Also perform the normal invalidation.
+ forgetValue(V);
+}
+
void ScalarEvolution::forgetLoopDispositions() { LoopDispositions.clear(); }
void ScalarEvolution::forgetBlockAndLoopDispositions(Value *V) {
diff --git a/llvm/lib/Analysis/TrainingLogger.cpp b/llvm/lib/Analysis/TrainingLogger.cpp
index e236890..344ca92 100644
--- a/llvm/lib/Analysis/TrainingLogger.cpp
+++ b/llvm/lib/Analysis/TrainingLogger.cpp
@@ -27,11 +27,6 @@
using namespace llvm;
-// FIXME(mtrofin): remove the flag altogether
-static cl::opt<bool>
- UseSimpleLogger("tfutils-use-simplelogger", cl::init(true), cl::Hidden,
- cl::desc("Output simple (non-protobuf) log."));
-
void Logger::writeHeader(std::optional<TensorSpec> AdviceSpec) {
json::OStream JOS(*OS);
JOS.object([&]() {
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 5f5d7e07..e25aa9c 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -662,16 +662,6 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
// known bits from the RHS to V.
Known.Zero |= RHSKnown.Zero & MaskKnown.One;
Known.One |= RHSKnown.One & MaskKnown.One;
- // assume(~(v & b) = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
- m_Value(A)))) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
- KnownBits MaskKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
-
- // For those bits in the mask that are known to be one, we can propagate
- // inverted known bits from the RHS to V.
- Known.Zero |= RHSKnown.One & MaskKnown.One;
- Known.One |= RHSKnown.Zero & MaskKnown.One;
// assume(v | b = a)
} else if (match(Cmp,
m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A)))) {
@@ -682,16 +672,6 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
// bits from the RHS to V.
Known.Zero |= RHSKnown.Zero & BKnown.Zero;
Known.One |= RHSKnown.One & BKnown.Zero;
- // assume(~(v | b) = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
- m_Value(A)))) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
- KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
-
- // For those bits in B that are known to be zero, we can propagate
- // inverted known bits from the RHS to V.
- Known.Zero |= RHSKnown.One & BKnown.Zero;
- Known.One |= RHSKnown.Zero & BKnown.Zero;
// assume(v ^ b = a)
} else if (match(Cmp,
m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A)))) {
@@ -705,19 +685,6 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
Known.One |= RHSKnown.One & BKnown.Zero;
Known.Zero |= RHSKnown.One & BKnown.One;
Known.One |= RHSKnown.Zero & BKnown.One;
- // assume(~(v ^ b) = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
- m_Value(A)))) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
- KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
-
- // For those bits in B that are known to be zero, we can propagate
- // inverted known bits from the RHS to V. For those bits in B that are
- // known to be one, we can propagate known bits from the RHS to V.
- Known.Zero |= RHSKnown.One & BKnown.Zero;
- Known.One |= RHSKnown.Zero & BKnown.Zero;
- Known.Zero |= RHSKnown.Zero & BKnown.One;
- Known.One |= RHSKnown.One & BKnown.One;
// assume(v << c = a)
} else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
m_Value(A))) &&
@@ -729,17 +696,6 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
RHSKnown.Zero.lshrInPlace(C);
RHSKnown.One.lshrInPlace(C);
Known = Known.unionWith(RHSKnown);
- // assume(~(v << c) = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
- m_Value(A))) &&
- C < BitWidth) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
- // For those bits in RHS that are known, we can propagate them inverted
- // to known bits in V shifted to the right by C.
- RHSKnown.One.lshrInPlace(C);
- Known.Zero |= RHSKnown.One;
- RHSKnown.Zero.lshrInPlace(C);
- Known.One |= RHSKnown.Zero;
// assume(v >> c = a)
} else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
m_Value(A))) &&
@@ -749,15 +705,6 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
// bits in V shifted to the right by C.
Known.Zero |= RHSKnown.Zero << C;
Known.One |= RHSKnown.One << C;
- // assume(~(v >> c) = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
- m_Value(A))) &&
- C < BitWidth) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
- // For those bits in RHS that are known, we can propagate them inverted
- // to known bits in V shifted to the right by C.
- Known.Zero |= RHSKnown.One << C;
- Known.One |= RHSKnown.Zero << C;
}
break;
case ICmpInst::ICMP_NE: {
diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
index bf11492..f3cb7fa 100644
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -163,8 +163,6 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineBasicBlock *MBB = nullptr;
- MachineLoop *Loop = nullptr;
- bool IsExiting = false;
float TotalWeight = 0;
unsigned NumInstr = 0; // Number of instructions using LI
SmallPtrSet<MachineInstr *, 8> Visited;
@@ -221,6 +219,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
}
};
+ bool IsExiting = false;
std::set<CopyHint> CopyHints;
DenseMap<unsigned, float> Hint;
for (MachineRegisterInfo::reg_instr_nodbg_iterator
@@ -262,7 +261,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
// Get loop info for mi.
if (MI->getParent() != MBB) {
MBB = MI->getParent();
- Loop = Loops.getLoopFor(MBB);
+ const MachineLoop *Loop = Loops.getLoopFor(MBB);
IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
}
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index 2740265..213dc1e 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -33,7 +33,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
@@ -86,7 +85,6 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
LiveIntervals &LIS;
LiveStacks &LSS;
MachineDominatorTree &MDT;
- MachineLoopInfo &Loops;
VirtRegMap &VRM;
MachineRegisterInfo &MRI;
const TargetInstrInfo &TII;
@@ -138,8 +136,7 @@ public:
VirtRegMap &vrm)
: MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
LSS(pass.getAnalysis<LiveStacks>()),
- MDT(pass.getAnalysis<MachineDominatorTree>()),
- Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
+ MDT(pass.getAnalysis<MachineDominatorTree>()), VRM(vrm),
MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()),
TRI(*mf.getSubtarget().getRegisterInfo()),
MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
@@ -157,7 +154,6 @@ class InlineSpiller : public Spiller {
LiveIntervals &LIS;
LiveStacks &LSS;
MachineDominatorTree &MDT;
- MachineLoopInfo &Loops;
VirtRegMap &VRM;
MachineRegisterInfo &MRI;
const TargetInstrInfo &TII;
@@ -197,8 +193,7 @@ public:
VirtRegAuxInfo &VRAI)
: MF(MF), LIS(Pass.getAnalysis<LiveIntervals>()),
LSS(Pass.getAnalysis<LiveStacks>()),
- MDT(Pass.getAnalysis<MachineDominatorTree>()),
- Loops(Pass.getAnalysis<MachineLoopInfo>()), VRM(VRM),
+ MDT(Pass.getAnalysis<MachineDominatorTree>()), VRM(VRM),
MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
TRI(*MF.getSubtarget().getRegisterInfo()),
MBFI(Pass.getAnalysis<MachineBlockFrequencyInfo>()),
diff --git a/llvm/lib/CodeGen/SpillPlacement.cpp b/llvm/lib/CodeGen/SpillPlacement.cpp
index 6e74e51..cdb8099 100644
--- a/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -32,7 +32,6 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -52,7 +51,6 @@ char &llvm::SpillPlacementID = SpillPlacement::ID;
INITIALIZE_PASS_BEGIN(SpillPlacement, DEBUG_TYPE,
"Spill Code Placement Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE,
"Spill Code Placement Analysis", true, true)
@@ -60,7 +58,6 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequiredTransitive<EdgeBundles>();
- AU.addRequiredTransitive<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -195,7 +192,6 @@ struct SpillPlacement::Node {
bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
bundles = &getAnalysis<EdgeBundles>();
- loops = &getAnalysis<MachineLoopInfo>();
assert(!nodes && "Leaking node array");
nodes = new Node[bundles->getNumBundles()];
diff --git a/llvm/lib/CodeGen/SpillPlacement.h b/llvm/lib/CodeGen/SpillPlacement.h
index 2a298c7..5fd9b08 100644
--- a/llvm/lib/CodeGen/SpillPlacement.h
+++ b/llvm/lib/CodeGen/SpillPlacement.h
@@ -38,13 +38,11 @@ class BitVector;
class EdgeBundles;
class MachineBlockFrequencyInfo;
class MachineFunction;
-class MachineLoopInfo;
class SpillPlacement : public MachineFunctionPass {
struct Node;
const MachineFunction *MF = nullptr;
const EdgeBundles *bundles = nullptr;
- const MachineLoopInfo *loops = nullptr;
const MachineBlockFrequencyInfo *MBFI = nullptr;
Node *nodes = nullptr;
diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
index 6d8e239..164adce 100644
--- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
@@ -469,11 +469,11 @@ MachOPlatform::MachOPlatform(
// itself (to build the allocation actions that will call the registration
// functions). Further complicating the situation (a) the graph containing
// the registration functions is allowed to depend on other graphs (e.g. the
- // graph containing the ORC runtime RTTI support) so we need to handle with
- // an unknown set of dependencies during bootstrap, and (b) these graphs may
+ // graph containing the ORC runtime RTTI support) so we need to handle an
+ // unknown set of dependencies during bootstrap, and (b) these graphs may
// be linked concurrently if the user has installed a concurrent dispatcher.
//
- // We satisfy these constraint by implementing a bootstrap phase during which
+ // We satisfy these constraints by implementing a bootstrap phase during which
// allocation actions generated by MachOPlatform are appended to a list of
// deferred allocation actions, rather than to the graphs themselves. At the
// end of the bootstrap process the deferred actions are attached to a final
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index 943826c..927aefb 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -915,11 +915,11 @@ DICompileUnit::DICompileUnit(LLVMContext &C, StorageType Storage,
bool DebugInfoForProfiling, unsigned NameTableKind,
bool RangesBaseAddress, ArrayRef<Metadata *> Ops)
: DIScope(C, DICompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops),
- SourceLanguage(SourceLanguage), IsOptimized(IsOptimized),
- RuntimeVersion(RuntimeVersion), EmissionKind(EmissionKind), DWOId(DWOId),
- SplitDebugInlining(SplitDebugInlining),
+ SourceLanguage(SourceLanguage), RuntimeVersion(RuntimeVersion),
+ DWOId(DWOId), EmissionKind(EmissionKind), NameTableKind(NameTableKind),
+ IsOptimized(IsOptimized), SplitDebugInlining(SplitDebugInlining),
DebugInfoForProfiling(DebugInfoForProfiling),
- NameTableKind(NameTableKind), RangesBaseAddress(RangesBaseAddress) {
+ RangesBaseAddress(RangesBaseAddress) {
assert(Storage != Uniqued);
}
@@ -1181,8 +1181,9 @@ DILexicalBlockFile *DILexicalBlockFile::getImpl(LLVMContext &Context,
DINamespace::DINamespace(LLVMContext &Context, StorageType Storage,
bool ExportSymbols, ArrayRef<Metadata *> Ops)
- : DIScope(Context, DINamespaceKind, Storage, dwarf::DW_TAG_namespace, Ops),
- ExportSymbols(ExportSymbols) {}
+ : DIScope(Context, DINamespaceKind, Storage, dwarf::DW_TAG_namespace, Ops) {
+ SubclassData1 = ExportSymbols;
+}
DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope,
MDString *Name, bool ExportSymbols,
StorageType Storage, bool ShouldCreate) {
@@ -1196,8 +1197,9 @@ DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope,
DICommonBlock::DICommonBlock(LLVMContext &Context, StorageType Storage,
unsigned LineNo, ArrayRef<Metadata *> Ops)
: DIScope(Context, DICommonBlockKind, Storage, dwarf::DW_TAG_common_block,
- Ops),
- LineNo(LineNo) {}
+ Ops) {
+ SubclassData32 = LineNo;
+}
DICommonBlock *DICommonBlock::getImpl(LLVMContext &Context, Metadata *Scope,
Metadata *Decl, MDString *Name,
Metadata *File, unsigned LineNo,
@@ -1211,8 +1213,10 @@ DICommonBlock *DICommonBlock::getImpl(LLVMContext &Context, Metadata *Scope,
DIModule::DIModule(LLVMContext &Context, StorageType Storage, unsigned LineNo,
bool IsDecl, ArrayRef<Metadata *> Ops)
- : DIScope(Context, DIModuleKind, Storage, dwarf::DW_TAG_module, Ops),
- LineNo(LineNo), IsDecl(IsDecl) {}
+ : DIScope(Context, DIModuleKind, Storage, dwarf::DW_TAG_module, Ops) {
+ SubclassData1 = IsDecl;
+ SubclassData32 = LineNo;
+}
DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *File,
Metadata *Scope, MDString *Name,
MDString *ConfigurationMacros,
@@ -1301,8 +1305,9 @@ DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
DIVariable::DIVariable(LLVMContext &C, unsigned ID, StorageType Storage,
signed Line, ArrayRef<Metadata *> Ops,
uint32_t AlignInBits)
- : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line),
- AlignInBits(AlignInBits) {}
+ : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line) {
+ SubclassData32 = AlignInBits;
+}
std::optional<uint64_t> DIVariable::getSizeInBits() const {
// This is used by the Verifier so be mindful of broken types.
const Metadata *RawType = getRawType();
@@ -1328,7 +1333,9 @@ std::optional<uint64_t> DIVariable::getSizeInBits() const {
DILabel::DILabel(LLVMContext &C, StorageType Storage, unsigned Line,
ArrayRef<Metadata *> Ops)
- : DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops), Line(Line) {}
+ : DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops) {
+ SubclassData32 = Line;
+}
DILabel *DILabel::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
Metadata *File, unsigned Line, StorageType Storage,
bool ShouldCreate) {
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index 8490fef..e4d18d8 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -843,7 +843,7 @@ bool ELFWriter::maybeWriteCompression(
uint32_t ChType, uint64_t Size,
SmallVectorImpl<uint8_t> &CompressedContents, Align Alignment) {
uint64_t HdrSize =
- is64Bit() ? sizeof(ELF::Elf32_Chdr) : sizeof(ELF::Elf64_Chdr);
+ is64Bit() ? sizeof(ELF::Elf64_Chdr) : sizeof(ELF::Elf32_Chdr);
if (Size <= HdrSize + CompressedContents.size())
return false;
// Platform specific header is followed by compressed data.
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index dd2ae3e..f1f3d56 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -852,6 +852,12 @@ def TuneA510 : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510",
FeaturePostRAScheduler
]>;
+def TuneA520 : SubtargetFeature<"a520", "ARMProcFamily", "CortexA520",
+ "Cortex-A520 ARM processors", [
+ FeatureFuseAES,
+ FeatureFuseAdrpAdd,
+ FeaturePostRAScheduler]>;
+
def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
"Cortex-A57 ARM processors", [
FeatureFuseAES,
@@ -957,6 +963,17 @@ def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715",
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
+def TuneA720 : SubtargetFeature<"a720", "ARMProcFamily", "CortexA720",
+ "Cortex-A720 ARM processors", [
+ FeatureFuseAES,
+ FeaturePostRAScheduler,
+ FeatureCmpBccFusion,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
+ FeatureFuseAdrpAdd,
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
+
def TuneR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
"CortexR82",
"Cortex-R82 ARM processors", [
@@ -994,6 +1011,16 @@ def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3",
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
+def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4",
+ "Cortex-X4 ARM processors", [
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
+ FeatureFuseAdrpAdd,
+ FeatureFuseAES,
+ FeaturePostRAScheduler,
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
+
def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
"Fujitsu A64FX processors", [
FeaturePostRAScheduler,
@@ -1329,6 +1356,9 @@ def ProcessorFeatures {
FeatureMatMulInt8, FeatureBF16, FeatureAM,
FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
FeatureFP16FML];
+ list<SubtargetFeature> A520 = [HasV9_2aOps, FeaturePerfMon, FeatureAM,
+ FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
+ FeatureFP16FML];
list<SubtargetFeature> A65 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
FeatureNEON, FeatureFullFP16, FeatureDotProd,
FeatureRCPC, FeatureSSBS, FeatureRAS,
@@ -1355,6 +1385,9 @@ def ProcessorFeatures {
FeatureFP16FML, FeatureSVE, FeatureTRBE,
FeatureSVE2BitPerm, FeatureBF16, FeatureETE,
FeaturePerfMon, FeatureMatMulInt8, FeatureSPE];
+ list<SubtargetFeature> A720 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML,
+ FeatureTRBE, FeatureSVE2BitPerm, FeatureETE,
+ FeaturePerfMon, FeatureSPE, FeatureSPE_EEF];
list<SubtargetFeature> R82 = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16,
FeatureFP16FML, FeatureSSBS, FeaturePredRes,
FeatureSB];
@@ -1376,6 +1409,10 @@ def ProcessorFeatures {
FeatureSPE, FeatureBF16, FeatureMatMulInt8,
FeatureMTE, FeatureSVE2BitPerm, FeatureFullFP16,
FeatureFP16FML];
+ list<SubtargetFeature> X4 = [HasV9_2aOps,
+ FeaturePerfMon, FeatureETE, FeatureTRBE,
+ FeatureSPE, FeatureMTE, FeatureSVE2BitPerm,
+ FeatureFP16FML, FeatureSPE_EEF];
list<SubtargetFeature> A64FX = [HasV8_2aOps, FeatureFPARMv8, FeatureNEON,
FeatureSHA2, FeaturePerfMon, FeatureFullFP16,
FeatureSVE, FeatureComplxNum];
@@ -1480,6 +1517,8 @@ def : ProcessorModel<"cortex-a55", CortexA55Model, ProcessorFeatures.A55,
[TuneA55]>;
def : ProcessorModel<"cortex-a510", CortexA510Model, ProcessorFeatures.A510,
[TuneA510]>;
+def : ProcessorModel<"cortex-a520", CortexA510Model, ProcessorFeatures.A520,
+ [TuneA520]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, ProcessorFeatures.A53,
[TuneA57]>;
def : ProcessorModel<"cortex-a65", CortexA53Model, ProcessorFeatures.A65,
@@ -1506,6 +1545,8 @@ def : ProcessorModel<"cortex-a710", NeoverseN2Model, ProcessorFeatures.A710,
[TuneA710]>;
def : ProcessorModel<"cortex-a715", NeoverseN2Model, ProcessorFeatures.A715,
[TuneA715]>;
+def : ProcessorModel<"cortex-a720", NeoverseN2Model, ProcessorFeatures.A720,
+ [TuneA720]>;
def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82,
[TuneR82]>;
def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1,
@@ -1516,6 +1557,8 @@ def : ProcessorModel<"cortex-x2", NeoverseN2Model, ProcessorFeatures.X2,
[TuneX2]>;
def : ProcessorModel<"cortex-x3", NeoverseN2Model, ProcessorFeatures.X3,
[TuneX3]>;
+def : ProcessorModel<"cortex-x4", NeoverseN2Model, ProcessorFeatures.X4,
+ [TuneX4]>;
def : ProcessorModel<"neoverse-e1", CortexA53Model,
ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>;
def : ProcessorModel<"neoverse-n1", NeoverseN1Model,
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index ff14fcf..beae9b5 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -149,6 +149,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
MaxBytesForLoopAlignment = 16;
break;
case CortexA510:
+ case CortexA520:
PrefFunctionAlignment = Align(16);
VScaleForTuning = 1;
PrefLoopAlignment = Align(16);
@@ -156,8 +157,10 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
break;
case CortexA710:
case CortexA715:
+ case CortexA720:
case CortexX2:
case CortexX3:
+ case CortexX4:
PrefFunctionAlignment = Align(16);
VScaleForTuning = 1;
PrefLoopAlignment = Align(32);
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index b2ee2e7..ae2bea6 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -56,6 +56,7 @@ public:
CortexA53,
CortexA55,
CortexA510,
+ CortexA520,
CortexA57,
CortexA65,
CortexA72,
@@ -67,11 +68,13 @@ public:
CortexA78C,
CortexA710,
CortexA715,
+ CortexA720,
CortexR82,
CortexX1,
CortexX1C,
CortexX2,
CortexX3,
+ CortexX4,
ExynosM3,
Falkor,
Kryo,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c938121..cf66707 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1396,6 +1396,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMINIMUM, VT, Custom);
}
+ setOperationAction(ISD::FABS, MVT::v8f16, Custom);
+
// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
// even though v8i16 is a legal type.
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
@@ -4662,6 +4664,8 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
+ if ((SizeInBits % SrcEltSizeInBits) != 0)
+ return false;
APInt UndefSrcElts(NumSrcElts, 0);
SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index aba2ebf..ae47929 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -208,6 +208,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
.Case("0xd03", "cortex-a53")
.Case("0xd05", "cortex-a55")
.Case("0xd46", "cortex-a510")
+ .Case("0xd80", "cortex-a520")
.Case("0xd07", "cortex-a57")
.Case("0xd08", "cortex-a72")
.Case("0xd09", "cortex-a73")
@@ -217,10 +218,12 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
.Case("0xd41", "cortex-a78")
.Case("0xd47", "cortex-a710")
.Case("0xd4d", "cortex-a715")
+ .Case("0xd81", "cortex-a720")
.Case("0xd44", "cortex-x1")
.Case("0xd4c", "cortex-x1c")
.Case("0xd48", "cortex-x2")
.Case("0xd4e", "cortex-x3")
+ .Case("0xd82", "cortex-x4")
.Case("0xd0c", "neoverse-n1")
.Case("0xd49", "neoverse-n2")
.Case("0xd40", "neoverse-v1")
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index b786685..8f0b12d 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -237,7 +237,8 @@ void SampleProfileProber::computeProbeIdForCallsites() {
if (LastProbeId >= 0xFFFF) {
std::string Msg = "Pseudo instrumentation incomplete for " +
std::string(F->getName()) + " because it's too large";
- Ctx.diagnose(DiagnosticInfoSampleProfile(M->getName().data(), Msg));
+ Ctx.diagnose(
+ DiagnosticInfoSampleProfile(M->getName().data(), Msg, DS_Warning));
return;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index d6fac88..4015672 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1433,7 +1433,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
// sdiv Op0, (sext i1 X) --> -Op0 (because if X is 0, the op is undefined)
if (match(Op1, m_AllOnes()) ||
(match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)))
- return BinaryOperator::CreateNeg(Op0);
+ return BinaryOperator::CreateNSWNeg(Op0);
// X / INT_MIN --> X == INT_MIN
if (match(Op1, m_SignMask()))
@@ -1456,7 +1456,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
Constant *NegPow2C = ConstantExpr::getNeg(cast<Constant>(Op1));
Constant *C = ConstantExpr::getExactLogBase2(NegPow2C);
Value *Ashr = Builder.CreateAShr(Op0, C, I.getName() + ".neg", true);
- return BinaryOperator::CreateNeg(Ashr);
+ return BinaryOperator::CreateNSWNeg(Ashr);
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 80c044b..c82b926 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -803,7 +803,9 @@ public:
// do globals-gc.
UseCtorComdat(UseGlobalsGC && ClWithComdat && !this->CompileKernel),
DestructorKind(DestructorKind),
- ConstructorKind(ConstructorKind) {
+ ConstructorKind(ClConstructorKind.getNumOccurrences() > 0
+ ? ClConstructorKind
+ : ConstructorKind) {
C = &(M.getContext());
int LongSize = M.getDataLayout().getPointerSizeInBits();
IntptrTy = Type::getIntNTy(*C, LongSize);
@@ -1151,7 +1153,7 @@ AddressSanitizerPass::AddressSanitizerPass(
AsanCtorKind ConstructorKind)
: Options(Options), UseGlobalGC(UseGlobalGC),
UseOdrIndicator(UseOdrIndicator), DestructorKind(DestructorKind),
- ConstructorKind(ClConstructorKind) {}
+ ConstructorKind(ConstructorKind) {}
PreservedAnalyses AddressSanitizerPass::run(Module &M,
ModuleAnalysisManager &MAM) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0a10f0d6..64093a0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3599,7 +3599,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
OrigLoop->getExitBlocks(ExitBlocks);
for (BasicBlock *Exit : ExitBlocks)
for (PHINode &PN : Exit->phis())
- PSE.getSE()->forgetValue(&PN);
+ PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion()->getExitingBasicBlock();
Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5b9c206..fca96491 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8564,7 +8564,7 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
// profitable for the vectorization, we can skip it, if the cost threshold is
// default. The cost of vectorized PHI nodes is almost always 0 + the cost of
// gathers/buildvectors.
- constexpr unsigned Limit = 4;
+ constexpr int Limit = 4;
if (!ForReduction && !SLPCostThreshold.getNumOccurrences() &&
!VectorizableTree.empty() &&
all_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
@@ -10261,7 +10261,7 @@ public:
/// Checks if the specified entry \p E needs to be delayed because of its
/// dependency nodes.
Value *needToDelay(const TreeEntry *E,
- ArrayRef<SmallVector<const TreeEntry *>> Deps) {
+ ArrayRef<SmallVector<const TreeEntry *>> Deps) const {
// No need to delay emission if all deps are ready.
if (all_of(Deps, [](ArrayRef<const TreeEntry *> TEs) {
return all_of(
@@ -10276,6 +10276,16 @@ public:
VecTy, PoisonValue::get(PointerType::getUnqual(VecTy->getContext())),
MaybeAlign());
}
+ /// Adds 2 input vectors (in form of tree entries) and the mask for their
+ /// shuffling.
+ void add(const TreeEntry &E1, const TreeEntry &E2, ArrayRef<int> Mask) {
+ add(E1.VectorizedValue, E2.VectorizedValue, Mask);
+ }
+ /// Adds single input vector (in form of tree entry) and the mask for its
+ /// shuffling.
+ void add(const TreeEntry &E1, ArrayRef<int> Mask) {
+ add(E1.VectorizedValue, Mask);
+ }
/// Adds 2 input vectors and the mask for their shuffling.
void add(Value *V1, Value *V2, ArrayRef<int> Mask) {
assert(V1 && V2 && !Mask.empty() && "Expected non-empty input vectors.");
@@ -10690,7 +10700,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
Mask[I] = FrontTE->findLaneForValue(V);
}
}
- ShuffleBuilder.add(FrontTE->VectorizedValue, Mask);
+ ShuffleBuilder.add(*FrontTE, Mask);
Res = ShuffleBuilder.finalize(E->getCommonMask());
return Res;
}
@@ -10868,17 +10878,14 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
VecMask.assign(VecMask.size(), PoisonMaskElem);
copy(SubMask, std::next(VecMask.begin(), I * SliceSize));
if (TEs.size() == 1) {
- IsUsedInExpr &= FindReusedSplat(
- VecMask,
- cast<FixedVectorType>(TEs.front()->VectorizedValue->getType())
- ->getNumElements());
- ShuffleBuilder.add(TEs.front()->VectorizedValue, VecMask);
+ IsUsedInExpr &=
+ FindReusedSplat(VecMask, TEs.front()->getVectorFactor());
+ ShuffleBuilder.add(*TEs.front(), VecMask);
IsNonPoisoned &=
isGuaranteedNotToBePoison(TEs.front()->VectorizedValue);
} else {
IsUsedInExpr = false;
- ShuffleBuilder.add(TEs.front()->VectorizedValue,
- TEs.back()->VectorizedValue, VecMask);
+ ShuffleBuilder.add(*TEs.front(), *TEs.back(), VecMask);
IsNonPoisoned &=
isGuaranteedNotToBePoison(TEs.front()->VectorizedValue) &&
isGuaranteedNotToBePoison(TEs.back()->VectorizedValue);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c55864d..0eaaa03 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -816,15 +816,28 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
break;
}
case Instruction::Trunc: {
- VPRecipeBase *Zext = R.getOperand(0)->getDefiningRecipe();
- if (!Zext || getOpcodeForRecipe(*Zext) != Instruction::ZExt)
+ VPRecipeBase *Ext = R.getOperand(0)->getDefiningRecipe();
+ if (!Ext)
break;
- VPValue *A = Zext->getOperand(0);
+ unsigned ExtOpcode = getOpcodeForRecipe(*Ext);
+ if (ExtOpcode != Instruction::ZExt && ExtOpcode != Instruction::SExt)
+ break;
+ VPValue *A = Ext->getOperand(0);
VPValue *Trunc = R.getVPSingleValue();
- Type *TruncToTy = TypeInfo.inferScalarType(Trunc);
- if (TruncToTy && TruncToTy == TypeInfo.inferScalarType(A))
+ Type *TruncTy = TypeInfo.inferScalarType(Trunc);
+ Type *ATy = TypeInfo.inferScalarType(A);
+ if (TruncTy == ATy) {
Trunc->replaceAllUsesWith(A);
-
+ } else if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
+ auto *VPC =
+ new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
+ VPC->insertBefore(&R);
+ Trunc->replaceAllUsesWith(VPC);
+ } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
+ auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
+ VPC->insertBefore(&R);
+ Trunc->replaceAllUsesWith(VPC);
+ }
#ifndef NDEBUG
// Verify that the cached type info is for both A and its users is still
// accurate by comparing it to freshly computed types.