aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp111
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp53
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp33
-rw-r--r--llvm/lib/IR/ConstantFPRange.cpp14
-rw-r--r--llvm/lib/Object/XCOFFObjectFile.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp12
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp1
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp1
-rw-r--r--llvm/lib/TargetParser/Triple.cpp3
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp6
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h21
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp3
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp6
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp62
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/InstructionNamer.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp61
24 files changed, 261 insertions, 160 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 10df9c1..219bbc9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -20,6 +20,7 @@
#include "WinException.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -205,6 +206,17 @@ public:
};
} // namespace
+namespace callgraph {
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+enum Flags : uint8_t {
+ None = 0,
+ IsIndirectTarget = 1u << 0,
+ HasDirectCallees = 1u << 1,
+ HasIndirectCallees = 1u << 2,
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue*/ HasIndirectCallees)
+};
+} // namespace callgraph
+
class llvm::AddrLabelMap {
MCContext &Context;
struct AddrLabelSymEntry {
@@ -1683,63 +1695,65 @@ void AsmPrinter::emitCallGraphSection(const MachineFunction &MF,
OutStreamer->pushSection();
OutStreamer->switchSection(FuncCGSection);
- // Emit format version number.
- OutStreamer->emitInt64(CallGraphSectionFormatVersion::V_0);
-
- // Emit function's self information, which is composed of:
- // 1) FunctionEntryPc
- // 2) FunctionKind: Whether the function is indirect target, and if so,
- // whether its type id is known.
- // 3) FunctionTypeId: Emit only when the function is an indirect target
- // and its type id is known.
-
- // Emit function entry pc.
const MCSymbol *FunctionSymbol = getFunctionBegin();
- OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
-
+ const Function &F = MF.getFunction();
// If this function has external linkage or has its address taken and
// it is not a callback, then anything could call it.
- const Function &F = MF.getFunction();
bool IsIndirectTarget =
!F.hasLocalLinkage() || F.hasAddressTaken(nullptr,
/*IgnoreCallbackUses=*/true,
/*IgnoreAssumeLikeCalls=*/true,
/*IgnoreLLVMUsed=*/false);
- // FIXME: FunctionKind takes a few values but emitted as a 64-bit value.
- // Can be optimized to occupy 2 bits instead.
- // Emit function kind, and type id if available.
- if (!IsIndirectTarget) {
- OutStreamer->emitInt64(
- static_cast<uint64_t>(FunctionKind::NOT_INDIRECT_TARGET));
- } else {
- if (const auto *TypeId = extractNumericCGTypeId(F)) {
- OutStreamer->emitInt64(
- static_cast<uint64_t>(FunctionKind::INDIRECT_TARGET_KNOWN_TID));
- OutStreamer->emitInt64(TypeId->getZExtValue());
- } else {
- OutStreamer->emitInt64(
- static_cast<uint64_t>(FunctionKind::INDIRECT_TARGET_UNKNOWN_TID));
- }
- }
+ const auto &DirectCallees = FuncCGInfo.DirectCallees;
+ const auto &IndirectCalleeTypeIDs = FuncCGInfo.IndirectCalleeTypeIDs;
+
+ using namespace callgraph;
+ Flags CGFlags = Flags::None;
+ if (IsIndirectTarget)
+ CGFlags |= Flags::IsIndirectTarget;
+ if (DirectCallees.size() > 0)
+ CGFlags |= Flags::HasDirectCallees;
+ if (IndirectCalleeTypeIDs.size() > 0)
+ CGFlags |= Flags::HasIndirectCallees;
+
+ // Emit function's call graph information.
+ // 1) CallGraphSectionFormatVersion
+ // 2) Flags
+ // a. LSB bit 0 is set to 1 if the function is a potential indirect
+ // target.
+ // b. LSB bit 1 is set to 1 if there are direct callees.
+ // c. LSB bit 2 is set to 1 if there are indirect callees.
+ // d. Rest of the 5 bits in Flags are reserved for any future use.
+ // 3) Function entry PC.
+ // 4) FunctionTypeID if the function is indirect target and its type id
+ // is known, otherwise it is set to 0.
+ // 5) Number of unique direct callees, if at least one exists.
+ // 6) For each unique direct callee, the callee's PC.
+ // 7) Number of unique indirect target type IDs, if at least one exists.
+ // 8) Each unique indirect target type id.
+ OutStreamer->emitInt8(CallGraphSectionFormatVersion::V_0);
+ OutStreamer->emitInt8(static_cast<uint8_t>(CGFlags));
+ OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
+ const auto *TypeId = extractNumericCGTypeId(F);
+ if (IsIndirectTarget && TypeId)
+ OutStreamer->emitInt64(TypeId->getZExtValue());
+ else
+ OutStreamer->emitInt64(0);
- // Emit callsite labels, where each element is a pair of type id and
- // indirect callsite pc.
- const auto &CallSiteLabels = FuncCGInfo.CallSiteLabels;
- OutStreamer->emitInt64(CallSiteLabels.size());
- for (const auto &[TypeId, Label] : CallSiteLabels) {
- OutStreamer->emitInt64(TypeId);
- OutStreamer->emitSymbolValue(Label, TM.getProgramPointerSize());
+ if (DirectCallees.size() > 0) {
+ OutStreamer->emitULEB128IntValue(DirectCallees.size());
+ for (const auto &CalleeSymbol : DirectCallees)
+ OutStreamer->emitSymbolValue(CalleeSymbol, TM.getProgramPointerSize());
+ FuncCGInfo.DirectCallees.clear();
}
- FuncCGInfo.CallSiteLabels.clear();
-
- const auto &DirectCallees = FuncCGInfo.DirectCallees;
- OutStreamer->emitInt64(DirectCallees.size());
- for (const auto &CalleeSymbol : DirectCallees) {
- OutStreamer->emitSymbolValue(CalleeSymbol, TM.getProgramPointerSize());
+ if (IndirectCalleeTypeIDs.size() > 0) {
+ OutStreamer->emitULEB128IntValue(IndirectCalleeTypeIDs.size());
+ for (const auto &CalleeTypeId : IndirectCalleeTypeIDs)
+ OutStreamer->emitInt64(CalleeTypeId);
+ FuncCGInfo.IndirectCalleeTypeIDs.clear();
}
- FuncCGInfo.DirectCallees.clear();
-
+ // End of emitting call graph section contents.
OutStreamer->popSection();
}
@@ -1877,8 +1891,7 @@ void AsmPrinter::handleCallsiteForCallgraph(
FunctionCallGraphInfo &FuncCGInfo,
const MachineFunction::CallSiteInfoMap &CallSitesInfoMap,
const MachineInstr &MI) {
- assert(MI.isCall() &&
- "Callsite labels are meant for call instructions only.");
+ assert(MI.isCall() && "This method is meant for call instructions only.");
const MachineOperand &CalleeOperand = MI.getOperand(0);
if (CalleeOperand.isGlobal() || CalleeOperand.isSymbol()) {
// Handle direct calls.
@@ -1903,10 +1916,8 @@ void AsmPrinter::handleCallsiteForCallgraph(
// Handle indirect callsite info.
// Only indirect calls have type identifiers set.
for (ConstantInt *CalleeTypeId : CallSiteInfo->second.CalleeTypeIds) {
- MCSymbol *S = MF->getContext().createTempSymbol();
- OutStreamer->emitLabel(S);
uint64_t CalleeTypeIdVal = CalleeTypeId->getZExtValue();
- FuncCGInfo.CallSiteLabels.emplace_back(CalleeTypeIdVal, S);
+ FuncCGInfo.IndirectCalleeTypeIDs.insert(CalleeTypeIdVal);
}
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 12d749c..e57ed24 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -569,40 +569,30 @@ void CodeViewDebug::emitCodeViewMagicVersion() {
OS.emitInt32(COFF::DEBUG_SECTION_MAGIC);
}
-static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
- switch (DWLang) {
- case dwarf::DW_LANG_C:
- case dwarf::DW_LANG_C89:
- case dwarf::DW_LANG_C99:
- case dwarf::DW_LANG_C11:
+static SourceLanguage
+MapDWARFLanguageToCVLang(dwarf::SourceLanguageName DWLName) {
+ switch (DWLName) {
+ case dwarf::DW_LNAME_C:
return SourceLanguage::C;
- case dwarf::DW_LANG_C_plus_plus:
- case dwarf::DW_LANG_C_plus_plus_03:
- case dwarf::DW_LANG_C_plus_plus_11:
- case dwarf::DW_LANG_C_plus_plus_14:
+ case dwarf::DW_LNAME_C_plus_plus:
return SourceLanguage::Cpp;
- case dwarf::DW_LANG_Fortran77:
- case dwarf::DW_LANG_Fortran90:
- case dwarf::DW_LANG_Fortran95:
- case dwarf::DW_LANG_Fortran03:
- case dwarf::DW_LANG_Fortran08:
+ case dwarf::DW_LNAME_Fortran:
return SourceLanguage::Fortran;
- case dwarf::DW_LANG_Pascal83:
+ case dwarf::DW_LNAME_Pascal:
return SourceLanguage::Pascal;
- case dwarf::DW_LANG_Cobol74:
- case dwarf::DW_LANG_Cobol85:
+ case dwarf::DW_LNAME_Cobol:
return SourceLanguage::Cobol;
- case dwarf::DW_LANG_Java:
+ case dwarf::DW_LNAME_Java:
return SourceLanguage::Java;
- case dwarf::DW_LANG_D:
+ case dwarf::DW_LNAME_D:
return SourceLanguage::D;
- case dwarf::DW_LANG_Swift:
+ case dwarf::DW_LNAME_Swift:
return SourceLanguage::Swift;
- case dwarf::DW_LANG_Rust:
+ case dwarf::DW_LNAME_Rust:
return SourceLanguage::Rust;
- case dwarf::DW_LANG_ObjC:
+ case dwarf::DW_LNAME_ObjC:
return SourceLanguage::ObjC;
- case dwarf::DW_LANG_ObjC_plus_plus:
+ case dwarf::DW_LNAME_ObjC_plus_plus:
return SourceLanguage::ObjCpp;
default:
// There's no CodeView representation for this language, and CV doesn't
@@ -612,6 +602,14 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
}
}
+static SourceLanguage MapDWARFLanguageToCVLang(dwarf::SourceLanguage DWLang) {
+ auto MaybeLName = dwarf::toDW_LNAME(DWLang);
+ if (!MaybeLName)
+ return MapDWARFLanguageToCVLang(static_cast<dwarf::SourceLanguageName>(0));
+
+ return MapDWARFLanguageToCVLang(MaybeLName->first);
+}
+
void CodeViewDebug::beginModule(Module *M) {
// If COFF debug section is not available, skip any debug info related stuff.
if (!Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) {
@@ -633,8 +631,13 @@ void CodeViewDebug::beginModule(Module *M) {
Node = *CUs->operands().begin();
}
const auto *CU = cast<DICompileUnit>(Node);
+ DISourceLanguageName Lang = CU->getSourceLanguage();
CurrentSourceLanguage =
- MapDWLangToCVLang(CU->getSourceLanguage().getUnversionedName());
+ Lang.hasVersionedName()
+ ? MapDWARFLanguageToCVLang(
+ static_cast<dwarf::SourceLanguageName>(Lang.getName()))
+ : MapDWARFLanguageToCVLang(
+ static_cast<dwarf::SourceLanguage>(Lang.getName()));
if (!M->getCodeViewFlag() ||
CU->getEmissionKind() == DICompileUnit::NoDebug) {
Asm = nullptr;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index d751a7f..433877f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1039,8 +1039,12 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
} else
NewCU.addString(Die, dwarf::DW_AT_producer, Producer);
- NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
- DIUnit->getSourceLanguage().getUnversionedName());
+ if (auto Lang = DIUnit->getSourceLanguage(); Lang.hasVersionedName())
+ NewCU.addUInt(Die, dwarf::DW_AT_language_name, dwarf::DW_FORM_data2,
+ Lang.getName());
+ else
+ NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+ Lang.getName());
NewCU.addString(Die, dwarf::DW_AT_name, FN);
StringRef SysRoot = DIUnit->getSysRoot();
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5ffdc4e..b47274b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18870,27 +18870,38 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
const TargetLowering &TLI) {
- // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
- // replacing casts with a libcall. We also must be allowed to ignore -0.0
- // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
- // conversions would return +0.0.
+ // We can fold the fpto[us]i -> [us]itofp pattern into a single ftrunc.
+ // If NoSignedZerosFPMath is enabled, this is a direct replacement.
+ // Otherwise, for strict math, we must handle edge cases:
+ // 1. For unsigned conversions, use FABS to handle negative cases. Take -0.0
+ // as example, it first becomes integer 0, and is converted back to +0.0.
+ // FTRUNC on its own could produce -0.0.
+
// FIXME: We should be able to use node-level FMF here.
- // TODO: If strict math, should we use FABS (+ range check for signed cast)?
EVT VT = N->getValueType(0);
- if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
- !DAG.getTarget().Options.NoSignedZerosFPMath)
+ if (!TLI.isOperationLegal(ISD::FTRUNC, VT))
return SDValue();
// fptosi/fptoui round towards zero, so converting from FP to integer and
// back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
SDValue N0 = N->getOperand(0);
if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
- N0.getOperand(0).getValueType() == VT)
- return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
+ N0.getOperand(0).getValueType() == VT) {
+ if (DAG.getTarget().Options.NoSignedZerosFPMath)
+ return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
+ }
if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
- N0.getOperand(0).getValueType() == VT)
- return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
+ N0.getOperand(0).getValueType() == VT) {
+ if (DAG.getTarget().Options.NoSignedZerosFPMath)
+ return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
+
+ // Strict math: use FABS to handle negative inputs correctly.
+ if (TLI.isFAbsFree(VT)) {
+ SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::FTRUNC, DL, VT, Abs);
+ }
+ }
return SDValue();
}
diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp
index fba6942..2477e22 100644
--- a/llvm/lib/IR/ConstantFPRange.cpp
+++ b/llvm/lib/IR/ConstantFPRange.cpp
@@ -411,3 +411,17 @@ ConstantFPRange ConstantFPRange::abs() const {
ConstantFPRange ConstantFPRange::negate() const {
return ConstantFPRange(-Upper, -Lower, MayBeQNaN, MayBeSNaN);
}
+
+ConstantFPRange ConstantFPRange::getWithoutInf() const {
+ if (isNaNOnly())
+ return *this;
+ APFloat NewLower = Lower;
+ APFloat NewUpper = Upper;
+ if (Lower.isNegInfinity())
+ NewLower = APFloat::getLargest(getSemantics(), /*Negative=*/true);
+ if (Upper.isPosInfinity())
+ NewUpper = APFloat::getLargest(getSemantics(), /*Negative=*/false);
+ canonicalizeRange(NewLower, NewUpper);
+ return ConstantFPRange(std::move(NewLower), std::move(NewUpper), MayBeQNaN,
+ MayBeSNaN);
+}
diff --git a/llvm/lib/Object/XCOFFObjectFile.cpp b/llvm/lib/Object/XCOFFObjectFile.cpp
index c5a2ec2..7a8c8ad 100644
--- a/llvm/lib/Object/XCOFFObjectFile.cpp
+++ b/llvm/lib/Object/XCOFFObjectFile.cpp
@@ -379,7 +379,7 @@ Expected<StringRef> XCOFFObjectFile::getSectionName(DataRefImpl Sec) const {
}
uint64_t XCOFFObjectFile::getSectionAddress(DataRefImpl Sec) const {
- // Avoid ternary due to failure to convert the ubig32_t value to a unit64_t
+ // Avoid ternary due to failure to convert the ubig32_t value to a uint64_t
// with MSVC.
if (is64Bit())
return toSection64(Sec)->VirtualAddress;
@@ -397,7 +397,7 @@ uint64_t XCOFFObjectFile::getSectionIndex(DataRefImpl Sec) const {
}
uint64_t XCOFFObjectFile::getSectionSize(DataRefImpl Sec) const {
- // Avoid ternary due to failure to convert the ubig32_t value to a unit64_t
+ // Avoid ternary due to failure to convert the ubig32_t value to a uint64_t
// with MSVC.
if (is64Bit())
return toSection64(Sec)->SectionSize;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 7392f4b..bfe2c80 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -633,6 +633,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
.Any({{DivS64, S64}, {{Vgpr64}, {Vgpr64}, SplitTo32SExtInReg}});
+ addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)
+ .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}})
+ .Div(S32, {{Vgpr32}, {Vgpr32, Imm}})
+ .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
+ .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
+
bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
bool hasSMRDSmall = ST->hasScalarSubwordLoads();
bool usesTrue16 = ST->useRealTrue16Insts();
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 4d3331a..c684f9e 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -674,15 +674,9 @@ void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) {
createUnpackedMI(I, UnpackedOpcode, /*IsHiBits=*/true);
MachineOperand HiDstOp = Op0HOp1H->getOperand(0);
- if (I.getFlag(MachineInstr::MIFlag::NoFPExcept)) {
- Op0LOp1L->setFlag(MachineInstr::MIFlag::NoFPExcept);
- Op0HOp1H->setFlag(MachineInstr::MIFlag::NoFPExcept);
- }
- if (I.getFlag(MachineInstr::MIFlag::FmContract)) {
- Op0LOp1L->setFlag(MachineInstr::MIFlag::FmContract);
- Op0HOp1H->setFlag(MachineInstr::MIFlag::FmContract);
- }
-
+ uint32_t IFlags = I.getFlags();
+ Op0LOp1L->setFlags(IFlags);
+ Op0HOp1H->setFlags(IFlags);
LoDstOp.setIsRenamable(DstOp.isRenamable());
HiDstOp.setIsRenamable(DstOp.isRenamable());
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 2a40fb9..83c7def 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -42,7 +42,6 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ComplexDeinterleavingPass.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
index e8c849e..28a1690 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
@@ -46,7 +46,6 @@
#include "SPIRVSubtarget.h"
#include "SPIRVTargetMachine.h"
#include "SPIRVUtils.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
diff --git a/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp b/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp
index 20f03b0..60d39c9 100644
--- a/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Intrinsics.h"
diff --git a/llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp
index 278ad7c..e621bcd44 100644
--- a/llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp
@@ -14,7 +14,6 @@
#include "SPIRV.h"
#include "SPIRVSubtarget.h"
#include "SPIRVUtils.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Transforms/Utils/Cloning.h"
diff --git a/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp b/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp
index 1811492..5b149f8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9580ade..1cfcb1f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28,7 +28,6 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index ac3626d..f021094 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -375,6 +375,8 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) {
case MuslSF:
return "muslsf";
case MuslX32: return "muslx32";
+ case MuslWALI:
+ return "muslwali";
case Simulator: return "simulator";
case Pixel: return "pixel";
case Vertex: return "vertex";
@@ -767,6 +769,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
.StartsWith("muslf32", Triple::MuslF32)
.StartsWith("muslsf", Triple::MuslSF)
.StartsWith("muslx32", Triple::MuslX32)
+ .StartsWith("muslwali", Triple::MuslWALI)
.StartsWith("musl", Triple::Musl)
.StartsWith("msvc", Triple::MSVC)
.StartsWith("itanium", Triple::Itanium)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 59e103cd..73ec451 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -880,11 +880,13 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
// zext(bool) + C -> bool ? C + 1 : C
if (match(Op0, m_ZExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
- return createSelectInst(X, InstCombiner::AddOne(Op1C), Op1);
+ return createSelectInstWithUnknownProfile(X, InstCombiner::AddOne(Op1C),
+ Op1);
// sext(bool) + C -> bool ? C - 1 : C
if (match(Op0, m_SExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
- return createSelectInst(X, InstCombiner::SubOne(Op1C), Op1);
+ return createSelectInstWithUnknownProfile(X, InstCombiner::SubOne(Op1C),
+ Op1);
// ~X + C --> (C-1) - X
if (match(Op0, m_Not(m_Value(X)))) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 218aaf9..7071876 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -471,18 +471,19 @@ private:
Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable,
unsigned Depth = 0);
- SelectInst *createSelectInst(Value *C, Value *S1, Value *S2,
- const Twine &NameStr = "",
- InsertPosition InsertBefore = nullptr,
- Instruction *MDFrom = nullptr) {
- SelectInst *SI =
- SelectInst::Create(C, S1, S2, NameStr, InsertBefore, MDFrom);
- if (!MDFrom)
- setExplicitlyUnknownBranchWeightsIfProfiled(*SI, F, DEBUG_TYPE);
- return SI;
+public:
+ /// Create `select C, S1, S2`. Use only when the profile cannot be calculated
+ /// from existing profile metadata: if the Function has profiles, this will
+ /// set the profile of this select to "unknown".
+ SelectInst *
+ createSelectInstWithUnknownProfile(Value *C, Value *S1, Value *S2,
+ const Twine &NameStr = "",
+ InsertPosition InsertBefore = nullptr) {
+ auto *Sel = SelectInst::Create(C, S1, S2, NameStr, InsertBefore, nullptr);
+ setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, F, DEBUG_TYPE);
+ return Sel;
}
-public:
/// Create and insert the idiom we use to indicate a block is unreachable
/// without having to rewrite the CFG from within InstCombine.
void CreateNonTerminatorUnreachable(Instruction *InsertAt) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index d457e0c..899a3c1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1253,7 +1253,8 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
// shl (zext i1 X), C1 --> select (X, 1 << C1, 0)
if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
auto *NewC = Builder.CreateShl(ConstantInt::get(Ty, 1), C1);
- return createSelectInst(X, NewC, ConstantInt::getNullValue(Ty));
+ return createSelectInstWithUnknownProfile(X, NewC,
+ ConstantInt::getNullValue(Ty));
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 127a506..63e24a0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -16,6 +16,7 @@
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
@@ -107,7 +108,10 @@ static Value *simplifyShiftSelectingPackedElement(Instruction *I,
Value *ShrAmtZ =
IC.Builder.CreateICmpEQ(ShrAmt, Constant::getNullValue(ShrAmt->getType()),
ShrAmt->getName() + ".z");
- Value *Select = IC.Builder.CreateSelect(ShrAmtZ, Lower, Upper);
+ // There is no existing !prof metadata we can derive the !prof metadata for
+ // this select.
+ Value *Select = IC.createSelectInstWithUnknownProfile(ShrAmtZ, Lower, Upper);
+ IC.Builder.Insert(Select);
Select->takeName(I);
return Select;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index d56a1af..82ac903 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1737,7 +1737,7 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
Constant *Zero = ConstantInt::getNullValue(BO.getType());
Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
- return createSelectInst(X, TVal, FVal);
+ return createSelectInstWithUnknownProfile(X, TVal, FVal);
}
static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index e5935f4..ff5f390 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -386,22 +386,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
return OS;
}
-/// Helper to get the successor corresponding to a particular case value for
-/// a switch statement.
-static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch,
- const APInt &NextState) {
- BasicBlock *NextCase = nullptr;
- for (auto Case : Switch->cases()) {
- if (Case.getCaseValue()->getValue() == NextState) {
- NextCase = Case.getCaseSuccessor();
- break;
- }
- }
- if (!NextCase)
- NextCase = Switch->getDefaultDest();
- return NextCase;
-}
-
namespace {
/// ThreadingPath is a path in the control flow of a loop that can be threaded
/// by cloning necessary basic blocks and replacing conditional branches with
@@ -835,19 +819,32 @@ private:
TPaths = std::move(TempList);
}
+ /// Fast helper to get the successor corresponding to a particular case value
+ /// for a switch statement.
+ BasicBlock *getNextCaseSuccessor(const APInt &NextState) {
+ // Precompute the value => successor mapping
+ if (CaseValToDest.empty()) {
+ for (auto Case : Switch->cases()) {
+ APInt CaseVal = Case.getCaseValue()->getValue();
+ CaseValToDest[CaseVal] = Case.getCaseSuccessor();
+ }
+ }
+
+ auto SuccIt = CaseValToDest.find(NextState);
+ return SuccIt == CaseValToDest.end() ? Switch->getDefaultDest()
+ : SuccIt->second;
+ }
+
// Two states are equivalent if they have the same switch destination.
// Unify the states in different threading path if the states are equivalent.
void unifyTPaths() {
- llvm::SmallDenseMap<BasicBlock *, APInt> DestToState;
+ SmallDenseMap<BasicBlock *, APInt> DestToState;
for (ThreadingPath &Path : TPaths) {
APInt NextState = Path.getExitValue();
- BasicBlock *Dest = getNextCaseSuccessor(Switch, NextState);
- auto StateIt = DestToState.find(Dest);
- if (StateIt == DestToState.end()) {
- DestToState.insert({Dest, NextState});
+ BasicBlock *Dest = getNextCaseSuccessor(NextState);
+ auto [StateIt, Inserted] = DestToState.try_emplace(Dest, NextState);
+ if (Inserted)
continue;
- }
-
if (NextState != StateIt->second) {
LLVM_DEBUG(dbgs() << "Next state in " << Path << " is equivalent to "
<< StateIt->second << "\n");
@@ -861,6 +858,7 @@ private:
BasicBlock *SwitchBlock;
OptimizationRemarkEmitter *ORE;
std::vector<ThreadingPath> TPaths;
+ DenseMap<APInt, BasicBlock *> CaseValToDest;
LoopInfo *LI;
Loop *SwitchOuterLoop;
};
@@ -1162,6 +1160,24 @@ private:
SSAUpdate.RewriteAllUses(&DTU->getDomTree());
}
+ /// Helper to get the successor corresponding to a particular case value for
+ /// a switch statement.
+ /// TODO: Unify it with SwitchPaths->getNextCaseSuccessor(SwitchInst *Switch)
+ /// by updating cached value => successor mapping during threading.
+ static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch,
+ const APInt &NextState) {
+ BasicBlock *NextCase = nullptr;
+ for (auto Case : Switch->cases()) {
+ if (Case.getCaseValue()->getValue() == NextState) {
+ NextCase = Case.getCaseSuccessor();
+ break;
+ }
+ }
+ if (!NextCase)
+ NextCase = Switch->getDefaultDest();
+ return NextCase;
+ }
+
/// Clones a basic block, and adds it to the CFG.
///
/// This function also includes updating phi nodes in the successors of the
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 3a8ade8..42db424 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -2156,6 +2156,9 @@ bool GVNPass::processLoad(LoadInst *L) {
if (!L->isUnordered())
return false;
+ if (L->getType()->isTokenLikeTy())
+ return false;
+
if (L->use_empty()) {
salvageAndRemoveInstruction(L);
return true;
diff --git a/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/llvm/lib/Transforms/Utils/InstructionNamer.cpp
index 3ae570c..4f1ff7b 100644
--- a/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -20,9 +20,8 @@
using namespace llvm;
-namespace {
-void nameInstructions(Function &F) {
- for (auto &Arg : F.args()) {
+static void nameInstructions(Function &F) {
+ for (Argument &Arg : F.args()) {
if (!Arg.hasName())
Arg.setName("arg");
}
@@ -38,8 +37,6 @@ void nameInstructions(Function &F) {
}
}
-} // namespace
-
PreservedAnalyses InstructionNamerPass::run(Function &F,
FunctionAnalysisManager &FAM) {
nameInstructions(F);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cfa8d27..2388375 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2245,6 +2245,26 @@ public:
Align Alignment, const int64_t Diff, Value *Ptr0,
Value *PtrN, StridedPtrInfo &SPtrInfo) const;
+ /// Return true if an array of scalar loads can be replaced with a strided
+ /// load (with run-time stride).
+ /// \param PointerOps list of pointer arguments of loads.
+ /// \param ScalarTy type of loads.
+ /// \param CommonAlignment common alignement of loads as computed by
+ /// `computeCommonAlignment<LoadInst>`.
+ /// \param SortedIndicies is a list of indicies computed by this function such
+ /// that the sequence `PointerOps[SortedIndices[0]],
+ /// PointerOps[SortedIndicies[1]], ..., PointerOps[SortedIndices[n]]` is
+ /// ordered by the coefficient of the stride. For example, if PointerOps is
+ /// `%base + %stride, %base, %base + 2 * stride` the `SortedIndices` will be
+ /// `[1, 0, 2]`. We follow the convention that if `SortedIndices` has to be
+ /// `0, 1, 2, 3, ...` we return empty vector for `SortedIndicies`.
+ /// \param SPtrInfo If the function return `true`, it also sets all the fields
+ /// of `SPtrInfo` necessary to generate the strided load later.
+ bool analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, Type *ScalarTy,
+ Align CommonAlignment,
+ SmallVectorImpl<unsigned> &SortedIndices,
+ StridedPtrInfo &SPtrInfo) const;
+
/// Checks if the given array of loads can be represented as a vectorized,
/// scatter or just simple gather.
/// \param VL list of loads.
@@ -6875,6 +6895,24 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
return false;
}
+bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps,
+ Type *ScalarTy, Align CommonAlignment,
+ SmallVectorImpl<unsigned> &SortedIndices,
+ StridedPtrInfo &SPtrInfo) const {
+ const unsigned Sz = PointerOps.size();
+ FixedVectorType *StridedLoadTy = getWidenedType(ScalarTy, Sz);
+ if (Sz <= MinProfitableStridedLoads || !TTI->isTypeLegal(StridedLoadTy) ||
+ !TTI->isLegalStridedLoadStore(StridedLoadTy, CommonAlignment))
+ return false;
+ if (const SCEV *Stride =
+ calculateRtStride(PointerOps, ScalarTy, *DL, *SE, SortedIndices)) {
+ SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size());
+ SPtrInfo.StrideSCEV = Stride;
+ return true;
+ }
+ return false;
+}
+
BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order,
SmallVectorImpl<Value *> &PointerOps, StridedPtrInfo &SPtrInfo,
@@ -6915,15 +6953,9 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
auto *VecTy = getWidenedType(ScalarTy, Sz);
Align CommonAlignment = computeCommonAlignment<LoadInst>(VL);
if (!IsSorted) {
- if (Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy)) {
- if (const SCEV *Stride =
- calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order);
- Stride && TTI->isLegalStridedLoadStore(VecTy, CommonAlignment)) {
- SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size());
- SPtrInfo.StrideSCEV = Stride;
- return LoadsState::StridedVectorize;
- }
- }
+ if (analyzeRtStrideCandidate(PointerOps, ScalarTy, CommonAlignment, Order,
+ SPtrInfo))
+ return LoadsState::StridedVectorize;
if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||
TTI->forceScalarizeMaskedGather(VecTy, CommonAlignment))
@@ -10632,7 +10664,9 @@ class InstructionsCompatibilityAnalysis {
void findAndSetMainInstruction(ArrayRef<Value *> VL, const BoUpSLP &R) {
BasicBlock *Parent = nullptr;
// Checks if the instruction has supported opcode.
- auto IsSupportedInstruction = [&](Instruction *I) {
+ auto IsSupportedInstruction = [&](Instruction *I, bool AnyUndef) {
+ if (AnyUndef && (I->isIntDivRem() || I->isFPDivRem() || isa<CallInst>(I)))
+ return false;
return I && isSupportedOpcode(I->getOpcode()) &&
(!doesNotNeedToBeScheduled(I) || !R.isVectorized(I));
};
@@ -10640,10 +10674,13 @@ class InstructionsCompatibilityAnalysis {
// will be unable to schedule anyway.
SmallDenseSet<Value *, 8> Operands;
SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates;
+ bool AnyUndef = false;
for (Value *V : VL) {
auto *I = dyn_cast<Instruction>(V);
- if (!I)
+ if (!I) {
+ AnyUndef |= isa<UndefValue>(V);
continue;
+ }
if (!DT.isReachableFromEntry(I->getParent()))
continue;
if (Candidates.empty()) {
@@ -10678,7 +10715,7 @@ class InstructionsCompatibilityAnalysis {
if (P.second.size() < BestOpcodeNum)
continue;
for (Instruction *I : P.second) {
- if (IsSupportedInstruction(I) && !Operands.contains(I)) {
+ if (IsSupportedInstruction(I, AnyUndef) && !Operands.contains(I)) {
MainOp = I;
BestOpcodeNum = P.second.size();
break;