diff options
Diffstat (limited to 'llvm/lib')
24 files changed, 261 insertions, 160 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 10df9c1..219bbc9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -20,6 +20,7 @@ #include "WinException.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -205,6 +206,17 @@ public: }; } // namespace +namespace callgraph { +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); +enum Flags : uint8_t { + None = 0, + IsIndirectTarget = 1u << 0, + HasDirectCallees = 1u << 1, + HasIndirectCallees = 1u << 2, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue*/ HasIndirectCallees) +}; +} // namespace callgraph + class llvm::AddrLabelMap { MCContext &Context; struct AddrLabelSymEntry { @@ -1683,63 +1695,65 @@ void AsmPrinter::emitCallGraphSection(const MachineFunction &MF, OutStreamer->pushSection(); OutStreamer->switchSection(FuncCGSection); - // Emit format version number. - OutStreamer->emitInt64(CallGraphSectionFormatVersion::V_0); - - // Emit function's self information, which is composed of: - // 1) FunctionEntryPc - // 2) FunctionKind: Whether the function is indirect target, and if so, - // whether its type id is known. - // 3) FunctionTypeId: Emit only when the function is an indirect target - // and its type id is known. - - // Emit function entry pc. const MCSymbol *FunctionSymbol = getFunctionBegin(); - OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); - + const Function &F = MF.getFunction(); // If this function has external linkage or has its address taken and // it is not a callback, then anything could call it. - const Function &F = MF.getFunction(); bool IsIndirectTarget = !F.hasLocalLinkage() || F.hasAddressTaken(nullptr, /*IgnoreCallbackUses=*/true, /*IgnoreAssumeLikeCalls=*/true, /*IgnoreLLVMUsed=*/false); - // FIXME: FunctionKind takes a few values but emitted as a 64-bit value. - // Can be optimized to occupy 2 bits instead. - // Emit function kind, and type id if available. - if (!IsIndirectTarget) { - OutStreamer->emitInt64( - static_cast<uint64_t>(FunctionKind::NOT_INDIRECT_TARGET)); - } else { - if (const auto *TypeId = extractNumericCGTypeId(F)) { - OutStreamer->emitInt64( - static_cast<uint64_t>(FunctionKind::INDIRECT_TARGET_KNOWN_TID)); - OutStreamer->emitInt64(TypeId->getZExtValue()); - } else { - OutStreamer->emitInt64( - static_cast<uint64_t>(FunctionKind::INDIRECT_TARGET_UNKNOWN_TID)); - } - } + const auto &DirectCallees = FuncCGInfo.DirectCallees; + const auto &IndirectCalleeTypeIDs = FuncCGInfo.IndirectCalleeTypeIDs; + + using namespace callgraph; + Flags CGFlags = Flags::None; + if (IsIndirectTarget) + CGFlags |= Flags::IsIndirectTarget; + if (DirectCallees.size() > 0) + CGFlags |= Flags::HasDirectCallees; + if (IndirectCalleeTypeIDs.size() > 0) + CGFlags |= Flags::HasIndirectCallees; + + // Emit function's call graph information. + // 1) CallGraphSectionFormatVersion + // 2) Flags + // a. LSB bit 0 is set to 1 if the function is a potential indirect + // target. + // b. LSB bit 1 is set to 1 if there are direct callees. + // c. LSB bit 2 is set to 1 if there are indirect callees. + // d. Rest of the 5 bits in Flags are reserved for any future use. + // 3) Function entry PC. + // 4) FunctionTypeID if the function is indirect target and its type id + // is known, otherwise it is set to 0. + // 5) Number of unique direct callees, if at least one exists. + // 6) For each unique direct callee, the callee's PC. + // 7) Number of unique indirect target type IDs, if at least one exists. + // 8) Each unique indirect target type id. + OutStreamer->emitInt8(CallGraphSectionFormatVersion::V_0); + OutStreamer->emitInt8(static_cast<uint8_t>(CGFlags)); + OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); + const auto *TypeId = extractNumericCGTypeId(F); + if (IsIndirectTarget && TypeId) + OutStreamer->emitInt64(TypeId->getZExtValue()); + else + OutStreamer->emitInt64(0); - // Emit callsite labels, where each element is a pair of type id and - // indirect callsite pc. - const auto &CallSiteLabels = FuncCGInfo.CallSiteLabels; - OutStreamer->emitInt64(CallSiteLabels.size()); - for (const auto &[TypeId, Label] : CallSiteLabels) { - OutStreamer->emitInt64(TypeId); - OutStreamer->emitSymbolValue(Label, TM.getProgramPointerSize()); + if (DirectCallees.size() > 0) { + OutStreamer->emitULEB128IntValue(DirectCallees.size()); + for (const auto &CalleeSymbol : DirectCallees) + OutStreamer->emitSymbolValue(CalleeSymbol, TM.getProgramPointerSize()); + FuncCGInfo.DirectCallees.clear(); } - FuncCGInfo.CallSiteLabels.clear(); - - const auto &DirectCallees = FuncCGInfo.DirectCallees; - OutStreamer->emitInt64(DirectCallees.size()); - for (const auto &CalleeSymbol : DirectCallees) { - OutStreamer->emitSymbolValue(CalleeSymbol, TM.getProgramPointerSize()); + if (IndirectCalleeTypeIDs.size() > 0) { + OutStreamer->emitULEB128IntValue(IndirectCalleeTypeIDs.size()); + for (const auto &CalleeTypeId : IndirectCalleeTypeIDs) + OutStreamer->emitInt64(CalleeTypeId); + FuncCGInfo.IndirectCalleeTypeIDs.clear(); } - FuncCGInfo.DirectCallees.clear(); - + // End of emitting call graph section contents. OutStreamer->popSection(); } @@ -1877,8 +1891,7 @@ void AsmPrinter::handleCallsiteForCallgraph( FunctionCallGraphInfo &FuncCGInfo, const MachineFunction::CallSiteInfoMap &CallSitesInfoMap, const MachineInstr &MI) { - assert(MI.isCall() && - "Callsite labels are meant for call instructions only."); + assert(MI.isCall() && "This method is meant for call instructions only."); const MachineOperand &CalleeOperand = MI.getOperand(0); if (CalleeOperand.isGlobal() || CalleeOperand.isSymbol()) { // Handle direct calls. @@ -1903,10 +1916,8 @@ void AsmPrinter::handleCallsiteForCallgraph( // Handle indirect callsite info. // Only indirect calls have type identifiers set. for (ConstantInt *CalleeTypeId : CallSiteInfo->second.CalleeTypeIds) { - MCSymbol *S = MF->getContext().createTempSymbol(); - OutStreamer->emitLabel(S); uint64_t CalleeTypeIdVal = CalleeTypeId->getZExtValue(); - FuncCGInfo.CallSiteLabels.emplace_back(CalleeTypeIdVal, S); + FuncCGInfo.IndirectCalleeTypeIDs.insert(CalleeTypeIdVal); } } diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 12d749c..e57ed24 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -569,40 +569,30 @@ void CodeViewDebug::emitCodeViewMagicVersion() { OS.emitInt32(COFF::DEBUG_SECTION_MAGIC); } -static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { - switch (DWLang) { - case dwarf::DW_LANG_C: - case dwarf::DW_LANG_C89: - case dwarf::DW_LANG_C99: - case dwarf::DW_LANG_C11: +static SourceLanguage +MapDWARFLanguageToCVLang(dwarf::SourceLanguageName DWLName) { + switch (DWLName) { + case dwarf::DW_LNAME_C: return SourceLanguage::C; - case dwarf::DW_LANG_C_plus_plus: - case dwarf::DW_LANG_C_plus_plus_03: - case dwarf::DW_LANG_C_plus_plus_11: - case dwarf::DW_LANG_C_plus_plus_14: + case dwarf::DW_LNAME_C_plus_plus: return SourceLanguage::Cpp; - case dwarf::DW_LANG_Fortran77: - case dwarf::DW_LANG_Fortran90: - case dwarf::DW_LANG_Fortran95: - case dwarf::DW_LANG_Fortran03: - case dwarf::DW_LANG_Fortran08: + case dwarf::DW_LNAME_Fortran: return SourceLanguage::Fortran; - case dwarf::DW_LANG_Pascal83: + case dwarf::DW_LNAME_Pascal: return SourceLanguage::Pascal; - case dwarf::DW_LANG_Cobol74: - case dwarf::DW_LANG_Cobol85: + case dwarf::DW_LNAME_Cobol: return SourceLanguage::Cobol; - case dwarf::DW_LANG_Java: + case dwarf::DW_LNAME_Java: return SourceLanguage::Java; - case dwarf::DW_LANG_D: + case dwarf::DW_LNAME_D: return SourceLanguage::D; - case dwarf::DW_LANG_Swift: + case dwarf::DW_LNAME_Swift: return SourceLanguage::Swift; - case dwarf::DW_LANG_Rust: + case dwarf::DW_LNAME_Rust: return SourceLanguage::Rust; - case dwarf::DW_LANG_ObjC: + case dwarf::DW_LNAME_ObjC: return SourceLanguage::ObjC; - case dwarf::DW_LANG_ObjC_plus_plus: + case dwarf::DW_LNAME_ObjC_plus_plus: return SourceLanguage::ObjCpp; default: // There's no CodeView representation for this language, and CV doesn't @@ -612,6 +602,14 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { } } +static SourceLanguage MapDWARFLanguageToCVLang(dwarf::SourceLanguage DWLang) { + auto MaybeLName = dwarf::toDW_LNAME(DWLang); + if (!MaybeLName) + return MapDWARFLanguageToCVLang(static_cast<dwarf::SourceLanguageName>(0)); + + return MapDWARFLanguageToCVLang(MaybeLName->first); +} + void CodeViewDebug::beginModule(Module *M) { // If COFF debug section is not available, skip any debug info related stuff. if (!Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) { @@ -633,8 +631,13 @@ void CodeViewDebug::beginModule(Module *M) { Node = *CUs->operands().begin(); } const auto *CU = cast<DICompileUnit>(Node); + DISourceLanguageName Lang = CU->getSourceLanguage(); CurrentSourceLanguage = - MapDWLangToCVLang(CU->getSourceLanguage().getUnversionedName()); + Lang.hasVersionedName() + ? MapDWARFLanguageToCVLang( + static_cast<dwarf::SourceLanguageName>(Lang.getName())) + : MapDWARFLanguageToCVLang( + static_cast<dwarf::SourceLanguage>(Lang.getName())); if (!M->getCodeViewFlag() || CU->getEmissionKind() == DICompileUnit::NoDebug) { Asm = nullptr; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index d751a7f..433877f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1039,8 +1039,12 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit, } else NewCU.addString(Die, dwarf::DW_AT_producer, Producer); - NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, - DIUnit->getSourceLanguage().getUnversionedName()); + if (auto Lang = DIUnit->getSourceLanguage(); Lang.hasVersionedName()) + NewCU.addUInt(Die, dwarf::DW_AT_language_name, dwarf::DW_FORM_data2, + Lang.getName()); + else + NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + Lang.getName()); NewCU.addString(Die, dwarf::DW_AT_name, FN); StringRef SysRoot = DIUnit->getSysRoot(); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5ffdc4e..b47274b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18870,27 +18870,38 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI) { - // We only do this if the target has legal ftrunc. Otherwise, we'd likely be - // replacing casts with a libcall. We also must be allowed to ignore -0.0 - // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer - // conversions would return +0.0. + // We can fold the fpto[us]i -> [us]itofp pattern into a single ftrunc. + // If NoSignedZerosFPMath is enabled, this is a direct replacement. + // Otherwise, for strict math, we must handle edge cases: + // 1. For unsigned conversions, use FABS to handle negative cases. Take -0.0 + // as example, it first becomes integer 0, and is converted back to +0.0. + // FTRUNC on its own could produce -0.0. + // FIXME: We should be able to use node-level FMF here. - // TODO: If strict math, should we use FABS (+ range check for signed cast)? EVT VT = N->getValueType(0); - if (!TLI.isOperationLegal(ISD::FTRUNC, VT) || - !DAG.getTarget().Options.NoSignedZerosFPMath) + if (!TLI.isOperationLegal(ISD::FTRUNC, VT)) return SDValue(); // fptosi/fptoui round towards zero, so converting from FP to integer and // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X SDValue N0 = N->getOperand(0); if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT && - N0.getOperand(0).getValueType() == VT) - return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); + N0.getOperand(0).getValueType() == VT) { + if (DAG.getTarget().Options.NoSignedZerosFPMath) + return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); + } if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT && - N0.getOperand(0).getValueType() == VT) - return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); + N0.getOperand(0).getValueType() == VT) { + if (DAG.getTarget().Options.NoSignedZerosFPMath) + return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); + + // Strict math: use FABS to handle negative inputs correctly. + if (TLI.isFAbsFree(VT)) { + SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::FTRUNC, DL, VT, Abs); + } + } return SDValue(); } diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp index fba6942..2477e22 100644 --- a/llvm/lib/IR/ConstantFPRange.cpp +++ b/llvm/lib/IR/ConstantFPRange.cpp @@ -411,3 +411,17 @@ ConstantFPRange ConstantFPRange::abs() const { ConstantFPRange ConstantFPRange::negate() const { return ConstantFPRange(-Upper, -Lower, MayBeQNaN, MayBeSNaN); } + +ConstantFPRange ConstantFPRange::getWithoutInf() const { + if (isNaNOnly()) + return *this; + APFloat NewLower = Lower; + APFloat NewUpper = Upper; + if (Lower.isNegInfinity()) + NewLower = APFloat::getLargest(getSemantics(), /*Negative=*/true); + if (Upper.isPosInfinity()) + NewUpper = APFloat::getLargest(getSemantics(), /*Negative=*/false); + canonicalizeRange(NewLower, NewUpper); + return ConstantFPRange(std::move(NewLower), std::move(NewUpper), MayBeQNaN, + MayBeSNaN); +} diff --git a/llvm/lib/Object/XCOFFObjectFile.cpp b/llvm/lib/Object/XCOFFObjectFile.cpp index c5a2ec2..7a8c8ad 100644 --- a/llvm/lib/Object/XCOFFObjectFile.cpp +++ b/llvm/lib/Object/XCOFFObjectFile.cpp @@ -379,7 +379,7 @@ Expected<StringRef> XCOFFObjectFile::getSectionName(DataRefImpl Sec) const { } uint64_t XCOFFObjectFile::getSectionAddress(DataRefImpl Sec) const { - // Avoid ternary due to failure to convert the ubig32_t value to a unit64_t + // Avoid ternary due to failure to convert the ubig32_t value to a uint64_t // with MSVC. if (is64Bit()) return toSection64(Sec)->VirtualAddress; @@ -397,7 +397,7 @@ uint64_t XCOFFObjectFile::getSectionIndex(DataRefImpl Sec) const { } uint64_t XCOFFObjectFile::getSectionSize(DataRefImpl Sec) const { - // Avoid ternary due to failure to convert the ubig32_t value to a unit64_t + // Avoid ternary due to failure to convert the ubig32_t value to a uint64_t // with MSVC. if (is64Bit()) return toSection64(Sec)->SectionSize; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 7392f4b..bfe2c80 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -633,6 +633,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}}) .Any({{DivS64, S64}, {{Vgpr64}, {Vgpr64}, SplitTo32SExtInReg}}); + addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard) + .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}}) + .Div(S32, {{Vgpr32}, {Vgpr32, Imm}}) + .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}}) + .Div(S64, {{Vgpr64}, {Vgpr64, Imm}}); + bool hasSMRDx3 = ST->hasScalarDwordx3Loads(); bool hasSMRDSmall = ST->hasScalarSubwordLoads(); bool usesTrue16 = ST->useRealTrue16Insts(); diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index 4d3331a..c684f9e 100644 --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -674,15 +674,9 @@ void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) { createUnpackedMI(I, UnpackedOpcode, /*IsHiBits=*/true); MachineOperand HiDstOp = Op0HOp1H->getOperand(0); - if (I.getFlag(MachineInstr::MIFlag::NoFPExcept)) { - Op0LOp1L->setFlag(MachineInstr::MIFlag::NoFPExcept); - Op0HOp1H->setFlag(MachineInstr::MIFlag::NoFPExcept); - } - if (I.getFlag(MachineInstr::MIFlag::FmContract)) { - Op0LOp1L->setFlag(MachineInstr::MIFlag::FmContract); - Op0HOp1H->setFlag(MachineInstr::MIFlag::FmContract); - } - + uint32_t IFlags = I.getFlags(); + Op0LOp1L->setFlags(IFlags); + Op0HOp1H->setFlags(IFlags); LoDstOp.setIsRenamable(DstOp.isRenamable()); HiDstOp.setIsRenamable(DstOp.isRenamable()); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 2a40fb9..83c7def 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -42,7 +42,6 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ComplexDeinterleavingPass.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp index e8c849e..28a1690 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp @@ -46,7 +46,6 @@ #include "SPIRVSubtarget.h" #include "SPIRVTargetMachine.h" #include "SPIRVUtils.h" -#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" diff --git a/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp b/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp index 20f03b0..60d39c9 100644 --- a/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Intrinsics.h" diff --git a/llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp index 278ad7c..e621bcd44 100644 --- a/llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp @@ -14,7 +14,6 @@ #include "SPIRV.h" #include "SPIRVSubtarget.h" #include "SPIRVUtils.h" -#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Transforms/Utils/Cloning.h" diff --git a/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp b/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp index 1811492..5b149f8 100644 --- a/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp @@ -16,7 +16,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9580ade..1cfcb1f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28,7 +28,6 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/VectorUtils.h" -#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index ac3626d..f021094 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -375,6 +375,8 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) { case MuslSF: return "muslsf"; case MuslX32: return "muslx32"; + case MuslWALI: + return "muslwali"; case Simulator: return "simulator"; case Pixel: return "pixel"; case Vertex: return "vertex"; @@ -767,6 +769,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { .StartsWith("muslf32", Triple::MuslF32) .StartsWith("muslsf", Triple::MuslSF) .StartsWith("muslx32", Triple::MuslX32) + .StartsWith("muslwali", Triple::MuslWALI) .StartsWith("musl", Triple::Musl) .StartsWith("msvc", Triple::MSVC) .StartsWith("itanium", Triple::Itanium) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 59e103cd..73ec451 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -880,11 +880,13 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) { // zext(bool) + C -> bool ? C + 1 : C if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->getScalarSizeInBits() == 1) - return createSelectInst(X, InstCombiner::AddOne(Op1C), Op1); + return createSelectInstWithUnknownProfile(X, InstCombiner::AddOne(Op1C), + Op1); // sext(bool) + C -> bool ? C - 1 : C if (match(Op0, m_SExt(m_Value(X))) && X->getType()->getScalarSizeInBits() == 1) - return createSelectInst(X, InstCombiner::SubOne(Op1C), Op1); + return createSelectInstWithUnknownProfile(X, InstCombiner::SubOne(Op1C), + Op1); // ~X + C --> (C-1) - X if (match(Op0, m_Not(m_Value(X)))) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 218aaf9..7071876 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -471,18 +471,19 @@ private: Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable, unsigned Depth = 0); - SelectInst *createSelectInst(Value *C, Value *S1, Value *S2, - const Twine &NameStr = "", - InsertPosition InsertBefore = nullptr, - Instruction *MDFrom = nullptr) { - SelectInst *SI = - SelectInst::Create(C, S1, S2, NameStr, InsertBefore, MDFrom); - if (!MDFrom) - setExplicitlyUnknownBranchWeightsIfProfiled(*SI, F, DEBUG_TYPE); - return SI; +public: + /// Create `select C, S1, S2`. Use only when the profile cannot be calculated + /// from existing profile metadata: if the Function has profiles, this will + /// set the profile of this select to "unknown". + SelectInst * + createSelectInstWithUnknownProfile(Value *C, Value *S1, Value *S2, + const Twine &NameStr = "", + InsertPosition InsertBefore = nullptr) { + auto *Sel = SelectInst::Create(C, S1, S2, NameStr, InsertBefore, nullptr); + setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, F, DEBUG_TYPE); + return Sel; } -public: /// Create and insert the idiom we use to indicate a block is unreachable /// without having to rewrite the CFG from within InstCombine. void CreateNonTerminatorUnreachable(Instruction *InsertAt) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index d457e0c..899a3c1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1253,7 +1253,8 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { // shl (zext i1 X), C1 --> select (X, 1 << C1, 0) if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { auto *NewC = Builder.CreateShl(ConstantInt::get(Ty, 1), C1); - return createSelectInst(X, NewC, ConstantInt::getNullValue(Ty)); + return createSelectInstWithUnknownProfile(X, NewC, + ConstantInt::getNullValue(Ty)); } } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 127a506..63e24a0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" @@ -107,7 +108,10 @@ static Value *simplifyShiftSelectingPackedElement(Instruction *I, Value *ShrAmtZ = IC.Builder.CreateICmpEQ(ShrAmt, Constant::getNullValue(ShrAmt->getType()), ShrAmt->getName() + ".z"); - Value *Select = IC.Builder.CreateSelect(ShrAmtZ, Lower, Upper); + // There is no existing !prof metadata we can derive the !prof metadata for + // this select. + Value *Select = IC.createSelectInstWithUnknownProfile(ShrAmtZ, Lower, Upper); + IC.Builder.Insert(Select); Select->takeName(I); return Select; } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index d56a1af..82ac903 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1737,7 +1737,7 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) { Constant *Zero = ConstantInt::getNullValue(BO.getType()); Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C); Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C); - return createSelectInst(X, TVal, FVal); + return createSelectInstWithUnknownProfile(X, TVal, FVal); } static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI, diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index e5935f4..ff5f390 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -386,22 +386,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { return OS; } -/// Helper to get the successor corresponding to a particular case value for -/// a switch statement. -static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, - const APInt &NextState) { - BasicBlock *NextCase = nullptr; - for (auto Case : Switch->cases()) { - if (Case.getCaseValue()->getValue() == NextState) { - NextCase = Case.getCaseSuccessor(); - break; - } - } - if (!NextCase) - NextCase = Switch->getDefaultDest(); - return NextCase; -} - namespace { /// ThreadingPath is a path in the control flow of a loop that can be threaded /// by cloning necessary basic blocks and replacing conditional branches with @@ -835,19 +819,32 @@ private: TPaths = std::move(TempList); } + /// Fast helper to get the successor corresponding to a particular case value + /// for a switch statement. + BasicBlock *getNextCaseSuccessor(const APInt &NextState) { + // Precompute the value => successor mapping + if (CaseValToDest.empty()) { + for (auto Case : Switch->cases()) { + APInt CaseVal = Case.getCaseValue()->getValue(); + CaseValToDest[CaseVal] = Case.getCaseSuccessor(); + } + } + + auto SuccIt = CaseValToDest.find(NextState); + return SuccIt == CaseValToDest.end() ? Switch->getDefaultDest() + : SuccIt->second; + } + // Two states are equivalent if they have the same switch destination. // Unify the states in different threading path if the states are equivalent. void unifyTPaths() { - llvm::SmallDenseMap<BasicBlock *, APInt> DestToState; + SmallDenseMap<BasicBlock *, APInt> DestToState; for (ThreadingPath &Path : TPaths) { APInt NextState = Path.getExitValue(); - BasicBlock *Dest = getNextCaseSuccessor(Switch, NextState); - auto StateIt = DestToState.find(Dest); - if (StateIt == DestToState.end()) { - DestToState.insert({Dest, NextState}); + BasicBlock *Dest = getNextCaseSuccessor(NextState); + auto [StateIt, Inserted] = DestToState.try_emplace(Dest, NextState); + if (Inserted) continue; - } - if (NextState != StateIt->second) { LLVM_DEBUG(dbgs() << "Next state in " << Path << " is equivalent to " << StateIt->second << "\n"); @@ -861,6 +858,7 @@ private: BasicBlock *SwitchBlock; OptimizationRemarkEmitter *ORE; std::vector<ThreadingPath> TPaths; + DenseMap<APInt, BasicBlock *> CaseValToDest; LoopInfo *LI; Loop *SwitchOuterLoop; }; @@ -1162,6 +1160,24 @@ private: SSAUpdate.RewriteAllUses(&DTU->getDomTree()); } + /// Helper to get the successor corresponding to a particular case value for + /// a switch statement. + /// TODO: Unify it with SwitchPaths->getNextCaseSuccessor(SwitchInst *Switch) + /// by updating cached value => successor mapping during threading. + static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, + const APInt &NextState) { + BasicBlock *NextCase = nullptr; + for (auto Case : Switch->cases()) { + if (Case.getCaseValue()->getValue() == NextState) { + NextCase = Case.getCaseSuccessor(); + break; + } + } + if (!NextCase) + NextCase = Switch->getDefaultDest(); + return NextCase; + } + /// Clones a basic block, and adds it to the CFG. /// /// This function also includes updating phi nodes in the successors of the diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 3a8ade8..42db424 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -2156,6 +2156,9 @@ bool GVNPass::processLoad(LoadInst *L) { if (!L->isUnordered()) return false; + if (L->getType()->isTokenLikeTy()) + return false; + if (L->use_empty()) { salvageAndRemoveInstruction(L); return true; diff --git a/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/llvm/lib/Transforms/Utils/InstructionNamer.cpp index 3ae570c..4f1ff7b 100644 --- a/llvm/lib/Transforms/Utils/InstructionNamer.cpp +++ b/llvm/lib/Transforms/Utils/InstructionNamer.cpp @@ -20,9 +20,8 @@ using namespace llvm; -namespace { -void nameInstructions(Function &F) { - for (auto &Arg : F.args()) { +static void nameInstructions(Function &F) { + for (Argument &Arg : F.args()) { if (!Arg.hasName()) Arg.setName("arg"); } @@ -38,8 +37,6 @@ void nameInstructions(Function &F) { } } -} // namespace - PreservedAnalyses InstructionNamerPass::run(Function &F, FunctionAnalysisManager &FAM) { nameInstructions(F); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cfa8d27..2388375 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2245,6 +2245,26 @@ public: Align Alignment, const int64_t Diff, Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const; + /// Return true if an array of scalar loads can be replaced with a strided + /// load (with run-time stride). + /// \param PointerOps list of pointer arguments of loads. + /// \param ScalarTy type of loads. + /// \param CommonAlignment common alignement of loads as computed by + /// `computeCommonAlignment<LoadInst>`. + /// \param SortedIndicies is a list of indicies computed by this function such + /// that the sequence `PointerOps[SortedIndices[0]], + /// PointerOps[SortedIndicies[1]], ..., PointerOps[SortedIndices[n]]` is + /// ordered by the coefficient of the stride. For example, if PointerOps is + /// `%base + %stride, %base, %base + 2 * stride` the `SortedIndices` will be + /// `[1, 0, 2]`. We follow the convention that if `SortedIndices` has to be + /// `0, 1, 2, 3, ...` we return empty vector for `SortedIndicies`. + /// \param SPtrInfo If the function return `true`, it also sets all the fields + /// of `SPtrInfo` necessary to generate the strided load later. + bool analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, Type *ScalarTy, + Align CommonAlignment, + SmallVectorImpl<unsigned> &SortedIndices, + StridedPtrInfo &SPtrInfo) const; + /// Checks if the given array of loads can be represented as a vectorized, /// scatter or just simple gather. /// \param VL list of loads. @@ -6875,6 +6895,24 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy, return false; } +bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, + Type *ScalarTy, Align CommonAlignment, + SmallVectorImpl<unsigned> &SortedIndices, + StridedPtrInfo &SPtrInfo) const { + const unsigned Sz = PointerOps.size(); + FixedVectorType *StridedLoadTy = getWidenedType(ScalarTy, Sz); + if (Sz <= MinProfitableStridedLoads || !TTI->isTypeLegal(StridedLoadTy) || + !TTI->isLegalStridedLoadStore(StridedLoadTy, CommonAlignment)) + return false; + if (const SCEV *Stride = + calculateRtStride(PointerOps, ScalarTy, *DL, *SE, SortedIndices)) { + SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size()); + SPtrInfo.StrideSCEV = Stride; + return true; + } + return false; +} + BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order, SmallVectorImpl<Value *> &PointerOps, StridedPtrInfo &SPtrInfo, @@ -6915,15 +6953,9 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( auto *VecTy = getWidenedType(ScalarTy, Sz); Align CommonAlignment = computeCommonAlignment<LoadInst>(VL); if (!IsSorted) { - if (Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy)) { - if (const SCEV *Stride = - calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order); - Stride && TTI->isLegalStridedLoadStore(VecTy, CommonAlignment)) { - SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size()); - SPtrInfo.StrideSCEV = Stride; - return LoadsState::StridedVectorize; - } - } + if (analyzeRtStrideCandidate(PointerOps, ScalarTy, CommonAlignment, Order, + SPtrInfo)) + return LoadsState::StridedVectorize; if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) || TTI->forceScalarizeMaskedGather(VecTy, CommonAlignment)) @@ -10632,7 +10664,9 @@ class InstructionsCompatibilityAnalysis { void findAndSetMainInstruction(ArrayRef<Value *> VL, const BoUpSLP &R) { BasicBlock *Parent = nullptr; // Checks if the instruction has supported opcode. - auto IsSupportedInstruction = [&](Instruction *I) { + auto IsSupportedInstruction = [&](Instruction *I, bool AnyUndef) { + if (AnyUndef && (I->isIntDivRem() || I->isFPDivRem() || isa<CallInst>(I))) + return false; return I && isSupportedOpcode(I->getOpcode()) && (!doesNotNeedToBeScheduled(I) || !R.isVectorized(I)); }; @@ -10640,10 +10674,13 @@ class InstructionsCompatibilityAnalysis { // will be unable to schedule anyway. SmallDenseSet<Value *, 8> Operands; SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates; + bool AnyUndef = false; for (Value *V : VL) { auto *I = dyn_cast<Instruction>(V); - if (!I) + if (!I) { + AnyUndef |= isa<UndefValue>(V); continue; + } if (!DT.isReachableFromEntry(I->getParent())) continue; if (Candidates.empty()) { @@ -10678,7 +10715,7 @@ class InstructionsCompatibilityAnalysis { if (P.second.size() < BestOpcodeNum) continue; for (Instruction *I : P.second) { - if (IsSupportedInstruction(I) && !Operands.contains(I)) { + if (IsSupportedInstruction(I, AnyUndef) && !Operands.contains(I)) { MainOp = I; BestOpcodeNum = P.second.size(); break; |