diff options
Diffstat (limited to 'llvm/lib')
61 files changed, 1514 insertions, 987 deletions
diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp index 6885351..1794a60 100644 --- a/llvm/lib/Analysis/IR2Vec.cpp +++ b/llvm/lib/Analysis/IR2Vec.cpp @@ -239,10 +239,21 @@ void FlowAwareEmbedder::computeEmbeddings(const BasicBlock &BB) const { // If the operand is defined elsewhere, we use its embedding if (const auto *DefInst = dyn_cast<Instruction>(Op)) { auto DefIt = InstVecMap.find(DefInst); - assert(DefIt != InstVecMap.end() && - "Instruction should have been processed before its operands"); - ArgEmb += DefIt->second; - continue; + // Fixme (#159171): Ideally we should never miss an instruction + // embedding here. + // But when we have cyclic dependencies (e.g., phi + // nodes), we might miss the embedding. In such cases, we fall back to + // using the vocabulary embedding. This can be fixed by iterating to a + // fixed-point, or by using a simple solver for the set of simultaneous + // equations. + // Another case when we might miss an instruction embedding is when + // the operand instruction is in a different basic block that has not + // been processed yet. This can be fixed by processing the basic blocks + // in a topological order. + if (DefIt != InstVecMap.end()) + ArgEmb += DefIt->second; + else + ArgEmb += Vocab[*Op]; } // If the operand is not defined by an instruction, we use the vocabulary else { diff --git a/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp index 0fbf082..f31d625 100644 --- a/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -43,11 +43,13 @@ static void printModuleDebugInfo(raw_ostream &O, const Module *M, // filenames), so just print a few useful things. for (DICompileUnit *CU : Finder.compile_units()) { O << "Compile unit: "; - auto Lang = dwarf::LanguageString(CU->getSourceLanguage()); + auto Lang = + dwarf::LanguageString(CU->getSourceLanguage().getUnversionedName()); if (!Lang.empty()) O << Lang; else - O << "unknown-language(" << CU->getSourceLanguage() << ")"; + O << "unknown-language(" << CU->getSourceLanguage().getUnversionedName() + << ")"; printFile(O, CU->getFilename(), CU->getDirectory()); O << '\n'; } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 63e1b14..6f6776c 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6351,19 +6351,20 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { return getGEPExpr(GEP, IndexExprs); } -APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) { +APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S, + const Instruction *CtxI) { uint64_t BitWidth = getTypeSizeInBits(S->getType()); auto GetShiftedByZeros = [BitWidth](uint32_t TrailingZeros) { return TrailingZeros >= BitWidth ? APInt::getZero(BitWidth) : APInt::getOneBitSet(BitWidth, TrailingZeros); }; - auto GetGCDMultiple = [this](const SCEVNAryExpr *N) { + auto GetGCDMultiple = [this, CtxI](const SCEVNAryExpr *N) { // The result is GCD of all operands results. - APInt Res = getConstantMultiple(N->getOperand(0)); + APInt Res = getConstantMultiple(N->getOperand(0), CtxI); for (unsigned I = 1, E = N->getNumOperands(); I < E && Res != 1; ++I) Res = APIntOps::GreatestCommonDivisor( - Res, getConstantMultiple(N->getOperand(I))); + Res, getConstantMultiple(N->getOperand(I), CtxI)); return Res; }; @@ -6371,33 +6372,33 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) { case scConstant: return cast<SCEVConstant>(S)->getAPInt(); case scPtrToInt: - return getConstantMultiple(cast<SCEVPtrToIntExpr>(S)->getOperand()); + return getConstantMultiple(cast<SCEVPtrToIntExpr>(S)->getOperand(), CtxI); case scUDivExpr: case scVScale: return APInt(BitWidth, 1); case scTruncate: { // Only multiples that are a power of 2 will hold after truncation. const SCEVTruncateExpr *T = cast<SCEVTruncateExpr>(S); - uint32_t TZ = getMinTrailingZeros(T->getOperand()); + uint32_t TZ = getMinTrailingZeros(T->getOperand(), CtxI); return GetShiftedByZeros(TZ); } case scZeroExtend: { const SCEVZeroExtendExpr *Z = cast<SCEVZeroExtendExpr>(S); - return getConstantMultiple(Z->getOperand()).zext(BitWidth); + return getConstantMultiple(Z->getOperand(), CtxI).zext(BitWidth); } case scSignExtend: { // Only multiples that are a power of 2 will hold after sext. const SCEVSignExtendExpr *E = cast<SCEVSignExtendExpr>(S); - uint32_t TZ = getMinTrailingZeros(E->getOperand()); + uint32_t TZ = getMinTrailingZeros(E->getOperand(), CtxI); return GetShiftedByZeros(TZ); } case scMulExpr: { const SCEVMulExpr *M = cast<SCEVMulExpr>(S); if (M->hasNoUnsignedWrap()) { // The result is the product of all operand results. - APInt Res = getConstantMultiple(M->getOperand(0)); + APInt Res = getConstantMultiple(M->getOperand(0), CtxI); for (const SCEV *Operand : M->operands().drop_front()) - Res = Res * getConstantMultiple(Operand); + Res = Res * getConstantMultiple(Operand, CtxI); return Res; } @@ -6405,7 +6406,7 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) { // sum of trailing zeros for all its operands. uint32_t TZ = 0; for (const SCEV *Operand : M->operands()) - TZ += getMinTrailingZeros(Operand); + TZ += getMinTrailingZeros(Operand, CtxI); return GetShiftedByZeros(TZ); } case scAddExpr: @@ -6414,9 +6415,9 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) { if (N->hasNoUnsignedWrap()) return GetGCDMultiple(N); // Find the trailing bits, which is the minimum of its operands. - uint32_t TZ = getMinTrailingZeros(N->getOperand(0)); + uint32_t TZ = getMinTrailingZeros(N->getOperand(0), CtxI); for (const SCEV *Operand : N->operands().drop_front()) - TZ = std::min(TZ, getMinTrailingZeros(Operand)); + TZ = std::min(TZ, getMinTrailingZeros(Operand, CtxI)); return GetShiftedByZeros(TZ); } case scUMaxExpr: @@ -6429,7 +6430,7 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) { // ask ValueTracking for known bits const SCEVUnknown *U = cast<SCEVUnknown>(S); unsigned Known = - computeKnownBits(U->getValue(), getDataLayout(), &AC, nullptr, &DT) + computeKnownBits(U->getValue(), getDataLayout(), &AC, CtxI, &DT) .countMinTrailingZeros(); return GetShiftedByZeros(Known); } @@ -6439,12 +6440,18 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) { llvm_unreachable("Unknown SCEV kind!"); } -APInt ScalarEvolution::getConstantMultiple(const SCEV *S) { +APInt ScalarEvolution::getConstantMultiple(const SCEV *S, + const Instruction *CtxI) { + // Skip looking up and updating the cache if there is a context instruction, + // as the result will only be valid in the specified context. + if (CtxI) + return getConstantMultipleImpl(S, CtxI); + auto I = ConstantMultipleCache.find(S); if (I != ConstantMultipleCache.end()) return I->second; - APInt Result = getConstantMultipleImpl(S); + APInt Result = getConstantMultipleImpl(S, CtxI); auto InsertPair = ConstantMultipleCache.insert({S, Result}); assert(InsertPair.second && "Should insert a new key"); return InsertPair.first->second; @@ -6455,8 +6462,9 @@ APInt ScalarEvolution::getNonZeroConstantMultiple(const SCEV *S) { return Multiple == 0 ? APInt(Multiple.getBitWidth(), 1) : Multiple; } -uint32_t ScalarEvolution::getMinTrailingZeros(const SCEV *S) { - return std::min(getConstantMultiple(S).countTrailingZeros(), +uint32_t ScalarEvolution::getMinTrailingZeros(const SCEV *S, + const Instruction *CtxI) { + return std::min(getConstantMultiple(S, CtxI).countTrailingZeros(), (unsigned)getTypeSizeInBits(S->getType())); } @@ -10243,8 +10251,7 @@ const SCEV *ScalarEvolution::stripInjectiveFunctions(const SCEV *S) const { static const SCEV * SolveLinEquationWithOverflow(const APInt &A, const SCEV *B, SmallVectorImpl<const SCEVPredicate *> *Predicates, - - ScalarEvolution &SE) { + ScalarEvolution &SE, const Loop *L) { uint32_t BW = A.getBitWidth(); assert(BW == SE.getTypeSizeInBits(B->getType())); assert(A != 0 && "A must be non-zero."); @@ -10260,7 +10267,12 @@ SolveLinEquationWithOverflow(const APInt &A, const SCEV *B, // // B is divisible by D if and only if the multiplicity of prime factor 2 for B // is not less than multiplicity of this prime factor for D. - if (SE.getMinTrailingZeros(B) < Mult2) { + unsigned MinTZ = SE.getMinTrailingZeros(B); + // Try again with the terminator of the loop predecessor for context-specific + // result, if MinTZ s too small. + if (MinTZ < Mult2 && L->getLoopPredecessor()) + MinTZ = SE.getMinTrailingZeros(B, L->getLoopPredecessor()->getTerminator()); + if (MinTZ < Mult2) { // Check if we can prove there's no remainder using URem. const SCEV *URem = SE.getURemExpr(B, SE.getConstant(APInt::getOneBitSet(BW, Mult2))); @@ -10708,7 +10720,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::howFarToZero(const SCEV *V, return getCouldNotCompute(); const SCEV *E = SolveLinEquationWithOverflow( StepC->getAPInt(), getNegativeSCEV(Start), - AllowPredicates ? &Predicates : nullptr, *this); + AllowPredicates ? &Predicates : nullptr, *this, L); const SCEV *M = E; if (E != getCouldNotCompute()) { diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 897e679..5589966 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -5861,11 +5861,11 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) { #undef VISIT_MD_FIELDS Result = DICompileUnit::getDistinct( - Context, language.Val, file.Val, producer.Val, isOptimized.Val, flags.Val, - runtimeVersion.Val, splitDebugFilename.Val, emissionKind.Val, enums.Val, - retainedTypes.Val, globals.Val, imports.Val, macros.Val, dwoId.Val, - splitDebugInlining.Val, debugInfoForProfiling.Val, nameTableKind.Val, - rangesBaseAddress.Val, sysroot.Val, sdk.Val); + Context, DISourceLanguageName(language.Val), file.Val, producer.Val, + isOptimized.Val, flags.Val, runtimeVersion.Val, splitDebugFilename.Val, + emissionKind.Val, enums.Val, retainedTypes.Val, globals.Val, imports.Val, + macros.Val, dwoId.Val, splitDebugInlining.Val, debugInfoForProfiling.Val, + nameTableKind.Val, rangesBaseAddress.Val, sysroot.Val, sdk.Val); return false; } diff --git a/llvm/lib/BinaryFormat/Dwarf.cpp b/llvm/lib/BinaryFormat/Dwarf.cpp index 8b24044..969047a 100644 --- a/llvm/lib/BinaryFormat/Dwarf.cpp +++ b/llvm/lib/BinaryFormat/Dwarf.cpp @@ -472,6 +472,137 @@ StringRef llvm::dwarf::LanguageDescription(dwarf::SourceLanguageName lname) { return "Unknown"; } +StringRef llvm::dwarf::LanguageDescription(dwarf::SourceLanguageName Name, + uint32_t Version) { + switch (Name) { + // YYYY + case DW_LNAME_Ada: { + if (Version <= 1983) + return "Ada 83"; + if (Version <= 1995) + return "Ada 95"; + if (Version <= 2005) + return "Ada 2005"; + if (Version <= 2012) + return "Ada 2012"; + } break; + + case DW_LNAME_Cobol: { + if (Version <= 1974) + return "COBOL-74"; + if (Version <= 1985) + return "COBOL-85"; + } break; + + case DW_LNAME_Fortran: { + if (Version <= 1977) + return "FORTRAN 77"; + if (Version <= 1990) + return "FORTRAN 90"; + if (Version <= 1995) + return "Fortran 95"; + if (Version <= 2003) + return "Fortran 2003"; + if (Version <= 2008) + return "Fortran 2008"; + if (Version <= 2018) + return "Fortran 2018"; + } break; + + // YYYYMM + case DW_LNAME_C: { + if (Version == 0) + break; + if (Version <= 198912) + return "C89"; + if (Version <= 199901) + return "C99"; + if (Version <= 201112) + return "C11"; + if (Version <= 201710) + return "C17"; + } break; + + case DW_LNAME_C_plus_plus: { + if (Version == 0) + break; + if (Version <= 199711) + return "C++98"; + if (Version <= 200310) + return "C++03"; + if (Version <= 201103) + return "C++11"; + if (Version <= 201402) + return "C++14"; + if (Version <= 201703) + return "C++17"; + if (Version <= 202002) + return "C++20"; + } break; + + case DW_LNAME_ObjC_plus_plus: + case DW_LNAME_ObjC: + case DW_LNAME_Move: + case DW_LNAME_SYCL: + case DW_LNAME_BLISS: + case DW_LNAME_Crystal: + case DW_LNAME_D: + case DW_LNAME_Dylan: + case DW_LNAME_Go: + case DW_LNAME_Haskell: + case DW_LNAME_HLSL: + case DW_LNAME_Java: + case DW_LNAME_Julia: + case DW_LNAME_Kotlin: + case DW_LNAME_Modula2: + case DW_LNAME_Modula3: + case DW_LNAME_OCaml: + case DW_LNAME_OpenCL_C: + case DW_LNAME_Pascal: + case DW_LNAME_PLI: + case DW_LNAME_Python: + case DW_LNAME_RenderScript: + case DW_LNAME_Rust: + case DW_LNAME_Swift: + case DW_LNAME_UPC: + case DW_LNAME_Zig: + case DW_LNAME_Assembly: + case DW_LNAME_C_sharp: + case DW_LNAME_Mojo: + case DW_LNAME_GLSL: + case DW_LNAME_GLSL_ES: + case DW_LNAME_OpenCL_CPP: + case DW_LNAME_CPP_for_OpenCL: + case DW_LNAME_Ruby: + case DW_LNAME_Hylo: + case DW_LNAME_Metal: + break; + } + + // Fallback to un-versioned name. + return LanguageDescription(Name); +} + +llvm::StringRef llvm::dwarf::SourceLanguageNameString(SourceLanguageName Lang) { + switch (Lang) { +#define HANDLE_DW_LNAME(ID, NAME, DESC, LOWER_BOUND) \ + case DW_LNAME_##NAME: \ + return "DW_LNAME_" #NAME; +#include "llvm/BinaryFormat/Dwarf.def" + } + + return {}; +} + +unsigned +llvm::dwarf::getSourceLanguageName(StringRef SourceLanguageNameString) { + return StringSwitch<unsigned>(SourceLanguageNameString) +#define HANDLE_DW_LNAME(ID, NAME, DESC, LOWER_BOUND) \ + .Case("DW_LNAME_" #NAME, DW_LNAME_##NAME) +#include "llvm/BinaryFormat/Dwarf.def" + .Default(0); +} + StringRef llvm::dwarf::CaseString(unsigned Case) { switch (Case) { case DW_ID_case_sensitive: diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 22c7fa5..a4d1b83 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -1866,11 +1866,13 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( // Ignore Record[0], which indicates whether this compile unit is // distinct. It's always distinct. IsDistinct = true; + auto *CU = DICompileUnit::getDistinct( - Context, Record[1], getMDOrNull(Record[2]), getMDString(Record[3]), - Record[4], getMDString(Record[5]), Record[6], getMDString(Record[7]), - Record[8], getMDOrNull(Record[9]), getMDOrNull(Record[10]), - getMDOrNull(Record[12]), getMDOrNull(Record[13]), + Context, DISourceLanguageName(Record[1]), getMDOrNull(Record[2]), + getMDString(Record[3]), Record[4], getMDString(Record[5]), Record[6], + getMDString(Record[7]), Record[8], getMDOrNull(Record[9]), + getMDOrNull(Record[10]), getMDOrNull(Record[12]), + getMDOrNull(Record[13]), Record.size() <= 15 ? nullptr : getMDOrNull(Record[15]), Record.size() <= 14 ? 0 : Record[14], Record.size() <= 16 ? true : Record[16], diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 6d86809..7ed140d 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -2107,7 +2107,8 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N, unsigned Abbrev) { assert(N->isDistinct() && "Expected distinct compile units"); Record.push_back(/* IsDistinct */ true); - Record.push_back(N->getSourceLanguage()); + + Record.push_back(N->getSourceLanguage().getUnversionedName()); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(VE.getMetadataOrNullID(N->getRawProducer())); Record.push_back(N->isOptimized()); diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index c5d6e40..12d749c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -633,8 +633,8 @@ void CodeViewDebug::beginModule(Module *M) { Node = *CUs->operands().begin(); } const auto *CU = cast<DICompileUnit>(Node); - - CurrentSourceLanguage = MapDWLangToCVLang(CU->getSourceLanguage()); + CurrentSourceLanguage = + MapDWLangToCVLang(CU->getSourceLanguage().getUnversionedName()); if (!M->getCodeViewFlag() || CU->getEmissionKind() == DICompileUnit::NoDebug) { Asm = nullptr; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 09d5f9c..d751a7f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1040,7 +1040,8 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit, NewCU.addString(Die, dwarf::DW_AT_producer, Producer); NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, - DIUnit->getSourceLanguage()); + DIUnit->getSourceLanguage().getUnversionedName()); + NewCU.addString(Die, dwarf::DW_AT_name, FN); StringRef SysRoot = DIUnit->getSysRoot(); if (!SysRoot.empty()) @@ -2930,10 +2931,9 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_enumeration_type: return dwarf::PubIndexEntryDescriptor( - dwarf::GIEK_TYPE, - dwarf::isCPlusPlus((dwarf::SourceLanguage)CU->getLanguage()) - ? dwarf::GIEL_EXTERNAL - : dwarf::GIEL_STATIC); + dwarf::GIEK_TYPE, dwarf::isCPlusPlus(CU->getSourceLanguage()) + ? dwarf::GIEL_EXTERNAL + : dwarf::GIEL_STATIC); case dwarf::DW_TAG_typedef: case dwarf::DW_TAG_base_type: case dwarf::DW_TAG_subrange_type: @@ -3926,7 +3926,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, TypeUnitsUnderConstruction.emplace_back(std::move(OwnedUnit), CTy); NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2, - CU.getLanguage()); + CU.getSourceLanguage()); uint64_t Signature = makeTypeSignature(Identifier); NewTU.setTypeSignature(Signature); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 3cfe7cc..aa078f3 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -100,7 +100,7 @@ DwarfUnit::~DwarfUnit() { } int64_t DwarfUnit::getDefaultLowerBound() const { - switch (getLanguage()) { + switch (getSourceLanguage()) { default: break; @@ -704,12 +704,17 @@ void DwarfUnit::addType(DIE &Entity, const DIType *Ty, addDIEEntry(Entity, Attribute, DIEEntry(*getOrCreateTypeDIE(Ty))); } +llvm::dwarf::SourceLanguage DwarfUnit::getSourceLanguage() const { + return static_cast<llvm::dwarf::SourceLanguage>( + getLanguage().getUnversionedName()); +} + std::string DwarfUnit::getParentContextString(const DIScope *Context) const { if (!Context) return ""; // FIXME: Decide whether to implement this for non-C++ languages. - if (!dwarf::isCPlusPlus((dwarf::SourceLanguage)getLanguage())) + if (!dwarf::isCPlusPlus(getSourceLanguage())) return ""; std::string CS; @@ -940,7 +945,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) { // Add prototype flag if we're dealing with a C language and the function has // been prototyped. - if (isPrototyped && dwarf::isC((dwarf::SourceLanguage)getLanguage())) + if (isPrototyped && dwarf::isC(getSourceLanguage())) addFlag(Buffer, dwarf::DW_AT_prototyped); // Add a DW_AT_calling_convention if this has an explicit convention. @@ -1448,7 +1453,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, // Add the prototype if we have a prototype and we have a C like // language. - if (SP->isPrototyped() && dwarf::isC((dwarf::SourceLanguage)getLanguage())) + if (SP->isPrototyped() && dwarf::isC(getSourceLanguage())) addFlag(SPDie, dwarf::DW_AT_prototyped); if (SP->isObjCDirect()) @@ -1700,8 +1705,7 @@ DIE *DwarfUnit::getIndexTyDie() { addString(*IndexTyDie, dwarf::DW_AT_name, Name); addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, std::nullopt, sizeof(int64_t)); addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - dwarf::getArrayIndexTypeEncoding( - (dwarf::SourceLanguage)getLanguage())); + dwarf::getArrayIndexTypeEncoding(getSourceLanguage())); DD->addAccelType(*this, CUNode->getNameTableKind(), Name, *IndexTyDie, /*Flags*/ 0); return IndexTyDie; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index bb00ec3..9288d7e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Target/TargetMachine.h" #include <optional> #include <string> @@ -107,7 +108,7 @@ public: return LabelBegin; } MCSymbol *getEndLabel() const { return EndLabel; } - uint16_t getLanguage() const { return CUNode->getSourceLanguage(); } + llvm::dwarf::SourceLanguage getSourceLanguage() const; const DICompileUnit *getCUNode() const { return CUNode; } DwarfDebug &getDwarfDebug() const { return *DD; } @@ -358,6 +359,10 @@ protected: } private: + DISourceLanguageName getLanguage() const { + return CUNode->getSourceLanguage(); + } + /// A helper to add a wide integer constant to a DIE using a block /// form. void addIntAsBlock(DIE &Die, dwarf::Attribute Attribute, const APInt &Val); diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 851d445..507b2d6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1843,7 +1843,8 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) { return selectCast(I, ISD::SINT_TO_FP); case Instruction::IntToPtr: // Deliberate fall-through. - case Instruction::PtrToInt: { + case Instruction::PtrToInt: + case Instruction::PtrToAddr: { EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(DL, I->getType()); if (DstVT.bitsGT(SrcVT)) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 87d5453..3b5f83f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3416,7 +3416,7 @@ void DAGTypeLegalizer::SplitVecRes_PARTIAL_REDUCE_MLA(SDNode *N, SDValue &Lo, SDValue Input2 = N->getOperand(2); SDValue AccLo, AccHi; - std::tie(AccLo, AccHi) = DAG.SplitVector(Acc, DL); + GetSplitVector(Acc, AccLo, AccHi); unsigned Opcode = N->getOpcode(); // If the input types don't need splitting, just accumulate into the @@ -3429,8 +3429,8 @@ void DAGTypeLegalizer::SplitVecRes_PARTIAL_REDUCE_MLA(SDNode *N, SDValue &Lo, SDValue Input1Lo, Input1Hi; SDValue Input2Lo, Input2Hi; - std::tie(Input1Lo, Input1Hi) = DAG.SplitVector(Input1, DL); - std::tie(Input2Lo, Input2Hi) = DAG.SplitVector(Input2, DL); + GetSplitVector(Input1, Input1Lo, Input1Hi); + GetSplitVector(Input2, Input2Lo, Input2Hi); EVT ResultVT = AccLo.getValueType(); Lo = DAG.getNode(Opcode, DL, ResultVT, AccLo, Input1Lo, Input2Lo); @@ -4761,8 +4761,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_PARTIAL_REDUCE_MLA(SDNode *N) { SDLoc DL(N); SDValue Input1Lo, Input1Hi, Input2Lo, Input2Hi; - std::tie(Input1Lo, Input1Hi) = DAG.SplitVector(N->getOperand(1), DL); - std::tie(Input2Lo, Input2Hi) = DAG.SplitVector(N->getOperand(2), DL); + GetSplitVector(N->getOperand(1), Input1Lo, Input1Hi); + GetSplitVector(N->getOperand(2), Input2Lo, Input2Hi); unsigned Opcode = N->getOpcode(); EVT ResultVT = Acc.getValueType(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index c35f29d..175753f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -571,7 +571,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SwiftError->setFunction(mf); const Function &Fn = mf.getFunction(); - bool InstrRef = mf.shouldUseDebugInstrRef(); + bool InstrRef = mf.useDebugInstrRef(); FuncInfo->set(MF->getFunction(), *MF, CurDAG); diff --git a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp index 707f0c3..132a280 100644 --- a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp +++ b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp @@ -24,15 +24,7 @@ namespace llvm { namespace hlsl { namespace rootsig { -char GenericRSMetadataError::ID; -char InvalidRSMetadataFormat::ID; -char InvalidRSMetadataValue::ID; -char TableSamplerMixinError::ID; -char ShaderRegisterOverflowError::ID; -char OffsetOverflowError::ID; -char OffsetAppendAfterOverflow::ID; - -template <typename T> char RootSignatureValidationError<T>::ID; +char RootSignatureValidationError::ID; static std::optional<uint32_t> extractMdIntValue(MDNode *Node, unsigned int OpId) { @@ -57,20 +49,6 @@ static std::optional<StringRef> extractMdStringValue(MDNode *Node, return NodeText->getString(); } -template <typename T, typename = std::enable_if_t< - std::is_enum_v<T> && - std::is_same_v<std::underlying_type_t<T>, uint32_t>>> -static Expected<T> -extractEnumValue(MDNode *Node, unsigned int OpId, StringRef ErrText, - llvm::function_ref<bool(uint32_t)> VerifyFn) { - if (std::optional<uint32_t> Val = extractMdIntValue(Node, OpId)) { - if (!VerifyFn(*Val)) - return make_error<RootSignatureValidationError<uint32_t>>(ErrText, *Val); - return static_cast<T>(*Val); - } - return make_error<InvalidRSMetadataValue>("ShaderVisibility"); -} - namespace { // We use the OverloadVisit with std::visit to ensure the compiler catches if a @@ -81,8 +59,52 @@ template <class... Ts> struct OverloadedVisit : Ts... { }; template <class... Ts> OverloadedVisit(Ts...) -> OverloadedVisit<Ts...>; +struct FmtRange { + dxil::ResourceClass Type; + uint32_t Register; + uint32_t Space; + + FmtRange(const mcdxbc::DescriptorRange &Range) + : Type(Range.RangeType), Register(Range.BaseShaderRegister), + Space(Range.RegisterSpace) {} +}; + +raw_ostream &operator<<(llvm::raw_ostream &OS, const FmtRange &Range) { + OS << getResourceClassName(Range.Type) << "(register=" << Range.Register + << ", space=" << Range.Space << ")"; + return OS; +} + +struct FmtMDNode { + const MDNode *Node; + + FmtMDNode(const MDNode *Node) : Node(Node) {} +}; + +raw_ostream &operator<<(llvm::raw_ostream &OS, FmtMDNode Fmt) { + Fmt.Node->printTree(OS); + return OS; +} + +static Error makeRSError(const Twine &Msg) { + return make_error<RootSignatureValidationError>(Msg); +} } // namespace +template <typename T, typename = std::enable_if_t< + std::is_enum_v<T> && + std::is_same_v<std::underlying_type_t<T>, uint32_t>>> +static Expected<T> +extractEnumValue(MDNode *Node, unsigned int OpId, StringRef ErrText, + llvm::function_ref<bool(uint32_t)> VerifyFn) { + if (std::optional<uint32_t> Val = extractMdIntValue(Node, OpId)) { + if (!VerifyFn(*Val)) + return makeRSError(formatv("Invalid value for {0}: {1}", ErrText, Val)); + return static_cast<T>(*Val); + } + return makeRSError(formatv("Invalid value for {0}:", ErrText)); +} + MDNode *MetadataBuilder::BuildRootSignature() { const auto Visitor = OverloadedVisit{ [this](const dxbc::RootFlags &Flags) -> MDNode * { @@ -226,12 +248,12 @@ MDNode *MetadataBuilder::BuildStaticSampler(const StaticSampler &Sampler) { Error MetadataParser::parseRootFlags(mcdxbc::RootSignatureDesc &RSD, MDNode *RootFlagNode) { if (RootFlagNode->getNumOperands() != 2) - return make_error<InvalidRSMetadataFormat>("RootFlag Element"); + return makeRSError("Invalid format for RootFlags Element"); if (std::optional<uint32_t> Val = extractMdIntValue(RootFlagNode, 1)) RSD.Flags = *Val; else - return make_error<InvalidRSMetadataValue>("RootFlag"); + return makeRSError("Invalid value for RootFlag"); return Error::success(); } @@ -239,7 +261,7 @@ Error MetadataParser::parseRootFlags(mcdxbc::RootSignatureDesc &RSD, Error MetadataParser::parseRootConstants(mcdxbc::RootSignatureDesc &RSD, MDNode *RootConstantNode) { if (RootConstantNode->getNumOperands() != 5) - return make_error<InvalidRSMetadataFormat>("RootConstants Element"); + return makeRSError("Invalid format for RootConstants Element"); Expected<dxbc::ShaderVisibility> Visibility = extractEnumValue<dxbc::ShaderVisibility>(RootConstantNode, 1, @@ -252,17 +274,17 @@ Error MetadataParser::parseRootConstants(mcdxbc::RootSignatureDesc &RSD, if (std::optional<uint32_t> Val = extractMdIntValue(RootConstantNode, 2)) Constants.ShaderRegister = *Val; else - return make_error<InvalidRSMetadataValue>("ShaderRegister"); + return makeRSError("Invalid value for ShaderRegister"); if (std::optional<uint32_t> Val = extractMdIntValue(RootConstantNode, 3)) Constants.RegisterSpace = *Val; else - return make_error<InvalidRSMetadataValue>("RegisterSpace"); + return makeRSError("Invalid value for RegisterSpace"); if (std::optional<uint32_t> Val = extractMdIntValue(RootConstantNode, 4)) Constants.Num32BitValues = *Val; else - return make_error<InvalidRSMetadataValue>("Num32BitValues"); + return makeRSError("Invalid value for Num32BitValues"); RSD.ParametersContainer.addParameter(dxbc::RootParameterType::Constants32Bit, *Visibility, Constants); @@ -279,7 +301,7 @@ Error MetadataParser::parseRootDescriptors( "parseRootDescriptors should only be called with RootDescriptor " "element kind."); if (RootDescriptorNode->getNumOperands() != 5) - return make_error<InvalidRSMetadataFormat>("Root Descriptor Element"); + return makeRSError("Invalid format for Root Descriptor Element"); dxbc::RootParameterType Type; switch (ElementKind) { @@ -308,23 +330,17 @@ Error MetadataParser::parseRootDescriptors( if (std::optional<uint32_t> Val = extractMdIntValue(RootDescriptorNode, 2)) Descriptor.ShaderRegister = *Val; else - return make_error<InvalidRSMetadataValue>("ShaderRegister"); + return makeRSError("Invalid value for ShaderRegister"); if (std::optional<uint32_t> Val = extractMdIntValue(RootDescriptorNode, 3)) Descriptor.RegisterSpace = *Val; else - return make_error<InvalidRSMetadataValue>("RegisterSpace"); - - if (RSD.Version == 1) { - RSD.ParametersContainer.addParameter(Type, *Visibility, Descriptor); - return Error::success(); - } - assert(RSD.Version > 1); + return makeRSError("Invalid value for RegisterSpace"); if (std::optional<uint32_t> Val = extractMdIntValue(RootDescriptorNode, 4)) Descriptor.Flags = *Val; else - return make_error<InvalidRSMetadataValue>("Root Descriptor Flags"); + return makeRSError("Invalid value for Root Descriptor Flags"); RSD.ParametersContainer.addParameter(Type, *Visibility, Descriptor); return Error::success(); @@ -333,7 +349,7 @@ Error MetadataParser::parseRootDescriptors( Error MetadataParser::parseDescriptorRange(mcdxbc::DescriptorTable &Table, MDNode *RangeDescriptorNode) { if (RangeDescriptorNode->getNumOperands() != 6) - return make_error<InvalidRSMetadataFormat>("Descriptor Range"); + return makeRSError("Invalid format for Descriptor Range"); mcdxbc::DescriptorRange Range; @@ -341,7 +357,7 @@ Error MetadataParser::parseDescriptorRange(mcdxbc::DescriptorTable &Table, extractMdStringValue(RangeDescriptorNode, 0); if (!ElementText.has_value()) - return make_error<InvalidRSMetadataFormat>("Descriptor Range"); + return makeRSError("Invalid format for Descriptor Range"); if (*ElementText == "CBV") Range.RangeType = dxil::ResourceClass::CBuffer; @@ -352,35 +368,34 @@ Error MetadataParser::parseDescriptorRange(mcdxbc::DescriptorTable &Table, else if (*ElementText == "Sampler") Range.RangeType = dxil::ResourceClass::Sampler; else - return make_error<GenericRSMetadataError>("Invalid Descriptor Range type.", - RangeDescriptorNode); + return makeRSError(formatv("Invalid Descriptor Range type.\n{0}", + FmtMDNode{RangeDescriptorNode})); if (std::optional<uint32_t> Val = extractMdIntValue(RangeDescriptorNode, 1)) Range.NumDescriptors = *Val; else - return make_error<GenericRSMetadataError>("Number of Descriptor in Range", - RangeDescriptorNode); + return makeRSError(formatv("Invalid number of Descriptor in Range.\n{0}", + FmtMDNode{RangeDescriptorNode})); if (std::optional<uint32_t> Val = extractMdIntValue(RangeDescriptorNode, 2)) Range.BaseShaderRegister = *Val; else - return make_error<InvalidRSMetadataValue>("BaseShaderRegister"); + return makeRSError("Invalid value for BaseShaderRegister"); if (std::optional<uint32_t> Val = extractMdIntValue(RangeDescriptorNode, 3)) Range.RegisterSpace = *Val; else - return make_error<InvalidRSMetadataValue>("RegisterSpace"); + return makeRSError("Invalid value for RegisterSpace"); if (std::optional<uint32_t> Val = extractMdIntValue(RangeDescriptorNode, 4)) Range.OffsetInDescriptorsFromTableStart = *Val; else - return make_error<InvalidRSMetadataValue>( - "OffsetInDescriptorsFromTableStart"); + return makeRSError("Invalid value for OffsetInDescriptorsFromTableStart"); if (std::optional<uint32_t> Val = extractMdIntValue(RangeDescriptorNode, 5)) Range.Flags = *Val; else - return make_error<InvalidRSMetadataValue>("Descriptor Range Flags"); + return makeRSError("Invalid value for Descriptor Range Flags"); Table.Ranges.push_back(Range); return Error::success(); @@ -390,7 +405,7 @@ Error MetadataParser::parseDescriptorTable(mcdxbc::RootSignatureDesc &RSD, MDNode *DescriptorTableNode) { const unsigned int NumOperands = DescriptorTableNode->getNumOperands(); if (NumOperands < 2) - return make_error<InvalidRSMetadataFormat>("Descriptor Table"); + return makeRSError("Invalid format for Descriptor Table"); Expected<dxbc::ShaderVisibility> Visibility = extractEnumValue<dxbc::ShaderVisibility>(DescriptorTableNode, 1, @@ -404,8 +419,8 @@ Error MetadataParser::parseDescriptorTable(mcdxbc::RootSignatureDesc &RSD, for (unsigned int I = 2; I < NumOperands; I++) { MDNode *Element = dyn_cast<MDNode>(DescriptorTableNode->getOperand(I)); if (Element == nullptr) - return make_error<GenericRSMetadataError>( - "Missing Root Element Metadata Node.", DescriptorTableNode); + return makeRSError(formatv("Missing Root Element Metadata Node.\n{0}", + FmtMDNode{DescriptorTableNode})); if (auto Err = parseDescriptorRange(Table, Element)) return Err; @@ -419,7 +434,7 @@ Error MetadataParser::parseDescriptorTable(mcdxbc::RootSignatureDesc &RSD, Error MetadataParser::parseStaticSampler(mcdxbc::RootSignatureDesc &RSD, MDNode *StaticSamplerNode) { if (StaticSamplerNode->getNumOperands() != 15) - return make_error<InvalidRSMetadataFormat>("Static Sampler"); + return makeRSError("Invalid format for Static Sampler"); mcdxbc::StaticSampler Sampler; @@ -453,12 +468,12 @@ Error MetadataParser::parseStaticSampler(mcdxbc::RootSignatureDesc &RSD, if (std::optional<float> Val = extractMdFloatValue(StaticSamplerNode, 5)) Sampler.MipLODBias = *Val; else - return make_error<InvalidRSMetadataValue>("MipLODBias"); + return makeRSError("Invalid value for MipLODBias"); if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 6)) Sampler.MaxAnisotropy = *Val; else - return make_error<InvalidRSMetadataValue>("MaxAnisotropy"); + return makeRSError("Invalid value for MaxAnisotropy"); Expected<dxbc::ComparisonFunc> ComparisonFunc = extractEnumValue<dxbc::ComparisonFunc>( @@ -477,22 +492,22 @@ Error MetadataParser::parseStaticSampler(mcdxbc::RootSignatureDesc &RSD, if (std::optional<float> Val = extractMdFloatValue(StaticSamplerNode, 9)) Sampler.MinLOD = *Val; else - return make_error<InvalidRSMetadataValue>("MinLOD"); + return makeRSError("Invalid value for MinLOD"); if (std::optional<float> Val = extractMdFloatValue(StaticSamplerNode, 10)) Sampler.MaxLOD = *Val; else - return make_error<InvalidRSMetadataValue>("MaxLOD"); + return makeRSError("Invalid value for MaxLOD"); if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 11)) Sampler.ShaderRegister = *Val; else - return make_error<InvalidRSMetadataValue>("ShaderRegister"); + return makeRSError("Invalid value for ShaderRegister"); if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 12)) Sampler.RegisterSpace = *Val; else - return make_error<InvalidRSMetadataValue>("RegisterSpace"); + return makeRSError("Invalid value for RegisterSpace"); Expected<dxbc::ShaderVisibility> Visibility = extractEnumValue<dxbc::ShaderVisibility>(StaticSamplerNode, 13, @@ -502,16 +517,10 @@ Error MetadataParser::parseStaticSampler(mcdxbc::RootSignatureDesc &RSD, return Error(std::move(E)); Sampler.ShaderVisibility = *Visibility; - if (RSD.Version < 3) { - RSD.StaticSamplers.push_back(Sampler); - return Error::success(); - } - assert(RSD.Version >= 3); - if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 14)) Sampler.Flags = *Val; else - return make_error<InvalidRSMetadataValue>("Static Sampler Flags"); + return makeRSError("Invalid value for Static Sampler Flags"); RSD.StaticSamplers.push_back(Sampler); return Error::success(); @@ -521,7 +530,7 @@ Error MetadataParser::parseRootSignatureElement(mcdxbc::RootSignatureDesc &RSD, MDNode *Element) { std::optional<StringRef> ElementText = extractMdStringValue(Element, 0); if (!ElementText.has_value()) - return make_error<InvalidRSMetadataFormat>("Root Element"); + return makeRSError("Invalid format for Root Element"); RootSignatureElementKind ElementKind = StringSwitch<RootSignatureElementKind>(*ElementText) @@ -549,8 +558,8 @@ Error MetadataParser::parseRootSignatureElement(mcdxbc::RootSignatureDesc &RSD, case RootSignatureElementKind::StaticSamplers: return parseStaticSampler(RSD, Element); case RootSignatureElementKind::Error: - return make_error<GenericRSMetadataError>("Invalid Root Signature Element", - Element); + return makeRSError( + formatv("Invalid Root Signature Element\n{0}", FmtMDNode{Element})); } llvm_unreachable("Unhandled RootSignatureElementKind enum."); @@ -563,7 +572,10 @@ validateDescriptorTableSamplerMixin(const mcdxbc::DescriptorTable &Table, for (const mcdxbc::DescriptorRange &Range : Table.Ranges) { if (Range.RangeType == dxil::ResourceClass::Sampler && CurrRC != dxil::ResourceClass::Sampler) - return make_error<TableSamplerMixinError>(CurrRC, Location); + return makeRSError( + formatv("Samplers cannot be mixed with other resource types in a " + "descriptor table, {0}(location={1})", + getResourceClassName(CurrRC), Location)); CurrRC = Range.RangeType; } return Error::success(); @@ -583,8 +595,8 @@ validateDescriptorTableRegisterOverflow(const mcdxbc::DescriptorTable &Table, Range.BaseShaderRegister, Range.NumDescriptors); if (!verifyNoOverflowedOffset(RangeBound)) - return make_error<ShaderRegisterOverflowError>( - Range.RangeType, Range.BaseShaderRegister, Range.RegisterSpace); + return makeRSError( + formatv("Overflow for shader register range: {0}", FmtRange{Range})); bool IsAppending = Range.OffsetInDescriptorsFromTableStart == DescriptorTableOffsetAppend; @@ -592,15 +604,16 @@ validateDescriptorTableRegisterOverflow(const mcdxbc::DescriptorTable &Table, Offset = Range.OffsetInDescriptorsFromTableStart; if (IsPrevUnbound && IsAppending) - return make_error<OffsetAppendAfterOverflow>( - Range.RangeType, Range.BaseShaderRegister, Range.RegisterSpace); + return makeRSError( + formatv("Range {0} cannot be appended after an unbounded range", + FmtRange{Range})); const uint64_t OffsetBound = llvm::hlsl::rootsig::computeRangeBound(Offset, Range.NumDescriptors); if (!verifyNoOverflowedOffset(OffsetBound)) - return make_error<OffsetOverflowError>( - Range.RangeType, Range.BaseShaderRegister, Range.RegisterSpace); + return makeRSError(formatv("Offset overflow for descriptor range: {0}.", + FmtRange{Range})); Offset = OffsetBound + 1; IsPrevUnbound = @@ -614,17 +627,15 @@ Error MetadataParser::validateRootSignature( const mcdxbc::RootSignatureDesc &RSD) { Error DeferredErrs = Error::success(); if (!hlsl::rootsig::verifyVersion(RSD.Version)) { - DeferredErrs = - joinErrors(std::move(DeferredErrs), - make_error<RootSignatureValidationError<uint32_t>>( - "Version", RSD.Version)); + DeferredErrs = joinErrors( + std::move(DeferredErrs), + makeRSError(formatv("Invalid value for Version: {0}", RSD.Version))); } if (!hlsl::rootsig::verifyRootFlag(RSD.Flags)) { - DeferredErrs = - joinErrors(std::move(DeferredErrs), - make_error<RootSignatureValidationError<uint32_t>>( - "RootFlags", RSD.Flags)); + DeferredErrs = joinErrors( + std::move(DeferredErrs), + makeRSError(formatv("Invalid value for RootFlags: {0}", RSD.Flags))); } for (const mcdxbc::RootParameterInfo &Info : RSD.ParametersContainer) { @@ -639,28 +650,26 @@ Error MetadataParser::validateRootSignature( const mcdxbc::RootDescriptor &Descriptor = RSD.ParametersContainer.getRootDescriptor(Info.Location); if (!hlsl::rootsig::verifyRegisterValue(Descriptor.ShaderRegister)) - DeferredErrs = - joinErrors(std::move(DeferredErrs), - make_error<RootSignatureValidationError<uint32_t>>( - "ShaderRegister", Descriptor.ShaderRegister)); + DeferredErrs = joinErrors( + std::move(DeferredErrs), + makeRSError(formatv("Invalid value for ShaderRegister: {0}", + Descriptor.ShaderRegister))); if (!hlsl::rootsig::verifyRegisterSpace(Descriptor.RegisterSpace)) - DeferredErrs = - joinErrors(std::move(DeferredErrs), - make_error<RootSignatureValidationError<uint32_t>>( - "RegisterSpace", Descriptor.RegisterSpace)); - - if (RSD.Version > 1) { - bool IsValidFlag = - dxbc::isValidRootDesciptorFlags(Descriptor.Flags) && - hlsl::rootsig::verifyRootDescriptorFlag( - RSD.Version, dxbc::RootDescriptorFlags(Descriptor.Flags)); - if (!IsValidFlag) - DeferredErrs = - joinErrors(std::move(DeferredErrs), - make_error<RootSignatureValidationError<uint32_t>>( - "RootDescriptorFlag", Descriptor.Flags)); - } + DeferredErrs = joinErrors( + std::move(DeferredErrs), + makeRSError(formatv("Invalid value for RegisterSpace: {0}", + Descriptor.RegisterSpace))); + + bool IsValidFlag = + dxbc::isValidRootDesciptorFlags(Descriptor.Flags) && + hlsl::rootsig::verifyRootDescriptorFlag( + RSD.Version, dxbc::RootDescriptorFlags(Descriptor.Flags)); + if (!IsValidFlag) + DeferredErrs = joinErrors( + std::move(DeferredErrs), + makeRSError(formatv("Invalid value for RootDescriptorFlag: {0}", + Descriptor.Flags))); break; } case dxbc::RootParameterType::DescriptorTable: { @@ -668,26 +677,26 @@ Error MetadataParser::validateRootSignature( RSD.ParametersContainer.getDescriptorTable(Info.Location); for (const mcdxbc::DescriptorRange &Range : Table) { if (!hlsl::rootsig::verifyRegisterSpace(Range.RegisterSpace)) - DeferredErrs = - joinErrors(std::move(DeferredErrs), - make_error<RootSignatureValidationError<uint32_t>>( - "RegisterSpace", Range.RegisterSpace)); + DeferredErrs = joinErrors( + std::move(DeferredErrs), + makeRSError(formatv("Invalid value for RegisterSpace: {0}", + Range.RegisterSpace))); if (!hlsl::rootsig::verifyNumDescriptors(Range.NumDescriptors)) - DeferredErrs = - joinErrors(std::move(DeferredErrs), - make_error<RootSignatureValidationError<uint32_t>>( - "NumDescriptors", Range.NumDescriptors)); + DeferredErrs = joinErrors( + std::move(DeferredErrs), + makeRSError(formatv("Invalid value for NumDescriptors: {0}", + Range.NumDescriptors))); bool IsValidFlag = dxbc::isValidDescriptorRangeFlags(Range.Flags) && hlsl::rootsig::verifyDescriptorRangeFlag( RSD.Version, Range.RangeType, dxbc::DescriptorRangeFlags(Range.Flags)); if (!IsValidFlag) - DeferredErrs = - joinErrors(std::move(DeferredErrs), - make_error<RootSignatureValidationError<uint32_t>>( - "DescriptorFlag", Range.Flags)); + DeferredErrs = joinErrors( + std::move(DeferredErrs), + makeRSError(formatv("Invalid value for DescriptorFlag: {0}", + Range.Flags))); if (Error Err = validateDescriptorTableSamplerMixin(Table, Info.Location)) @@ -705,46 +714,49 @@ Error MetadataParser::validateRootSignature( for (const mcdxbc::StaticSampler &Sampler : RSD.StaticSamplers) { if (!hlsl::rootsig::verifyMipLODBias(Sampler.MipLODBias)) - DeferredErrs = joinErrors(std::move(DeferredErrs), - make_error<RootSignatureValidationError<float>>( - "MipLODBias", Sampler.MipLODBias)); + DeferredErrs = + joinErrors(std::move(DeferredErrs), + makeRSError(formatv("Invalid value for MipLODBias: {0:e}", + Sampler.MipLODBias))); if (!hlsl::rootsig::verifyMaxAnisotropy(Sampler.MaxAnisotropy)) DeferredErrs = joinErrors(std::move(DeferredErrs), - make_error<RootSignatureValidationError<uint32_t>>( - "MaxAnisotropy", Sampler.MaxAnisotropy)); + makeRSError(formatv("Invalid value for MaxAnisotropy: {0}", + Sampler.MaxAnisotropy))); if (!hlsl::rootsig::verifyLOD(Sampler.MinLOD)) - DeferredErrs = joinErrors(std::move(DeferredErrs), - make_error<RootSignatureValidationError<float>>( - "MinLOD", Sampler.MinLOD)); + DeferredErrs = + joinErrors(std::move(DeferredErrs), + makeRSError(formatv("Invalid value for MinLOD: {0}", + Sampler.MinLOD))); if (!hlsl::rootsig::verifyLOD(Sampler.MaxLOD)) - DeferredErrs = joinErrors(std::move(DeferredErrs), - make_error<RootSignatureValidationError<float>>( - "MaxLOD", Sampler.MaxLOD)); - - if (!hlsl::rootsig::verifyRegisterValue(Sampler.ShaderRegister)) DeferredErrs = joinErrors(std::move(DeferredErrs), - make_error<RootSignatureValidationError<uint32_t>>( - "ShaderRegister", Sampler.ShaderRegister)); + makeRSError(formatv("Invalid value for MaxLOD: {0}", + Sampler.MaxLOD))); + + if (!hlsl::rootsig::verifyRegisterValue(Sampler.ShaderRegister)) + DeferredErrs = joinErrors( + std::move(DeferredErrs), + makeRSError(formatv("Invalid value for ShaderRegister: {0}", + Sampler.ShaderRegister))); if (!hlsl::rootsig::verifyRegisterSpace(Sampler.RegisterSpace)) DeferredErrs = joinErrors(std::move(DeferredErrs), - make_error<RootSignatureValidationError<uint32_t>>( - "RegisterSpace", Sampler.RegisterSpace)); + makeRSError(formatv("Invalid value for RegisterSpace: {0}", + Sampler.RegisterSpace))); bool IsValidFlag = dxbc::isValidStaticSamplerFlags(Sampler.Flags) && hlsl::rootsig::verifyStaticSamplerFlags( RSD.Version, dxbc::StaticSamplerFlags(Sampler.Flags)); if (!IsValidFlag) - DeferredErrs = - joinErrors(std::move(DeferredErrs), - make_error<RootSignatureValidationError<uint32_t>>( - "Static Sampler Flag", Sampler.Flags)); + DeferredErrs = joinErrors( + std::move(DeferredErrs), + makeRSError(formatv("Invalid value for Static Sampler Flag: {0}", + Sampler.Flags))); } return DeferredErrs; @@ -758,9 +770,9 @@ MetadataParser::ParseRootSignature(uint32_t Version) { for (const auto &Operand : Root->operands()) { MDNode *Element = dyn_cast<MDNode>(Operand); if (Element == nullptr) - return joinErrors(std::move(DeferredErrs), - make_error<GenericRSMetadataError>( - "Missing Root Element Metadata Node.", nullptr)); + return joinErrors( + std::move(DeferredErrs), + makeRSError(formatv("Missing Root Element Metadata Node."))); if (auto Err = parseRootSignatureElement(RSD, Element)) DeferredErrs = joinErrors(std::move(DeferredErrs), std::move(Err)); diff --git a/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp b/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp index 30408df..1735751 100644 --- a/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp +++ b/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp @@ -41,8 +41,6 @@ bool verifyRootDescriptorFlag(uint32_t Version, if (Version == 1) return Flags == FlagT::DataVolatile; - assert((Version <= 3) && "Provided invalid root signature version"); - // The data-specific flags are mutually exclusive. FlagT DataFlags = FlagT::DataVolatile | FlagT::DataStatic | FlagT::DataStaticWhileSetAtExecute; @@ -118,8 +116,6 @@ bool verifyStaticSamplerFlags(uint32_t Version, if (Version <= 2) return Flags == dxbc::StaticSamplerFlags::None; - assert(Version == 3 && "Provided invalid root signature version"); - dxbc::StaticSamplerFlags Mask = dxbc::StaticSamplerFlags::NonNormalizedCoordinates | dxbc::StaticSamplerFlags::UintBorderColor | diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 245129f..ae086bcd 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -2369,8 +2369,12 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N, AsmWriterContext &WriterCtx) { Out << "!DICompileUnit("; MDFieldPrinter Printer(Out, WriterCtx); - Printer.printDwarfEnum("language", N->getSourceLanguage(), - dwarf::LanguageString, /* ShouldSkipZero */ false); + + Printer.printDwarfEnum("language", + N->getSourceLanguage().getUnversionedName(), + dwarf::LanguageString, + /* ShouldSkipZero */ false); + Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false); Printer.printString("producer", N->getProducer()); Printer.printBool("isOptimized", N->isOptimized()); diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp index 1344df9..1ae20a9f 100644 --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -131,17 +131,13 @@ static DIScope *getNonCompileUnitScope(DIScope *N) { } DICompileUnit *DIBuilder::createCompileUnit( - unsigned Lang, DIFile *File, StringRef Producer, bool isOptimized, - StringRef Flags, unsigned RunTimeVer, StringRef SplitName, + DISourceLanguageName Lang, DIFile *File, StringRef Producer, + bool isOptimized, StringRef Flags, unsigned RunTimeVer, StringRef SplitName, DICompileUnit::DebugEmissionKind Kind, uint64_t DWOId, bool SplitDebugInlining, bool DebugInfoForProfiling, DICompileUnit::DebugNameTableKind NameTableKind, bool RangesBaseAddress, StringRef SysRoot, StringRef SDK) { - assert(((Lang <= dwarf::DW_LANG_Metal && Lang >= dwarf::DW_LANG_C89) || - (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) && - "Invalid Language tag"); - assert(!CUNode && "Can only make one compile unit per DIBuilder instance"); CUNode = DICompileUnit::getDistinct( VMContext, Lang, File, Producer, isOptimized, Flags, RunTimeVer, diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index f9ded50..9601a8a 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -1078,7 +1078,7 @@ LLVMMetadataRef LLVMDIBuilderCreateCompileUnit( auto File = unwrapDI<DIFile>(FileRef); return wrap(unwrap(Builder)->createCompileUnit( - map_from_llvmDWARFsourcelanguage(Lang), File, + DISourceLanguageName(map_from_llvmDWARFsourcelanguage(Lang)), File, StringRef(Producer, ProducerLen), isOptimized, StringRef(Flags, FlagsLen), RuntimeVer, StringRef(SplitName, SplitNameLen), static_cast<DICompileUnit::DebugEmissionKind>(Kind), DWOId, diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index 77d044b..e30df88 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -1184,9 +1184,10 @@ DIFile *DIFile::getImpl(LLVMContext &Context, MDString *Filename, DEFINE_GETIMPL_STORE(DIFile, (CS, Source), Ops); } DICompileUnit::DICompileUnit(LLVMContext &C, StorageType Storage, - unsigned SourceLanguage, bool IsOptimized, - unsigned RuntimeVersion, unsigned EmissionKind, - uint64_t DWOId, bool SplitDebugInlining, + DISourceLanguageName SourceLanguage, + bool IsOptimized, unsigned RuntimeVersion, + unsigned EmissionKind, uint64_t DWOId, + bool SplitDebugInlining, bool DebugInfoForProfiling, unsigned NameTableKind, bool RangesBaseAddress, ArrayRef<Metadata *> Ops) : DIScope(C, DICompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops), @@ -1199,7 +1200,7 @@ DICompileUnit::DICompileUnit(LLVMContext &C, StorageType Storage, } DICompileUnit *DICompileUnit::getImpl( - LLVMContext &Context, unsigned SourceLanguage, Metadata *File, + LLVMContext &Context, DISourceLanguageName SourceLanguage, Metadata *File, MDString *Producer, bool IsOptimized, MDString *Flags, unsigned RuntimeVersion, MDString *SplitDebugFilename, unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes, diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 71a8a38..c9ff86b 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5398,8 +5398,10 @@ void Verifier::visitCapturesMetadata(Instruction &I, const MDNode *Captures) { void Verifier::visitAllocTokenMetadata(Instruction &I, MDNode *MD) { Check(isa<CallBase>(I), "!alloc_token should only exist on calls", &I); - Check(MD->getNumOperands() == 1, "!alloc_token must have 1 operand", MD); + Check(MD->getNumOperands() == 2, "!alloc_token must have 2 operands", MD); Check(isa<MDString>(MD->getOperand(0)), "expected string", MD); + Check(mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(1)), + "expected integer constant", MD); } /// verifyInstruction - Verify that an instruction is well formed. diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 20dcde8..53cf004 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1111,6 +1111,8 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) { Result.forwardSwitchCondToPhi(Enable); } else if (ParamName == "switch-range-to-icmp") { Result.convertSwitchRangeToICmp(Enable); + } else if (ParamName == "switch-to-arithmetic") { + Result.convertSwitchToArithmetic(Enable); } else if (ParamName == "switch-to-lookup") { Result.convertSwitchToLookupTable(Enable); } else if (ParamName == "keep-loops") { diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 119caea..fea0d25 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -781,6 +781,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() .convertSwitchRangeToICmp(true) + .convertSwitchToArithmetic(true) .hoistCommonInsts(true) .sinkCommonInsts(true))); FPM.addPass(InstCombinePass()); @@ -1377,6 +1378,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) .convertSwitchRangeToICmp(true) + .convertSwitchToArithmetic(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) .hoistCommonInsts(true) @@ -1603,6 +1605,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, OptimizePM.addPass( SimplifyCFGPass(SimplifyCFGOptions() .convertSwitchRangeToICmp(true) + .convertSwitchToArithmetic(true) .speculateUnpredictables(true) .hoistLoadsStoresWithCondFaulting(true))); @@ -2187,6 +2190,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // Delete basic blocks, which optimization passes may have killed. LateFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() .convertSwitchRangeToICmp(true) + .convertSwitchToArithmetic(true) .hoistCommonInsts(true) .speculateUnpredictables(true))); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM))); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index c5c0d64..1b16525 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -687,8 +687,9 @@ FUNCTION_PASS_WITH_PARAMS( parseSimplifyCFGOptions, "no-speculate-blocks;speculate-blocks;no-simplify-cond-branch;" "simplify-cond-branch;no-forward-switch-cond;forward-switch-cond;" - "no-switch-range-to-icmp;switch-range-to-icmp;no-switch-to-lookup;" - "switch-to-lookup;no-keep-loops;keep-loops;no-hoist-common-insts;" + "no-switch-range-to-icmp;switch-range-to-icmp;no-switch-to-arithmetic;" + "switch-to-arithmetic;no-switch-to-lookup;switch-to-lookup;" + "no-keep-loops;keep-loops;no-hoist-common-insts;" "hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;" "hoist-loads-stores-with-cond-faulting;no-sink-common-insts;" "sink-common-insts;no-speculate-unpredictables;speculate-unpredictables;" diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 6ad8d7d..80fd485 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/VirtualFileSystem.h" #include <algorithm> #include <limits> +#include <memory> #include <stdio.h> #include <string> #include <system_error> @@ -29,55 +30,77 @@ namespace llvm { -Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber, - bool UseGlobs) { +Error SpecialCaseList::RegexMatcher::insert(StringRef Pattern, + unsigned LineNumber) { if (Pattern.empty()) return createStringError(errc::invalid_argument, - Twine("Supplied ") + - (UseGlobs ? "glob" : "regex") + " was blank"); - - if (!UseGlobs) { - // Replace * with .* - auto Regexp = Pattern.str(); - for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos; - pos += strlen(".*")) { - Regexp.replace(pos, strlen("*"), ".*"); - } + "Supplied regex was blank"); - Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str(); + // Replace * with .* + auto Regexp = Pattern.str(); + for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos; + pos += strlen(".*")) { + Regexp.replace(pos, strlen("*"), ".*"); + } - // Check that the regexp is valid. - Regex CheckRE(Regexp); - std::string REError; - if (!CheckRE.isValid(REError)) - return createStringError(errc::invalid_argument, REError); + Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str(); - auto Rg = - std::make_unique<Matcher::Reg>(Pattern, LineNumber, std::move(CheckRE)); - RegExes.emplace_back(std::move(Rg)); + // Check that the regexp is valid. + Regex CheckRE(Regexp); + std::string REError; + if (!CheckRE.isValid(REError)) + return createStringError(errc::invalid_argument, REError); - return Error::success(); - } + RegExes.emplace_back(Pattern, LineNumber, std::move(CheckRE)); + return Error::success(); +} - auto Glob = std::make_unique<Matcher::Glob>(Pattern, LineNumber); - // We must be sure to use the string in `Glob` rather than the provided - // reference which could be destroyed before match() is called - if (auto Err = GlobPattern::create(Glob->Name, /*MaxSubPatterns=*/1024) - .moveInto(Glob->Pattern)) +void SpecialCaseList::RegexMatcher::match( + StringRef Query, + llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const { + for (const auto &R : reverse(RegExes)) + if (R.Rg.match(Query)) + Cb(R.Name, R.LineNo); +} + +Error SpecialCaseList::GlobMatcher::insert(StringRef Pattern, + unsigned LineNumber) { + if (Pattern.empty()) + return createStringError(errc::invalid_argument, "Supplied glob was blank"); + + auto Res = GlobPattern::create(Pattern, /*MaxSubPatterns=*/1024); + if (auto Err = Res.takeError()) return Err; - Globs.push_back(std::move(Glob)); + Globs.emplace_back(Pattern, LineNumber, std::move(Res.get())); return Error::success(); } +void SpecialCaseList::GlobMatcher::match( + StringRef Query, + llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const { + for (const auto &G : reverse(Globs)) + if (G.Pattern.match(Query)) + Cb(G.Name, G.LineNo); +} + +SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash) + : RemoveDotSlash(RemoveDotSlash) { + if (UseGlobs) + M.emplace<GlobMatcher>(); + else + M.emplace<RegexMatcher>(); +} + void SpecialCaseList::Matcher::match( StringRef Query, llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const { - for (const auto &Glob : reverse(Globs)) - if (Glob->Pattern.match(Query)) - Cb(Glob->Name, Glob->LineNo); - for (const auto &Regex : reverse(RegExes)) - if (Regex->Rg.match(Query)) - Cb(Regex->Name, Regex->LineNo); + if (RemoveDotSlash) + Query = llvm::sys::path::remove_leading_dotslash(Query); + return std::visit([&](auto &V) { return V.match(Query, Cb); }, M); +} + +Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) { + return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M); } // TODO: Refactor this to return Expected<...> @@ -136,10 +159,11 @@ bool SpecialCaseList::createInternal(const MemoryBuffer *MB, Expected<SpecialCaseList::Section *> SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo, unsigned LineNo, bool UseGlobs) { - Sections.emplace_back(SectionStr, FileNo); + Sections.emplace_back(SectionStr, FileNo, UseGlobs); auto &Section = Sections.back(); - if (auto Err = Section.SectionMatcher.insert(SectionStr, LineNo, UseGlobs)) { + SectionStr = SectionStr.copy(StrAlloc); + if (auto Err = Section.SectionMatcher.insert(SectionStr, LineNo)) { return createStringError(errc::invalid_argument, "malformed section at line " + Twine(LineNo) + ": '" + SectionStr + @@ -164,12 +188,18 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, // https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666 bool UseGlobs = Version > 1; + bool RemoveDotSlash = Version > 2; + Section *CurrentSection; - if (auto Err = addSection("*", FileIdx, 1).moveInto(CurrentSection)) { + if (auto Err = addSection("*", FileIdx, 1, true).moveInto(CurrentSection)) { Error = toString(std::move(Err)); return false; } + // This is the current list of prefixes for all existing users matching file + // path. We may need parametrization in constructor in future. + constexpr StringRef PathPrefixes[] = {"src", "!src", "mainfile", "source"}; + for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); !LineIt.is_at_eof(); LineIt++) { unsigned LineNo = LineIt.line_number(); @@ -204,8 +234,11 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, } auto [Pattern, Category] = Postfix.split("="); - auto &Entry = CurrentSection->Entries[Prefix][Category]; - if (auto Err = Entry.insert(Pattern, LineNo, UseGlobs)) { + auto [It, _] = CurrentSection->Entries[Prefix].try_emplace( + Category, UseGlobs, + RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix)); + Pattern = Pattern.copy(StrAlloc); + if (auto Err = It->second.insert(Pattern, LineNo)) { Error = (Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " + Twine(LineNo) + ": '" + Pattern + "': " + toString(std::move(Err))) @@ -262,4 +295,17 @@ unsigned SpecialCaseList::Section::getLastMatch(StringRef Prefix, return LastLine; } +StringRef SpecialCaseList::Section::getLongestMatch(StringRef Prefix, + StringRef Query, + StringRef Category) const { + StringRef LongestRule; + if (const Matcher *M = findMatcher(Prefix, Category)) { + M->match(Query, [&](StringRef Rule, unsigned) { + if (LongestRule.size() < Rule.size()) + LongestRule = Rule; + }); + } + return LongestRule; +} + } // namespace llvm diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td index 65b752e..9438917 100644 --- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td +++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td @@ -816,8 +816,8 @@ def : BTI<"jc", 0b110>; // TLBI (translation lookaside buffer invalidate) instruction options. //===----------------------------------------------------------------------===// -class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm, - bits<3> op2, bit needsreg> { +class TLBICommon<string name, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2, bit needsreg> { string Name = name; bits<14> Encoding; let Encoding{13-11} = op1; @@ -830,131 +830,150 @@ class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm, code RequiresStr = [{ { }] # !interleave(Requires # ExtraRequires, [{, }]) # [{ } }]; } -def TLBITable : GenericTable { - let FilterClass = "TLBIEntry"; - let CppTypeName = "TLBI"; - let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"]; - - let PrimaryKey = ["Encoding"]; - let PrimaryKeyName = "lookupTLBIByEncoding"; +class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2, bit needsreg> + : TLBICommon<name, op1, crn, crm, op2, needsreg>; + +class TLBIPEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2, bit needsreg> + : TLBICommon<name, op1, crn, crm, op2, needsreg>; + +multiclass TLBITableBase { + def NAME # Table : GenericTable { + let FilterClass = NAME # "Entry"; + let CppTypeName = NAME; + let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"]; + let PrimaryKey = ["Encoding"]; + let PrimaryKeyName = "lookup" # NAME # "ByEncoding"; + } + def lookup # NAME # ByName : SearchIndex { + let Table = !cast<GenericTable>(NAME # "Table"); + let Key = ["Name"]; + } } -def lookupTLBIByName : SearchIndex { - let Table = TLBITable; - let Key = ["Name"]; -} +defm TLBI : TLBITableBase; +defm TLBIP : TLBITableBase; -multiclass TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm, +multiclass TLBI<string name, bit hasTLBIP, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2, bit needsreg = 1> { def : TLBIEntry<name, op1, crn, crm, op2, needsreg>; def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> { let Encoding{7} = 1; let ExtraRequires = ["AArch64::FeatureXS"]; } + if !eq(hasTLBIP, true) then { + def : TLBIPEntry<name, op1, crn, crm, op2, needsreg>; + def : TLBIPEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> { + let Encoding{7} = 1; + let ExtraRequires = ["AArch64::FeatureXS"]; + } + } } -defm : TLBI<"IPAS2E1IS", 0b100, 0b1000, 0b0000, 0b001>; -defm : TLBI<"IPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b101>; -defm : TLBI<"VMALLE1IS", 0b000, 0b1000, 0b0011, 0b000, 0>; -defm : TLBI<"ALLE2IS", 0b100, 0b1000, 0b0011, 0b000, 0>; -defm : TLBI<"ALLE3IS", 0b110, 0b1000, 0b0011, 0b000, 0>; -defm : TLBI<"VAE1IS", 0b000, 0b1000, 0b0011, 0b001>; -defm : TLBI<"VAE2IS", 0b100, 0b1000, 0b0011, 0b001>; -defm : TLBI<"VAE3IS", 0b110, 0b1000, 0b0011, 0b001>; -defm : TLBI<"ASIDE1IS", 0b000, 0b1000, 0b0011, 0b010>; -defm : TLBI<"VAAE1IS", 0b000, 0b1000, 0b0011, 0b011>; -defm : TLBI<"ALLE1IS", 0b100, 0b1000, 0b0011, 0b100, 0>; -defm : TLBI<"VALE1IS", 0b000, 0b1000, 0b0011, 0b101>; -defm : TLBI<"VALE2IS", 0b100, 0b1000, 0b0011, 0b101>; -defm : TLBI<"VALE3IS", 0b110, 0b1000, 0b0011, 0b101>; -defm : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>; -defm : TLBI<"VAALE1IS", 0b000, 0b1000, 0b0011, 0b111>; -defm : TLBI<"IPAS2E1", 0b100, 0b1000, 0b0100, 0b001>; -defm : TLBI<"IPAS2LE1", 0b100, 0b1000, 0b0100, 0b101>; -defm : TLBI<"VMALLE1", 0b000, 0b1000, 0b0111, 0b000, 0>; -defm : TLBI<"ALLE2", 0b100, 0b1000, 0b0111, 0b000, 0>; -defm : TLBI<"ALLE3", 0b110, 0b1000, 0b0111, 0b000, 0>; -defm : TLBI<"VAE1", 0b000, 0b1000, 0b0111, 0b001>; -defm : TLBI<"VAE2", 0b100, 0b1000, 0b0111, 0b001>; -defm : TLBI<"VAE3", 0b110, 0b1000, 0b0111, 0b001>; -defm : TLBI<"ASIDE1", 0b000, 0b1000, 0b0111, 0b010>; -defm : TLBI<"VAAE1", 0b000, 0b1000, 0b0111, 0b011>; -defm : TLBI<"ALLE1", 0b100, 0b1000, 0b0111, 0b100, 0>; -defm : TLBI<"VALE1", 0b000, 0b1000, 0b0111, 0b101>; -defm : TLBI<"VALE2", 0b100, 0b1000, 0b0111, 0b101>; -defm : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>; -defm : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>; -defm : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>; +// hasTLBIP op1 CRn CRm op2 needsreg +defm : TLBI<"IPAS2E1IS", 1, 0b100, 0b1000, 0b0000, 0b001>; +defm : TLBI<"IPAS2LE1IS", 1, 0b100, 0b1000, 0b0000, 0b101>; +defm : TLBI<"VMALLE1IS", 0, 0b000, 0b1000, 0b0011, 0b000, 0>; +defm : TLBI<"ALLE2IS", 0, 0b100, 0b1000, 0b0011, 0b000, 0>; +defm : TLBI<"ALLE3IS", 0, 0b110, 0b1000, 0b0011, 0b000, 0>; +defm : TLBI<"VAE1IS", 1, 0b000, 0b1000, 0b0011, 0b001>; +defm : TLBI<"VAE2IS", 1, 0b100, 0b1000, 0b0011, 0b001>; +defm : TLBI<"VAE3IS", 1, 0b110, 0b1000, 0b0011, 0b001>; +defm : TLBI<"ASIDE1IS", 0, 0b000, 0b1000, 0b0011, 0b010>; +defm : TLBI<"VAAE1IS", 1, 0b000, 0b1000, 0b0011, 0b011>; +defm : TLBI<"ALLE1IS", 0, 0b100, 0b1000, 0b0011, 0b100, 0>; +defm : TLBI<"VALE1IS", 1, 0b000, 0b1000, 0b0011, 0b101>; +defm : TLBI<"VALE2IS", 1, 0b100, 0b1000, 0b0011, 0b101>; +defm : TLBI<"VALE3IS", 1, 0b110, 0b1000, 0b0011, 0b101>; +defm : TLBI<"VMALLS12E1IS", 0, 0b100, 0b1000, 0b0011, 0b110, 0>; +defm : TLBI<"VAALE1IS", 1, 0b000, 0b1000, 0b0011, 0b111>; +defm : TLBI<"IPAS2E1", 1, 0b100, 0b1000, 0b0100, 0b001>; +defm : TLBI<"IPAS2LE1", 1, 0b100, 0b1000, 0b0100, 0b101>; +defm : TLBI<"VMALLE1", 0, 0b000, 0b1000, 0b0111, 0b000, 0>; +defm : TLBI<"ALLE2", 0, 0b100, 0b1000, 0b0111, 0b000, 0>; +defm : TLBI<"ALLE3", 0, 0b110, 0b1000, 0b0111, 0b000, 0>; +defm : TLBI<"VAE1", 1, 0b000, 0b1000, 0b0111, 0b001>; +defm : TLBI<"VAE2", 1, 0b100, 0b1000, 0b0111, 0b001>; +defm : TLBI<"VAE3", 1, 0b110, 0b1000, 0b0111, 0b001>; +defm : TLBI<"ASIDE1", 0, 0b000, 0b1000, 0b0111, 0b010>; +defm : TLBI<"VAAE1", 1, 0b000, 0b1000, 0b0111, 0b011>; +defm : TLBI<"ALLE1", 0, 0b100, 0b1000, 0b0111, 0b100, 0>; +defm : TLBI<"VALE1", 1, 0b000, 0b1000, 0b0111, 0b101>; +defm : TLBI<"VALE2", 1, 0b100, 0b1000, 0b0111, 0b101>; +defm : TLBI<"VALE3", 1, 0b110, 0b1000, 0b0111, 0b101>; +defm : TLBI<"VMALLS12E1", 0, 0b100, 0b1000, 0b0111, 0b110, 0>; +defm : TLBI<"VAALE1", 1, 0b000, 0b1000, 0b0111, 0b111>; // Armv8.4-A Translation Lookaside Buffer Instructions (TLBI) let Requires = ["AArch64::FeatureTLB_RMI"] in { // Armv8.4-A Outer Sharable TLB Maintenance instructions: -// op1 CRn CRm op2 -defm : TLBI<"VMALLE1OS", 0b000, 0b1000, 0b0001, 0b000, 0>; -defm : TLBI<"VAE1OS", 0b000, 0b1000, 0b0001, 0b001>; -defm : TLBI<"ASIDE1OS", 0b000, 0b1000, 0b0001, 0b010>; -defm : TLBI<"VAAE1OS", 0b000, 0b1000, 0b0001, 0b011>; -defm : TLBI<"VALE1OS", 0b000, 0b1000, 0b0001, 0b101>; -defm : TLBI<"VAALE1OS", 0b000, 0b1000, 0b0001, 0b111>; -defm : TLBI<"IPAS2E1OS", 0b100, 0b1000, 0b0100, 0b000>; -defm : TLBI<"IPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b100>; -defm : TLBI<"VAE2OS", 0b100, 0b1000, 0b0001, 0b001>; -defm : TLBI<"VALE2OS", 0b100, 0b1000, 0b0001, 0b101>; -defm : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>; -defm : TLBI<"VAE3OS", 0b110, 0b1000, 0b0001, 0b001>; -defm : TLBI<"VALE3OS", 0b110, 0b1000, 0b0001, 0b101>; -defm : TLBI<"ALLE2OS", 0b100, 0b1000, 0b0001, 0b000, 0>; -defm : TLBI<"ALLE1OS", 0b100, 0b1000, 0b0001, 0b100, 0>; -defm : TLBI<"ALLE3OS", 0b110, 0b1000, 0b0001, 0b000, 0>; +// hasTLBIP op1 CRn CRm op2 needsreg +defm : TLBI<"VMALLE1OS", 0, 0b000, 0b1000, 0b0001, 0b000, 0>; +defm : TLBI<"VAE1OS", 1, 0b000, 0b1000, 0b0001, 0b001>; +defm : TLBI<"ASIDE1OS", 0, 0b000, 0b1000, 0b0001, 0b010>; +defm : TLBI<"VAAE1OS", 1, 0b000, 0b1000, 0b0001, 0b011>; +defm : TLBI<"VALE1OS", 1, 0b000, 0b1000, 0b0001, 0b101>; +defm : TLBI<"VAALE1OS", 1, 0b000, 0b1000, 0b0001, 0b111>; +defm : TLBI<"IPAS2E1OS", 1, 0b100, 0b1000, 0b0100, 0b000>; +defm : TLBI<"IPAS2LE1OS", 1, 0b100, 0b1000, 0b0100, 0b100>; +defm : TLBI<"VAE2OS", 1, 0b100, 0b1000, 0b0001, 0b001>; +defm : TLBI<"VALE2OS", 1, 0b100, 0b1000, 0b0001, 0b101>; +defm : TLBI<"VMALLS12E1OS", 0, 0b100, 0b1000, 0b0001, 0b110, 0>; +defm : TLBI<"VAE3OS", 1, 0b110, 0b1000, 0b0001, 0b001>; +defm : TLBI<"VALE3OS", 1, 0b110, 0b1000, 0b0001, 0b101>; +defm : TLBI<"ALLE2OS", 0, 0b100, 0b1000, 0b0001, 0b000, 0>; +defm : TLBI<"ALLE1OS", 0, 0b100, 0b1000, 0b0001, 0b100, 0>; +defm : TLBI<"ALLE3OS", 0, 0b110, 0b1000, 0b0001, 0b000, 0>; // Armv8.4-A TLB Range Maintenance instructions: -// op1 CRn CRm op2 -defm : TLBI<"RVAE1", 0b000, 0b1000, 0b0110, 0b001>; -defm : TLBI<"RVAAE1", 0b000, 0b1000, 0b0110, 0b011>; -defm : TLBI<"RVALE1", 0b000, 0b1000, 0b0110, 0b101>; -defm : TLBI<"RVAALE1", 0b000, 0b1000, 0b0110, 0b111>; -defm : TLBI<"RVAE1IS", 0b000, 0b1000, 0b0010, 0b001>; -defm : TLBI<"RVAAE1IS", 0b000, 0b1000, 0b0010, 0b011>; -defm : TLBI<"RVALE1IS", 0b000, 0b1000, 0b0010, 0b101>; -defm : TLBI<"RVAALE1IS", 0b000, 0b1000, 0b0010, 0b111>; -defm : TLBI<"RVAE1OS", 0b000, 0b1000, 0b0101, 0b001>; -defm : TLBI<"RVAAE1OS", 0b000, 0b1000, 0b0101, 0b011>; -defm : TLBI<"RVALE1OS", 0b000, 0b1000, 0b0101, 0b101>; -defm : TLBI<"RVAALE1OS", 0b000, 0b1000, 0b0101, 0b111>; -defm : TLBI<"RIPAS2E1IS", 0b100, 0b1000, 0b0000, 0b010>; -defm : TLBI<"RIPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b110>; -defm : TLBI<"RIPAS2E1", 0b100, 0b1000, 0b0100, 0b010>; -defm : TLBI<"RIPAS2LE1", 0b100, 0b1000, 0b0100, 0b110>; -defm : TLBI<"RIPAS2E1OS", 0b100, 0b1000, 0b0100, 0b011>; -defm : TLBI<"RIPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b111>; -defm : TLBI<"RVAE2", 0b100, 0b1000, 0b0110, 0b001>; -defm : TLBI<"RVALE2", 0b100, 0b1000, 0b0110, 0b101>; -defm : TLBI<"RVAE2IS", 0b100, 0b1000, 0b0010, 0b001>; -defm : TLBI<"RVALE2IS", 0b100, 0b1000, 0b0010, 0b101>; -defm : TLBI<"RVAE2OS", 0b100, 0b1000, 0b0101, 0b001>; -defm : TLBI<"RVALE2OS", 0b100, 0b1000, 0b0101, 0b101>; -defm : TLBI<"RVAE3", 0b110, 0b1000, 0b0110, 0b001>; -defm : TLBI<"RVALE3", 0b110, 0b1000, 0b0110, 0b101>; -defm : TLBI<"RVAE3IS", 0b110, 0b1000, 0b0010, 0b001>; -defm : TLBI<"RVALE3IS", 0b110, 0b1000, 0b0010, 0b101>; -defm : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>; -defm : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>; +// hasTLBIP op1 CRn CRm op2 needsreg +defm : TLBI<"RVAE1", 1, 0b000, 0b1000, 0b0110, 0b001>; +defm : TLBI<"RVAAE1", 1, 0b000, 0b1000, 0b0110, 0b011>; +defm : TLBI<"RVALE1", 1, 0b000, 0b1000, 0b0110, 0b101>; +defm : TLBI<"RVAALE1", 1, 0b000, 0b1000, 0b0110, 0b111>; +defm : TLBI<"RVAE1IS", 1, 0b000, 0b1000, 0b0010, 0b001>; +defm : TLBI<"RVAAE1IS", 1, 0b000, 0b1000, 0b0010, 0b011>; +defm : TLBI<"RVALE1IS", 1, 0b000, 0b1000, 0b0010, 0b101>; +defm : TLBI<"RVAALE1IS", 1, 0b000, 0b1000, 0b0010, 0b111>; +defm : TLBI<"RVAE1OS", 1, 0b000, 0b1000, 0b0101, 0b001>; +defm : TLBI<"RVAAE1OS", 1, 0b000, 0b1000, 0b0101, 0b011>; +defm : TLBI<"RVALE1OS", 1, 0b000, 0b1000, 0b0101, 0b101>; +defm : TLBI<"RVAALE1OS", 1, 0b000, 0b1000, 0b0101, 0b111>; +defm : TLBI<"RIPAS2E1IS", 1, 0b100, 0b1000, 0b0000, 0b010>; +defm : TLBI<"RIPAS2LE1IS", 1, 0b100, 0b1000, 0b0000, 0b110>; +defm : TLBI<"RIPAS2E1", 1, 0b100, 0b1000, 0b0100, 0b010>; +defm : TLBI<"RIPAS2LE1", 1, 0b100, 0b1000, 0b0100, 0b110>; +defm : TLBI<"RIPAS2E1OS", 1, 0b100, 0b1000, 0b0100, 0b011>; +defm : TLBI<"RIPAS2LE1OS", 1, 0b100, 0b1000, 0b0100, 0b111>; +defm : TLBI<"RVAE2", 1, 0b100, 0b1000, 0b0110, 0b001>; +defm : TLBI<"RVALE2", 1, 0b100, 0b1000, 0b0110, 0b101>; +defm : TLBI<"RVAE2IS", 1, 0b100, 0b1000, 0b0010, 0b001>; +defm : TLBI<"RVALE2IS", 1, 0b100, 0b1000, 0b0010, 0b101>; +defm : TLBI<"RVAE2OS", 1, 0b100, 0b1000, 0b0101, 0b001>; +defm : TLBI<"RVALE2OS", 1, 0b100, 0b1000, 0b0101, 0b101>; +defm : TLBI<"RVAE3", 1, 0b110, 0b1000, 0b0110, 0b001>; +defm : TLBI<"RVALE3", 1, 0b110, 0b1000, 0b0110, 0b101>; +defm : TLBI<"RVAE3IS", 1, 0b110, 0b1000, 0b0010, 0b001>; +defm : TLBI<"RVALE3IS", 1, 0b110, 0b1000, 0b0010, 0b101>; +defm : TLBI<"RVAE3OS", 1, 0b110, 0b1000, 0b0101, 0b001>; +defm : TLBI<"RVALE3OS", 1, 0b110, 0b1000, 0b0101, 0b101>; } //FeatureTLB_RMI // Armv9-A Realm Management Extension TLBI Instructions let Requires = ["AArch64::FeatureRME"] in { -defm : TLBI<"RPAOS", 0b110, 0b1000, 0b0100, 0b011>; -defm : TLBI<"RPALOS", 0b110, 0b1000, 0b0100, 0b111>; -defm : TLBI<"PAALLOS", 0b110, 0b1000, 0b0001, 0b100, 0>; -defm : TLBI<"PAALL", 0b110, 0b1000, 0b0111, 0b100, 0>; +defm : TLBI<"RPAOS", 0, 0b110, 0b1000, 0b0100, 0b011>; +defm : TLBI<"RPALOS", 0, 0b110, 0b1000, 0b0100, 0b111>; +defm : TLBI<"PAALLOS", 0, 0b110, 0b1000, 0b0001, 0b100, 0>; +defm : TLBI<"PAALL", 0, 0b110, 0b1000, 0b0111, 0b100, 0>; } // Armv9.5-A TLBI VMALL for Dirty State let Requires = ["AArch64::FeatureTLBIW"] in { -// op1, CRn, CRm, op2, needsreg -defm : TLBI<"VMALLWS2E1", 0b100, 0b1000, 0b0110, 0b010, 0>; -defm : TLBI<"VMALLWS2E1IS", 0b100, 0b1000, 0b0010, 0b010, 0>; -defm : TLBI<"VMALLWS2E1OS", 0b100, 0b1000, 0b0101, 0b010, 0>; +// op1, CRn, CRm, op2, needsreg +defm : TLBI<"VMALLWS2E1", 0, 0b100, 0b1000, 0b0110, 0b010, 0>; +defm : TLBI<"VMALLWS2E1IS", 0, 0b100, 0b1000, 0b0010, 0b010, 0>; +defm : TLBI<"VMALLWS2E1OS", 0, 0b100, 0b1000, 0b0101, 0b010, 0>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 3641e22..2c3870c 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -4020,23 +4020,23 @@ bool AArch64AsmParser::parseSyspAlias(StringRef Name, SMLoc NameLoc, if (HasnXSQualifier) { Op = Op.drop_back(3); } - const AArch64TLBI::TLBI *TLBIorig = AArch64TLBI::lookupTLBIByName(Op); - if (!TLBIorig) + const AArch64TLBIP::TLBIP *TLBIPorig = AArch64TLBIP::lookupTLBIPByName(Op); + if (!TLBIPorig) return TokError("invalid operand for TLBIP instruction"); - const AArch64TLBI::TLBI TLBI( - TLBIorig->Name, TLBIorig->Encoding | (HasnXSQualifier ? (1 << 7) : 0), - TLBIorig->NeedsReg, + const AArch64TLBIP::TLBIP TLBIP( + TLBIPorig->Name, TLBIPorig->Encoding | (HasnXSQualifier ? (1 << 7) : 0), + TLBIPorig->NeedsReg, HasnXSQualifier - ? TLBIorig->FeaturesRequired | FeatureBitset({AArch64::FeatureXS}) - : TLBIorig->FeaturesRequired); - if (!TLBI.haveFeatures(getSTI().getFeatureBits())) { + ? TLBIPorig->FeaturesRequired | FeatureBitset({AArch64::FeatureXS}) + : TLBIPorig->FeaturesRequired); + if (!TLBIP.haveFeatures(getSTI().getFeatureBits())) { std::string Name = - std::string(TLBI.Name) + (HasnXSQualifier ? "nXS" : ""); + std::string(TLBIP.Name) + (HasnXSQualifier ? "nXS" : ""); std::string Str("TLBIP " + Name + " requires: "); - setRequiredFeatureString(TLBI.getRequiredFeatures(), Str); + setRequiredFeatureString(TLBIP.getRequiredFeatures(), Str); return TokError(Str); } - createSysAlias(TLBI.Encoding, Operands, S); + createSysAlias(TLBIP.Encoding, Operands, S); } Lex(); // Eat operand. diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index 2552ee3..35bd244 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -1066,12 +1066,13 @@ bool AArch64InstPrinter::printSyspAlias(const MCInst *MI, Encoding &= ~(1 << 7); } - const AArch64TLBI::TLBI *TLBI = AArch64TLBI::lookupTLBIByEncoding(Encoding); - if (!TLBI || !TLBI->haveFeatures(STI.getFeatureBits())) + const AArch64TLBIP::TLBIP *TLBIP = + AArch64TLBIP::lookupTLBIPByEncoding(Encoding); + if (!TLBIP || !TLBIP->haveFeatures(STI.getFeatureBits())) return false; Ins = "tlbip\t"; - Name = std::string(TLBI->Name); + Name = std::string(TLBIP->Name); if (CnVal == 9) Name += "nXS"; } else diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 7767028..d6cb0e8 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -186,6 +186,13 @@ namespace llvm { } namespace llvm { +namespace AArch64TLBIP { +#define GET_TLBIPTable_IMPL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64TLBIP +} // namespace llvm + +namespace llvm { namespace AArch64SVCR { #define GET_SVCRsList_IMPL #include "AArch64GenSystemOperands.inc" diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index a4ee963..fea33ef 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -795,6 +795,14 @@ namespace AArch64TLBI { #include "AArch64GenSystemOperands.inc" } +namespace AArch64TLBIP { +struct TLBIP : SysAliasReg { + using SysAliasReg::SysAliasReg; +}; +#define GET_TLBIPTable_DECL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64TLBIP + namespace AArch64II { /// Target Operand Flag enum. enum TOF { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index ef58004..9907c88f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1288,16 +1288,17 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA, return std::min(MaxVirtReg + MaxPhysReg, 256u); } -// TODO: Migrate to range merge of amdgpu-agpr-alloc. -struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> { - using Base = StateWrapper<BooleanState, AbstractAttribute>; - AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : Base(IRP) {} +struct AAAMDGPUMinAGPRAlloc + : public StateWrapper<DecIntegerState<>, AbstractAttribute> { + using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>; + AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {} - static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP, - Attributor &A) { + static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP, + Attributor &A) { if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) - return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A); - llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position"); + return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A); + llvm_unreachable( + "AAAMDGPUMinAGPRAlloc is only valid for function position"); } void initialize(Attributor &A) override { @@ -1310,25 +1311,33 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> { } const std::string getAsStr(Attributor *A) const override { - return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr"; + std::string Str = "amdgpu-agpr-alloc="; + raw_string_ostream OS(Str); + OS << getAssumed(); + return OS.str(); } void trackStatistics() const override {} ChangeStatus updateImpl(Attributor &A) override { - // TODO: Use AACallEdges, but then we need a way to inspect asm edges. + DecIntegerState<> Maximum; - auto CheckForNoAGPRs = [&](Instruction &I) { + // Check for cases which require allocation of AGPRs. The only cases where + // AGPRs are required are if there are direct references to AGPRs, so inline + // assembly and special intrinsics. + auto CheckForMinAGPRAllocs = [&](Instruction &I) { const auto &CB = cast<CallBase>(I); const Value *CalleeOp = CB.getCalledOperand(); - const Function *Callee = dyn_cast<Function>(CalleeOp); - if (!Callee) { - if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) - return inlineAsmGetNumRequiredAGPRs(IA, CB) == 0; - return false; + + if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) { + // Technically, the inline asm could be invoking a call to an unknown + // external function that requires AGPRs, but ignore that. + unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB); + Maximum.takeAssumedMaximum(NumRegs); + return true; } - switch (Callee->getIntrinsicID()) { + switch (CB.getIntrinsicID()) { case Intrinsic::not_intrinsic: break; case Intrinsic::write_register: @@ -1340,7 +1349,10 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> { ->getOperand(0)); auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmPhysRegName(RegName->getString()); - return Kind != 'a'; + if (Kind == 'a') + Maximum.takeAssumedMaximum(std::min(RegIdx + NumRegs, 256u)); + + return true; } default: // Some intrinsics may use AGPRs, but if we have a choice, we are not @@ -1349,32 +1361,50 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> { } // TODO: Handle callsite attributes - const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>( - *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); - return CalleeInfo && CalleeInfo->isValidState() && - CalleeInfo->getAssumed(); + auto *CBEdges = A.getAAFor<AACallEdges>( + *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED); + if (!CBEdges || CBEdges->hasUnknownCallee()) { + Maximum.indicatePessimisticFixpoint(); + return false; + } + + for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) { + const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>( + *this, IRPosition::function(*PossibleCallee), DepClassTy::REQUIRED); + if (!CalleeInfo || !CalleeInfo->isValidState()) { + Maximum.indicatePessimisticFixpoint(); + return false; + } + + Maximum.takeAssumedMaximum(CalleeInfo->getAssumed()); + } + + return true; }; bool UsedAssumedInformation = false; - if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this, + if (!A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this, UsedAssumedInformation)) return indicatePessimisticFixpoint(); - return ChangeStatus::UNCHANGED; + + return clampStateAndIndicateChange(getState(), Maximum); } ChangeStatus manifest(Attributor &A) override { - if (!getAssumed()) - return ChangeStatus::UNCHANGED; LLVMContext &Ctx = getAssociatedFunction()->getContext(); - return A.manifestAttrs(getIRPosition(), - {Attribute::get(Ctx, "amdgpu-agpr-alloc", "0")}); + SmallString<4> Buffer; + raw_svector_ostream OS(Buffer); + OS << getAssumed(); + + return A.manifestAttrs( + getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())}); } - StringRef getName() const override { return "AAAMDGPUNoAGPR"; } + StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; } const char *getIdAddr() const override { return &ID; } /// This function should return true if the type of the \p AA is - /// AAAMDGPUNoAGPRs + /// AAAMDGPUMinAGPRAllocs static bool classof(const AbstractAttribute *AA) { return (AA->getIdAddr() == &ID); } @@ -1382,7 +1412,7 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> { static const char ID; }; -const char AAAMDGPUNoAGPR::ID = 0; +const char AAAMDGPUMinAGPRAlloc::ID = 0; /// An abstract attribute to propagate the function attribute /// "amdgpu-cluster-dims" from kernel entry functions to device functions. @@ -1550,10 +1580,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, DenseSet<const char *> Allowed( {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, - &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, - &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID, - &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID, - &AAIndirectCallInfo::ID, &AAAMDGPUClusterDims::ID}); + &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, + &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID, + &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID, + &AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID, + &AAAMDGPUClusterDims::ID}); AttributorConfig AC(CGUpdater); AC.IsClosedWorldModule = Options.IsClosedWorld; @@ -1595,7 +1626,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F)); if (ST.hasGFX90AInsts()) - A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(*F)); + A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRPosition::function(*F)); for (auto &I : instructions(F)) { Value *Ptr = nullptr; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index e4d328a..b8b419d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1112,8 +1112,7 @@ void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { {N->getOperand(0), N->getOperand(1), CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/}); } else { - unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO - : AMDGPU::S_USUBO_PSEUDO; + unsigned Opc = IsAdd ? AMDGPU::S_UADDO_PSEUDO : AMDGPU::S_USUBO_PSEUDO; CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {N->getOperand(0), N->getOperand(1)}); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp index fedb694..89c16da 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -482,12 +482,13 @@ void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs() const { } sort(StackIntervals, [](const LiveInterval *A, const LiveInterval *B) { + // The ordering has to be strictly weak. /// Sort heaviest intervals first to prioritize their unspilling - if (A->weight() > B->weight()) - return true; + if (A->weight() != B->weight()) + return A->weight() > B->weight(); - if (A->getSize() > B->getSize()) - return true; + if (A->getSize() != B->getSize()) + return A->getSize() > B->getSize(); // Tie breaker by number to avoid need for stable sort return A->reg().stackSlotIndex() < B->reg().stackSlotIndex(); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1a686a9..730be69 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6073,9 +6073,6 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineOperand &Src0 = MI.getOperand(2); MachineOperand &Src1 = MI.getOperand(3); MachineOperand &Src2 = MI.getOperand(4); - unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO) - ? AMDGPU::S_ADDC_U32 - : AMDGPU::S_SUBB_U32; if (Src0.isReg() && TRI->isVectorRegister(MRI, Src0.getReg())) { Register RegOp0 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp0) @@ -6124,11 +6121,11 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addImm(0); } - // clang-format off - BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg()) - .add(Src0) - .add(Src1); - // clang-format on + unsigned Opc = MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO + ? AMDGPU::S_ADDC_U32 + : AMDGPU::S_SUBB_U32; + + BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg()).add(Src0).add(Src1); unsigned SelOpc = ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; @@ -16571,6 +16568,53 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N, } } + // Eliminate setcc by using carryout from add/sub instruction + + // LHS = ADD i64 RHS, Z LHSlo = UADDO i32 RHSlo, Zlo + // setcc LHS ult RHS -> LHSHi = UADDO_CARRY i32 RHShi, Zhi + // similarly for subtraction + + // LHS = ADD i64 Y, 1 LHSlo = UADDO i32 Ylo, 1 + // setcc LHS eq 0 -> LHSHi = UADDO_CARRY i32 Yhi, 0 + + if (VT == MVT::i64 && ((CC == ISD::SETULT && + sd_match(LHS, m_Add(m_Specific(RHS), m_Value()))) || + (CC == ISD::SETUGT && + sd_match(LHS, m_Sub(m_Specific(RHS), m_Value()))) || + (CC == ISD::SETEQ && CRHS && CRHS->isZero() && + sd_match(LHS, m_Add(m_Value(), m_One()))))) { + bool IsAdd = LHS.getOpcode() == ISD::ADD; + + SDValue Op0 = LHS.getOperand(0); + SDValue Op1 = LHS.getOperand(1); + + SDValue Op0Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Op0); + SDValue Op1Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Op1); + + SDValue Op0Hi = getHiHalf64(Op0, DAG); + SDValue Op1Hi = getHiHalf64(Op1, DAG); + + SDValue NodeLo = + DAG.getNode(IsAdd ? ISD::UADDO : ISD::USUBO, SL, + DAG.getVTList(MVT::i32, MVT::i1), {Op0Lo, Op1Lo}); + + SDValue CarryInHi = NodeLo.getValue(1); + SDValue NodeHi = DAG.getNode(IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY, + SL, DAG.getVTList(MVT::i32, MVT::i1), + {Op0Hi, Op1Hi, CarryInHi}); + + SDValue ResultLo = NodeLo.getValue(0); + SDValue ResultHi = NodeHi.getValue(0); + + SDValue JoinedResult = + DAG.getBuildVector(MVT::v2i32, SL, {ResultLo, ResultHi}); + + SDValue Result = DAG.getNode(ISD::BITCAST, SL, VT, JoinedResult); + SDValue Overflow = NodeHi.getValue(1); + DCI.CombineTo(LHS.getNode(), Result); + return Overflow; + } + if (VT != MVT::f32 && VT != MVT::f64 && (!Subtarget->has16BitInsts() || VT != MVT::f16)) return SDValue(); diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp index bc1a3a7..82c43ff 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -1507,7 +1507,7 @@ void DXILBitcodeWriter::writeDICompileUnit(const DICompileUnit *N, SmallVectorImpl<uint64_t> &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); - Record.push_back(N->getSourceLanguage()); + Record.push_back(N->getSourceLanguage().getUnversionedName()); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(VE.getMetadataOrNullID(N->getRawProducer())); Record.push_back(N->isOptimized()); diff --git a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp index bb8cec0..4e06939 100644 --- a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp @@ -88,7 +88,7 @@ static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) { // reinterpret_cast<InitCallback *>(*start)(); // } // -// void call_init_array_callbacks() { +// void call_fini_array_callbacks() { // size_t fini_array_size = __fini_array_end - __fini_array_start; // for (size_t i = fini_array_size; i > 0; --i) // reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])(); @@ -153,7 +153,7 @@ static void createInitOrFiniCalls(Function &F, bool IsCtor) { "start"); } IRB.CreateCondBr( - IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGT, BeginVal, + IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGE, BeginVal, EndVal), LoopBB, ExitBB); IRB.SetInsertPoint(LoopBB); diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp index ecfb5fe..eb41588 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp @@ -334,7 +334,7 @@ static bool isLegalElementTypeForRVV(Type *EltTy, if (EltTy->isIntegerTy(64)) return Subtarget.hasVInstructionsI64(); if (EltTy->isHalfTy()) - return Subtarget.hasVInstructionsF16(); + return Subtarget.hasVInstructionsF16Minimal(); if (EltTy->isBFloatTy()) return Subtarget.hasVInstructionsBF16Minimal(); if (EltTy->isFloatTy()) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 8d9b777..a29b7dd 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -788,32 +788,32 @@ multiclass ShxAdd_UWPat<int i, Instruction shxadd_uw> { } multiclass Sh1Add_UWPat<Instruction sh1add_uw> { - def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and (shl GPR:$rs1, (i64 1)), (i64 0x1FFFFFFFF)), + (XLenVT GPR:$rs2)), (sh1add_uw GPR:$rs1, GPR:$rs2)>; // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. - def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x1FFFFFFFE), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and GPR:$rs1, (i64 0x1FFFFFFFE)), + (XLenVT GPR:$rs2)), (sh1add_uw (XLenVT (SRLI GPR:$rs1, 1)), GPR:$rs2)>; } multiclass Sh2Add_UWPat<Instruction sh2add_uw> { - def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and (shl GPR:$rs1, (i64 2)), (i64 0x3FFFFFFFF)), + (XLenVT GPR:$rs2)), (sh2add_uw GPR:$rs1, GPR:$rs2)>; // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. - def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x3FFFFFFFC), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and GPR:$rs1, (i64 0x3FFFFFFFC)), + (XLenVT GPR:$rs2)), (sh2add_uw (XLenVT (SRLI GPR:$rs1, 2)), GPR:$rs2)>; } multiclass Sh3Add_UWPat<Instruction sh3add_uw> { - def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and (shl GPR:$rs1, (i64 3)), (i64 0x7FFFFFFFF)), + (XLenVT GPR:$rs2)), (sh3add_uw GPR:$rs1, GPR:$rs2)>; // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. - def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x7FFFFFFF8), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and GPR:$rs1, (i64 0x7FFFFFFF8)), + (XLenVT GPR:$rs2)), (sh3add_uw (XLenVT (SRLI GPR:$rs1, 3)), GPR:$rs2)>; } diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 82e768d..6605a5c 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -238,7 +238,7 @@ class RISCVRegisterClass<list<ValueType> regTypes, int align, dag regList> } class GPRRegisterClass<dag regList> - : RISCVRegisterClass<[XLenVT, XLenFVT, i32, i16], 32, regList> { + : RISCVRegisterClass<[XLenVT, XLenFVT], 32, regList> { let RegInfos = XLenRI; } diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp index 776208b..35a2ee1 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp @@ -284,6 +284,17 @@ void SPIRVInstPrinter::printInst(const MCInst *MI, uint64_t Address, } break; } + case SPIRV::OpPredicatedLoadINTEL: + case SPIRV::OpPredicatedStoreINTEL: { + const unsigned NumOps = MI->getNumOperands(); + if (NumOps > NumFixedOps) { + OS << ' '; + printSymbolicOperand<OperandCategory::MemoryOperandOperand>( + MI, NumOps - 1, OS); + break; + } + break; + } default: printRemainingVariableOps(MI, NumFixedOps, OS); break; diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index 0e0c454..dbe8e18 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -2419,6 +2419,27 @@ static bool generatePipeInst(const SPIRV::IncomingCall *Call, return buildPipeInst(Call, Opcode, Scope, MIRBuilder, GR); } +static bool generatePredicatedLoadStoreInst(const SPIRV::IncomingCall *Call, + MachineIRBuilder &MIRBuilder, + SPIRVGlobalRegistry *GR) { + const SPIRV::DemangledBuiltin *Builtin = Call->Builtin; + unsigned Opcode = + SPIRV::lookupNativeBuiltin(Builtin->Name, Builtin->Set)->Opcode; + + bool IsSet = Opcode != SPIRV::OpPredicatedStoreINTEL; + unsigned ArgSz = Call->Arguments.size(); + SmallVector<uint32_t, 1> ImmArgs; + MachineRegisterInfo *MRI = MIRBuilder.getMRI(); + // Memory operand is optional and is literal. + if (ArgSz > 3) + ImmArgs.push_back( + getConstFromIntrinsic(Call->Arguments[/*Literal index*/ 3], MRI)); + + Register TypeReg = GR->getSPIRVTypeID(Call->ReturnType); + return buildOpFromWrapper(MIRBuilder, Opcode, Call, + IsSet ? TypeReg : Register(0), ImmArgs); +} + static bool buildNDRange(const SPIRV::IncomingCall *Call, MachineIRBuilder &MIRBuilder, SPIRVGlobalRegistry *GR) { @@ -3019,6 +3040,8 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall, return generate2DBlockIOINTELInst(Call.get(), MIRBuilder, GR); case SPIRV::Pipe: return generatePipeInst(Call.get(), MIRBuilder, GR); + case SPIRV::PredicatedLoadStore: + return generatePredicatedLoadStoreInst(Call.get(), MIRBuilder, GR); } return false; } diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index 2a8deb6..3b8764a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -70,6 +70,7 @@ def BindlessINTEL : BuiltinGroup; def TernaryBitwiseINTEL : BuiltinGroup; def Block2DLoadStore : BuiltinGroup; def Pipe : BuiltinGroup; +def PredicatedLoadStore : BuiltinGroup; //===----------------------------------------------------------------------===// // Class defining a demangled builtin record. The information in the record @@ -752,6 +753,10 @@ defm : DemangledNativeBuiltin<"__spirv_Subgroup2DBlockLoadTransformINTEL", OpenC defm : DemangledNativeBuiltin<"__spirv_Subgroup2DBlockPrefetchINTEL", OpenCL_std, Block2DLoadStore, 9, 9, OpSubgroup2DBlockPrefetchINTEL>; defm : DemangledNativeBuiltin<"__spirv_Subgroup2DBlockStoreINTEL", OpenCL_std, Block2DLoadStore, 10, 10, OpSubgroup2DBlockStoreINTEL>; +// SPV_INTEL_predicated_io builtin records +defm : DemangledNativeBuiltin<"__spirv_PredicatedLoadINTEL", OpenCL_std, PredicatedLoadStore, 3, 4, OpPredicatedLoadINTEL>; +defm : DemangledNativeBuiltin<"__spirv_PredicatedStoreINTEL", OpenCL_std, PredicatedLoadStore, 3, 4, OpPredicatedStoreINTEL>; + //===----------------------------------------------------------------------===// // Class defining a work/sub group builtin that should be translated into a // SPIR-V instruction using the defined properties. diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp index 85ea9e1..5f3ed86 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp @@ -151,7 +151,9 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>> {"SPV_KHR_bfloat16", SPIRV::Extension::Extension::SPV_KHR_bfloat16}, {"SPV_EXT_relaxed_printf_string_address_space", SPIRV::Extension::Extension:: - SPV_EXT_relaxed_printf_string_address_space}}; + SPV_EXT_relaxed_printf_string_address_space}, + {"SPV_INTEL_predicated_io", + SPIRV::Extension::Extension::SPV_INTEL_predicated_io}}; bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName, StringRef ArgValue, diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp index 275463e..318ef06 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp @@ -112,7 +112,8 @@ bool SPIRVEmitNonSemanticDI::emitGlobalDI(MachineFunction &MF) { FilePaths.emplace_back(); sys::path::append(FilePaths.back(), File->getDirectory(), File->getFilename()); - LLVMSourceLanguages.push_back(CompileUnit->getSourceLanguage()); + LLVMSourceLanguages.push_back( + CompileUnit->getSourceLanguage().getUnversionedName()); } } const NamedMDNode *ModuleFlags = M->getNamedMetadata("llvm.module.flags"); diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index 1723bfb..a61351e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -987,3 +987,9 @@ def OpSubgroup2DBlockPrefetchINTEL: Op<6234, (outs), (ins ID:$element_size, ID:$ def OpSubgroup2DBlockStoreINTEL: Op<6235, (outs), (ins ID:$element_size, ID:$block_width, ID:$block_height, ID:$block_count, ID:$src_ptr, ID:$dst_base_ptr, ID:$memory_width, ID:$memory_height, ID:$memory_pitch, ID:$coord), "OpSubgroup2DBlockStoreINTEL $element_size $block_width $block_height $block_count $src_ptr $dst_base_ptr $memory_width $memory_height $memory_pitch $coord">; + +// SPV_INTEL_predicated_io +def OpPredicatedLoadINTEL: Op<6528, (outs ID:$res), (ins TYPE:$resType, ID:$ptr, ID:$predicate, ID:$default_value, variable_ops), + "$res = OpPredicatedLoadINTEL $resType $ptr $predicate $default_value">; +def OpPredicatedStoreINTEL: Op<6529, (outs), (ins ID:$ptr, ID:$object, ID:$predicate, variable_ops), + "OpPredicatedStoreINTEL $ptr $object $predicate">; diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index dc717a6..5144fb1 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -2035,6 +2035,17 @@ void addInstrRequirements(const MachineInstr &MI, // TODO: Add UntypedPointersKHR when implemented. break; } + case SPIRV::OpPredicatedLoadINTEL: + case SPIRV::OpPredicatedStoreINTEL: { + if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_predicated_io)) + report_fatal_error( + "OpPredicated[Load/Store]INTEL instructions require " + "the following SPIR-V extension: SPV_INTEL_predicated_io", + false); + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_predicated_io); + Reqs.addCapability(SPIRV::Capability::PredicatedIOINTEL); + break; + } default: break; diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index 6a32dba..2625642 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -385,6 +385,7 @@ defm SPV_INTEL_int4 : ExtensionOperand<123, [EnvOpenCL]>; defm SPV_KHR_float_controls2 : ExtensionOperand<124, [EnvVulkan, EnvOpenCL]>; defm SPV_INTEL_tensor_float32_conversion : ExtensionOperand<125, [EnvOpenCL]>; defm SPV_KHR_bfloat16 : ExtensionOperand<126, [EnvVulkan, EnvOpenCL]>; +defm SPV_INTEL_predicated_io : ExtensionOperand<127, [EnvOpenCL]>; //===----------------------------------------------------------------------===// // Multiclass used to define Capabilities enum values and at the same time @@ -594,6 +595,7 @@ defm SubgroupMatrixMultiplyAccumulateINTEL : CapabilityOperand<6236, 0, 0, [SPV_ defm Subgroup2DBlockIOINTEL : CapabilityOperand<6228, 0, 0, [SPV_INTEL_2d_block_io], []>; defm Subgroup2DBlockTransformINTEL : CapabilityOperand<6229, 0, 0, [SPV_INTEL_2d_block_io], [Subgroup2DBlockIOINTEL]>; defm Subgroup2DBlockTransposeINTEL : CapabilityOperand<6230, 0, 0, [SPV_INTEL_2d_block_io], [Subgroup2DBlockIOINTEL]>; +defm PredicatedIOINTEL : CapabilityOperand<6257, 0, 0, [SPV_INTEL_predicated_io], []>; defm Int4TypeINTEL : CapabilityOperand<5112, 0, 0, [SPV_INTEL_int4], []>; defm Int4CooperativeMatrixINTEL : CapabilityOperand<5114, 0, 0, [SPV_INTEL_int4], [Int4TypeINTEL, CooperativeMatrixKHR]>; defm TensorFloat32RoundingINTEL : CapabilityOperand<6425, 0, 0, [SPV_INTEL_tensor_float32_conversion], []>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 6bb064a..526420b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -441,7 +441,9 @@ void WebAssemblyAsmPrinter::EmitProducerInfo(Module &M) { llvm::SmallSet<StringRef, 4> SeenLanguages; for (size_t I = 0, E = Debug->getNumOperands(); I < E; ++I) { const auto *CU = cast<DICompileUnit>(Debug->getOperand(I)); - StringRef Language = dwarf::LanguageString(CU->getSourceLanguage()); + StringRef Language = + dwarf::LanguageString(CU->getSourceLanguage().getUnversionedName()); + Language.consume_front("DW_LANG_"); if (SeenLanguages.insert(Language).second) Languages.emplace_back(Language.str(), ""); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 1306026..49af78b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1445,6 +1445,49 @@ def : Pat<(v16i8 (wasm_narrow_u (v8i16 V128:$left), (v8i16 V128:$right))), def : Pat<(v8i16 (wasm_narrow_u (v4i32 V128:$left), (v4i32 V128:$right))), (NARROW_U_I16x8 $left, $right)>; +// Recognize a saturating truncation and convert into the corresponding +// narrow_TYPE_s or narrow_TYPE_u instruction. +multiclass SignedSaturatingTruncate<ValueType input, ValueType output, + Instruction narrow, int minval, + int maxval, int mask> { + def : Pat< + (output (wasm_narrow_u + (and (smin (smax (input V128:$a), (splat_vector (i32 minval))), + (splat_vector (i32 maxval))), (splat_vector (i32 mask))), + (and (smin (smax (input V128:$b), (splat_vector (i32 minval))), + (splat_vector (i32 maxval))), (splat_vector (i32 mask))) + )), + (narrow V128:$a, V128:$b) + >; + + def : Pat< + (output (wasm_narrow_u + (and (smax (smin (input V128:$a), (splat_vector (i32 maxval))), + (splat_vector (i32 minval))), (splat_vector (i32 mask))), + (and (smax (smin (input V128:$b), (splat_vector (i32 maxval))), + (splat_vector (i32 minval))), (splat_vector (i32 mask))) + )), + (narrow V128:$a, V128:$b) + >; +} + +defm : SignedSaturatingTruncate<v8i16, v16i8, NARROW_S_I8x16, -128, 127, 0xFF>; +defm : SignedSaturatingTruncate<v4i32, v8i16, NARROW_S_I16x8, -32768, 32767, 0xFFFF>; + +multiclass UnsignedSaturatingTruncate<ValueType input, ValueType output, + Instruction narrow, int maxval> { + def : Pat< + (output (wasm_narrow_u + (umin (input V128:$a), (splat_vector (i32 maxval))), + (umin (input V128:$b), (splat_vector (i32 maxval))) + )), + (narrow V128:$a, V128:$b) + >; +} + +defm : UnsignedSaturatingTruncate<v8i16, v16i8, NARROW_U_I8x16, 0xFF>; +defm : UnsignedSaturatingTruncate<v4i32, v8i16, NARROW_U_I16x8, 0xFFFF>; + // Bitcasts are nops // Matching bitcast t1 to t1 causes strange errors, so avoid repeating types foreach t1 = AllVecs in diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 931a10b..9580ade 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3659,11 +3659,8 @@ bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const { if (VT.isVector()) return false; - // 64-bit shifts on 32-bit targets produce really bad bloated code. - if (VT == MVT::i64 && !Subtarget.is64Bit()) - return false; - - return true; + unsigned MaxWidth = Subtarget.is64Bit() ? 64 : 32; + return VT.getScalarSizeInBits() <= MaxWidth; } TargetLowering::ShiftLegalizationStrategy diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 805bdb4..bbbac45 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -28,8 +28,12 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ProfDataUtils.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" @@ -39,6 +43,10 @@ using namespace PatternMatch; #define DEBUG_TYPE "aggressive-instcombine" +namespace llvm { +extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} + STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded"); STATISTIC(NumGuardedRotates, "Number of guarded rotates transformed into funnel shifts"); @@ -599,6 +607,14 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL) { auto Cmp = B.CreateICmpEQ(X1, ConstantInt::get(XType, 0)); auto Select = B.CreateSelect(Cmp, B.CreateZExt(ZeroTableElem, XType), Cttz); + // The true branch of select handles the cttz(0) case, which is rare. + if (!ProfcheckDisableMetadataFixes) { + if (Instruction *SelectI = dyn_cast<Instruction>(Select)) + SelectI->setMetadata( + LLVMContext::MD_prof, + MDBuilder(SelectI->getContext()).createUnlikelyBranchWeights()); + } + // NOTE: If the table[0] is 0, but the cttz(0) is defined by the Target // it should be handled as: `cttz(x) & (typeSize - 1)`. diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 0accb22..c89af68 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -689,10 +689,14 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape, DISubprogram *DIS = F.getSubprogram(); // If there is no DISubprogram for F, it implies the function is compiled // without debug info. So we also don't generate debug info for the frame. - if (!DIS || !DIS->getUnit() || - !dwarf::isCPlusPlus( - (dwarf::SourceLanguage)DIS->getUnit()->getSourceLanguage()) || - DIS->getUnit()->getEmissionKind() != DICompileUnit::DebugEmissionKind::FullDebug) + + if (!DIS || !DIS->getUnit()) + return; + + if (!dwarf::isCPlusPlus(static_cast<llvm::dwarf::SourceLanguage>( + DIS->getUnit()->getSourceLanguage().getUnversionedName())) || + DIS->getUnit()->getEmissionKind() != + DICompileUnit::DebugEmissionKind::FullDebug) return; assert(Shape.ABI == coro::ABI::Switch && diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index aa030294..127a506 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -60,6 +60,58 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, return true; } +/// Let N = 2 * M. +/// Given an N-bit integer representing a pack of two M-bit integers, +/// we can select one of the packed integers by right-shifting by either +/// zero or M (which is the most straightforward to check if M is a power +/// of 2), and then isolating the lower M bits. In this case, we can +/// represent the shift as a select on whether the shr amount is nonzero. +static Value *simplifyShiftSelectingPackedElement(Instruction *I, + const APInt &DemandedMask, + InstCombinerImpl &IC, + unsigned Depth) { + assert(I->getOpcode() == Instruction::LShr && + "Only lshr instruction supported"); + + uint64_t ShlAmt; + Value *Upper, *Lower; + if (!match(I->getOperand(0), + m_OneUse(m_c_DisjointOr( + m_OneUse(m_Shl(m_Value(Upper), m_ConstantInt(ShlAmt))), + m_Value(Lower))))) + return nullptr; + + if (!isPowerOf2_64(ShlAmt)) + return nullptr; + + const uint64_t DemandedBitWidth = DemandedMask.getActiveBits(); + if (DemandedBitWidth > ShlAmt) + return nullptr; + + // Check that upper demanded bits are not lost from lshift. + if (Upper->getType()->getScalarSizeInBits() < ShlAmt + DemandedBitWidth) + return nullptr; + + KnownBits KnownLowerBits = IC.computeKnownBits(Lower, I, Depth); + if (!KnownLowerBits.getMaxValue().isIntN(ShlAmt)) + return nullptr; + + Value *ShrAmt = I->getOperand(1); + KnownBits KnownShrBits = IC.computeKnownBits(ShrAmt, I, Depth); + + // Verify that ShrAmt is either exactly ShlAmt (which is a power of 2) or + // zero. + if (~KnownShrBits.Zero != ShlAmt) + return nullptr; + + Value *ShrAmtZ = + IC.Builder.CreateICmpEQ(ShrAmt, Constant::getNullValue(ShrAmt->getType()), + ShrAmt->getName() + ".z"); + Value *Select = IC.Builder.CreateSelect(ShrAmtZ, Lower, Upper); + Select->takeName(I); + return Select; +} + /// Returns the bitwidth of the given scalar or pointer type. For vector types, /// returns the element type's bitwidth. static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { @@ -798,9 +850,13 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I, Known >>= ShiftAmt; if (ShiftAmt) Known.Zero.setHighBits(ShiftAmt); // high bits known zero. - } else { - llvm::computeKnownBits(I, Known, Q, Depth); + break; } + if (Value *V = + simplifyShiftSelectingPackedElement(I, DemandedMask, *this, Depth)) + return V; + + llvm::computeKnownBits(I, Known, Q, Depth); break; } case Instruction::AShr: { diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp index 782d5a1..40720ae 100644 --- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp +++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp @@ -69,19 +69,30 @@ enum class TokenMode : unsigned { /// Token ID based on allocated type hash. TypeHash = 2, + + /// Token ID based on allocated type hash, where the top half ID-space is + /// reserved for types that contain pointers and the bottom half for types + /// that do not contain pointers. + TypeHashPointerSplit = 3, }; //===--- Command-line options ---------------------------------------------===// -cl::opt<TokenMode> - ClMode("alloc-token-mode", cl::Hidden, cl::desc("Token assignment mode"), - cl::init(TokenMode::TypeHash), - cl::values(clEnumValN(TokenMode::Increment, "increment", - "Incrementally increasing token ID"), - clEnumValN(TokenMode::Random, "random", - "Statically-assigned random token ID"), - clEnumValN(TokenMode::TypeHash, "typehash", - "Token ID based on allocated type hash"))); +cl::opt<TokenMode> ClMode( + "alloc-token-mode", cl::Hidden, cl::desc("Token assignment mode"), + cl::init(TokenMode::TypeHashPointerSplit), + cl::values( + clEnumValN(TokenMode::Increment, "increment", + "Incrementally increasing token ID"), + clEnumValN(TokenMode::Random, "random", + "Statically-assigned random token ID"), + clEnumValN(TokenMode::TypeHash, "typehash", + "Token ID based on allocated type hash"), + clEnumValN( + TokenMode::TypeHashPointerSplit, "typehashpointersplit", + "Token ID based on allocated type hash, where the top half " + "ID-space is reserved for types that contain pointers and the " + "bottom half for types that do not contain pointers. "))); cl::opt<std::string> ClFuncPrefix("alloc-token-prefix", cl::desc("The allocation function prefix"), @@ -127,16 +138,23 @@ STATISTIC(NumAllocationsInstrumented, "Allocations instrumented"); /// Returns the !alloc_token metadata if available. /// -/// Expected format is: !{<type-name>} +/// Expected format is: !{<type-name>, <contains-pointer>} MDNode *getAllocTokenMetadata(const CallBase &CB) { MDNode *Ret = CB.getMetadata(LLVMContext::MD_alloc_token); if (!Ret) return nullptr; - assert(Ret->getNumOperands() == 1 && "bad !alloc_token"); + assert(Ret->getNumOperands() == 2 && "bad !alloc_token"); assert(isa<MDString>(Ret->getOperand(0))); + assert(isa<ConstantAsMetadata>(Ret->getOperand(1))); return Ret; } +bool containsPointer(const MDNode *MD) { + ConstantAsMetadata *C = cast<ConstantAsMetadata>(MD->getOperand(1)); + auto *CI = cast<ConstantInt>(C->getValue()); + return CI->getValue().getBoolValue(); +} + class ModeBase { public: explicit ModeBase(const IntegerType &TokenTy, uint64_t MaxTokens) @@ -188,12 +206,20 @@ public: using ModeBase::ModeBase; uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) { + const auto [N, H] = getHash(CB, ORE); + return N ? boundedToken(H) : H; + } + +protected: + std::pair<MDNode *, uint64_t> getHash(const CallBase &CB, + OptimizationRemarkEmitter &ORE) { if (MDNode *N = getAllocTokenMetadata(CB)) { MDString *S = cast<MDString>(N->getOperand(0)); - return boundedToken(getStableSipHash(S->getString())); + return {N, getStableSipHash(S->getString())}; } + // Fallback. remarkNoMetadata(CB, ORE); - return ClFallbackToken; + return {nullptr, ClFallbackToken}; } /// Remark that there was no precise type information. @@ -210,6 +236,29 @@ public: } }; +/// Implementation for TokenMode::TypeHashPointerSplit. +class TypeHashPointerSplitMode : public TypeHashMode { +public: + using TypeHashMode::TypeHashMode; + + uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) { + if (MaxTokens == 1) + return 0; + const uint64_t HalfTokens = MaxTokens / 2; + const auto [N, H] = getHash(CB, ORE); + if (!N) { + // Pick the fallback token (ClFallbackToken), which by default is 0, + // meaning it'll fall into the pointer-less bucket. Override by setting + // -alloc-token-fallback if that is the wrong choice. + return H; + } + uint64_t Hash = H % HalfTokens; // base hash + if (containsPointer(N)) + Hash += HalfTokens; + return Hash; + } +}; + // Apply opt overrides. AllocTokenOptions transformOptionsFromCl(AllocTokenOptions Opts) { if (!Opts.MaxTokens.has_value()) @@ -236,6 +285,9 @@ public: case TokenMode::TypeHash: Mode.emplace<TypeHashMode>(*IntPtrTy, *Options.MaxTokens); break; + case TokenMode::TypeHashPointerSplit: + Mode.emplace<TypeHashPointerSplitMode>(*IntPtrTy, *Options.MaxTokens); + break; } } @@ -275,7 +327,9 @@ private: // Cache for replacement functions. DenseMap<std::pair<LibFunc, uint64_t>, FunctionCallee> TokenAllocFunctions; // Selected mode. - std::variant<IncrementMode, RandomMode, TypeHashMode> Mode; + std::variant<IncrementMode, RandomMode, TypeHashMode, + TypeHashPointerSplitMode> + Mode; }; bool AllocToken::instrumentFunction(Function &F) { diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index b9b5b58..638952a 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -699,6 +699,7 @@ uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) { case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::PtrToInt: + case Instruction::PtrToAddr: case Instruction::IntToPtr: case Instruction::AddrSpaceCast: case Instruction::BitCast: diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index d6b7633..3c1a8ba 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -2066,6 +2066,7 @@ NewGVN::performSymbolicEvaluation(Instruction *I, case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::PtrToInt: + case Instruction::PtrToAddr: case Instruction::IntToPtr: case Instruction::Select: case Instruction::ExtractElement: diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 60e5df0..7ffccf7 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -355,6 +355,8 @@ void SimplifyCFGPass::printPipeline( OS << (Options.ForwardSwitchCondToPhi ? "" : "no-") << "forward-switch-cond;"; OS << (Options.ConvertSwitchRangeToICmp ? "" : "no-") << "switch-range-to-icmp;"; + OS << (Options.ConvertSwitchToArithmetic ? "" : "no-") + << "switch-to-arithmetic;"; OS << (Options.ConvertSwitchToLookupTable ? "" : "no-") << "switch-to-lookup;"; OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;"; diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp index 5a09b73..2923633 100644 --- a/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/llvm/lib/Transforms/Utils/Debugify.cpp @@ -19,6 +19,7 @@ #include "llvm/Config/llvm-config.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -162,8 +163,8 @@ bool llvm::applyDebugifyMetadata( unsigned NextLine = 1; unsigned NextVar = 1; auto File = DIB.createFile(M.getName(), "/"); - auto CU = DIB.createCompileUnit(dwarf::DW_LANG_C, File, "debugify", - /*isOptimized=*/true, "", 0); + auto CU = DIB.createCompileUnit(DISourceLanguageName(dwarf::DW_LANG_C), File, + "debugify", /*isOptimized=*/true, "", 0); // Visit each instruction. for (Function &F : Functions) { diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index 7cc9ff8..0c8d6fa 100644 --- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -45,12 +45,6 @@ STATISTIC(NumInstrsHoisted, "Number of instructions hoisted into loop preheader"); STATISTIC(NumInstrsDuplicated, "Number of instructions cloned into loop preheader"); -STATISTIC(NumRotated, "Number of loops rotated"); - -static cl::opt<bool> - MultiRotate("loop-rotate-multi", cl::init(false), cl::Hidden, - cl::desc("Allow loop rotation multiple times in order to reach " - "a better latch exit")); // Probability that a rotated loop has zero trip count / is never entered. static constexpr uint32_t ZeroTripCountWeights[] = {1, 127}; @@ -206,50 +200,6 @@ static bool profitableToRotateLoopExitingLatch(Loop *L) { return false; } -// Check that latch exit is deoptimizing (which means - very unlikely to happen) -// and there is another exit from the loop which is non-deoptimizing. -// If we rotate latch to that exit our loop has a better chance of being fully -// canonical. -// -// It can give false positives in some rare cases. -static bool canRotateDeoptimizingLatchExit(Loop *L) { - BasicBlock *Latch = L->getLoopLatch(); - assert(Latch && "need latch"); - BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator()); - // Need normal exiting latch. - if (!BI || !BI->isConditional()) - return false; - - BasicBlock *Exit = BI->getSuccessor(1); - if (L->contains(Exit)) - Exit = BI->getSuccessor(0); - - // Latch exit is non-deoptimizing, no need to rotate. - if (!Exit->getPostdominatingDeoptimizeCall()) - return false; - - SmallVector<BasicBlock *, 4> Exits; - L->getUniqueExitBlocks(Exits); - if (!Exits.empty()) { - // There is at least one non-deoptimizing exit. - // - // Note, that BasicBlock::getPostdominatingDeoptimizeCall is not exact, - // as it can conservatively return false for deoptimizing exits with - // complex enough control flow down to deoptimize call. - // - // That means here we can report success for a case where - // all exits are deoptimizing but one of them has complex enough - // control flow (e.g. with loops). - // - // That should be a very rare case and false positives for this function - // have compile-time effect only. - return any_of(Exits, [](const BasicBlock *BB) { - return !BB->getPostdominatingDeoptimizeCall(); - }); - } - return false; -} - static void updateBranchWeights(BranchInst &PreHeaderBI, BranchInst &LoopBI, bool HasConditionalPreHeader, bool SuccsSwapped) { @@ -387,506 +337,489 @@ static void updateBranchWeights(BranchInst &PreHeaderBI, BranchInst &LoopBI, /// rotation. LoopRotate should be repeatable and converge to a canonical /// form. This property is satisfied because simplifying the loop latch can only /// happen once across multiple invocations of the LoopRotate pass. -/// -/// If -loop-rotate-multi is enabled we can do multiple rotations in one go -/// so to reach a suitable (non-deoptimizing) exit. bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // If the loop has only one block then there is not much to rotate. if (L->getBlocks().size() == 1) return false; bool Rotated = false; - do { - BasicBlock *OrigHeader = L->getHeader(); - BasicBlock *OrigLatch = L->getLoopLatch(); - - BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); - if (!BI || BI->isUnconditional()) - return Rotated; - - // If the loop header is not one of the loop exiting blocks then - // either this loop is already rotated or it is not - // suitable for loop rotation transformations. - if (!L->isLoopExiting(OrigHeader)) + BasicBlock *OrigHeader = L->getHeader(); + BasicBlock *OrigLatch = L->getLoopLatch(); + + BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); + if (!BI || BI->isUnconditional()) + return Rotated; + + // If the loop header is not one of the loop exiting blocks then + // either this loop is already rotated or it is not + // suitable for loop rotation transformations. + if (!L->isLoopExiting(OrigHeader)) + return Rotated; + + // If the loop latch already contains a branch that leaves the loop then the + // loop is already rotated. + if (!OrigLatch) + return Rotated; + + // Rotate if the loop latch was just simplified. Or if it makes the loop exit + // count computable. Or if we think it will be profitable. + if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false && + !profitableToRotateLoopExitingLatch(L)) + return Rotated; + + // Check size of original header and reject loop if it is very big or we can't + // duplicate blocks inside it. + { + SmallPtrSet<const Value *, 32> EphValues; + CodeMetrics::collectEphemeralValues(L, AC, EphValues); + + CodeMetrics Metrics; + Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues, PrepareForLTO); + if (Metrics.notDuplicatable) { + LLVM_DEBUG( + dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" + << " instructions: "; + L->dump()); return Rotated; - - // If the loop latch already contains a branch that leaves the loop then the - // loop is already rotated. - if (!OrigLatch) + } + if (Metrics.Convergence != ConvergenceKind::None) { + LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent " + "instructions: "; + L->dump()); return Rotated; - - // Rotate if either the loop latch does *not* exit the loop, or if the loop - // latch was just simplified. Or if we think it will be profitable. - if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false && - !profitableToRotateLoopExitingLatch(L) && - !canRotateDeoptimizingLatchExit(L)) + } + if (!Metrics.NumInsts.isValid()) { + LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains instructions" + " with invalid cost: "; + L->dump()); return Rotated; - - // Check size of original header and reject loop if it is very big or we can't - // duplicate blocks inside it. - { - SmallPtrSet<const Value *, 32> EphValues; - CodeMetrics::collectEphemeralValues(L, AC, EphValues); - - CodeMetrics Metrics; - Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues, PrepareForLTO); - if (Metrics.notDuplicatable) { - LLVM_DEBUG( - dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" - << " instructions: "; - L->dump()); - return Rotated; - } - if (Metrics.Convergence != ConvergenceKind::None) { - LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent " - "instructions: "; - L->dump()); - return Rotated; - } - if (!Metrics.NumInsts.isValid()) { - LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains instructions" - " with invalid cost: "; - L->dump()); - return Rotated; - } - if (Metrics.NumInsts > MaxHeaderSize) { - LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains " - << Metrics.NumInsts - << " instructions, which is more than the threshold (" - << MaxHeaderSize << " instructions): "; - L->dump()); - ++NumNotRotatedDueToHeaderSize; - return Rotated; - } - - // When preparing for LTO, avoid rotating loops with calls that could be - // inlined during the LTO stage. - if (PrepareForLTO && Metrics.NumInlineCandidates > 0) - return Rotated; } - - // Now, this loop is suitable for rotation. - BasicBlock *OrigPreheader = L->getLoopPreheader(); - - // If the loop could not be converted to canonical form, it must have an - // indirectbr in it, just give up. - if (!OrigPreheader || !L->hasDedicatedExits()) + if (Metrics.NumInsts > MaxHeaderSize) { + LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains " + << Metrics.NumInsts + << " instructions, which is more than the threshold (" + << MaxHeaderSize << " instructions): "; + L->dump()); + ++NumNotRotatedDueToHeaderSize; return Rotated; - - // Anything ScalarEvolution may know about this loop or the PHI nodes - // in its header will soon be invalidated. We should also invalidate - // all outer loops because insertion and deletion of blocks that happens - // during the rotation may violate invariants related to backedge taken - // infos in them. - if (SE) { - SE->forgetTopmostLoop(L); - // We may hoist some instructions out of loop. In case if they were cached - // as "loop variant" or "loop computable", these caches must be dropped. - // We also may fold basic blocks, so cached block dispositions also need - // to be dropped. - SE->forgetBlockAndLoopDispositions(); } - LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - - // Find new Loop header. NewHeader is a Header's one and only successor - // that is inside loop. Header's other successor is outside the - // loop. Otherwise loop is not suitable for rotation. - BasicBlock *Exit = BI->getSuccessor(0); - BasicBlock *NewHeader = BI->getSuccessor(1); - bool BISuccsSwapped = L->contains(Exit); - if (BISuccsSwapped) - std::swap(Exit, NewHeader); - assert(NewHeader && "Unable to determine new loop header"); - assert(L->contains(NewHeader) && !L->contains(Exit) && - "Unable to determine loop header and exit blocks"); - - // This code assumes that the new header has exactly one predecessor. - // Remove any single-entry PHI nodes in it. - assert(NewHeader->getSinglePredecessor() && - "New header doesn't have one pred!"); - FoldSingleEntryPHINodes(NewHeader); - - // Begin by walking OrigHeader and populating ValueMap with an entry for - // each Instruction. - BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); - ValueToValueMapTy ValueMap, ValueMapMSSA; - - // For PHI nodes, the value available in OldPreHeader is just the - // incoming value from OldPreHeader. - for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) - InsertNewValueIntoMap(ValueMap, PN, - PN->getIncomingValueForBlock(OrigPreheader)); - - // For the rest of the instructions, either hoist to the OrigPreheader if - // possible or create a clone in the OldPreHeader if not. - Instruction *LoopEntryBranch = OrigPreheader->getTerminator(); - - // Record all debug records preceding LoopEntryBranch to avoid - // duplication. - using DbgHash = - std::pair<std::pair<hash_code, DILocalVariable *>, DIExpression *>; - auto makeHash = [](const DbgVariableRecord *D) -> DbgHash { - auto VarLocOps = D->location_ops(); - return {{hash_combine_range(VarLocOps), D->getVariable()}, - D->getExpression()}; - }; - - SmallDenseSet<DbgHash, 8> DbgRecords; - // Build DbgVariableRecord hashes for DbgVariableRecords attached to the - // terminator. - for (const DbgVariableRecord &DVR : - filterDbgVars(OrigPreheader->getTerminator()->getDbgRecordRange())) - DbgRecords.insert(makeHash(&DVR)); - - // Remember the local noalias scope declarations in the header. After the - // rotation, they must be duplicated and the scope must be cloned. This - // avoids unwanted interaction across iterations. - SmallVector<NoAliasScopeDeclInst *, 6> NoAliasDeclInstructions; - for (Instruction &I : *OrigHeader) - if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) - NoAliasDeclInstructions.push_back(Decl); - - Module *M = OrigHeader->getModule(); - - // Track the next DbgRecord to clone. If we have a sequence where an - // instruction is hoisted instead of being cloned: - // DbgRecord blah - // %foo = add i32 0, 0 - // DbgRecord xyzzy - // %bar = call i32 @foobar() - // where %foo is hoisted, then the DbgRecord "blah" will be seen twice, once - // attached to %foo, then when %foo his hoisted it will "fall down" onto the - // function call: - // DbgRecord blah - // DbgRecord xyzzy - // %bar = call i32 @foobar() - // causing it to appear attached to the call too. - // - // To avoid this, cloneDebugInfoFrom takes an optional "start cloning from - // here" position to account for this behaviour. We point it at any - // DbgRecords on the next instruction, here labelled xyzzy, before we hoist - // %foo. Later, we only only clone DbgRecords from that position (xyzzy) - // onwards, which avoids cloning DbgRecord "blah" multiple times. (Stored as - // a range because it gives us a natural way of testing whether - // there were DbgRecords on the next instruction before we hoisted things). - iterator_range<DbgRecord::self_iterator> NextDbgInsts = - (I != E) ? I->getDbgRecordRange() : DbgMarker::getEmptyDbgRecordRange(); - - while (I != E) { - Instruction *Inst = &*I++; - - // If the instruction's operands are invariant and it doesn't read or write - // memory, then it is safe to hoist. Doing this doesn't change the order of - // execution in the preheader, but does prevent the instruction from - // executing in each iteration of the loop. This means it is safe to hoist - // something that might trap, but isn't safe to hoist something that reads - // memory (without proving that the loop doesn't write). - if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() && - !Inst->mayWriteToMemory() && !Inst->isTerminator() && - !isa<AllocaInst>(Inst) && - // It is not safe to hoist the value of these instructions in - // coroutines, as the addresses of otherwise eligible variables (e.g. - // thread-local variables and errno) may change if the coroutine is - // resumed in a different thread.Therefore, we disable this - // optimization for correctness. However, this may block other correct - // optimizations. - // FIXME: This should be reverted once we have a better model for - // memory access in coroutines. - !Inst->getFunction()->isPresplitCoroutine()) { - - if (!NextDbgInsts.empty()) { - auto DbgValueRange = - LoopEntryBranch->cloneDebugInfoFrom(Inst, NextDbgInsts.begin()); - RemapDbgRecordRange(M, DbgValueRange, ValueMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - // Erase anything we've seen before. - for (DbgVariableRecord &DVR : - make_early_inc_range(filterDbgVars(DbgValueRange))) - if (DbgRecords.count(makeHash(&DVR))) - DVR.eraseFromParent(); - } - - NextDbgInsts = I->getDbgRecordRange(); - - Inst->moveBefore(LoopEntryBranch->getIterator()); + // When preparing for LTO, avoid rotating loops with calls that could be + // inlined during the LTO stage. + if (PrepareForLTO && Metrics.NumInlineCandidates > 0) + return Rotated; + } - ++NumInstrsHoisted; - continue; - } + // Now, this loop is suitable for rotation. + BasicBlock *OrigPreheader = L->getLoopPreheader(); + + // If the loop could not be converted to canonical form, it must have an + // indirectbr in it, just give up. + if (!OrigPreheader || !L->hasDedicatedExits()) + return Rotated; + + // Anything ScalarEvolution may know about this loop or the PHI nodes + // in its header will soon be invalidated. We should also invalidate + // all outer loops because insertion and deletion of blocks that happens + // during the rotation may violate invariants related to backedge taken + // infos in them. + if (SE) { + SE->forgetTopmostLoop(L); + // We may hoist some instructions out of loop. In case if they were cached + // as "loop variant" or "loop computable", these caches must be dropped. + // We also may fold basic blocks, so cached block dispositions also need + // to be dropped. + SE->forgetBlockAndLoopDispositions(); + } - // Otherwise, create a duplicate of the instruction. - Instruction *C = Inst->clone(); - if (const DebugLoc &DL = C->getDebugLoc()) - mapAtomInstance(DL, ValueMap); + LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); - C->insertBefore(LoopEntryBranch->getIterator()); + // Find new Loop header. NewHeader is a Header's one and only successor + // that is inside loop. Header's other successor is outside the + // loop. Otherwise loop is not suitable for rotation. + BasicBlock *Exit = BI->getSuccessor(0); + BasicBlock *NewHeader = BI->getSuccessor(1); + bool BISuccsSwapped = L->contains(Exit); + if (BISuccsSwapped) + std::swap(Exit, NewHeader); + assert(NewHeader && "Unable to determine new loop header"); + assert(L->contains(NewHeader) && !L->contains(Exit) && + "Unable to determine loop header and exit blocks"); + + // This code assumes that the new header has exactly one predecessor. + // Remove any single-entry PHI nodes in it. + assert(NewHeader->getSinglePredecessor() && + "New header doesn't have one pred!"); + FoldSingleEntryPHINodes(NewHeader); + + // Begin by walking OrigHeader and populating ValueMap with an entry for + // each Instruction. + BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); + ValueToValueMapTy ValueMap, ValueMapMSSA; + + // For PHI nodes, the value available in OldPreHeader is just the + // incoming value from OldPreHeader. + for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) + InsertNewValueIntoMap(ValueMap, PN, + PN->getIncomingValueForBlock(OrigPreheader)); + + // For the rest of the instructions, either hoist to the OrigPreheader if + // possible or create a clone in the OldPreHeader if not. + Instruction *LoopEntryBranch = OrigPreheader->getTerminator(); + + // Record all debug records preceding LoopEntryBranch to avoid + // duplication. + using DbgHash = + std::pair<std::pair<hash_code, DILocalVariable *>, DIExpression *>; + auto makeHash = [](const DbgVariableRecord *D) -> DbgHash { + auto VarLocOps = D->location_ops(); + return {{hash_combine_range(VarLocOps), D->getVariable()}, + D->getExpression()}; + }; - ++NumInstrsDuplicated; + SmallDenseSet<DbgHash, 8> DbgRecords; + // Build DbgVariableRecord hashes for DbgVariableRecords attached to the + // terminator. + for (const DbgVariableRecord &DVR : + filterDbgVars(OrigPreheader->getTerminator()->getDbgRecordRange())) + DbgRecords.insert(makeHash(&DVR)); + + // Remember the local noalias scope declarations in the header. After the + // rotation, they must be duplicated and the scope must be cloned. This + // avoids unwanted interaction across iterations. + SmallVector<NoAliasScopeDeclInst *, 6> NoAliasDeclInstructions; + for (Instruction &I : *OrigHeader) + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + NoAliasDeclInstructions.push_back(Decl); + + Module *M = OrigHeader->getModule(); + + // Track the next DbgRecord to clone. If we have a sequence where an + // instruction is hoisted instead of being cloned: + // DbgRecord blah + // %foo = add i32 0, 0 + // DbgRecord xyzzy + // %bar = call i32 @foobar() + // where %foo is hoisted, then the DbgRecord "blah" will be seen twice, once + // attached to %foo, then when %foo his hoisted it will "fall down" onto the + // function call: + // DbgRecord blah + // DbgRecord xyzzy + // %bar = call i32 @foobar() + // causing it to appear attached to the call too. + // + // To avoid this, cloneDebugInfoFrom takes an optional "start cloning from + // here" position to account for this behaviour. We point it at any + // DbgRecords on the next instruction, here labelled xyzzy, before we hoist + // %foo. Later, we only only clone DbgRecords from that position (xyzzy) + // onwards, which avoids cloning DbgRecord "blah" multiple times. (Stored as + // a range because it gives us a natural way of testing whether + // there were DbgRecords on the next instruction before we hoisted things). + iterator_range<DbgRecord::self_iterator> NextDbgInsts = + (I != E) ? I->getDbgRecordRange() : DbgMarker::getEmptyDbgRecordRange(); + + while (I != E) { + Instruction *Inst = &*I++; + + // If the instruction's operands are invariant and it doesn't read or write + // memory, then it is safe to hoist. Doing this doesn't change the order of + // execution in the preheader, but does prevent the instruction from + // executing in each iteration of the loop. This means it is safe to hoist + // something that might trap, but isn't safe to hoist something that reads + // memory (without proving that the loop doesn't write). + if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() && + !Inst->mayWriteToMemory() && !Inst->isTerminator() && + !isa<AllocaInst>(Inst) && + // It is not safe to hoist the value of these instructions in + // coroutines, as the addresses of otherwise eligible variables (e.g. + // thread-local variables and errno) may change if the coroutine is + // resumed in a different thread.Therefore, we disable this + // optimization for correctness. However, this may block other correct + // optimizations. + // FIXME: This should be reverted once we have a better model for + // memory access in coroutines. + !Inst->getFunction()->isPresplitCoroutine()) { if (!NextDbgInsts.empty()) { - auto Range = C->cloneDebugInfoFrom(Inst, NextDbgInsts.begin()); - RemapDbgRecordRange(M, Range, ValueMap, + auto DbgValueRange = + LoopEntryBranch->cloneDebugInfoFrom(Inst, NextDbgInsts.begin()); + RemapDbgRecordRange(M, DbgValueRange, ValueMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - NextDbgInsts = DbgMarker::getEmptyDbgRecordRange(); // Erase anything we've seen before. for (DbgVariableRecord &DVR : - make_early_inc_range(filterDbgVars(Range))) + make_early_inc_range(filterDbgVars(DbgValueRange))) if (DbgRecords.count(makeHash(&DVR))) DVR.eraseFromParent(); } - // Eagerly remap the operands of the instruction. - RemapInstruction(C, ValueMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - - // With the operands remapped, see if the instruction constant folds or is - // otherwise simplifyable. This commonly occurs because the entry from PHI - // nodes allows icmps and other instructions to fold. - Value *V = simplifyInstruction(C, SQ); - if (V && LI->replacementPreservesLCSSAForm(C, V)) { - // If so, then delete the temporary instruction and stick the folded value - // in the map. - InsertNewValueIntoMap(ValueMap, Inst, V); - if (!C->mayHaveSideEffects()) { - C->eraseFromParent(); - C = nullptr; - } - } else { - InsertNewValueIntoMap(ValueMap, Inst, C); - } - if (C) { - // Otherwise, stick the new instruction into the new block! - C->setName(Inst->getName()); - - if (auto *II = dyn_cast<AssumeInst>(C)) - AC->registerAssumption(II); - // MemorySSA cares whether the cloned instruction was inserted or not, and - // not whether it can be remapped to a simplified value. - if (MSSAU) - InsertNewValueIntoMap(ValueMapMSSA, Inst, C); - } - } + NextDbgInsts = I->getDbgRecordRange(); - if (!NoAliasDeclInstructions.empty()) { - // There are noalias scope declarations: - // (general): - // Original: OrigPre { OrigHeader NewHeader ... Latch } - // after: (OrigPre+OrigHeader') { NewHeader ... Latch OrigHeader } - // - // with D: llvm.experimental.noalias.scope.decl, - // U: !noalias or !alias.scope depending on D - // ... { D U1 U2 } can transform into: - // (0) : ... { D U1 U2 } // no relevant rotation for this part - // (1) : ... D' { U1 U2 D } // D is part of OrigHeader - // (2) : ... D' U1' { U2 D U1 } // D, U1 are part of OrigHeader - // - // We now want to transform: - // (1) -> : ... D' { D U1 U2 D'' } - // (2) -> : ... D' U1' { D U2 D'' U1'' } - // D: original llvm.experimental.noalias.scope.decl - // D', U1': duplicate with replaced scopes - // D'', U1'': different duplicate with replaced scopes - // This ensures a safe fallback to 'may_alias' introduced by the rotate, - // as U1'' and U1' scopes will not be compatible wrt to the local restrict - - // Clone the llvm.experimental.noalias.decl again for the NewHeader. - BasicBlock::iterator NewHeaderInsertionPoint = - NewHeader->getFirstNonPHIIt(); - for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) { - LLVM_DEBUG(dbgs() << " Cloning llvm.experimental.noalias.scope.decl:" - << *NAD << "\n"); - Instruction *NewNAD = NAD->clone(); - NewNAD->insertBefore(*NewHeader, NewHeaderInsertionPoint); - } + Inst->moveBefore(LoopEntryBranch->getIterator()); - // Scopes must now be duplicated, once for OrigHeader and once for - // OrigPreHeader'. - { - auto &Context = NewHeader->getContext(); - - SmallVector<MDNode *, 8> NoAliasDeclScopes; - for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) - NoAliasDeclScopes.push_back(NAD->getScopeList()); - - LLVM_DEBUG(dbgs() << " Updating OrigHeader scopes\n"); - cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigHeader}, Context, - "h.rot"); - LLVM_DEBUG(OrigHeader->dump()); - - // Keep the compile time impact low by only adapting the inserted block - // of instructions in the OrigPreHeader. This might result in slightly - // more aliasing between these instructions and those that were already - // present, but it will be much faster when the original PreHeader is - // large. - LLVM_DEBUG(dbgs() << " Updating part of OrigPreheader scopes\n"); - auto *FirstDecl = - cast<Instruction>(ValueMap[*NoAliasDeclInstructions.begin()]); - auto *LastInst = &OrigPreheader->back(); - cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, FirstDecl, LastInst, - Context, "pre.rot"); - LLVM_DEBUG(OrigPreheader->dump()); - - LLVM_DEBUG(dbgs() << " Updated NewHeader:\n"); - LLVM_DEBUG(NewHeader->dump()); - } + ++NumInstrsHoisted; + continue; } - // Along with all the other instructions, we just cloned OrigHeader's - // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's - // successors by duplicating their incoming values for OrigHeader. - for (BasicBlock *SuccBB : successors(OrigHeader)) - for (BasicBlock::iterator BI = SuccBB->begin(); - PHINode *PN = dyn_cast<PHINode>(BI); ++BI) - PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader); - - // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove - // OrigPreHeader's old terminator (the original branch into the loop), and - // remove the corresponding incoming values from the PHI nodes in OrigHeader. - LoopEntryBranch->eraseFromParent(); - OrigPreheader->flushTerminatorDbgRecords(); - - // Update MemorySSA before the rewrite call below changes the 1:1 - // instruction:cloned_instruction_or_value mapping. - if (MSSAU) { - InsertNewValueIntoMap(ValueMapMSSA, OrigHeader, OrigPreheader); - MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader, - ValueMapMSSA); - } + // Otherwise, create a duplicate of the instruction. + Instruction *C = Inst->clone(); + if (const DebugLoc &DL = C->getDebugLoc()) + mapAtomInstance(DL, ValueMap); - SmallVector<PHINode*, 2> InsertedPHIs; - // If there were any uses of instructions in the duplicated block outside the - // loop, update them, inserting PHI nodes as required - RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap, SE, - &InsertedPHIs); - - // Attach debug records to the new phis if that phi uses a value that - // previously had debug metadata attached. This keeps the debug info - // up-to-date in the loop body. - if (!InsertedPHIs.empty()) - insertDebugValuesForPHIs(OrigHeader, InsertedPHIs); - - // NewHeader is now the header of the loop. - L->moveToHeader(NewHeader); - assert(L->getHeader() == NewHeader && "Latch block is our new header"); - - // Inform DT about changes to the CFG. - if (DT) { - // The OrigPreheader branches to the NewHeader and Exit now. Then, inform - // the DT about the removed edge to the OrigHeader (that got removed). - SmallVector<DominatorTree::UpdateType, 3> Updates = { - {DominatorTree::Insert, OrigPreheader, Exit}, - {DominatorTree::Insert, OrigPreheader, NewHeader}, - {DominatorTree::Delete, OrigPreheader, OrigHeader}}; - - if (MSSAU) { - MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true); - if (VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - } else { - DT->applyUpdates(Updates); - } - } + C->insertBefore(LoopEntryBranch->getIterator()); - // At this point, we've finished our major CFG changes. As part of cloning - // the loop into the preheader we've simplified instructions and the - // duplicated conditional branch may now be branching on a constant. If it is - // branching on a constant and if that constant means that we enter the loop, - // then we fold away the cond branch to an uncond branch. This simplifies the - // loop in cases important for nested loops, and it also means we don't have - // to split as many edges. - BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator()); - assert(PHBI->isConditional() && "Should be clone of BI condbr!"); - const Value *Cond = PHBI->getCondition(); - const bool HasConditionalPreHeader = - !isa<ConstantInt>(Cond) || - PHBI->getSuccessor(cast<ConstantInt>(Cond)->isZero()) != NewHeader; - - updateBranchWeights(*PHBI, *BI, HasConditionalPreHeader, BISuccsSwapped); + ++NumInstrsDuplicated; - if (HasConditionalPreHeader) { - // The conditional branch can't be folded, handle the general case. - // Split edges as necessary to preserve LoopSimplify form. - - // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and - // thus is not a preheader anymore. - // Split the edge to form a real preheader. - BasicBlock *NewPH = SplitCriticalEdge( - OrigPreheader, NewHeader, - CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()); - NewPH->setName(NewHeader->getName() + ".lr.ph"); - - // Preserve canonical loop form, which means that 'Exit' should have only - // one predecessor. Note that Exit could be an exit block for multiple - // nested loops, causing both of the edges to now be critical and need to - // be split. - SmallVector<BasicBlock *, 4> ExitPreds(predecessors(Exit)); - bool SplitLatchEdge = false; - for (BasicBlock *ExitPred : ExitPreds) { - // We only need to split loop exit edges. - Loop *PredLoop = LI->getLoopFor(ExitPred); - if (!PredLoop || PredLoop->contains(Exit) || - isa<IndirectBrInst>(ExitPred->getTerminator())) - continue; - SplitLatchEdge |= L->getLoopLatch() == ExitPred; - BasicBlock *ExitSplit = SplitCriticalEdge( - ExitPred, Exit, - CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()); - ExitSplit->moveBefore(Exit); + if (!NextDbgInsts.empty()) { + auto Range = C->cloneDebugInfoFrom(Inst, NextDbgInsts.begin()); + RemapDbgRecordRange(M, Range, ValueMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + NextDbgInsts = DbgMarker::getEmptyDbgRecordRange(); + // Erase anything we've seen before. + for (DbgVariableRecord &DVR : make_early_inc_range(filterDbgVars(Range))) + if (DbgRecords.count(makeHash(&DVR))) + DVR.eraseFromParent(); + } + + // Eagerly remap the operands of the instruction. + RemapInstruction(C, ValueMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + + // With the operands remapped, see if the instruction constant folds or is + // otherwise simplifyable. This commonly occurs because the entry from PHI + // nodes allows icmps and other instructions to fold. + Value *V = simplifyInstruction(C, SQ); + if (V && LI->replacementPreservesLCSSAForm(C, V)) { + // If so, then delete the temporary instruction and stick the folded value + // in the map. + InsertNewValueIntoMap(ValueMap, Inst, V); + if (!C->mayHaveSideEffects()) { + C->eraseFromParent(); + C = nullptr; } - assert(SplitLatchEdge && - "Despite splitting all preds, failed to split latch exit?"); - (void)SplitLatchEdge; } else { - // We can fold the conditional branch in the preheader, this makes things - // simpler. The first step is to remove the extra edge to the Exit block. - Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); - BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI->getIterator()); - NewBI->setDebugLoc(PHBI->getDebugLoc()); - PHBI->eraseFromParent(); + InsertNewValueIntoMap(ValueMap, Inst, C); + } + if (C) { + // Otherwise, stick the new instruction into the new block! + C->setName(Inst->getName()); + + if (auto *II = dyn_cast<AssumeInst>(C)) + AC->registerAssumption(II); + // MemorySSA cares whether the cloned instruction was inserted or not, and + // not whether it can be remapped to a simplified value. + if (MSSAU) + InsertNewValueIntoMap(ValueMapMSSA, Inst, C); + } + } - // With our CFG finalized, update DomTree if it is available. - if (DT) DT->deleteEdge(OrigPreheader, Exit); + if (!NoAliasDeclInstructions.empty()) { + // There are noalias scope declarations: + // (general): + // Original: OrigPre { OrigHeader NewHeader ... Latch } + // after: (OrigPre+OrigHeader') { NewHeader ... Latch OrigHeader } + // + // with D: llvm.experimental.noalias.scope.decl, + // U: !noalias or !alias.scope depending on D + // ... { D U1 U2 } can transform into: + // (0) : ... { D U1 U2 } // no relevant rotation for this part + // (1) : ... D' { U1 U2 D } // D is part of OrigHeader + // (2) : ... D' U1' { U2 D U1 } // D, U1 are part of OrigHeader + // + // We now want to transform: + // (1) -> : ... D' { D U1 U2 D'' } + // (2) -> : ... D' U1' { D U2 D'' U1'' } + // D: original llvm.experimental.noalias.scope.decl + // D', U1': duplicate with replaced scopes + // D'', U1'': different duplicate with replaced scopes + // This ensures a safe fallback to 'may_alias' introduced by the rotate, + // as U1'' and U1' scopes will not be compatible wrt to the local restrict + + // Clone the llvm.experimental.noalias.decl again for the NewHeader. + BasicBlock::iterator NewHeaderInsertionPoint = + NewHeader->getFirstNonPHIIt(); + for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) { + LLVM_DEBUG(dbgs() << " Cloning llvm.experimental.noalias.scope.decl:" + << *NAD << "\n"); + Instruction *NewNAD = NAD->clone(); + NewNAD->insertBefore(*NewHeader, NewHeaderInsertionPoint); + } - // Update MSSA too, if available. - if (MSSAU) - MSSAU->removeEdge(OrigPreheader, Exit); + // Scopes must now be duplicated, once for OrigHeader and once for + // OrigPreHeader'. + { + auto &Context = NewHeader->getContext(); + + SmallVector<MDNode *, 8> NoAliasDeclScopes; + for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) + NoAliasDeclScopes.push_back(NAD->getScopeList()); + + LLVM_DEBUG(dbgs() << " Updating OrigHeader scopes\n"); + cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigHeader}, Context, + "h.rot"); + LLVM_DEBUG(OrigHeader->dump()); + + // Keep the compile time impact low by only adapting the inserted block + // of instructions in the OrigPreHeader. This might result in slightly + // more aliasing between these instructions and those that were already + // present, but it will be much faster when the original PreHeader is + // large. + LLVM_DEBUG(dbgs() << " Updating part of OrigPreheader scopes\n"); + auto *FirstDecl = + cast<Instruction>(ValueMap[*NoAliasDeclInstructions.begin()]); + auto *LastInst = &OrigPreheader->back(); + cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, FirstDecl, LastInst, + Context, "pre.rot"); + LLVM_DEBUG(OrigPreheader->dump()); + + LLVM_DEBUG(dbgs() << " Updated NewHeader:\n"); + LLVM_DEBUG(NewHeader->dump()); } + } - assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation"); - assert(L->getLoopLatch() && "Invalid loop latch after loop rotation"); + // Along with all the other instructions, we just cloned OrigHeader's + // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's + // successors by duplicating their incoming values for OrigHeader. + for (BasicBlock *SuccBB : successors(OrigHeader)) + for (BasicBlock::iterator BI = SuccBB->begin(); + PHINode *PN = dyn_cast<PHINode>(BI); ++BI) + PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader); + + // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove + // OrigPreHeader's old terminator (the original branch into the loop), and + // remove the corresponding incoming values from the PHI nodes in OrigHeader. + LoopEntryBranch->eraseFromParent(); + OrigPreheader->flushTerminatorDbgRecords(); + + // Update MemorySSA before the rewrite call below changes the 1:1 + // instruction:cloned_instruction_or_value mapping. + if (MSSAU) { + InsertNewValueIntoMap(ValueMapMSSA, OrigHeader, OrigPreheader); + MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader, + ValueMapMSSA); + } - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); + SmallVector<PHINode *, 2> InsertedPHIs; + // If there were any uses of instructions in the duplicated block outside the + // loop, update them, inserting PHI nodes as required + RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap, SE, + &InsertedPHIs); + + // Attach debug records to the new phis if that phi uses a value that + // previously had debug metadata attached. This keeps the debug info + // up-to-date in the loop body. + if (!InsertedPHIs.empty()) + insertDebugValuesForPHIs(OrigHeader, InsertedPHIs); + + // NewHeader is now the header of the loop. + L->moveToHeader(NewHeader); + assert(L->getHeader() == NewHeader && "Latch block is our new header"); + + // Inform DT about changes to the CFG. + if (DT) { + // The OrigPreheader branches to the NewHeader and Exit now. Then, inform + // the DT about the removed edge to the OrigHeader (that got removed). + SmallVector<DominatorTree::UpdateType, 3> Updates = { + {DominatorTree::Insert, OrigPreheader, Exit}, + {DominatorTree::Insert, OrigPreheader, NewHeader}, + {DominatorTree::Delete, OrigPreheader, OrigHeader}}; - // Now that the CFG and DomTree are in a consistent state again, try to merge - // the OrigHeader block into OrigLatch. This will succeed if they are - // connected by an unconditional branch. This is just a cleanup so the - // emitted code isn't too gross in this common case. - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - BasicBlock *PredBB = OrigHeader->getUniquePredecessor(); - bool DidMerge = MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU); - if (DidMerge) - RemoveRedundantDbgInstrs(PredBB); + if (MSSAU) { + MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true); + if (VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + } else { + DT->applyUpdates(Updates); + } + } - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); + // At this point, we've finished our major CFG changes. As part of cloning + // the loop into the preheader we've simplified instructions and the + // duplicated conditional branch may now be branching on a constant. If it is + // branching on a constant and if that constant means that we enter the loop, + // then we fold away the cond branch to an uncond branch. This simplifies the + // loop in cases important for nested loops, and it also means we don't have + // to split as many edges. + BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator()); + assert(PHBI->isConditional() && "Should be clone of BI condbr!"); + const Value *Cond = PHBI->getCondition(); + const bool HasConditionalPreHeader = + !isa<ConstantInt>(Cond) || + PHBI->getSuccessor(cast<ConstantInt>(Cond)->isZero()) != NewHeader; + + updateBranchWeights(*PHBI, *BI, HasConditionalPreHeader, BISuccsSwapped); - LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump()); + if (HasConditionalPreHeader) { + // The conditional branch can't be folded, handle the general case. + // Split edges as necessary to preserve LoopSimplify form. + + // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and + // thus is not a preheader anymore. + // Split the edge to form a real preheader. + BasicBlock *NewPH = SplitCriticalEdge( + OrigPreheader, NewHeader, + CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()); + NewPH->setName(NewHeader->getName() + ".lr.ph"); + + // Preserve canonical loop form, which means that 'Exit' should have only + // one predecessor. Note that Exit could be an exit block for multiple + // nested loops, causing both of the edges to now be critical and need to + // be split. + SmallVector<BasicBlock *, 4> ExitPreds(predecessors(Exit)); + bool SplitLatchEdge = false; + for (BasicBlock *ExitPred : ExitPreds) { + // We only need to split loop exit edges. + Loop *PredLoop = LI->getLoopFor(ExitPred); + if (!PredLoop || PredLoop->contains(Exit) || + isa<IndirectBrInst>(ExitPred->getTerminator())) + continue; + SplitLatchEdge |= L->getLoopLatch() == ExitPred; + BasicBlock *ExitSplit = SplitCriticalEdge( + ExitPred, Exit, + CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()); + ExitSplit->moveBefore(Exit); + } + assert(SplitLatchEdge && + "Despite splitting all preds, failed to split latch exit?"); + (void)SplitLatchEdge; + } else { + // We can fold the conditional branch in the preheader, this makes things + // simpler. The first step is to remove the extra edge to the Exit block. + Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); + BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI->getIterator()); + NewBI->setDebugLoc(PHBI->getDebugLoc()); + PHBI->eraseFromParent(); + + // With our CFG finalized, update DomTree if it is available. + if (DT) + DT->deleteEdge(OrigPreheader, Exit); + + // Update MSSA too, if available. + if (MSSAU) + MSSAU->removeEdge(OrigPreheader, Exit); + } - ++NumRotated; + assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation"); + assert(L->getLoopLatch() && "Invalid loop latch after loop rotation"); - Rotated = true; - SimplifiedLatch = false; + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + // Now that the CFG and DomTree are in a consistent state again, try to merge + // the OrigHeader block into OrigLatch. This will succeed if they are + // connected by an unconditional branch. This is just a cleanup so the + // emitted code isn't too gross in this common case. + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + BasicBlock *PredBB = OrigHeader->getUniquePredecessor(); + bool DidMerge = MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU); + if (DidMerge) + RemoveRedundantDbgInstrs(PredBB); - // Check that new latch is a deoptimizing exit and then repeat rotation if possible. - // Deoptimizing latch exit is not a generally typical case, so we just loop over. - // TODO: if it becomes a performance bottleneck extend rotation algorithm - // to handle multiple rotations in one go. - } while (MultiRotate && canRotateDeoptimizingLatchExit(L)); + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump()); return true; } diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index b8cfe3a..155fcc5 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -6642,6 +6642,9 @@ public: /// Return true if the replacement is a lookup table. bool isLookupTable(); + /// Return true if the replacement is a bit map. + bool isBitMap(); + private: // Depending on the switch, there are different alternatives. enum { @@ -6932,6 +6935,8 @@ Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; } bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; } +bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; } + static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) { // 40% is the default density for building a jump table in optsize/minsize // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this @@ -7097,7 +7102,8 @@ static void reuseTableCompare( /// lookup tables. static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, - const TargetTransformInfo &TTI) { + const TargetTransformInfo &TTI, + bool ConvertSwitchToLookupTable) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); BasicBlock *BB = SI->getParent(); @@ -7262,6 +7268,8 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, bool AnyLookupTables = any_of( PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); }); + bool AnyBitMaps = any_of(PhiToReplacementMap, + [](auto &KV) { return KV.second.isBitMap(); }); // A few conditions prevent the generation of lookup tables: // 1. The target does not support lookup tables. @@ -7274,6 +7282,12 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, Fn->getFnAttribute("no-jump-tables").getValueAsBool())) return false; + // In the early optimization pipeline, disable formation of lookup tables, + // bit maps and mask checks, as they may inhibit further optimization. + if (!ConvertSwitchToLookupTable && + (AnyLookupTables || AnyBitMaps || NeedMask)) + return false; + Builder.SetInsertPoint(SI); // TableIndex is the switch condition - TableIndexOffset if we don't // use the condition directly @@ -7929,14 +7943,13 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI)) return requestResimplify(); - // The conversion from switch to lookup tables results in difficult-to-analyze - // code and makes pruning branches much harder. This is a problem if the - // switch expression itself can still be restricted as a result of inlining or - // CVP. Therefore, only apply this transformation during late stages of the - // optimisation pipeline. - if (Options.ConvertSwitchToLookupTable && - simplifySwitchLookup(SI, Builder, DTU, DL, TTI)) - return requestResimplify(); + // The conversion of switches to arithmetic or lookup table is disabled in + // the early optimization pipeline, as it may lose information or make the + // resulting code harder to analyze. + if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable) + if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI, + Options.ConvertSwitchToLookupTable)) + return requestResimplify(); if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI)) return requestResimplify(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 3a9770c..600ff8a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3141,7 +3141,7 @@ static bool isUsedByLoadStoreAddress(const VPUser *V) { while (!WorkList.empty()) { auto *Cur = dyn_cast<VPSingleDefRecipe>(WorkList.pop_back_val()); - if (!Cur || !Seen.insert(Cur).second) + if (!Cur || !Seen.insert(Cur).second || isa<VPBlendRecipe>(Cur)) continue; for (VPUser *U : Cur->users()) { |