diff options
Diffstat (limited to 'llvm/lib')
156 files changed, 3209 insertions, 830 deletions
diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp index f2dc25f..26a5602 100644 --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -75,7 +75,7 @@ AAResults::AAResults(const TargetLibraryInfo &TLI) : TLI(TLI) {}  AAResults::AAResults(AAResults &&Arg)      : TLI(Arg.TLI), AAs(std::move(Arg.AAs)), AADeps(std::move(Arg.AADeps)) {} -AAResults::~AAResults() {} +AAResults::~AAResults() = default;  bool AAResults::invalidate(Function &F, const PreservedAnalyses &PA,                             FunctionAnalysisManager::Invalidator &Inv) { diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index c9baeda..a31f17b 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -2424,10 +2424,10 @@ ScalarEvolution::getStrengthenedNoWrapFlagsFromBinOp(  // We're trying to construct a SCEV of type `Type' with `Ops' as operands and  // `OldFlags' as can't-wrap behavior.  Infer a more aggressive set of  // can't-overflow flags for the operation if possible. -static SCEV::NoWrapFlags -StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, -                      const ArrayRef<const SCEV *> Ops, -                      SCEV::NoWrapFlags Flags) { +static SCEV::NoWrapFlags StrengthenNoWrapFlags(ScalarEvolution *SE, +                                               SCEVTypes Type, +                                               ArrayRef<const SCEV *> Ops, +                                               SCEV::NoWrapFlags Flags) {    using namespace std::placeholders;    using OBO = OverflowingBinaryOperator; @@ -2540,7 +2540,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,    unsigned Idx = isa<SCEVConstant>(Ops[0]) ? 1 : 0;    // Delay expensive flag strengthening until necessary. -  auto ComputeFlags = [this, OrigFlags](const ArrayRef<const SCEV *> Ops) { +  auto ComputeFlags = [this, OrigFlags](ArrayRef<const SCEV *> Ops) {      return StrengthenNoWrapFlags(this, scAddExpr, Ops, OrigFlags);    }; @@ -3125,7 +3125,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,      return Folded;    // Delay expensive flag strengthening until necessary. -  auto ComputeFlags = [this, OrigFlags](const ArrayRef<const SCEV *> Ops) { +  auto ComputeFlags = [this, OrigFlags](ArrayRef<const SCEV *> Ops) {      return StrengthenNoWrapFlags(this, scMulExpr, Ops, OrigFlags);    }; @@ -15510,6 +15510,78 @@ static const SCEV *getNextSCEVDivisibleByDivisor(const SCEV *Expr,    return SE.getConstant(*ExprVal + DivisorVal - Rem);  } +static bool collectDivisibilityInformation( +    ICmpInst::Predicate Predicate, const SCEV *LHS, const SCEV *RHS, +    DenseMap<const SCEV *, const SCEV *> &DivInfo, +    DenseMap<const SCEV *, APInt> &Multiples, ScalarEvolution &SE) { +  // If we have LHS == 0, check if LHS is computing a property of some unknown +  // SCEV %v which we can rewrite %v to express explicitly. +  if (Predicate != CmpInst::ICMP_EQ || !match(RHS, m_scev_Zero())) +    return false; +  // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to +  // explicitly express that. +  const SCEVUnknown *URemLHS = nullptr; +  const SCEV *URemRHS = nullptr; +  if (!match(LHS, m_scev_URem(m_SCEVUnknown(URemLHS), m_SCEV(URemRHS), SE))) +    return false; + +  const SCEV *Multiple = +      SE.getMulExpr(SE.getUDivExpr(URemLHS, URemRHS), URemRHS); +  DivInfo[URemLHS] = Multiple; +  if (auto *C = dyn_cast<SCEVConstant>(URemRHS)) +    Multiples[URemLHS] = C->getAPInt(); +  return true; +} + +// Check if the condition is a divisibility guard (A % B == 0). +static bool isDivisibilityGuard(const SCEV *LHS, const SCEV *RHS, +                                ScalarEvolution &SE) { +  const SCEV *X, *Y; +  return match(LHS, m_scev_URem(m_SCEV(X), m_SCEV(Y), SE)) && RHS->isZero(); +} + +// Apply divisibility by \p Divisor on MinMaxExpr with constant values, +// recursively. This is done by aligning up/down the constant value to the +// Divisor. +static const SCEV *applyDivisibilityOnMinMaxExpr(const SCEV *MinMaxExpr, +                                                 APInt Divisor, +                                                 ScalarEvolution &SE) { +  // Return true if \p Expr is a MinMax SCEV expression with a non-negative +  // constant operand. If so, return in \p SCTy the SCEV type and in \p RHS +  // the non-constant operand and in \p LHS the constant operand. +  auto IsMinMaxSCEVWithNonNegativeConstant = +      [&](const SCEV *Expr, SCEVTypes &SCTy, const SCEV *&LHS, +          const SCEV *&RHS) { +        if (auto *MinMax = dyn_cast<SCEVMinMaxExpr>(Expr)) { +          if (MinMax->getNumOperands() != 2) +            return false; +          if (auto *C = dyn_cast<SCEVConstant>(MinMax->getOperand(0))) { +            if (C->getAPInt().isNegative()) +              return false; +            SCTy = MinMax->getSCEVType(); +            LHS = MinMax->getOperand(0); +            RHS = MinMax->getOperand(1); +            return true; +          } +        } +        return false; +      }; + +  const SCEV *MinMaxLHS = nullptr, *MinMaxRHS = nullptr; +  SCEVTypes SCTy; +  if (!IsMinMaxSCEVWithNonNegativeConstant(MinMaxExpr, SCTy, MinMaxLHS, +                                           MinMaxRHS)) +    return MinMaxExpr; +  auto IsMin = isa<SCEVSMinExpr>(MinMaxExpr) || isa<SCEVUMinExpr>(MinMaxExpr); +  assert(SE.isKnownNonNegative(MinMaxLHS) && "Expected non-negative operand!"); +  auto *DivisibleExpr = +      IsMin ? getPreviousSCEVDivisibleByDivisor(MinMaxLHS, Divisor, SE) +            : getNextSCEVDivisibleByDivisor(MinMaxLHS, Divisor, SE); +  SmallVector<const SCEV *> Ops = { +      applyDivisibilityOnMinMaxExpr(MinMaxRHS, Divisor, SE), DivisibleExpr}; +  return SE.getMinMaxExpr(SCTy, Ops); +} +  void ScalarEvolution::LoopGuards::collectFromBlock(      ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,      const BasicBlock *Block, const BasicBlock *Pred, @@ -15520,19 +15592,13 @@ void ScalarEvolution::LoopGuards::collectFromBlock(    SmallVector<const SCEV *> ExprsToRewrite;    auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS,                                const SCEV *RHS, -                              DenseMap<const SCEV *, const SCEV *> -                                  &RewriteMap) { +                              DenseMap<const SCEV *, const SCEV *> &RewriteMap, +                              const LoopGuards &DivGuards) {      // WARNING: It is generally unsound to apply any wrap flags to the proposed      // replacement SCEV which isn't directly implied by the structure of that      // SCEV.  In particular, using contextual facts to imply flags is *NOT*      // legal.  See the scoping rules for flags in the header to understand why. -    // If LHS is a constant, apply information to the other expression. -    if (isa<SCEVConstant>(LHS)) { -      std::swap(LHS, RHS); -      Predicate = CmpInst::getSwappedPredicate(Predicate); -    } -      // Check for a condition of the form (-C1 + X < C2).  InstCombine will      // create this form when combining two checks of the form (X u< C2 + C1) and      // (X >=u C1). @@ -15565,67 +15631,6 @@ void ScalarEvolution::LoopGuards::collectFromBlock(      if (MatchRangeCheckIdiom())        return; -    // Return true if \p Expr is a MinMax SCEV expression with a non-negative -    // constant operand. If so, return in \p SCTy the SCEV type and in \p RHS -    // the non-constant operand and in \p LHS the constant operand. -    auto IsMinMaxSCEVWithNonNegativeConstant = -        [&](const SCEV *Expr, SCEVTypes &SCTy, const SCEV *&LHS, -            const SCEV *&RHS) { -          const APInt *C; -          SCTy = Expr->getSCEVType(); -          return match(Expr, m_scev_MinMax(m_SCEV(LHS), m_SCEV(RHS))) && -                 match(LHS, m_scev_APInt(C)) && C->isNonNegative(); -        }; - -    // Apply divisibilty by \p Divisor on MinMaxExpr with constant values, -    // recursively. This is done by aligning up/down the constant value to the -    // Divisor. -    std::function<const SCEV *(const SCEV *, const SCEV *)> -        ApplyDivisibiltyOnMinMaxExpr = [&](const SCEV *MinMaxExpr, -                                           const SCEV *Divisor) { -          auto *ConstDivisor = dyn_cast<SCEVConstant>(Divisor); -          if (!ConstDivisor) -            return MinMaxExpr; -          const APInt &DivisorVal = ConstDivisor->getAPInt(); - -          const SCEV *MinMaxLHS = nullptr, *MinMaxRHS = nullptr; -          SCEVTypes SCTy; -          if (!IsMinMaxSCEVWithNonNegativeConstant(MinMaxExpr, SCTy, MinMaxLHS, -                                                   MinMaxRHS)) -            return MinMaxExpr; -          auto IsMin = -              isa<SCEVSMinExpr>(MinMaxExpr) || isa<SCEVUMinExpr>(MinMaxExpr); -          assert(SE.isKnownNonNegative(MinMaxLHS) && -                 "Expected non-negative operand!"); -          auto *DivisibleExpr = -              IsMin -                  ? getPreviousSCEVDivisibleByDivisor(MinMaxLHS, DivisorVal, SE) -                  : getNextSCEVDivisibleByDivisor(MinMaxLHS, DivisorVal, SE); -          SmallVector<const SCEV *> Ops = { -              ApplyDivisibiltyOnMinMaxExpr(MinMaxRHS, Divisor), DivisibleExpr}; -          return SE.getMinMaxExpr(SCTy, Ops); -        }; - -    // If we have LHS == 0, check if LHS is computing a property of some unknown -    // SCEV %v which we can rewrite %v to express explicitly. -    if (Predicate == CmpInst::ICMP_EQ && match(RHS, m_scev_Zero())) { -      // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to -      // explicitly express that. -      const SCEVUnknown *URemLHS = nullptr; -      const SCEV *URemRHS = nullptr; -      if (match(LHS, -                m_scev_URem(m_SCEVUnknown(URemLHS), m_SCEV(URemRHS), SE))) { -        auto I = RewriteMap.find(URemLHS); -        const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : URemLHS; -        RewrittenLHS = ApplyDivisibiltyOnMinMaxExpr(RewrittenLHS, URemRHS); -        const auto *Multiple = -            SE.getMulExpr(SE.getUDivExpr(RewrittenLHS, URemRHS), URemRHS); -        RewriteMap[URemLHS] = Multiple; -        ExprsToRewrite.push_back(URemLHS); -        return; -      } -    } -      // Do not apply information for constants or if RHS contains an AddRec.      if (isa<SCEVConstant>(LHS) || SE.containsAddRecurrence(RHS))        return; @@ -15655,7 +15660,9 @@ void ScalarEvolution::LoopGuards::collectFromBlock(      };      const SCEV *RewrittenLHS = GetMaybeRewritten(LHS); -    const APInt &DividesBy = SE.getConstantMultiple(RewrittenLHS); +    // Apply divisibility information when computing the constant multiple. +    const APInt &DividesBy = +        SE.getConstantMultiple(DivGuards.rewrite(RewrittenLHS));      // Collect rewrites for LHS and its transitive operands based on the      // condition. @@ -15840,8 +15847,11 @@ void ScalarEvolution::LoopGuards::collectFromBlock(    // Now apply the information from the collected conditions to    // Guards.RewriteMap. Conditions are processed in reverse order, so the -  // earliest conditions is processed first. This ensures the SCEVs with the +  // earliest conditions is processed first, except guards with divisibility +  // information, which are moved to the back. This ensures the SCEVs with the    // shortest dependency chains are constructed first. +  SmallVector<std::tuple<CmpInst::Predicate, const SCEV *, const SCEV *>> +      GuardsToProcess;    for (auto [Term, EnterIfTrue] : reverse(Terms)) {      SmallVector<Value *, 8> Worklist;      SmallPtrSet<Value *, 8> Visited; @@ -15856,7 +15866,14 @@ void ScalarEvolution::LoopGuards::collectFromBlock(              EnterIfTrue ? Cmp->getPredicate() : Cmp->getInversePredicate();          const auto *LHS = SE.getSCEV(Cmp->getOperand(0));          const auto *RHS = SE.getSCEV(Cmp->getOperand(1)); -        CollectCondition(Predicate, LHS, RHS, Guards.RewriteMap); +        // If LHS is a constant, apply information to the other expression. +        // TODO: If LHS is not a constant, check if using CompareSCEVComplexity +        // can improve results. +        if (isa<SCEVConstant>(LHS)) { +          std::swap(LHS, RHS); +          Predicate = CmpInst::getSwappedPredicate(Predicate); +        } +        GuardsToProcess.emplace_back(Predicate, LHS, RHS);          continue;        } @@ -15869,6 +15886,31 @@ void ScalarEvolution::LoopGuards::collectFromBlock(      }    } +  // Process divisibility guards in reverse order to populate DivGuards early. +  DenseMap<const SCEV *, APInt> Multiples; +  LoopGuards DivGuards(SE); +  for (const auto &[Predicate, LHS, RHS] : GuardsToProcess) { +    if (!isDivisibilityGuard(LHS, RHS, SE)) +      continue; +    collectDivisibilityInformation(Predicate, LHS, RHS, DivGuards.RewriteMap, +                                   Multiples, SE); +  } + +  for (const auto &[Predicate, LHS, RHS] : GuardsToProcess) +    CollectCondition(Predicate, LHS, RHS, Guards.RewriteMap, DivGuards); + +  // Apply divisibility information last. This ensures it is applied to the +  // outermost expression after other rewrites for the given value. +  for (const auto &[K, Divisor] : Multiples) { +    const SCEV *DivisorSCEV = SE.getConstant(Divisor); +    Guards.RewriteMap[K] = +        SE.getMulExpr(SE.getUDivExpr(applyDivisibilityOnMinMaxExpr( +                                         Guards.rewrite(K), Divisor, SE), +                                     DivisorSCEV), +                      DivisorSCEV); +    ExprsToRewrite.push_back(K); +  } +    // Let the rewriter preserve NUW/NSW flags if the unsigned/signed ranges of    // the replacement expressions are contained in the ranges of the replaced    // expressions. diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index c47a1c1..0426ac7 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1353,9 +1353,9 @@ TargetTransformInfo::getInlineCallPenalty(const Function *F,    return TTIImpl->getInlineCallPenalty(F, Call, DefaultCallPenalty);  } -bool TargetTransformInfo::areTypesABICompatible( -    const Function *Caller, const Function *Callee, -    const ArrayRef<Type *> &Types) const { +bool TargetTransformInfo::areTypesABICompatible(const Function *Caller, +                                                const Function *Callee, +                                                ArrayRef<Type *> Types) const {    return TTIImpl->areTypesABICompatible(Caller, Callee, Types);  } diff --git a/llvm/lib/BinaryFormat/Dwarf.cpp b/llvm/lib/BinaryFormat/Dwarf.cpp index 55fa2df..a6c7e6a 100644 --- a/llvm/lib/BinaryFormat/Dwarf.cpp +++ b/llvm/lib/BinaryFormat/Dwarf.cpp @@ -1076,10 +1076,3 @@ StringRef (*const llvm::dwarf::EnumTraits<LineNumberOps>::StringFn)(unsigned) =      LNStandardString;  StringRef (*const llvm::dwarf::EnumTraits<Index>::StringFn)(unsigned) =      IndexString; - -constexpr char llvm::dwarf::EnumTraits<Attribute>::Type[]; -constexpr char llvm::dwarf::EnumTraits<Form>::Type[]; -constexpr char llvm::dwarf::EnumTraits<Index>::Type[]; -constexpr char llvm::dwarf::EnumTraits<Tag>::Type[]; -constexpr char llvm::dwarf::EnumTraits<LineNumberOps>::Type[]; -constexpr char llvm::dwarf::EnumTraits<LocationAtom>::Type[]; diff --git a/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp b/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp index 3de3dcc..80b421d 100644 --- a/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp +++ b/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp @@ -209,12 +209,12 @@ template <> struct CustomMappingTraits<MapDocNode> {    static void inputOne(IO &IO, StringRef Key, MapDocNode &M) {      ScalarDocNode KeyObj = M.getDocument()->getNode();      KeyObj.fromString(Key, ""); -    IO.mapRequired(Key.str().c_str(), M.getMap()[KeyObj]); +    IO.mapRequired(Key, M.getMap()[KeyObj]);    }    static void output(IO &IO, MapDocNode &M) {      for (auto I : M.getMap()) { -      IO.mapRequired(I.first.toString().c_str(), I.second); +      IO.mapRequired(I.first.toString(), I.second);      }    }  }; diff --git a/llvm/lib/CAS/ActionCaches.cpp b/llvm/lib/CAS/ActionCaches.cpp index 571c5b3..003c850 100644 --- a/llvm/lib/CAS/ActionCaches.cpp +++ b/llvm/lib/CAS/ActionCaches.cpp @@ -13,7 +13,11 @@  #include "BuiltinCAS.h"  #include "llvm/ADT/TrieRawHashMap.h"  #include "llvm/CAS/ActionCache.h" +#include "llvm/CAS/OnDiskKeyValueDB.h" +#include "llvm/CAS/UnifiedOnDiskCache.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/Support/BLAKE3.h" +#include "llvm/Support/Errc.h"  #define DEBUG_TYPE "cas-action-caches" @@ -47,12 +51,54 @@ public:    Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,                                           bool CanBeDistributed) const final; +  Error validate() const final { +    return createStringError("InMemoryActionCache doesn't support validate()"); +  } +  private:    using DataT = CacheEntry<sizeof(HashType)>;    using InMemoryCacheT = ThreadSafeTrieRawHashMap<DataT, sizeof(HashType)>;    InMemoryCacheT Cache;  }; + +/// Builtin basic OnDiskActionCache that uses one underlying OnDiskKeyValueDB. +class OnDiskActionCache final : public ActionCache { +public: +  Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result, +                bool CanBeDistributed) final; +  Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey, +                                         bool CanBeDistributed) const final; + +  static Expected<std::unique_ptr<OnDiskActionCache>> create(StringRef Path); + +  Error validate() const final; + +private: +  static StringRef getHashName() { return "BLAKE3"; } + +  OnDiskActionCache(std::unique_ptr<ondisk::OnDiskKeyValueDB> DB); + +  std::unique_ptr<ondisk::OnDiskKeyValueDB> DB; +  using DataT = CacheEntry<sizeof(HashType)>; +}; + +/// Builtin unified ActionCache that wraps around UnifiedOnDiskCache to provide +/// access to its ActionCache. +class UnifiedOnDiskActionCache final : public ActionCache { +public: +  Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result, +                bool CanBeDistributed) final; +  Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey, +                                         bool CanBeDistributed) const final; + +  UnifiedOnDiskActionCache(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB); + +  Error validate() const final; + +private: +  std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB; +};  } // end namespace  static Error createResultCachePoisonedError(ArrayRef<uint8_t> KeyHash, @@ -99,3 +145,123 @@ std::unique_ptr<ActionCache> createInMemoryActionCache() {  }  } // namespace llvm::cas + +OnDiskActionCache::OnDiskActionCache( +    std::unique_ptr<ondisk::OnDiskKeyValueDB> DB) +    : ActionCache(builtin::BuiltinCASContext::getDefaultContext()), +      DB(std::move(DB)) {} + +Expected<std::unique_ptr<OnDiskActionCache>> +OnDiskActionCache::create(StringRef AbsPath) { +  std::unique_ptr<ondisk::OnDiskKeyValueDB> DB; +  if (Error E = ondisk::OnDiskKeyValueDB::open(AbsPath, getHashName(), +                                               sizeof(HashType), getHashName(), +                                               sizeof(DataT)) +                    .moveInto(DB)) +    return std::move(E); +  return std::unique_ptr<OnDiskActionCache>( +      new OnDiskActionCache(std::move(DB))); +} + +Expected<std::optional<CASID>> +OnDiskActionCache::getImpl(ArrayRef<uint8_t> Key, +                           bool /*CanBeDistributed*/) const { +  std::optional<ArrayRef<char>> Val; +  if (Error E = DB->get(Key).moveInto(Val)) +    return std::move(E); +  if (!Val) +    return std::nullopt; +  return CASID::create(&getContext(), toStringRef(*Val)); +} + +Error OnDiskActionCache::putImpl(ArrayRef<uint8_t> Key, const CASID &Result, +                                 bool /*CanBeDistributed*/) { +  auto ResultHash = Result.getHash(); +  ArrayRef Expected((const char *)ResultHash.data(), ResultHash.size()); +  ArrayRef<char> Observed; +  if (Error E = DB->put(Key, Expected).moveInto(Observed)) +    return E; + +  if (Expected == Observed) +    return Error::success(); + +  return createResultCachePoisonedError( +      Key, getContext(), Result, +      ArrayRef((const uint8_t *)Observed.data(), Observed.size())); +} + +Error OnDiskActionCache::validate() const { +  // FIXME: without the matching CAS there is nothing we can check about the +  // cached values. The hash size is already validated by the DB validator. +  return DB->validate(nullptr); +} + +UnifiedOnDiskActionCache::UnifiedOnDiskActionCache( +    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) +    : ActionCache(builtin::BuiltinCASContext::getDefaultContext()), +      UniDB(std::move(UniDB)) {} + +Expected<std::optional<CASID>> +UnifiedOnDiskActionCache::getImpl(ArrayRef<uint8_t> Key, +                                  bool /*CanBeDistributed*/) const { +  std::optional<ArrayRef<char>> Val; +  if (Error E = UniDB->getKeyValueDB().get(Key).moveInto(Val)) +    return std::move(E); +  if (!Val) +    return std::nullopt; +  auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Val); +  return CASID::create(&getContext(), +                       toStringRef(UniDB->getGraphDB().getDigest(ID))); +} + +Error UnifiedOnDiskActionCache::putImpl(ArrayRef<uint8_t> Key, +                                        const CASID &Result, +                                        bool /*CanBeDistributed*/) { +  auto Expected = UniDB->getGraphDB().getReference(Result.getHash()); +  if (LLVM_UNLIKELY(!Expected)) +    return Expected.takeError(); + +  auto Value = ondisk::UnifiedOnDiskCache::getValueFromObjectID(*Expected); +  std::optional<ArrayRef<char>> Observed; +  if (Error E = UniDB->getKeyValueDB().put(Key, Value).moveInto(Observed)) +    return E; + +  auto ObservedID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Observed); +  if (*Expected == ObservedID) +    return Error::success(); + +  return createResultCachePoisonedError( +      Key, getContext(), Result, UniDB->getGraphDB().getDigest(ObservedID)); +} + +Error UnifiedOnDiskActionCache::validate() const { +  auto ValidateRef = [](FileOffset Offset, ArrayRef<char> Value) -> Error { +    auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(Value); +    auto formatError = [&](Twine Msg) { +      return createStringError( +          llvm::errc::illegal_byte_sequence, +          "bad record at 0x" + +              utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + +              Msg.str()); +    }; +    if (ID.getOpaqueData() == 0) +      return formatError("zero is not a valid ref"); +    return Error::success(); +  }; +  return UniDB->getKeyValueDB().validate(ValidateRef); +} + +Expected<std::unique_ptr<ActionCache>> +cas::createOnDiskActionCache(StringRef Path) { +#if LLVM_ENABLE_ONDISK_CAS +  return OnDiskActionCache::create(Path); +#else +  return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled"); +#endif +} + +std::unique_ptr<ActionCache> +cas::builtin::createActionCacheFromUnifiedOnDiskCache( +    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) { +  return std::make_unique<UnifiedOnDiskActionCache>(std::move(UniDB)); +} diff --git a/llvm/lib/CAS/BuiltinCAS.cpp b/llvm/lib/CAS/BuiltinCAS.cpp index 73646ad..e9bc6d8 100644 --- a/llvm/lib/CAS/BuiltinCAS.cpp +++ b/llvm/lib/CAS/BuiltinCAS.cpp @@ -9,6 +9,7 @@  #include "BuiltinCAS.h"  #include "llvm/ADT/StringExtras.h"  #include "llvm/CAS/BuiltinObjectHasher.h" +#include "llvm/CAS/UnifiedOnDiskCache.h"  #include "llvm/Support/Process.h"  using namespace llvm; @@ -68,7 +69,7 @@ Expected<ObjectRef> BuiltinCAS::store(ArrayRef<ObjectRef> Refs,                     Refs, Data);  } -Error BuiltinCAS::validate(const CASID &ID) { +Error BuiltinCAS::validateObject(const CASID &ID) {    auto Ref = getReference(ID);    if (!Ref)      return createUnknownObjectError(ID); @@ -92,3 +93,14 @@ Error BuiltinCAS::validate(const CASID &ID) {    return Error::success();  } + +Expected<std::unique_ptr<ondisk::UnifiedOnDiskCache>> +cas::builtin::createBuiltinUnifiedOnDiskCache(StringRef Path) { +#if LLVM_ENABLE_ONDISK_CAS +  return ondisk::UnifiedOnDiskCache::open(Path, /*SizeLimit=*/std::nullopt, +                                          BuiltinCASContext::getHashName(), +                                          sizeof(HashType)); +#else +  return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled"); +#endif +} diff --git a/llvm/lib/CAS/BuiltinCAS.h b/llvm/lib/CAS/BuiltinCAS.h index 3b5374d..4d2de66 100644 --- a/llvm/lib/CAS/BuiltinCAS.h +++ b/llvm/lib/CAS/BuiltinCAS.h @@ -1,4 +1,4 @@ -//===- BuiltinCAS.h ---------------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===//  //  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.  // See https://llvm.org/LICENSE.txt for license information. @@ -15,6 +15,9 @@  namespace llvm::cas {  class ActionCache; +namespace ondisk { +class UnifiedOnDiskCache; +} // namespace ondisk  namespace builtin {  /// Common base class for builtin CAS implementations using the same CASContext. @@ -65,9 +68,27 @@ public:                               "corrupt storage");    } -  Error validate(const CASID &ID) final; +  Error validateObject(const CASID &ID) final;  }; +/// Create a \p UnifiedOnDiskCache instance that uses \p BLAKE3 hashing. +Expected<std::unique_ptr<ondisk::UnifiedOnDiskCache>> +createBuiltinUnifiedOnDiskCache(StringRef Path); + +/// \param UniDB A \p UnifiedOnDiskCache instance from \p +/// createBuiltinUnifiedOnDiskCache. +std::unique_ptr<ObjectStore> createObjectStoreFromUnifiedOnDiskCache( +    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB); + +/// \param UniDB A \p UnifiedOnDiskCache instance from \p +/// createBuiltinUnifiedOnDiskCache. +std::unique_ptr<ActionCache> createActionCacheFromUnifiedOnDiskCache( +    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB); + +// FIXME: Proxy not portable. Maybe also error-prone? +constexpr StringLiteral DefaultDirProxy = "/^llvm::cas::builtin::default"; +constexpr StringLiteral DefaultDir = "llvm.cas.builtin.default"; +  } // end namespace builtin  } // end namespace llvm::cas diff --git a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp new file mode 100644 index 0000000..f3f6fa0 --- /dev/null +++ b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/BuiltinUnifiedCASDatabases.h" +#include "BuiltinCAS.h" +#include "llvm/CAS/ActionCache.h" +#include "llvm/CAS/UnifiedOnDiskCache.h" + +using namespace llvm; +using namespace llvm::cas; + +Expected<std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>>> +cas::createOnDiskUnifiedCASDatabases(StringRef Path) { +  std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB; +  if (Error E = builtin::createBuiltinUnifiedOnDiskCache(Path).moveInto(UniDB)) +    return std::move(E); +  auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB); +  auto AC = builtin::createActionCacheFromUnifiedOnDiskCache(std::move(UniDB)); +  return std::make_pair(std::move(CAS), std::move(AC)); +} + +Expected<ValidationResult> cas::validateOnDiskUnifiedCASDatabasesIfNeeded( +    StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation, +    std::optional<StringRef> LLVMCasBinary) { +#if LLVM_ENABLE_ONDISK_CAS +  return ondisk::UnifiedOnDiskCache::validateIfNeeded( +      Path, builtin::BuiltinCASContext::getHashName(), +      sizeof(builtin::HashType), CheckHash, AllowRecovery, ForceValidation, +      LLVMCasBinary); +#else +  return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled"); +#endif +} diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt index a2f8c49..aad77dc 100644 --- a/llvm/lib/CAS/CMakeLists.txt +++ b/llvm/lib/CAS/CMakeLists.txt @@ -2,15 +2,18 @@ add_llvm_component_library(LLVMCAS    ActionCache.cpp    ActionCaches.cpp    BuiltinCAS.cpp +  BuiltinUnifiedCASDatabases.cpp    DatabaseFile.cpp    InMemoryCAS.cpp    MappedFileRegionArena.cpp    ObjectStore.cpp +  OnDiskCAS.cpp    OnDiskCommon.cpp    OnDiskDataAllocator.cpp    OnDiskGraphDB.cpp    OnDiskKeyValueDB.cpp    OnDiskTrieRawHashMap.cpp +  UnifiedOnDiskCache.cpp    ADDITIONAL_HEADER_DIRS    ${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS diff --git a/llvm/lib/CAS/InMemoryCAS.cpp b/llvm/lib/CAS/InMemoryCAS.cpp index c63ee70d..2d4eedd 100644 --- a/llvm/lib/CAS/InMemoryCAS.cpp +++ b/llvm/lib/CAS/InMemoryCAS.cpp @@ -233,6 +233,12 @@ public:      return cast<InMemoryObject>(asInMemoryObject(Node)).getData();    } +  void print(raw_ostream &OS) const final; + +  Error validate(bool CheckHash) const final { +    return createStringError("InMemoryCAS doesn't support validate()"); +  } +    InMemoryCAS() = default;  private: @@ -271,6 +277,8 @@ ArrayRef<const InMemoryObject *> InMemoryObject::getRefs() const {    return cast<InMemoryInlineObject>(this)->getRefsImpl();  } +void InMemoryCAS::print(raw_ostream &OS) const {} +  Expected<ObjectRef>  InMemoryCAS::storeFromNullTerminatedRegion(ArrayRef<uint8_t> ComputedHash,                                             sys::fs::mapped_file_region Map) { diff --git a/llvm/lib/CAS/ObjectStore.cpp b/llvm/lib/CAS/ObjectStore.cpp index e0be50b..3110577 100644 --- a/llvm/lib/CAS/ObjectStore.cpp +++ b/llvm/lib/CAS/ObjectStore.cpp @@ -1,4 +1,4 @@ -//===- ObjectStore.cpp ------------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===//  //  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.  // See https://llvm.org/LICENSE.txt for license information. @@ -12,7 +12,7 @@  #include "llvm/Support/Errc.h"  #include "llvm/Support/FileSystem.h"  #include "llvm/Support/MemoryBuffer.h" -#include <optional> +#include <deque>  using namespace llvm;  using namespace llvm::cas; @@ -21,6 +21,7 @@ void CASContext::anchor() {}  void ObjectStore::anchor() {}  LLVM_DUMP_METHOD void CASID::dump() const { print(dbgs()); } +LLVM_DUMP_METHOD void ObjectStore::dump() const { print(dbgs()); }  LLVM_DUMP_METHOD void ObjectRef::dump() const { print(dbgs()); }  LLVM_DUMP_METHOD void ObjectHandle::dump() const { print(dbgs()); } @@ -141,7 +142,7 @@ Error ObjectStore::validateTree(ObjectRef Root) {      auto [I, Inserted] = ValidatedRefs.insert(Ref);      if (!Inserted)        continue; // already validated. -    if (Error E = validate(getID(Ref))) +    if (Error E = validateObject(getID(Ref)))        return E;      Expected<ObjectHandle> Obj = load(Ref);      if (!Obj) @@ -155,6 +156,92 @@ Error ObjectStore::validateTree(ObjectRef Root) {    return Error::success();  } +Expected<ObjectRef> ObjectStore::importObject(ObjectStore &Upstream, +                                              ObjectRef Other) { +  // Copy the full CAS tree from upstream with depth-first ordering to ensure +  // all the child nodes are available in downstream CAS before inserting +  // current object. This uses a similar algorithm as +  // `OnDiskGraphDB::importFullTree` but doesn't assume the upstream CAS schema +  // so it can be used to import from any other ObjectStore reguardless of the +  // CAS schema. + +  // There is no work to do if importing from self. +  if (this == &Upstream) +    return Other; + +  /// Keeps track of the state of visitation for current node and all of its +  /// parents. Upstream Cursor holds information only from upstream CAS. +  struct UpstreamCursor { +    ObjectRef Ref; +    ObjectHandle Node; +    size_t RefsCount; +    std::deque<ObjectRef> Refs; +  }; +  SmallVector<UpstreamCursor, 16> CursorStack; +  /// PrimaryNodeStack holds the ObjectRef of the current CAS, with nodes either +  /// just stored in the CAS or nodes already exists in the current CAS. +  SmallVector<ObjectRef, 128> PrimaryRefStack; +  /// A map from upstream ObjectRef to current ObjectRef. +  llvm::DenseMap<ObjectRef, ObjectRef> CreatedObjects; + +  auto enqueueNode = [&](ObjectRef Ref, ObjectHandle Node) { +    unsigned NumRefs = Upstream.getNumRefs(Node); +    std::deque<ObjectRef> Refs; +    for (unsigned I = 0; I < NumRefs; ++I) +      Refs.push_back(Upstream.readRef(Node, I)); + +    CursorStack.push_back({Ref, Node, NumRefs, std::move(Refs)}); +  }; + +  auto UpstreamHandle = Upstream.load(Other); +  if (!UpstreamHandle) +    return UpstreamHandle.takeError(); +  enqueueNode(Other, *UpstreamHandle); + +  while (!CursorStack.empty()) { +    UpstreamCursor &Cur = CursorStack.back(); +    if (Cur.Refs.empty()) { +      // Copy the node data into the primary store. +      // The bottom of \p PrimaryRefStack contains the ObjectRef for the +      // current node. +      assert(PrimaryRefStack.size() >= Cur.RefsCount); +      auto Refs = ArrayRef(PrimaryRefStack) +                      .slice(PrimaryRefStack.size() - Cur.RefsCount); +      auto NewNode = store(Refs, Upstream.getData(Cur.Node)); +      if (!NewNode) +        return NewNode.takeError(); + +      // Remove the current node and its IDs from the stack. +      PrimaryRefStack.truncate(PrimaryRefStack.size() - Cur.RefsCount); +      CursorStack.pop_back(); + +      PrimaryRefStack.push_back(*NewNode); +      CreatedObjects.try_emplace(Cur.Ref, *NewNode); +      continue; +    } + +    // Check if the node exists already. +    auto CurrentID = Cur.Refs.front(); +    Cur.Refs.pop_front(); +    auto Ref = CreatedObjects.find(CurrentID); +    if (Ref != CreatedObjects.end()) { +      // If exists already, just need to enqueue the primary node. +      PrimaryRefStack.push_back(Ref->second); +      continue; +    } + +    // Load child. +    auto PrimaryID = Upstream.load(CurrentID); +    if (LLVM_UNLIKELY(!PrimaryID)) +      return PrimaryID.takeError(); + +    enqueueNode(CurrentID, *PrimaryID); +  } + +  assert(PrimaryRefStack.size() == 1); +  return PrimaryRefStack.front(); +} +  std::unique_ptr<MemoryBuffer>  ObjectProxy::getMemoryBuffer(StringRef Name,                               bool RequiresNullTerminator) const { diff --git a/llvm/lib/CAS/OnDiskCAS.cpp b/llvm/lib/CAS/OnDiskCAS.cpp new file mode 100644 index 0000000..7d29f44 --- /dev/null +++ b/llvm/lib/CAS/OnDiskCAS.cpp @@ -0,0 +1,211 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "BuiltinCAS.h" +#include "llvm/CAS/BuiltinCASContext.h" +#include "llvm/CAS/BuiltinObjectHasher.h" +#include "llvm/CAS/OnDiskGraphDB.h" +#include "llvm/CAS/UnifiedOnDiskCache.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Error.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::builtin; + +namespace { + +class OnDiskCAS : public BuiltinCAS { +public: +  Expected<ObjectRef> storeImpl(ArrayRef<uint8_t> ComputedHash, +                                ArrayRef<ObjectRef> Refs, +                                ArrayRef<char> Data) final; + +  Expected<std::optional<ObjectHandle>> loadIfExists(ObjectRef Ref) final; + +  CASID getID(ObjectRef Ref) const final; + +  std::optional<ObjectRef> getReference(const CASID &ID) const final; + +  Expected<bool> isMaterialized(ObjectRef Ref) const final; + +  ArrayRef<char> getDataConst(ObjectHandle Node) const final; + +  void print(raw_ostream &OS) const final; +  Error validate(bool CheckHash) const final; + +  static Expected<std::unique_ptr<OnDiskCAS>> open(StringRef Path); + +  OnDiskCAS(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) +      : UnifiedDB(std::move(UniDB)), DB(&UnifiedDB->getGraphDB()) {} + +private: +  ObjectHandle convertHandle(ondisk::ObjectHandle Node) const { +    return makeObjectHandle(Node.getOpaqueData()); +  } + +  ondisk::ObjectHandle convertHandle(ObjectHandle Node) const { +    return ondisk::ObjectHandle(Node.getInternalRef(*this)); +  } + +  ObjectRef convertRef(ondisk::ObjectID Ref) const { +    return makeObjectRef(Ref.getOpaqueData()); +  } + +  ondisk::ObjectID convertRef(ObjectRef Ref) const { +    return ondisk::ObjectID::fromOpaqueData(Ref.getInternalRef(*this)); +  } + +  size_t getNumRefs(ObjectHandle Node) const final { +    auto RefsRange = DB->getObjectRefs(convertHandle(Node)); +    return std::distance(RefsRange.begin(), RefsRange.end()); +  } + +  ObjectRef readRef(ObjectHandle Node, size_t I) const final { +    auto RefsRange = DB->getObjectRefs(convertHandle(Node)); +    return convertRef(RefsRange.begin()[I]); +  } + +  Error forEachRef(ObjectHandle Node, +                   function_ref<Error(ObjectRef)> Callback) const final; + +  Error setSizeLimit(std::optional<uint64_t> SizeLimit) final; +  Expected<std::optional<uint64_t>> getStorageSize() const final; +  Error pruneStorageData() final; + +  OnDiskCAS(std::unique_ptr<ondisk::OnDiskGraphDB> GraphDB) +      : OwnedDB(std::move(GraphDB)), DB(OwnedDB.get()) {} + +  std::unique_ptr<ondisk::OnDiskGraphDB> OwnedDB; +  std::shared_ptr<ondisk::UnifiedOnDiskCache> UnifiedDB; +  ondisk::OnDiskGraphDB *DB; +}; + +} // end anonymous namespace + +void OnDiskCAS::print(raw_ostream &OS) const { DB->print(OS); } +Error OnDiskCAS::validate(bool CheckHash) const { +  auto Hasher = [](ArrayRef<ArrayRef<uint8_t>> Refs, ArrayRef<char> Data, +                   SmallVectorImpl<uint8_t> &Result) { +    auto Hash = BuiltinObjectHasher<llvm::cas::builtin::HasherT>::hashObject( +        Refs, Data); +    Result.assign(Hash.begin(), Hash.end()); +  }; + +  if (auto E = DB->validate(CheckHash, Hasher)) +    return E; + +  return Error::success(); +} + +CASID OnDiskCAS::getID(ObjectRef Ref) const { +  ArrayRef<uint8_t> Hash = DB->getDigest(convertRef(Ref)); +  return CASID::create(&getContext(), toStringRef(Hash)); +} + +std::optional<ObjectRef> OnDiskCAS::getReference(const CASID &ID) const { +  std::optional<ondisk::ObjectID> ObjID = +      DB->getExistingReference(ID.getHash()); +  if (!ObjID) +    return std::nullopt; +  return convertRef(*ObjID); +} + +Expected<bool> OnDiskCAS::isMaterialized(ObjectRef ExternalRef) const { +  return DB->isMaterialized(convertRef(ExternalRef)); +} + +ArrayRef<char> OnDiskCAS::getDataConst(ObjectHandle Node) const { +  return DB->getObjectData(convertHandle(Node)); +} + +Expected<std::optional<ObjectHandle>> +OnDiskCAS::loadIfExists(ObjectRef ExternalRef) { +  Expected<std::optional<ondisk::ObjectHandle>> ObjHnd = +      DB->load(convertRef(ExternalRef)); +  if (!ObjHnd) +    return ObjHnd.takeError(); +  if (!*ObjHnd) +    return std::nullopt; +  return convertHandle(**ObjHnd); +} + +Expected<ObjectRef> OnDiskCAS::storeImpl(ArrayRef<uint8_t> ComputedHash, +                                         ArrayRef<ObjectRef> Refs, +                                         ArrayRef<char> Data) { +  SmallVector<ondisk::ObjectID, 64> IDs; +  IDs.reserve(Refs.size()); +  for (ObjectRef Ref : Refs) { +    IDs.push_back(convertRef(Ref)); +  } + +  auto StoredID = DB->getReference(ComputedHash); +  if (LLVM_UNLIKELY(!StoredID)) +    return StoredID.takeError(); +  if (Error E = DB->store(*StoredID, IDs, Data)) +    return std::move(E); +  return convertRef(*StoredID); +} + +Error OnDiskCAS::forEachRef(ObjectHandle Node, +                            function_ref<Error(ObjectRef)> Callback) const { +  auto RefsRange = DB->getObjectRefs(convertHandle(Node)); +  for (ondisk::ObjectID Ref : RefsRange) { +    if (Error E = Callback(convertRef(Ref))) +      return E; +  } +  return Error::success(); +} + +Error OnDiskCAS::setSizeLimit(std::optional<uint64_t> SizeLimit) { +  UnifiedDB->setSizeLimit(SizeLimit); +  return Error::success(); +} + +Expected<std::optional<uint64_t>> OnDiskCAS::getStorageSize() const { +  return UnifiedDB->getStorageSize(); +} + +Error OnDiskCAS::pruneStorageData() { return UnifiedDB->collectGarbage(); } + +Expected<std::unique_ptr<OnDiskCAS>> OnDiskCAS::open(StringRef AbsPath) { +  Expected<std::unique_ptr<ondisk::OnDiskGraphDB>> DB = +      ondisk::OnDiskGraphDB::open(AbsPath, BuiltinCASContext::getHashName(), +                                  sizeof(HashType)); +  if (!DB) +    return DB.takeError(); +  return std::unique_ptr<OnDiskCAS>(new OnDiskCAS(std::move(*DB))); +} + +bool cas::isOnDiskCASEnabled() { +#if LLVM_ENABLE_ONDISK_CAS +  return true; +#else +  return false; +#endif +} + +Expected<std::unique_ptr<ObjectStore>> cas::createOnDiskCAS(const Twine &Path) { +#if LLVM_ENABLE_ONDISK_CAS +  // FIXME: An absolute path isn't really good enough. Should open a directory +  // and use openat() for files underneath. +  SmallString<256> AbsPath; +  Path.toVector(AbsPath); +  sys::fs::make_absolute(AbsPath); + +  return OnDiskCAS::open(AbsPath); +#else +  return createStringError(inconvertibleErrorCode(), "OnDiskCAS is disabled"); +#endif /* LLVM_ENABLE_ONDISK_CAS */ +} + +std::unique_ptr<ObjectStore> +cas::builtin::createObjectStoreFromUnifiedOnDiskCache( +    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) { +  return std::make_unique<OnDiskCAS>(std::move(UniDB)); +} diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp index 64cbe9d..245b6fb 100644 --- a/llvm/lib/CAS/OnDiskGraphDB.cpp +++ b/llvm/lib/CAS/OnDiskGraphDB.cpp @@ -893,6 +893,10 @@ int64_t DataRecordHandle::getDataRelOffset() const {  }  Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { +  if (UpstreamDB) { +    if (auto E = UpstreamDB->validate(Deep, Hasher)) +      return E; +  }    return Index.validate([&](FileOffset Offset,                              OnDiskTrieRawHashMap::ConstValueProxy Record)                              -> Error { @@ -1202,11 +1206,8 @@ OnDiskGraphDB::load(ObjectID ExternalRef) {      return I.takeError();    TrieRecord::Data Object = I->Ref.load(); -  if (Object.SK == TrieRecord::StorageKind::Unknown) { -    if (!UpstreamDB) -      return std::nullopt; +  if (Object.SK == TrieRecord::StorageKind::Unknown)      return faultInFromUpstream(ExternalRef); -  }    if (Object.SK == TrieRecord::StorageKind::DataPool)      return ObjectHandle::fromFileOffset(Object.Offset); @@ -1286,8 +1287,10 @@ OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef,    TrieRecord::Data Object = I->Ref.load();    if (Object.SK != TrieRecord::StorageKind::Unknown)      return ObjectPresence::InPrimaryDB; +    if (!CheckUpstream || !UpstreamDB)      return ObjectPresence::Missing; +    std::optional<ObjectID> UpstreamID =        UpstreamDB->getExistingReference(getDigest(*I));    return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB @@ -1549,9 +1552,10 @@ unsigned OnDiskGraphDB::getHardStorageLimitUtilization() const {    return std::max(IndexPercent, DataPercent);  } -Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open( -    StringRef AbsPath, StringRef HashName, unsigned HashByteSize, -    std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy) { +Expected<std::unique_ptr<OnDiskGraphDB>> +OnDiskGraphDB::open(StringRef AbsPath, StringRef HashName, +                    unsigned HashByteSize, OnDiskGraphDB *UpstreamDB, +                    FaultInPolicy Policy) {    if (std::error_code EC = sys::fs::create_directories(AbsPath))      return createFileError(AbsPath, EC); @@ -1604,18 +1608,15 @@ Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open(                               "unexpected user header in '" + DataPoolPath +                                   "'"); -  return std::unique_ptr<OnDiskGraphDB>( -      new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool), -                        std::move(UpstreamDB), Policy)); +  return std::unique_ptr<OnDiskGraphDB>(new OnDiskGraphDB( +      AbsPath, std::move(*Index), std::move(*DataPool), UpstreamDB, Policy));  }  OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,                               OnDiskDataAllocator DataPool, -                             std::unique_ptr<OnDiskGraphDB> UpstreamDB, -                             FaultInPolicy Policy) +                             OnDiskGraphDB *UpstreamDB, FaultInPolicy Policy)      : Index(std::move(Index)), DataPool(std::move(DataPool)), -      RootPath(RootPath.str()), UpstreamDB(std::move(UpstreamDB)), -      FIPolicy(Policy) { +      RootPath(RootPath.str()), UpstreamDB(UpstreamDB), FIPolicy(Policy) {    /// Lifetime for "big" objects not in DataPool.    ///    /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something @@ -1638,7 +1639,6 @@ Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID,    // against the process dying during importing and leaving the database with an    // incomplete tree. Note that if the upstream has missing nodes then the tree    // will be copied with missing nodes as well, it won't be considered an error. -    struct UpstreamCursor {      ObjectHandle Node;      size_t RefsCount; @@ -1720,7 +1720,6 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,    // Copy the node data into the primary store.    // FIXME: Use hard-link or cloning if the file-system supports it and data is    // stored into a separate file. -    auto Data = UpstreamDB->getObjectData(UpstreamNode);    auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode);    SmallVector<ObjectID, 64> Refs; @@ -1737,7 +1736,8 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,  Expected<std::optional<ObjectHandle>>  OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) { -  assert(UpstreamDB); +  if (!UpstreamDB) +    return std::nullopt;    auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID));    if (LLVM_UNLIKELY(!UpstreamID)) diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp index 2186071..15656cb 100644 --- a/llvm/lib/CAS/OnDiskKeyValueDB.cpp +++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp @@ -20,6 +20,7 @@  #include "llvm/CAS/OnDiskKeyValueDB.h"  #include "OnDiskCommon.h"  #include "llvm/ADT/StringExtras.h" +#include "llvm/CAS/UnifiedOnDiskCache.h"  #include "llvm/Support/Alignment.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Errc.h" @@ -53,15 +54,21 @@ Expected<std::optional<ArrayRef<char>>>  OnDiskKeyValueDB::get(ArrayRef<uint8_t> Key) {    // Check the result cache.    OnDiskTrieRawHashMap::ConstOnDiskPtr ActionP = Cache.find(Key); -  if (!ActionP) +  if (ActionP) { +    assert(isAddrAligned(Align(8), ActionP->Data.data())); +    return ActionP->Data; +  } +  if (!UnifiedCache || !UnifiedCache->UpstreamKVDB)      return std::nullopt; -  assert(isAddrAligned(Align(8), ActionP->Data.data())); -  return ActionP->Data; + +  // Try to fault in from upstream. +  return UnifiedCache->faultInFromUpstreamKV(Key);  }  Expected<std::unique_ptr<OnDiskKeyValueDB>>  OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize, -                       StringRef ValueName, size_t ValueSize) { +                       StringRef ValueName, size_t ValueSize, +                       UnifiedOnDiskCache *Cache) {    if (std::error_code EC = sys::fs::create_directories(Path))      return createFileError(Path, EC); @@ -87,10 +94,14 @@ OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize,      return std::move(E);    return std::unique_ptr<OnDiskKeyValueDB>( -      new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache))); +      new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache), Cache));  }  Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const { +  if (UnifiedCache && UnifiedCache->UpstreamKVDB) { +    if (auto E = UnifiedCache->UpstreamKVDB->validate(CheckValue)) +      return E; +  }    return Cache.validate(        [&](FileOffset Offset,            OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error { diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp new file mode 100644 index 0000000..ae9d818 --- /dev/null +++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp @@ -0,0 +1,613 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Encapsulates \p OnDiskGraphDB and \p OnDiskKeyValueDB instances within one +/// directory while also restricting storage growth with a scheme of chaining +/// the two most recent directories (primary & upstream), where the primary +/// "faults-in" data from the upstream one. When the primary (most recent) +/// directory exceeds its intended limit a new empty directory becomes the +/// primary one. +/// +/// Within the top-level directory (the path that \p UnifiedOnDiskCache::open +/// receives) there are directories named like this: +/// +/// 'v<version>.<x>' +/// 'v<version>.<x+1>' +/// 'v<version>.<x+2>' +/// ... +/// +/// 'version' is the version integer for this \p UnifiedOnDiskCache's scheme and +/// the part after the dot is an increasing integer. The primary directory is +/// the one with the highest integer and the upstream one is the directory +/// before it. For example, if the sub-directories contained are: +/// +/// 'v1.5', 'v1.6', 'v1.7', 'v1.8' +/// +/// Then the primary one is 'v1.8', the upstream one is 'v1.7', and the rest are +/// unused directories that can be safely deleted at any time and by any +/// process. +/// +/// Contained within the top-level directory is a file named "lock" which is +/// used for processes to take shared or exclusive locks for the contents of the +/// top directory. While a \p UnifiedOnDiskCache is open it keeps a shared lock +/// for the top-level directory; when it closes, if the primary sub-directory +/// exceeded its limit, it attempts to get an exclusive lock in order to create +/// a new empty primary directory; if it can't get the exclusive lock it gives +/// up and lets the next \p UnifiedOnDiskCache instance that closes to attempt +/// again. +/// +/// The downside of this scheme is that while \p UnifiedOnDiskCache is open on a +/// directory, by any process, the storage size in that directory will keep +/// growing unrestricted. But the major benefit is that garbage-collection can +/// be triggered on a directory concurrently, at any time and by any process, +/// without affecting any active readers/writers in the same process or other +/// processes. +/// +/// The \c UnifiedOnDiskCache also provides validation and recovery on top of +/// the underlying on-disk storage. The low-level storage is designed to remain +/// coherent across regular process crashes, but may be invalid after power loss +/// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows +/// validating the contents once per boot and can recover by marking invalid +/// data for garbage collection. +/// +/// The data recovery described above requires exclusive access to the CAS, and +/// it is an error to attempt recovery if the CAS is open in any process/thread. +/// In order to maximize backwards compatibility with tools that do not perform +/// validation before opening the CAS, we do not attempt to get exclusive access +/// until recovery is actually performed, meaning as long as the data is valid +/// it will not conflict with concurrent use. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/UnifiedOnDiskCache.h" +#include "BuiltinCAS.h" +#include "OnDiskCommon.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CAS/ActionCache.h" +#include "llvm/CAS/OnDiskGraphDB.h" +#include "llvm/CAS/OnDiskKeyValueDB.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" +#include <optional> + +#if __has_include(<sys/sysctl.h>) +#include <sys/sysctl.h> +#endif + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +/// FIXME: When the version of \p DBDirPrefix is bumped up we need to figure out +/// how to handle the leftover sub-directories of the previous version, within +/// the \p UnifiedOnDiskCache::collectGarbage function. +static constexpr StringLiteral DBDirPrefix = "v1."; + +static constexpr StringLiteral ValidationFilename = "v1.validation"; +static constexpr StringLiteral CorruptPrefix = "corrupt."; + +ObjectID UnifiedOnDiskCache::getObjectIDFromValue(ArrayRef<char> Value) { +  // little endian encoded. +  assert(Value.size() == sizeof(uint64_t)); +  return ObjectID::fromOpaqueData(support::endian::read64le(Value.data())); +} + +UnifiedOnDiskCache::ValueBytes +UnifiedOnDiskCache::getValueFromObjectID(ObjectID ID) { +  // little endian encoded. +  UnifiedOnDiskCache::ValueBytes ValBytes; +  static_assert(ValBytes.size() == sizeof(ID.getOpaqueData())); +  support::endian::write64le(ValBytes.data(), ID.getOpaqueData()); +  return ValBytes; +} + +Expected<std::optional<ArrayRef<char>>> +UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef<uint8_t> Key) { +  assert(UpstreamGraphDB); +  assert(UpstreamKVDB); + +  std::optional<ArrayRef<char>> UpstreamValue; +  if (Error E = UpstreamKVDB->get(Key).moveInto(UpstreamValue)) +    return std::move(E); +  if (!UpstreamValue) +    return std::nullopt; + +  // The value is the \p ObjectID in the context of the upstream +  // \p OnDiskGraphDB instance. Translate it to the context of the primary +  // \p OnDiskGraphDB instance. +  ObjectID UpstreamID = getObjectIDFromValue(*UpstreamValue); +  auto PrimaryID = +      PrimaryGraphDB->getReference(UpstreamGraphDB->getDigest(UpstreamID)); +  if (LLVM_UNLIKELY(!PrimaryID)) +    return PrimaryID.takeError(); +  return PrimaryKVDB->put(Key, getValueFromObjectID(*PrimaryID)); +} + +/// \returns all the 'v<version>.<x>' names of sub-directories, sorted with +/// ascending order of the integer after the dot. Corrupt directories, if +/// included, will come first. +static Expected<SmallVector<std::string, 4>> +getAllDBDirs(StringRef Path, bool IncludeCorrupt = false) { +  struct DBDir { +    uint64_t Order; +    std::string Name; +  }; +  SmallVector<DBDir> FoundDBDirs; + +  std::error_code EC; +  for (sys::fs::directory_iterator DirI(Path, EC), DirE; !EC && DirI != DirE; +       DirI.increment(EC)) { +    if (DirI->type() != sys::fs::file_type::directory_file) +      continue; +    StringRef SubDir = sys::path::filename(DirI->path()); +    if (IncludeCorrupt && SubDir.starts_with(CorruptPrefix)) { +      FoundDBDirs.push_back({0, std::string(SubDir)}); +      continue; +    } +    if (!SubDir.starts_with(DBDirPrefix)) +      continue; +    uint64_t Order; +    if (SubDir.substr(DBDirPrefix.size()).getAsInteger(10, Order)) +      return createStringError(inconvertibleErrorCode(), +                               "unexpected directory " + DirI->path()); +    FoundDBDirs.push_back({Order, std::string(SubDir)}); +  } +  if (EC) +    return createFileError(Path, EC); + +  llvm::sort(FoundDBDirs, [](const DBDir &LHS, const DBDir &RHS) -> bool { +    return LHS.Order <= RHS.Order; +  }); + +  SmallVector<std::string, 4> DBDirs; +  for (DBDir &Dir : FoundDBDirs) +    DBDirs.push_back(std::move(Dir.Name)); +  return DBDirs; +} + +static Expected<SmallVector<std::string, 4>> getAllGarbageDirs(StringRef Path) { +  auto DBDirs = getAllDBDirs(Path, /*IncludeCorrupt=*/true); +  if (!DBDirs) +    return DBDirs.takeError(); + +  // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure +  // out how to handle the leftover sub-directories of the previous version. + +  for (unsigned Keep = 2; Keep > 0 && !DBDirs->empty(); --Keep) { +    StringRef Back(DBDirs->back()); +    if (Back.starts_with(CorruptPrefix)) +      break; +    DBDirs->pop_back(); +  } +  return *DBDirs; +} + +/// \returns Given a sub-directory named 'v<version>.<x>', it outputs the +/// 'v<version>.<x+1>' name. +static void getNextDBDirName(StringRef DBDir, llvm::raw_ostream &OS) { +  assert(DBDir.starts_with(DBDirPrefix)); +  uint64_t Count; +  bool Failed = DBDir.substr(DBDirPrefix.size()).getAsInteger(10, Count); +  assert(!Failed); +  (void)Failed; +  OS << DBDirPrefix << Count + 1; +} + +static Error validateOutOfProcess(StringRef LLVMCasBinary, StringRef RootPath, +                                  bool CheckHash) { +  SmallVector<StringRef> Args{LLVMCasBinary, "-cas", RootPath, "-validate"}; +  if (CheckHash) +    Args.push_back("-check-hash"); + +  llvm::SmallString<128> StdErrPath; +  int StdErrFD = -1; +  if (std::error_code EC = sys::fs::createTemporaryFile( +          "llvm-cas-validate-stderr", "txt", StdErrFD, StdErrPath, +          llvm::sys::fs::OF_Text)) +    return createStringError(EC, "failed to create temporary file"); +  FileRemover OutputRemover(StdErrPath.c_str()); + +  std::optional<llvm::StringRef> Redirects[] = { +      {""}, // stdin = /dev/null +      {""}, // stdout = /dev/null +      StdErrPath.str(), +  }; + +  std::string ErrMsg; +  int Result = +      sys::ExecuteAndWait(LLVMCasBinary, Args, /*Env=*/std::nullopt, Redirects, +                          /*SecondsToWait=*/120, /*MemoryLimit=*/0, &ErrMsg); + +  if (Result == -1) +    return createStringError("failed to exec " + join(Args, " ") + ": " + +                             ErrMsg); +  if (Result != 0) { +    llvm::SmallString<64> Err("cas contents invalid"); +    if (!ErrMsg.empty()) { +      Err += ": "; +      Err += ErrMsg; +    } +    auto StdErrBuf = MemoryBuffer::getFile(StdErrPath.c_str()); +    if (StdErrBuf && !(*StdErrBuf)->getBuffer().empty()) { +      Err += ": "; +      Err += (*StdErrBuf)->getBuffer(); +    } +    return createStringError(Err); +  } +  return Error::success(); +} + +static Error validateInProcess(StringRef RootPath, StringRef HashName, +                               unsigned HashByteSize, bool CheckHash) { +  std::shared_ptr<UnifiedOnDiskCache> UniDB; +  if (Error E = UnifiedOnDiskCache::open(RootPath, std::nullopt, HashName, +                                         HashByteSize) +                    .moveInto(UniDB)) +    return E; +  auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB); +  if (Error E = CAS->validate(CheckHash)) +    return E; +  auto Cache = builtin::createActionCacheFromUnifiedOnDiskCache(UniDB); +  if (Error E = Cache->validate()) +    return E; +  return Error::success(); +} + +static Expected<uint64_t> getBootTime() { +#if __has_include(<sys/sysctl.h>) && defined(KERN_BOOTTIME) +  struct timeval TV; +  size_t TVLen = sizeof(TV); +  int KernBoot[2] = {CTL_KERN, KERN_BOOTTIME}; +  if (sysctl(KernBoot, 2, &TV, &TVLen, nullptr, 0) < 0) +    return createStringError(llvm::errnoAsErrorCode(), +                             "failed to get boottime"); +  if (TVLen != sizeof(TV)) +    return createStringError("sysctl kern.boottime unexpected format"); +  return TV.tv_sec; +#elif defined(__linux__) +  // Use the mtime for /proc, which is recreated during system boot. +  // We could also read /proc/stat and search for 'btime'. +  sys::fs::file_status Status; +  if (std::error_code EC = sys::fs::status("/proc", Status)) +    return createFileError("/proc", EC); +  return Status.getLastModificationTime().time_since_epoch().count(); +#else +  llvm::report_fatal_error("getBootTime unimplemented"); +#endif +} + +Expected<ValidationResult> UnifiedOnDiskCache::validateIfNeeded( +    StringRef RootPath, StringRef HashName, unsigned HashByteSize, +    bool CheckHash, bool AllowRecovery, bool ForceValidation, +    std::optional<StringRef> LLVMCasBinaryPath) { +  if (std::error_code EC = sys::fs::create_directories(RootPath)) +    return createFileError(RootPath, EC); + +  SmallString<256> PathBuf(RootPath); +  sys::path::append(PathBuf, ValidationFilename); +  int FD = -1; +  if (std::error_code EC = sys::fs::openFileForReadWrite( +          PathBuf, FD, sys::fs::CD_OpenAlways, sys::fs::OF_None)) +    return createFileError(PathBuf, EC); +  assert(FD != -1); + +  sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD); +  auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(File); }); + +  if (std::error_code EC = lockFileThreadSafe(FD, sys::fs::LockKind::Exclusive)) +    return createFileError(PathBuf, EC); +  auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(FD); }); + +  SmallString<8> Bytes; +  if (Error E = sys::fs::readNativeFileToEOF(File, Bytes)) +    return createFileError(PathBuf, std::move(E)); + +  uint64_t ValidationBootTime = 0; +  if (!Bytes.empty() && +      StringRef(Bytes).trim().getAsInteger(10, ValidationBootTime)) +    return createFileError(PathBuf, errc::illegal_byte_sequence, +                           "expected integer"); + +  static uint64_t BootTime = 0; +  if (BootTime == 0) +    if (Error E = getBootTime().moveInto(BootTime)) +      return std::move(E); + +  std::string LogValidationError; + +  if (ValidationBootTime == BootTime && !ForceValidation) +    return ValidationResult::Skipped; + +  // Validate! +  bool NeedsRecovery = false; +  if (Error E = +          LLVMCasBinaryPath +              ? validateOutOfProcess(*LLVMCasBinaryPath, RootPath, CheckHash) +              : validateInProcess(RootPath, HashName, HashByteSize, +                                  CheckHash)) { +    if (AllowRecovery) { +      consumeError(std::move(E)); +      NeedsRecovery = true; +    } else { +      return std::move(E); +    } +  } + +  if (NeedsRecovery) { +    sys::path::remove_filename(PathBuf); +    sys::path::append(PathBuf, "lock"); + +    int LockFD = -1; +    if (std::error_code EC = sys::fs::openFileForReadWrite( +            PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None)) +      return createFileError(PathBuf, EC); +    sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD); +    auto CloseLock = make_scope_exit([&]() { sys::fs::closeFile(LockFile); }); +    if (std::error_code EC = tryLockFileThreadSafe(LockFD)) { +      if (EC == std::errc::no_lock_available) +        return createFileError( +            PathBuf, EC, +            "CAS validation requires exclusive access but CAS was in use"); +      return createFileError(PathBuf, EC); +    } +    auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); }); + +    auto DBDirs = getAllDBDirs(RootPath); +    if (!DBDirs) +      return DBDirs.takeError(); + +    for (StringRef DBDir : *DBDirs) { +      sys::path::remove_filename(PathBuf); +      sys::path::append(PathBuf, DBDir); +      std::error_code EC; +      int Attempt = 0, MaxAttempts = 100; +      SmallString<128> GCPath; +      for (; Attempt < MaxAttempts; ++Attempt) { +        GCPath.assign(RootPath); +        sys::path::append(GCPath, CorruptPrefix + std::to_string(Attempt) + +                                      "." + DBDir); +        EC = sys::fs::rename(PathBuf, GCPath); +        // Darwin uses ENOTEMPTY. Linux may return either ENOTEMPTY or EEXIST. +        if (EC != errc::directory_not_empty && EC != errc::file_exists) +          break; +      } +      if (Attempt == MaxAttempts) +        return createStringError( +            EC, "rename " + PathBuf + +                    " failed: too many CAS directories awaiting pruning"); +      if (EC) +        return createStringError(EC, "rename " + PathBuf + " to " + GCPath + +                                         " failed: " + EC.message()); +    } +  } + +  if (ValidationBootTime != BootTime) { +    // Fix filename in case we have error to report. +    sys::path::remove_filename(PathBuf); +    sys::path::append(PathBuf, ValidationFilename); +    if (std::error_code EC = sys::fs::resize_file(FD, 0)) +      return createFileError(PathBuf, EC); +    raw_fd_ostream OS(FD, /*shouldClose=*/false); +    OS.seek(0); // resize does not reset position +    OS << BootTime << '\n'; +    if (OS.has_error()) +      return createFileError(PathBuf, OS.error()); +  } + +  return NeedsRecovery ? ValidationResult::Recovered : ValidationResult::Valid; +} + +Expected<std::unique_ptr<UnifiedOnDiskCache>> +UnifiedOnDiskCache::open(StringRef RootPath, std::optional<uint64_t> SizeLimit, +                         StringRef HashName, unsigned HashByteSize, +                         OnDiskGraphDB::FaultInPolicy FaultInPolicy) { +  if (std::error_code EC = sys::fs::create_directories(RootPath)) +    return createFileError(RootPath, EC); + +  SmallString<256> PathBuf(RootPath); +  sys::path::append(PathBuf, "lock"); +  int LockFD = -1; +  if (std::error_code EC = sys::fs::openFileForReadWrite( +          PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None)) +    return createFileError(PathBuf, EC); +  assert(LockFD != -1); +  // Locking the directory using shared lock, which will prevent other processes +  // from creating a new chain (essentially while a \p UnifiedOnDiskCache +  // instance holds a shared lock the storage for the primary directory will +  // grow unrestricted). +  if (std::error_code EC = +          lockFileThreadSafe(LockFD, sys::fs::LockKind::Shared)) +    return createFileError(PathBuf, EC); + +  auto DBDirs = getAllDBDirs(RootPath); +  if (!DBDirs) +    return DBDirs.takeError(); +  if (DBDirs->empty()) +    DBDirs->push_back((Twine(DBDirPrefix) + "1").str()); + +  assert(!DBDirs->empty()); + +  /// If there is only one directory open databases on it. If there are 2 or +  /// more directories, get the most recent directories and chain them, with the +  /// most recent being the primary one. The remaining directories are unused +  /// data than can be garbage-collected. +  auto UniDB = std::unique_ptr<UnifiedOnDiskCache>(new UnifiedOnDiskCache()); +  std::unique_ptr<OnDiskGraphDB> UpstreamGraphDB; +  std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB; +  if (DBDirs->size() > 1) { +    StringRef UpstreamDir = *(DBDirs->end() - 2); +    PathBuf = RootPath; +    sys::path::append(PathBuf, UpstreamDir); +    if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize, +                                      /*UpstreamDB=*/nullptr, FaultInPolicy) +                      .moveInto(UpstreamGraphDB)) +      return std::move(E); +    if (Error E = OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize, +                                         /*ValueName=*/"objectid", +                                         /*ValueSize=*/sizeof(uint64_t)) +                      .moveInto(UpstreamKVDB)) +      return std::move(E); +  } + +  StringRef PrimaryDir = *(DBDirs->end() - 1); +  PathBuf = RootPath; +  sys::path::append(PathBuf, PrimaryDir); +  std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB; +  if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize, +                                    UpstreamGraphDB.get(), FaultInPolicy) +                    .moveInto(PrimaryGraphDB)) +    return std::move(E); +  std::unique_ptr<OnDiskKeyValueDB> PrimaryKVDB; +  // \p UnifiedOnDiskCache does manual chaining for key-value requests, +  // including an extra translation step of the value during fault-in. +  if (Error E = +          OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize, +                                 /*ValueName=*/"objectid", +                                 /*ValueSize=*/sizeof(uint64_t), UniDB.get()) +              .moveInto(PrimaryKVDB)) +    return std::move(E); + +  UniDB->RootPath = RootPath; +  UniDB->SizeLimit = SizeLimit.value_or(0); +  UniDB->LockFD = LockFD; +  UniDB->NeedsGarbageCollection = DBDirs->size() > 2; +  UniDB->PrimaryDBDir = PrimaryDir; +  UniDB->UpstreamGraphDB = std::move(UpstreamGraphDB); +  UniDB->PrimaryGraphDB = std::move(PrimaryGraphDB); +  UniDB->UpstreamKVDB = std::move(UpstreamKVDB); +  UniDB->PrimaryKVDB = std::move(PrimaryKVDB); + +  return std::move(UniDB); +} + +void UnifiedOnDiskCache::setSizeLimit(std::optional<uint64_t> SizeLimit) { +  this->SizeLimit = SizeLimit.value_or(0); +} + +uint64_t UnifiedOnDiskCache::getStorageSize() const { +  uint64_t TotalSize = getPrimaryStorageSize(); +  if (UpstreamGraphDB) +    TotalSize += UpstreamGraphDB->getStorageSize(); +  if (UpstreamKVDB) +    TotalSize += UpstreamKVDB->getStorageSize(); +  return TotalSize; +} + +uint64_t UnifiedOnDiskCache::getPrimaryStorageSize() const { +  return PrimaryGraphDB->getStorageSize() + PrimaryKVDB->getStorageSize(); +} + +bool UnifiedOnDiskCache::hasExceededSizeLimit() const { +  uint64_t CurSizeLimit = SizeLimit; +  if (!CurSizeLimit) +    return false; + +  // If the hard limit is beyond 85%, declare above limit and request clean up. +  unsigned CurrentPercent = +      std::max(PrimaryGraphDB->getHardStorageLimitUtilization(), +               PrimaryKVDB->getHardStorageLimitUtilization()); +  if (CurrentPercent > 85) +    return true; + +  // We allow each of the directories in the chain to reach up to half the +  // intended size limit. Check whether the primary directory has exceeded half +  // the limit or not, in order to decide whether we need to start a new chain. +  // +  // We could check the size limit against the sum of sizes of both the primary +  // and upstream directories but then if the upstream is significantly larger +  // than the intended limit, it would trigger a new chain to be created before +  // the primary has reached its own limit. Essentially in such situation we +  // prefer reclaiming the storage later in order to have more consistent cache +  // hits behavior. +  return (CurSizeLimit / 2) < getPrimaryStorageSize(); +} + +Error UnifiedOnDiskCache::close(bool CheckSizeLimit) { +  if (LockFD == -1) +    return Error::success(); // already closed. +  auto CloseLock = make_scope_exit([&]() { +    assert(LockFD >= 0); +    sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD); +    sys::fs::closeFile(LockFile); +    LockFD = -1; +  }); + +  bool ExceededSizeLimit = CheckSizeLimit ? hasExceededSizeLimit() : false; +  UpstreamKVDB.reset(); +  PrimaryKVDB.reset(); +  UpstreamGraphDB.reset(); +  PrimaryGraphDB.reset(); +  if (std::error_code EC = unlockFileThreadSafe(LockFD)) +    return createFileError(RootPath, EC); + +  if (!ExceededSizeLimit) +    return Error::success(); + +  // The primary directory exceeded its intended size limit. Try to get an +  // exclusive lock in order to create a new primary directory for next time +  // this \p UnifiedOnDiskCache path is opened. + +  if (std::error_code EC = tryLockFileThreadSafe( +          LockFD, std::chrono::milliseconds(0), sys::fs::LockKind::Exclusive)) { +    if (EC == errc::no_lock_available) +      return Error::success(); // couldn't get exclusive lock, give up. +    return createFileError(RootPath, EC); +  } +  auto UnlockFile = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); }); + +  // Managed to get an exclusive lock which means there are no other open +  // \p UnifiedOnDiskCache instances for the same path, so we can safely start a +  // new primary directory. To start a new primary directory we just have to +  // create a new empty directory with the next consecutive index; since this is +  // an atomic operation we will leave the top-level directory in a consistent +  // state even if the process dies during this code-path. + +  SmallString<256> PathBuf(RootPath); +  raw_svector_ostream OS(PathBuf); +  OS << sys::path::get_separator(); +  getNextDBDirName(PrimaryDBDir, OS); +  if (std::error_code EC = sys::fs::create_directory(PathBuf)) +    return createFileError(PathBuf, EC); + +  NeedsGarbageCollection = true; +  return Error::success(); +} + +UnifiedOnDiskCache::UnifiedOnDiskCache() = default; + +UnifiedOnDiskCache::~UnifiedOnDiskCache() { consumeError(close()); } + +Error UnifiedOnDiskCache::collectGarbage(StringRef Path) { +  auto DBDirs = getAllGarbageDirs(Path); +  if (!DBDirs) +    return DBDirs.takeError(); + +  SmallString<256> PathBuf(Path); +  for (StringRef UnusedSubDir : *DBDirs) { +    sys::path::append(PathBuf, UnusedSubDir); +    if (std::error_code EC = sys::fs::remove_directories(PathBuf)) +      return createFileError(PathBuf, EC); +    sys::path::remove_filename(PathBuf); +  } +  return Error::success(); +} + +Error UnifiedOnDiskCache::collectGarbage() { return collectGarbage(RootPath); } diff --git a/llvm/lib/CGData/OutlinedHashTreeRecord.cpp b/llvm/lib/CGData/OutlinedHashTreeRecord.cpp index cc76063..2b6e2f0 100644 --- a/llvm/lib/CGData/OutlinedHashTreeRecord.cpp +++ b/llvm/lib/CGData/OutlinedHashTreeRecord.cpp @@ -37,7 +37,7 @@ template <> struct MappingTraits<HashNodeStable> {  template <> struct CustomMappingTraits<IdHashNodeStableMapTy> {    static void inputOne(IO &io, StringRef Key, IdHashNodeStableMapTy &V) {      HashNodeStable NodeStable; -    io.mapRequired(Key.str().c_str(), NodeStable); +    io.mapRequired(Key, NodeStable);      unsigned Id;      if (Key.getAsInteger(0, Id)) {        io.setError("Id not an integer"); @@ -48,7 +48,7 @@ template <> struct CustomMappingTraits<IdHashNodeStableMapTy> {    static void output(IO &io, IdHashNodeStableMapTy &V) {      for (auto Iter = V.begin(); Iter != V.end(); ++Iter) -      io.mapRequired(utostr(Iter->first).c_str(), Iter->second); +      io.mapRequired(utostr(Iter->first), Iter->second);    }  }; diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index e5c85d5..1ea30d8 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -745,11 +745,6 @@ void AppleAccelTableStaticTypeData::emit(AsmPrinter *Asm) const {    Asm->emitInt32(QualifiedNameHash);  } -constexpr AppleAccelTableData::Atom AppleAccelTableTypeData::Atoms[]; -constexpr AppleAccelTableData::Atom AppleAccelTableOffsetData::Atoms[]; -constexpr AppleAccelTableData::Atom AppleAccelTableStaticOffsetData::Atoms[]; -constexpr AppleAccelTableData::Atom AppleAccelTableStaticTypeData::Atoms[]; -  #ifndef NDEBUG  void AppleAccelTableWriter::Header::print(raw_ostream &OS) const {    OS << "Magic: " << format("0x%x", Magic) << "\n" diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index 171fb83..98cdada 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -112,8 +112,7 @@ void DbgValueHistoryMap::Entry::endEntry(EntryIndex Index) {  /// to the first intersecting scope range if one exists.  static std::optional<ArrayRef<InsnRange>::iterator>  intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, -           const ArrayRef<InsnRange> &Ranges, -           const InstructionOrdering &Ordering) { +           ArrayRef<InsnRange> Ranges, const InstructionOrdering &Ordering) {    for (auto RangesI = Ranges.begin(), RangesE = Ranges.end();         RangesI != RangesE; ++RangesI) {      if (EndMI && Ordering.isBefore(EndMI, RangesI->first)) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 8ea1326..0309e22 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -368,7 +368,7 @@ class CodeGenPrepare {    std::unique_ptr<DominatorTree> DT;  public: -  CodeGenPrepare(){}; +  CodeGenPrepare() = default;    CodeGenPrepare(const TargetMachine *TM) : TM(TM){};    /// If encounter huge function, we need to limit the build time.    bool IsHugeFunc = false; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 9ace7d6..ec4d13f 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -589,8 +589,8 @@ bool CombinerHelper::matchCombineShuffleVector(    return true;  } -void CombinerHelper::applyCombineShuffleVector( -    MachineInstr &MI, const ArrayRef<Register> Ops) const { +void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, +                                               ArrayRef<Register> Ops) const {    Register DstReg = MI.getOperand(0).getReg();    Builder.setInsertPt(*MI.getParent(), MI);    Register NewDstReg = MRI.cloneVirtualRegister(DstReg); diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 4b4df98..637acd6 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -109,8 +109,10 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,    if (auto *CI = dyn_cast<ConstantInt>(NumericConstant)) {      if (CI->getBitWidth() > 64)        MIB.addCImm(CI); -    else +    else if (CI->getBitWidth() == 1)        MIB.addImm(CI->getZExtValue()); +    else +      MIB.addImm(CI->getSExtValue());    } else if (auto *CFP = dyn_cast<ConstantFP>(NumericConstant)) {      MIB.addFPImm(CFP);    } else if (isa<ConstantPointerNull>(NumericConstant)) { diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index bb9c76f..8c6d219 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -363,8 +363,9 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {    case MachineOperand::MO_RegisterMask:    case MachineOperand::MO_RegisterLiveOut: {      // Shallow compare of the two RegMasks -    const uint32_t *RegMask = getRegMask(); -    const uint32_t *OtherRegMask = Other.getRegMask(); +    const uint32_t *RegMask = isRegMask() ? getRegMask() : getRegLiveOut(); +    const uint32_t *OtherRegMask = +        isRegMask() ? Other.getRegMask() : Other.getRegLiveOut();      if (RegMask == OtherRegMask)        return true; @@ -434,7 +435,8 @@ hash_code llvm::hash_value(const MachineOperand &MO) {      if (const MachineFunction *MF = getMFIfAvailable(MO)) {        const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();        unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); -      const uint32_t *RegMask = MO.getRegMask(); +      const uint32_t *RegMask = +          MO.isRegMask() ? MO.getRegMask() : MO.getRegLiveOut();        std::vector<stable_hash> RegMaskHashes(RegMask, RegMask + RegMaskSize);        return hash_combine(MO.getType(), MO.getTargetFlags(),                            stable_hash_combine(RegMaskHashes)); diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 3ed1045..f18c051 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -334,7 +334,7 @@ public:      LiveIntervals &LIS;    }; -  MachineSchedulerImpl() {} +  MachineSchedulerImpl() = default;    // Migration only    void setLegacyPass(MachineFunctionPass *P) { this->P = P; }    void setMFAM(MachineFunctionAnalysisManager *MFAM) { this->MFAM = MFAM; } @@ -358,7 +358,7 @@ public:      MachineLoopInfo &MLI;      AAResults &AA;    }; -  PostMachineSchedulerImpl() {} +  PostMachineSchedulerImpl() = default;    // Migration only    void setLegacyPass(MachineFunctionPass *P) { this->P = P; }    void setMFAM(MachineFunctionAnalysisManager *MFAM) { this->MFAM = MFAM; } diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp index 9d56696..6da708d 100644 --- a/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/llvm/lib/CodeGen/MachineStableHash.cpp @@ -136,7 +136,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {            const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();            unsigned RegMaskSize =                MachineOperand::getRegMaskSize(TRI->getNumRegs()); -          const uint32_t *RegMask = MO.getRegMask(); +          const uint32_t *RegMask = +              MO.isRegMask() ? MO.getRegMask() : MO.getRegLiveOut();            std::vector<llvm::stable_hash> RegMaskHashes(RegMask,                                                         RegMask + RegMaskSize);            return stable_hash_combine(MO.getType(), MO.getTargetFlags(), diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 697b779..ec6ffd4 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -206,7 +206,7 @@ private:      bool Error = false;              ///< Could not allocate.      explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {} -    explicit LiveReg() {} +    explicit LiveReg() = default;      unsigned getSparseSetIndex() const { return VirtReg.virtRegIndex(); }    }; diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index e17a214b..38f6deb 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -378,7 +378,7 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {  public:    // For legacy pass only. -  RegisterCoalescer() {} +  RegisterCoalescer() = default;    RegisterCoalescer &operator=(RegisterCoalescer &&Other) = default;    RegisterCoalescer(LiveIntervals *LIS, SlotIndexes *SI, diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bdd6bf0..46c4bb8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9374,7 +9374,7 @@ static unsigned bigEndianByteAt(unsigned BW, unsigned i) {  // Check if the bytes offsets we are looking at match with either big or  // little endian value loaded. Return true for big endian, false for little  // endian, and std::nullopt if match failed. -static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets, +static std::optional<bool> isBigEndian(ArrayRef<int64_t> ByteOffsets,                                         int64_t FirstOffset) {    // The endian can be decided only when it is 2 bytes at least.    unsigned Width = ByteOffsets.size(); diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index bb10cf6..d84c3fb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -733,6 +733,8 @@ MachineOperand GetMOForConstDbgOp(const SDDbgOperand &Op) {    if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {      if (CI->getBitWidth() > 64)        return MachineOperand::CreateCImm(CI); +    if (CI->getBitWidth() == 1) +      return MachineOperand::CreateImm(CI->getZExtValue());      return MachineOperand::CreateImm(CI->getSExtValue());    }    if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index a522650..fa0c899 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8958,9 +8958,8 @@ bool SelectionDAGBuilder::canTailCall(const CallBase &CB) const {    // Avoid emitting tail calls in functions with the disable-tail-calls    // attribute.    const Function *Caller = CB.getParent()->getParent(); -  if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() == -          "true" && -      !isMustTailCall) +  if (!isMustTailCall && +      Caller->getFnAttribute("disable-tail-calls").getValueAsBool())      return false;    // We can't tail call inside a function with a swifterror argument. Lowering diff --git a/llvm/lib/CodeGenTypes/LowLevelType.cpp b/llvm/lib/CodeGenTypes/LowLevelType.cpp index 4785f26..92b7fad 100644 --- a/llvm/lib/CodeGenTypes/LowLevelType.cpp +++ b/llvm/lib/CodeGenTypes/LowLevelType.cpp @@ -54,9 +54,3 @@ LLVM_DUMP_METHOD void LLT::dump() const {    dbgs() << '\n';  }  #endif - -const constexpr LLT::BitFieldInfo LLT::ScalarSizeFieldInfo; -const constexpr LLT::BitFieldInfo LLT::PointerSizeFieldInfo; -const constexpr LLT::BitFieldInfo LLT::PointerAddressSpaceFieldInfo; -const constexpr LLT::BitFieldInfo LLT::VectorElementsFieldInfo; -const constexpr LLT::BitFieldInfo LLT::VectorScalableFieldInfo; diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h index 84757ae..970abdc 100644 --- a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h @@ -28,7 +28,7 @@ using MacroOffset2UnitMapTy = DenseMap<uint64_t, DwarfUnit *>;  /// Base class for all Dwarf units(Compile unit/Type table unit).  class DwarfUnit : public OutputSections {  public: -  virtual ~DwarfUnit() {} +  virtual ~DwarfUnit() = default;    DwarfUnit(LinkingGlobalData &GlobalData, unsigned ID,              StringRef ClangModuleName)        : OutputSections(GlobalData), ID(ID), ClangModuleName(ClangModuleName), diff --git a/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h b/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h index f67536e..8ccb4a5 100644 --- a/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h +++ b/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h @@ -22,7 +22,7 @@ class StringEntryToDwarfStringPoolEntryMap {  public:    StringEntryToDwarfStringPoolEntryMap(LinkingGlobalData &GlobalData)        : GlobalData(GlobalData) {} -  ~StringEntryToDwarfStringPoolEntryMap() {} +  ~StringEntryToDwarfStringPoolEntryMap() = default;    /// Create DwarfStringPoolEntry for specified StringEntry if necessary.    /// Initialize DwarfStringPoolEntry with initial values. diff --git a/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp index 6c23ba8..23ab534 100644 --- a/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp +++ b/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp @@ -102,7 +102,8 @@ std::optional<CVType> LazyRandomTypeCollection::tryGetType(TypeIndex Index) {      return std::nullopt;    } -  assert(contains(Index)); +  if (!contains(Index)) +    return std::nullopt;    return Records[Index.toArrayIndex()].Type;  } diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp index 7e606c6a..4e7db82 100644 --- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp @@ -27,7 +27,7 @@  namespace llvm {  namespace orc { -MemoryMapper::~MemoryMapper() {} +MemoryMapper::~MemoryMapper() = default;  InProcessMemoryMapper::InProcessMemoryMapper(size_t PageSize)      : PageSize(PageSize) {} diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 0e5926f..fff9a81 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -528,7 +528,7 @@ void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,    Value *Version = Builder.getInt32(OMP_KERNEL_ARG_VERSION);    Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);    auto Int32Ty = Type::getInt32Ty(Builder.getContext()); -  constexpr const size_t MaxDim = 3; +  constexpr size_t MaxDim = 3;    Value *ZeroArray = Constant::getNullValue(ArrayType::get(Int32Ty, MaxDim));    Value *Flags = Builder.getInt64(KernelArgs.HasNoWait); diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index b838e36..58b7ddd 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -730,7 +730,7 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,        // (arm|aarch64).neon.bfdot.*'.        Intrinsic::ID ID =            StringSwitch<Intrinsic::ID>(Name) -              .Cases("v2f32.v8i8", "v4f32.v16i8", +              .Cases({"v2f32.v8i8", "v4f32.v16i8"},                       IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot                             : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)                .Default(Intrinsic::not_intrinsic); @@ -1456,7 +1456,7 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,        if (F->arg_size() == 1) {          Intrinsic::ID IID =              StringSwitch<Intrinsic::ID>(Name) -                .Cases("brev32", "brev64", Intrinsic::bitreverse) +                .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)                  .Case("clz.i", Intrinsic::ctlz)                  .Case("popc.i", Intrinsic::ctpop)                  .Default(Intrinsic::not_intrinsic); @@ -1504,6 +1504,10 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,        else if (Name.consume_front("fabs."))          // nvvm.fabs.{f,ftz.f,d}          Expand = Name == "f" || Name == "ftz.f" || Name == "d"; +      else if (Name.consume_front("ex2.approx.")) +        // nvvm.ex2.approx.{f,ftz.f,d,f16x2} +        Expand = +            Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";        else if (Name.consume_front("max.") || Name.consume_front("min."))          // nvvm.{min,max}.{i,ii,ui,ull}          Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" || @@ -2550,6 +2554,11 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,      Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz                                                 : Intrinsic::nvvm_fabs;      Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0)); +  } else if (Name.consume_front("ex2.approx.")) { +    // nvvm.ex2.approx.{f,ftz.f,d,f16x2} +    Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz +                                                : Intrinsic::nvvm_ex2_approx; +    Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));    } else if (Name.starts_with("atomic.load.add.f32.p") ||               Name.starts_with("atomic.load.add.f64.p")) {      Value *Ptr = CI->getArgOperand(0); diff --git a/llvm/lib/IR/ConstantsContext.h b/llvm/lib/IR/ConstantsContext.h index 51fb40b..e3e8d89 100644 --- a/llvm/lib/IR/ConstantsContext.h +++ b/llvm/lib/IR/ConstantsContext.h @@ -535,7 +535,7 @@ struct ConstantPtrAuthKeyType {    unsigned getHash() const { return hash_combine_range(Operands); } -  using TypeClass = typename ConstantInfo<ConstantPtrAuth>::TypeClass; +  using TypeClass = ConstantInfo<ConstantPtrAuth>::TypeClass;    ConstantPtrAuth *create(TypeClass *Ty) const {      return new ConstantPtrAuth(Operands[0], cast<ConstantInt>(Operands[1]), diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp index 62fd62c..3394754 100644 --- a/llvm/lib/IR/ModuleSummaryIndex.cpp +++ b/llvm/lib/IR/ModuleSummaryIndex.cpp @@ -34,8 +34,6 @@ static cl::opt<bool> ImportConstantsWithRefs(      "import-constants-with-refs", cl::init(true), cl::Hidden,      cl::desc("Import constant global variables with references")); -constexpr uint32_t FunctionSummary::ParamAccess::RangeWidth; -  FunctionSummary FunctionSummary::ExternalNode =      FunctionSummary::makeDummyFunctionSummary(          SmallVector<FunctionSummary::EdgeTy, 0>()); @@ -88,8 +86,6 @@ std::pair<unsigned, unsigned> FunctionSummary::specialRefCounts() const {    return {RORefCnt, WORefCnt};  } -constexpr uint64_t ModuleSummaryIndex::BitcodeSummaryVersion; -  uint64_t ModuleSummaryIndex::getFlags() const {    uint64_t Flags = 0;    // Flags & 0x4 is reserved. DO NOT REUSE. diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index b775cbb..95d61a9 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -148,18 +148,10 @@ void Value::destroyValueName() {  }  bool Value::hasNUses(unsigned N) const { -  if (!UseList) -    return N == 0; - -  // TODO: Disallow for ConstantData and remove !UseList check?    return hasNItems(use_begin(), use_end(), N);  }  bool Value::hasNUsesOrMore(unsigned N) const { -  // TODO: Disallow for ConstantData and remove !UseList check? -  if (!UseList) -    return N == 0; -    return hasNItemsOrMore(use_begin(), use_end(), N);  } diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index b618222..23be42f 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1076,63 +1076,59 @@ Expected<ArrayRef<SymbolResolution>>  LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,                  ArrayRef<SymbolResolution> Res) {    llvm::TimeTraceScope timeScope("LTO add thin LTO"); +  const auto BMID = BM.getModuleIdentifier();    ArrayRef<SymbolResolution> ResTmp = Res;    for (const InputFile::Symbol &Sym : Syms) {      assert(!ResTmp.empty());      const SymbolResolution &R = ResTmp.consume_front(); -    if (!Sym.getIRName().empty()) { +    if (!Sym.getIRName().empty() && R.Prevailing) {        auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(            GlobalValue::getGlobalIdentifier(Sym.getIRName(),                                             GlobalValue::ExternalLinkage, "")); -      if (R.Prevailing) -        ThinLTO.setPrevailingModuleForGUID(GUID, BM.getModuleIdentifier()); +      ThinLTO.setPrevailingModuleForGUID(GUID, BMID);      }    } -  if (Error Err = -          BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(), -                         [&](GlobalValue::GUID GUID) { -                           return ThinLTO.isPrevailingModuleForGUID( -                               GUID, BM.getModuleIdentifier()); -                         })) +  if (Error Err = BM.readSummary( +          ThinLTO.CombinedIndex, BMID, [&](GlobalValue::GUID GUID) { +            return ThinLTO.isPrevailingModuleForGUID(GUID, BMID); +          }))      return Err; -  LLVM_DEBUG(dbgs() << "Module " << BM.getModuleIdentifier() << "\n"); +  LLVM_DEBUG(dbgs() << "Module " << BMID << "\n");    for (const InputFile::Symbol &Sym : Syms) {      assert(!Res.empty());      const SymbolResolution &R = Res.consume_front(); -    if (!Sym.getIRName().empty()) { +    if (!Sym.getIRName().empty() && +        (R.Prevailing || R.FinalDefinitionInLinkageUnit)) {        auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(            GlobalValue::getGlobalIdentifier(Sym.getIRName(),                                             GlobalValue::ExternalLinkage, ""));        if (R.Prevailing) { -        assert( -            ThinLTO.isPrevailingModuleForGUID(GUID, BM.getModuleIdentifier())); +        assert(ThinLTO.isPrevailingModuleForGUID(GUID, BMID));          // For linker redefined symbols (via --wrap or --defsym) we want to          // switch the linkage to `weak` to prevent IPOs from happening.          // Find the summary in the module for this very GV and record the new          // linkage so that we can switch it when we import the GV.          if (R.LinkerRedefined) -          if (auto S = ThinLTO.CombinedIndex.findSummaryInModule( -                  GUID, BM.getModuleIdentifier())) +          if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(GUID, BMID))              S->setLinkage(GlobalValue::WeakAnyLinkage);        }        // If the linker resolved the symbol to a local definition then mark it        // as local in the summary for the module we are adding.        if (R.FinalDefinitionInLinkageUnit) { -        if (auto S = ThinLTO.CombinedIndex.findSummaryInModule( -                GUID, BM.getModuleIdentifier())) { +        if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(GUID, BMID)) {            S->setDSOLocal(true);          }        }      }    } -  if (!ThinLTO.ModuleMap.insert({BM.getModuleIdentifier(), BM}).second) +  if (!ThinLTO.ModuleMap.insert({BMID, BM}).second)      return make_error<StringError>(          "Expected at most one ThinLTO module per bitcode file",          inconvertibleErrorCode()); @@ -1143,10 +1139,10 @@ LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,      // This is a fuzzy name matching where only modules with name containing the      // specified switch values are going to be compiled.      for (const std::string &Name : Conf.ThinLTOModulesToCompile) { -      if (BM.getModuleIdentifier().contains(Name)) { -        ThinLTO.ModulesToCompile->insert({BM.getModuleIdentifier(), BM}); -        LLVM_DEBUG(dbgs() << "[ThinLTO] Selecting " << BM.getModuleIdentifier() -                          << " to compile\n"); +      if (BMID.contains(Name)) { +        ThinLTO.ModulesToCompile->insert({BMID, BM}); +        LLVM_DEBUG(dbgs() << "[ThinLTO] Selecting " << BMID << " to compile\n"); +        break;        }      }    } diff --git a/llvm/lib/MC/GOFFObjectWriter.cpp b/llvm/lib/MC/GOFFObjectWriter.cpp index 71bd397..a3eaaa7 100644 --- a/llvm/lib/MC/GOFFObjectWriter.cpp +++ b/llvm/lib/MC/GOFFObjectWriter.cpp @@ -520,7 +520,7 @@ GOFFObjectWriter::GOFFObjectWriter(      std::unique_ptr<MCGOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS)      : TargetObjectWriter(std::move(MOTW)), OS(OS) {} -GOFFObjectWriter::~GOFFObjectWriter() {} +GOFFObjectWriter::~GOFFObjectWriter() = default;  uint64_t GOFFObjectWriter::writeObject() {    uint64_t Size = GOFFWriter(OS, *Asm).writeObject(); diff --git a/llvm/lib/MC/MCDXContainerWriter.cpp b/llvm/lib/MC/MCDXContainerWriter.cpp index 5eda039..ebed411 100644 --- a/llvm/lib/MC/MCDXContainerWriter.cpp +++ b/llvm/lib/MC/MCDXContainerWriter.cpp @@ -16,7 +16,7 @@  using namespace llvm; -MCDXContainerTargetWriter::~MCDXContainerTargetWriter() {} +MCDXContainerTargetWriter::~MCDXContainerTargetWriter() = default;  uint64_t DXContainerObjectWriter::writeObject() {    auto &Asm = *this->Asm; diff --git a/llvm/lib/MC/MCGOFFStreamer.cpp b/llvm/lib/MC/MCGOFFStreamer.cpp index 8b228db..ad6397b 100644 --- a/llvm/lib/MC/MCGOFFStreamer.cpp +++ b/llvm/lib/MC/MCGOFFStreamer.cpp @@ -20,7 +20,7 @@  using namespace llvm; -MCGOFFStreamer::~MCGOFFStreamer() {} +MCGOFFStreamer::~MCGOFFStreamer() = default;  GOFFObjectWriter &MCGOFFStreamer::getWriter() {    return static_cast<GOFFObjectWriter &>(getAssembler().getWriter()); diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 1a3752f..911d92c 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -695,15 +695,15 @@ bool ELFAsmParser::parseDirectivePrevious(StringRef DirName, SMLoc) {  static MCSymbolAttr MCAttrForString(StringRef Type) {    return StringSwitch<MCSymbolAttr>(Type) -          .Cases("STT_FUNC", "function", MCSA_ELF_TypeFunction) -          .Cases("STT_OBJECT", "object", MCSA_ELF_TypeObject) -          .Cases("STT_TLS", "tls_object", MCSA_ELF_TypeTLS) -          .Cases("STT_COMMON", "common", MCSA_ELF_TypeCommon) -          .Cases("STT_NOTYPE", "notype", MCSA_ELF_TypeNoType) -          .Cases("STT_GNU_IFUNC", "gnu_indirect_function", -                 MCSA_ELF_TypeIndFunction) -          .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject) -          .Default(MCSA_Invalid); +      .Cases({"STT_FUNC", "function"}, MCSA_ELF_TypeFunction) +      .Cases({"STT_OBJECT", "object"}, MCSA_ELF_TypeObject) +      .Cases({"STT_TLS", "tls_object"}, MCSA_ELF_TypeTLS) +      .Cases({"STT_COMMON", "common"}, MCSA_ELF_TypeCommon) +      .Cases({"STT_NOTYPE", "notype"}, MCSA_ELF_TypeNoType) +      .Cases({"STT_GNU_IFUNC", "gnu_indirect_function"}, +             MCSA_ELF_TypeIndFunction) +      .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject) +      .Default(MCSA_Invalid);  }  /// parseDirectiveELFType diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 3462954..3a85770 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -5323,10 +5323,10 @@ void MasmParser::initializeDirectiveKindMap() {  bool MasmParser::isMacroLikeDirective() {    if (getLexer().is(AsmToken::Identifier)) {      bool IsMacroLike = StringSwitch<bool>(getTok().getIdentifier()) -                           .CasesLower("repeat", "rept", true) +                           .CasesLower({"repeat", "rept"}, true)                             .CaseLower("while", true) -                           .CasesLower("for", "irp", true) -                           .CasesLower("forc", "irpc", true) +                           .CasesLower({"for", "irp"}, true) +                           .CasesLower({"forc", "irpc"}, true)                             .Default(false);      if (IsMacroLike)        return true; diff --git a/llvm/lib/ObjCopy/COFF/COFFWriter.h b/llvm/lib/ObjCopy/COFF/COFFWriter.h index 66d7f01..3ee0e06 100644 --- a/llvm/lib/ObjCopy/COFF/COFFWriter.h +++ b/llvm/lib/ObjCopy/COFF/COFFWriter.h @@ -50,7 +50,7 @@ class COFFWriter {    Expected<uint32_t> virtualAddressToFileAddress(uint32_t RVA);  public: -  virtual ~COFFWriter() {} +  virtual ~COFFWriter() = default;    Error write();    COFFWriter(Object &Obj, raw_ostream &Out) diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h index 4f6473f..2783ef27 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObject.h +++ b/llvm/lib/ObjCopy/ELF/ELFObject.h @@ -134,7 +134,7 @@ private:    using Elf_Sym = typename ELFT::Sym;  public: -  ~ELFSectionWriter() override {} +  ~ELFSectionWriter() override = default;    Error visit(const SymbolTableSection &Sec) override;    Error visit(const RelocationSection &Sec) override;    Error visit(const GnuDebugLinkSection &Sec) override; @@ -180,7 +180,7 @@ public:  class BinarySectionWriter : public SectionWriter {  public: -  ~BinarySectionWriter() override {} +  ~BinarySectionWriter() override = default;    Error visit(const SymbolTableSection &Sec) override;    Error visit(const RelocationSection &Sec) override; @@ -346,7 +346,7 @@ private:    size_t totalSize() const;  public: -  ~ELFWriter() override {} +  ~ELFWriter() override = default;    bool WriteSectionHeaders;    // For --only-keep-debug, select an alternative section/segment layout @@ -367,7 +367,7 @@ private:    uint64_t TotalSize = 0;  public: -  ~BinaryWriter() override {} +  ~BinaryWriter() override = default;    Error finalize() override;    Error write() override;    BinaryWriter(Object &Obj, raw_ostream &Out, const CommonConfig &Config) @@ -784,7 +784,7 @@ private:    SymbolTableSection *Symbols = nullptr;  public: -  ~SectionIndexSection() override {} +  ~SectionIndexSection() override = default;    void addIndex(uint32_t Index) {      assert(Size > 0);      Indexes.push_back(Index); diff --git a/llvm/lib/ObjCopy/MachO/MachOReader.h b/llvm/lib/ObjCopy/MachO/MachOReader.h index e315e6fd..940ba4c 100644 --- a/llvm/lib/ObjCopy/MachO/MachOReader.h +++ b/llvm/lib/ObjCopy/MachO/MachOReader.h @@ -23,7 +23,7 @@ namespace macho {  // raw binaries and regular MachO object files.  class Reader {  public: -  virtual ~Reader(){}; +  virtual ~Reader() = default;    virtual Expected<std::unique_ptr<Object>> create() const = 0;  }; diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h b/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h index 8620548..47639ad 100644 --- a/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h +++ b/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h @@ -20,7 +20,7 @@ namespace xcoff {  class XCOFFWriter {  public: -  virtual ~XCOFFWriter() {} +  virtual ~XCOFFWriter() = default;    XCOFFWriter(Object &Obj, raw_ostream &Out) : Obj(Obj), Out(Out) {}    Error write(); diff --git a/llvm/lib/Object/WindowsMachineFlag.cpp b/llvm/lib/Object/WindowsMachineFlag.cpp index caf357e8..14c14f6 100644 --- a/llvm/lib/Object/WindowsMachineFlag.cpp +++ b/llvm/lib/Object/WindowsMachineFlag.cpp @@ -23,8 +23,8 @@ using namespace llvm;  COFF::MachineTypes llvm::getMachineType(StringRef S) {    // Flags must be a superset of Microsoft lib.exe /machine flags.    return StringSwitch<COFF::MachineTypes>(S.lower()) -      .Cases("x64", "amd64", COFF::IMAGE_FILE_MACHINE_AMD64) -      .Cases("x86", "i386", COFF::IMAGE_FILE_MACHINE_I386) +      .Cases({"x64", "amd64"}, COFF::IMAGE_FILE_MACHINE_AMD64) +      .Cases({"x86", "i386"}, COFF::IMAGE_FILE_MACHINE_I386)        .Case("arm", COFF::IMAGE_FILE_MACHINE_ARMNT)        .Case("arm64", COFF::IMAGE_FILE_MACHINE_ARM64)        .Case("arm64ec", COFF::IMAGE_FILE_MACHINE_ARM64EC) diff --git a/llvm/lib/ObjectYAML/GOFFYAML.cpp b/llvm/lib/ObjectYAML/GOFFYAML.cpp index 60bc1f7..ecd7fb6 100644 --- a/llvm/lib/ObjectYAML/GOFFYAML.cpp +++ b/llvm/lib/ObjectYAML/GOFFYAML.cpp @@ -15,7 +15,7 @@  namespace llvm {  namespace GOFFYAML { -Object::Object() {} +Object::Object() = default;  } // namespace GOFFYAML diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index bd03ac0..3f41618 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -228,7 +228,7 @@ static cl::opt<bool> EnableLoopHeaderDuplication(  static cl::opt<bool>      EnableDFAJumpThreading("enable-dfa-jump-thread",                             cl::desc("Enable DFA jump threading"), -                           cl::init(false), cl::Hidden); +                           cl::init(true), cl::Hidden);  static cl::opt<bool>      EnableHotColdSplit("hot-cold-split", diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 7290a86..6b7e980 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -537,7 +537,7 @@ void IRChangedPrinter::handleAfter(StringRef PassID, std::string &Name,    Out << "*** IR Dump After " << PassID << " on " << Name << " ***\n" << After;  } -IRChangedTester::~IRChangedTester() {} +IRChangedTester::~IRChangedTester() = default;  void IRChangedTester::registerCallbacks(PassInstrumentationCallbacks &PIC) {    if (TestChanged != "") @@ -1566,7 +1566,7 @@ void InLineChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) {      TextChangeReporter<IRDataT<EmptyData>>::registerRequiredCallbacks(PIC);  } -TimeProfilingPassesHandler::TimeProfilingPassesHandler() {} +TimeProfilingPassesHandler::TimeProfilingPassesHandler() = default;  void TimeProfilingPassesHandler::registerCallbacks(      PassInstrumentationCallbacks &PIC) { diff --git a/llvm/lib/Remarks/RemarkFormat.cpp b/llvm/lib/Remarks/RemarkFormat.cpp index 1c52e35..f9fd4af 100644 --- a/llvm/lib/Remarks/RemarkFormat.cpp +++ b/llvm/lib/Remarks/RemarkFormat.cpp @@ -19,7 +19,7 @@ using namespace llvm::remarks;  Expected<Format> llvm::remarks::parseFormat(StringRef FormatStr) {    auto Result = StringSwitch<Format>(FormatStr) -                    .Cases("", "yaml", Format::YAML) +                    .Cases({"", "yaml"}, Format::YAML)                      .Case("bitstream", Format::Bitstream)                      .Default(Format::Unknown); diff --git a/llvm/lib/SandboxIR/Context.cpp b/llvm/lib/SandboxIR/Context.cpp index fb6ff62..6f5d072 100644 --- a/llvm/lib/SandboxIR/Context.cpp +++ b/llvm/lib/SandboxIR/Context.cpp @@ -637,7 +637,7 @@ Context::Context(LLVMContext &LLVMCtx)      : LLVMCtx(LLVMCtx), IRTracker(*this),        LLVMIRBuilder(LLVMCtx, ConstantFolder()) {} -Context::~Context() {} +Context::~Context() = default;  void Context::clear() {    // TODO: Ideally we should clear only function-scope objects, and keep global diff --git a/llvm/lib/Support/AArch64BuildAttributes.cpp b/llvm/lib/Support/AArch64BuildAttributes.cpp index 4a6b2fd..be4d1f1 100644 --- a/llvm/lib/Support/AArch64BuildAttributes.cpp +++ b/llvm/lib/Support/AArch64BuildAttributes.cpp @@ -67,8 +67,8 @@ StringRef AArch64BuildAttributes::getTypeStr(unsigned Type) {  }  SubsectionType AArch64BuildAttributes::getTypeID(StringRef Type) {    return StringSwitch<SubsectionType>(Type) -      .Cases("uleb128", "ULEB128", ULEB128) -      .Cases("ntbs", "NTBS", NTBS) +      .Cases({"uleb128", "ULEB128"}, ULEB128) +      .Cases({"ntbs", "NTBS"}, NTBS)        .Default(TYPE_NOT_FOUND);  }  StringRef AArch64BuildAttributes::getSubsectionTypeUnknownError() { diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index e21cf8e..e2645fa 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -269,12 +269,6 @@ bool APFloatBase::isRepresentableBy(const fltSemantics &A,           A.precision <= B.precision;  } -constexpr RoundingMode APFloatBase::rmNearestTiesToEven; -constexpr RoundingMode APFloatBase::rmTowardPositive; -constexpr RoundingMode APFloatBase::rmTowardNegative; -constexpr RoundingMode APFloatBase::rmTowardZero; -constexpr RoundingMode APFloatBase::rmNearestTiesToAway; -  /* A tight upper bound on number of parts required to hold the value     pow(5, power) is diff --git a/llvm/lib/Support/BalancedPartitioning.cpp b/llvm/lib/Support/BalancedPartitioning.cpp index 1914f4c..d859abd 100644 --- a/llvm/lib/Support/BalancedPartitioning.cpp +++ b/llvm/lib/Support/BalancedPartitioning.cpp @@ -231,7 +231,7 @@ unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes,    }    // Compute move gains -  typedef std::pair<float, BPFunctionNode *> GainPair; +  using GainPair = std::pair<float, BPFunctionNode *>;    std::vector<GainPair> Gains;    for (auto &N : Nodes) {      bool FromLeftToRight = (N.Bucket == LeftBucket); diff --git a/llvm/lib/Support/BranchProbability.cpp b/llvm/lib/Support/BranchProbability.cpp index ea42f34..143e58a 100644 --- a/llvm/lib/Support/BranchProbability.cpp +++ b/llvm/lib/Support/BranchProbability.cpp @@ -20,8 +20,6 @@  using namespace llvm; -constexpr uint32_t BranchProbability::D; -  raw_ostream &BranchProbability::print(raw_ostream &OS) const {    if (isUnknown())      return OS << "?%"; diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 9491ec0..dab8bee 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -382,7 +382,7 @@ public:      RegisteredSubCommands.erase(sub);    } -  iterator_range<typename SmallPtrSet<SubCommand *, 4>::iterator> +  iterator_range<SmallPtrSet<SubCommand *, 4>::iterator>    getRegisteredSubcommands() {      return make_range(RegisteredSubCommands.begin(),                        RegisteredSubCommands.end()); @@ -2343,10 +2343,10 @@ namespace {  class HelpPrinter {  protected:    const bool ShowHidden; -  typedef SmallVector<std::pair<const char *, Option *>, 128> -      StrOptionPairVector; -  typedef SmallVector<std::pair<const char *, SubCommand *>, 128> -      StrSubCommandPairVector; +  using StrOptionPairVector = +      SmallVector<std::pair<const char *, Option *>, 128>; +  using StrSubCommandPairVector = +      SmallVector<std::pair<const char *, SubCommand *>, 128>;    // Print the options. Opts is assumed to be alphabetically sorted.    virtual void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) {      for (const auto &Opt : Opts) @@ -2830,7 +2830,7 @@ StringMap<Option *> &cl::getRegisteredOptions(SubCommand &Sub) {    return Sub.OptionsMap;  } -iterator_range<typename SmallPtrSet<SubCommand *, 4>::iterator> +iterator_range<SmallPtrSet<SubCommand *, 4>::iterator>  cl::getRegisteredSubcommands() {    return GlobalParser->getRegisteredSubcommands();  } diff --git a/llvm/lib/Support/DAGDeltaAlgorithm.cpp b/llvm/lib/Support/DAGDeltaAlgorithm.cpp index 98153647..3bfae14 100644 --- a/llvm/lib/Support/DAGDeltaAlgorithm.cpp +++ b/llvm/lib/Support/DAGDeltaAlgorithm.cpp @@ -47,16 +47,16 @@ class DAGDeltaAlgorithmImpl {    friend class DeltaActiveSetHelper;  public: -  typedef DAGDeltaAlgorithm::change_ty change_ty; -  typedef DAGDeltaAlgorithm::changeset_ty changeset_ty; -  typedef DAGDeltaAlgorithm::changesetlist_ty changesetlist_ty; -  typedef DAGDeltaAlgorithm::edge_ty edge_ty; +  using change_ty = DAGDeltaAlgorithm::change_ty; +  using changeset_ty = DAGDeltaAlgorithm::changeset_ty; +  using changesetlist_ty = DAGDeltaAlgorithm::changesetlist_ty; +  using edge_ty = DAGDeltaAlgorithm::edge_ty;  private: -  typedef std::vector<change_ty>::iterator pred_iterator_ty; -  typedef std::vector<change_ty>::iterator succ_iterator_ty; -  typedef std::set<change_ty>::iterator pred_closure_iterator_ty; -  typedef std::set<change_ty>::iterator succ_closure_iterator_ty; +  using pred_iterator_ty = std::vector<change_ty>::iterator; +  using succ_iterator_ty = std::vector<change_ty>::iterator; +  using pred_closure_iterator_ty = std::set<change_ty>::iterator; +  using succ_closure_iterator_ty = std::set<change_ty>::iterator;    DAGDeltaAlgorithm &DDA; diff --git a/llvm/lib/Support/DynamicLibrary.cpp b/llvm/lib/Support/DynamicLibrary.cpp index f1c15c0..61566d3 100644 --- a/llvm/lib/Support/DynamicLibrary.cpp +++ b/llvm/lib/Support/DynamicLibrary.cpp @@ -23,7 +23,7 @@ using namespace llvm::sys;  // All methods for HandleSet should be used holding SymbolsMutex.  class DynamicLibrary::HandleSet { -  typedef std::vector<void *> HandleList; +  using HandleList = std::vector<void *>;    HandleList Handles;    void *Process = &Invalid; diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp index b6a2f8a..2e8fba8 100644 --- a/llvm/lib/Support/StringRef.cpp +++ b/llvm/lib/Support/StringRef.cpp @@ -17,11 +17,6 @@  using namespace llvm; -// MSVC emits references to this into the translation units which reference it. -#ifndef _MSC_VER -constexpr size_t StringRef::npos; -#endif -  // strncasecmp() is not available on non-POSIX systems, so define an  // alternative function here.  static int ascii_strncasecmp(StringRef LHS, StringRef RHS) { diff --git a/llvm/lib/Support/Timer.cpp b/llvm/lib/Support/Timer.cpp index 9d45096..b08f508 100644 --- a/llvm/lib/Support/Timer.cpp +++ b/llvm/lib/Support/Timer.cpp @@ -207,7 +207,7 @@ void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const {  namespace { -typedef StringMap<Timer> Name2TimerMap; +using Name2TimerMap = StringMap<Timer>;  class Name2PairMap {    StringMap<std::pair<TimerGroup*, Name2TimerMap> > Map; diff --git a/llvm/lib/Support/UnicodeNameToCodepoint.cpp b/llvm/lib/Support/UnicodeNameToCodepoint.cpp index 6f8e091..8f0d24e 100644 --- a/llvm/lib/Support/UnicodeNameToCodepoint.cpp +++ b/llvm/lib/Support/UnicodeNameToCodepoint.cpp @@ -251,10 +251,10 @@ constexpr const char *const HangulSyllables[][3] = {  // Unicode 15.0  // 3.12 Conjoining Jamo Behavior Common constants -constexpr const char32_t SBase = 0xAC00; -constexpr const uint32_t LCount = 19; -constexpr const uint32_t VCount = 21; -constexpr const uint32_t TCount = 28; +constexpr char32_t SBase = 0xAC00; +constexpr uint32_t LCount = 19; +constexpr uint32_t VCount = 21; +constexpr uint32_t TCount = 28;  static std::size_t findSyllable(StringRef Name, bool Strict,                                  char &PreviousInName, int &Pos, int Column) { diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp index 07b9989..d6f27fb 100644 --- a/llvm/lib/Support/raw_ostream.cpp +++ b/llvm/lib/Support/raw_ostream.cpp @@ -61,17 +61,6 @@  using namespace llvm; -constexpr raw_ostream::Colors raw_ostream::BLACK; -constexpr raw_ostream::Colors raw_ostream::RED; -constexpr raw_ostream::Colors raw_ostream::GREEN; -constexpr raw_ostream::Colors raw_ostream::YELLOW; -constexpr raw_ostream::Colors raw_ostream::BLUE; -constexpr raw_ostream::Colors raw_ostream::MAGENTA; -constexpr raw_ostream::Colors raw_ostream::CYAN; -constexpr raw_ostream::Colors raw_ostream::WHITE; -constexpr raw_ostream::Colors raw_ostream::SAVEDCOLOR; -constexpr raw_ostream::Colors raw_ostream::RESET; -  raw_ostream::~raw_ostream() {    // raw_ostream's subclasses should take care to flush the buffer    // in their destructors. diff --git a/llvm/lib/Support/raw_socket_stream.cpp b/llvm/lib/Support/raw_socket_stream.cpp index 3b510d3..f716317 100644 --- a/llvm/lib/Support/raw_socket_stream.cpp +++ b/llvm/lib/Support/raw_socket_stream.cpp @@ -332,7 +332,7 @@ ListeningSocket::~ListeningSocket() {  raw_socket_stream::raw_socket_stream(int SocketFD)      : raw_fd_stream(SocketFD, true) {} -raw_socket_stream::~raw_socket_stream() {} +raw_socket_stream::~raw_socket_stream() = default;  Expected<std::unique_ptr<raw_socket_stream>>  raw_socket_stream::createConnectedUnix(StringRef SocketPath) { diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp index 30eae6e..e8e6469 100644 --- a/llvm/lib/TableGen/TGLexer.cpp +++ b/llvm/lib/TableGen/TGLexer.cpp @@ -682,8 +682,10 @@ tgtok::TokKind TGLexer::LexExclaim() {            .Case("instances", tgtok::XInstances)            .Case("substr", tgtok::XSubstr)            .Case("find", tgtok::XFind) -          .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated. -          .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated. +          .Cases({"setdagop", "setop"}, +                 tgtok::XSetDagOp) // !setop is deprecated. +          .Cases({"getdagop", "getop"}, +                 tgtok::XGetDagOp) // !getop is deprecated.            .Case("setdagopname", tgtok::XSetDagOpName)            .Case("getdagopname", tgtok::XGetDagOpName)            .Case("getdagarg", tgtok::XGetDagArg) diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp index 1169f26..97298f9 100644 --- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp @@ -655,16 +655,10 @@ Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) {    BasicBlock *BB = BasicBlock::Create(M->getContext(), "", GuestExit);    IRBuilder<> B(BB); -  // Load the global symbol as a pointer to the check function. -  Value *GuardFn; -  if (cfguard_module_flag == 2 && !F->hasFnAttribute("guard_nocf")) -    GuardFn = GuardFnCFGlobal; -  else -    GuardFn = GuardFnGlobal; -  LoadInst *GuardCheckLoad = B.CreateLoad(PtrTy, GuardFn); - -  // Create new call instruction. The CFGuard check should always be a call, -  // even if the original CallBase is an Invoke or CallBr instruction. +  // Create new call instruction. The call check should always be a call, +  // even if the original CallBase is an Invoke or CallBr instructio. +  // This is treated as a direct call, so do not use GuardFnCFGlobal. +  LoadInst *GuardCheckLoad = B.CreateLoad(PtrTy, GuardFnGlobal);    Function *Thunk = buildExitThunk(F->getFunctionType(), F->getAttributes());    CallInst *GuardCheck = B.CreateCall(        GuardFnType, GuardCheckLoad, {F, Thunk}); diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp index 7e03b97..45b7120 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp @@ -370,6 +370,22 @@ SVEFrameSizes AArch64PrologueEpilogueCommon::getSVEStackFrameSizes() const {            {ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}};  } +SVEStackAllocations AArch64PrologueEpilogueCommon::getSVEStackAllocations( +    SVEFrameSizes const &SVE) { +  StackOffset AfterZPRs = SVE.ZPR.LocalsSize; +  StackOffset BeforePPRs = SVE.ZPR.CalleeSavesSize + SVE.PPR.CalleeSavesSize; +  StackOffset AfterPPRs = {}; +  if (SVELayout == SVEStackLayout::Split) { +    BeforePPRs = SVE.PPR.CalleeSavesSize; +    // If there are no ZPR CSRs, place all local allocations after the ZPRs. +    if (SVE.ZPR.CalleeSavesSize) +      AfterPPRs += SVE.PPR.LocalsSize + SVE.ZPR.CalleeSavesSize; +    else +      AfterZPRs += SVE.PPR.LocalsSize; // Group allocation of locals. +  } +  return {BeforePPRs, AfterPPRs, AfterZPRs}; +} +  struct SVEPartitions {    struct {      MachineBasicBlock::iterator Begin, End; @@ -687,16 +703,19 @@ void AArch64PrologueEmitter::emitPrologue() {    // All of the remaining stack allocations are for locals.    determineLocalsStackSize(NumBytes, PrologueSaveSize); +  auto [PPR, ZPR] = getSVEStackFrameSizes(); +  SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR}); +    MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;    if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { +    assert(!SVEAllocs.AfterPPRs && +           "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");      // If we're doing SVE saves first, we need to immediately allocate space      // for fixed objects, then space for the SVE callee saves.      //      // Windows unwind requires that the scalable size is a multiple of 16;      // that's handled when the callee-saved size is computed. -    auto SaveSize = -        StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) + -        StackOffset::getFixed(FixedObject); +    auto SaveSize = SVEAllocs.BeforePPRs + StackOffset::getFixed(FixedObject);      allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},                         /*FollowupAllocs=*/true);      NumBytes -= FixedObject; @@ -764,12 +783,11 @@ void AArch64PrologueEmitter::emitPrologue() {    if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))      emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding); -  auto [PPR, ZPR] = getSVEStackFrameSizes(); -  StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;    StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes); +  SVEAllocs.AfterZPRs += NonSVELocalsSize; +    StackOffset CFAOffset =        StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize; -    MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;    // Allocate space for the callee saves and PPR locals (if any).    if (SVELayout != SVEStackLayout::CalleeSavesAboveFrameRecord) { @@ -780,31 +798,23 @@ void AArch64PrologueEmitter::emitPrologue() {      if (EmitAsyncCFI)        emitCalleeSavedSVELocations(AfterSVESavesI); -    StackOffset AllocateBeforePPRs = SVECalleeSavesSize; -    StackOffset AllocateAfterPPRs = PPR.LocalsSize; -    if (SVELayout == SVEStackLayout::Split) { -      AllocateBeforePPRs = PPR.CalleeSavesSize; -      AllocateAfterPPRs = PPR.LocalsSize + ZPR.CalleeSavesSize; -    } -    allocateStackSpace(PPRRange.Begin, 0, AllocateBeforePPRs, +    allocateStackSpace(PPRRange.Begin, 0, SVEAllocs.BeforePPRs,                         EmitAsyncCFI && !HasFP, CFAOffset, -                       MFI.hasVarSizedObjects() || AllocateAfterPPRs || -                           ZPR.LocalsSize || NonSVELocalsSize); -    CFAOffset += AllocateBeforePPRs; +                       MFI.hasVarSizedObjects() || SVEAllocs.AfterPPRs || +                           SVEAllocs.AfterZPRs); +    CFAOffset += SVEAllocs.BeforePPRs;      assert(PPRRange.End == ZPRRange.Begin &&             "Expected ZPR callee saves after PPR locals"); -    allocateStackSpace(PPRRange.End, RealignmentPadding, AllocateAfterPPRs, +    allocateStackSpace(PPRRange.End, RealignmentPadding, SVEAllocs.AfterPPRs,                         EmitAsyncCFI && !HasFP, CFAOffset, -                       MFI.hasVarSizedObjects() || ZPR.LocalsSize || -                           NonSVELocalsSize); -    CFAOffset += AllocateAfterPPRs; +                       MFI.hasVarSizedObjects() || SVEAllocs.AfterZPRs); +    CFAOffset += SVEAllocs.AfterPPRs;    } else {      assert(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord); -    // Note: With CalleeSavesAboveFrameRecord, the SVE CS have already been -    // allocated (and separate PPR locals are not supported, all SVE locals, -    // both PPR and ZPR, are within the ZPR locals area). -    assert(!PPR.LocalsSize && "Unexpected PPR locals!"); -    CFAOffset += SVECalleeSavesSize; +    // Note: With CalleeSavesAboveFrameRecord, the SVE CS (BeforePPRs) have +    // already been allocated. PPR locals (included in AfterPPRs) are not +    // supported (note: this is asserted above). +    CFAOffset += SVEAllocs.BeforePPRs;    }    // Allocate space for the rest of the frame including ZPR locals. Align the @@ -815,9 +825,9 @@ void AArch64PrologueEmitter::emitPrologue() {      // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the      // correct value here, as NumBytes also includes padding bytes, which      // shouldn't be counted here. -    allocateStackSpace( -        AfterSVESavesI, RealignmentPadding, ZPR.LocalsSize + NonSVELocalsSize, -        EmitAsyncCFI && !HasFP, CFAOffset, MFI.hasVarSizedObjects()); +    allocateStackSpace(AfterSVESavesI, RealignmentPadding, SVEAllocs.AfterZPRs, +                       EmitAsyncCFI && !HasFP, CFAOffset, +                       MFI.hasVarSizedObjects());    }    // If we need a base pointer, set it up here. It's whatever the value of the @@ -1472,27 +1482,26 @@ void AArch64EpilogueEmitter::emitEpilogue() {    assert(NumBytes >= 0 && "Negative stack allocation size!?");    StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize; -  StackOffset SVEStackSize = -      SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize; +  SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});    MachineBasicBlock::iterator RestoreBegin = ZPRRange.Begin; -  MachineBasicBlock::iterator RestoreEnd = PPRRange.End;    // Deallocate the SVE area.    if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { -    StackOffset SVELocalsSize = ZPR.LocalsSize + PPR.LocalsSize; +    assert(!SVEAllocs.AfterPPRs && +           "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");      // If the callee-save area is before FP, restoring the FP implicitly -    // deallocates non-callee-save SVE allocations.  Otherwise, deallocate them +    // deallocates non-callee-save SVE allocations. Otherwise, deallocate them      // explicitly.      if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {        emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP, -                      SVELocalsSize, TII, MachineInstr::FrameDestroy, false, -                      NeedsWinCFI, &HasWinCFI); +                      SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy, +                      false, NeedsWinCFI, &HasWinCFI);      }      // Deallocate callee-save SVE registers. -    emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, -                    SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false, -                    NeedsWinCFI, &HasWinCFI); +    emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP, +                    SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy, +                    false, NeedsWinCFI, &HasWinCFI);    } else if (AFI->hasSVEStackSize()) {      // If we have stack realignment or variable-sized objects we must use the FP      // to restore SVE callee saves (as there is an unknown amount of @@ -1524,46 +1533,33 @@ void AArch64EpilogueEmitter::emitEpilogue() {        emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,                        -SVECalleeSavesSize, TII, MachineInstr::FrameDestroy);      } else if (BaseForSVEDealloc == AArch64::SP) { -      auto CFAOffset = -          SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize); - -      if (SVECalleeSavesSize) { -        // Deallocate the non-SVE locals first before we can deallocate (and -        // restore callee saves) from the SVE area. -        auto NonSVELocals = StackOffset::getFixed(NumBytes); -        emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP, -                        NonSVELocals, TII, MachineInstr::FrameDestroy, false, -                        NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset); -        CFAOffset -= NonSVELocals; -        NumBytes = 0; -      } - -      if (ZPR.LocalsSize) { -        emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP, -                        ZPR.LocalsSize, TII, MachineInstr::FrameDestroy, false, -                        NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset); -        CFAOffset -= ZPR.LocalsSize; +      auto NonSVELocals = StackOffset::getFixed(NumBytes); +      auto CFAOffset = NonSVELocals + StackOffset::getFixed(PrologueSaveSize) + +                       SVEAllocs.totalSize(); + +      if (SVECalleeSavesSize || SVELayout == SVEStackLayout::Split) { +        // Deallocate non-SVE locals now. This is needed to reach the SVE callee +        // saves, but may also allow combining stack hazard bumps for split SVE. +        SVEAllocs.AfterZPRs += NonSVELocals; +        NumBytes -= NonSVELocals.getFixed();        } - -      StackOffset SVECalleeSavesToDealloc = SVECalleeSavesSize; -      if (SVELayout == SVEStackLayout::Split && -          (PPR.LocalsSize || ZPR.CalleeSavesSize)) { -        assert(PPRRange.Begin == ZPRRange.End && -               "Expected PPR restores after ZPR"); -        emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP, -                        PPR.LocalsSize + ZPR.CalleeSavesSize, TII, -                        MachineInstr::FrameDestroy, false, NeedsWinCFI, -                        &HasWinCFI, EmitCFI && !HasFP, CFAOffset); -        CFAOffset -= PPR.LocalsSize + ZPR.CalleeSavesSize; -        SVECalleeSavesToDealloc -= ZPR.CalleeSavesSize; -      } - -      // If split SVE is on, this dealloc PPRs, otherwise, deallocs ZPRs + PPRs: -      if (SVECalleeSavesToDealloc) -        emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP, -                        SVECalleeSavesToDealloc, TII, -                        MachineInstr::FrameDestroy, false, NeedsWinCFI, -                        &HasWinCFI, EmitCFI && !HasFP, CFAOffset); +      // To deallocate the SVE stack adjust by the allocations in reverse. +      emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP, +                      SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy, +                      false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, +                      CFAOffset); +      CFAOffset -= SVEAllocs.AfterZPRs; +      assert(PPRRange.Begin == ZPRRange.End && +             "Expected PPR restores after ZPR"); +      emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP, +                      SVEAllocs.AfterPPRs, TII, MachineInstr::FrameDestroy, +                      false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, +                      CFAOffset); +      CFAOffset -= SVEAllocs.AfterPPRs; +      emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP, +                      SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy, +                      false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, +                      CFAOffset);      }      if (EmitCFI) diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h index bccadda..6e0e283 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h @@ -33,6 +33,11 @@ struct SVEFrameSizes {    } PPR, ZPR;  }; +struct SVEStackAllocations { +  StackOffset BeforePPRs, AfterPPRs, AfterZPRs; +  StackOffset totalSize() const { return BeforePPRs + AfterPPRs + AfterZPRs; } +}; +  class AArch64PrologueEpilogueCommon {  public:    AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB, @@ -66,6 +71,7 @@ protected:    bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const;    SVEFrameSizes getSVEStackFrameSizes() const; +  SVEStackAllocations getSVEStackAllocations(SVEFrameSizes const &);    MachineFunction &MF;    MachineBasicBlock &MBB; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 47c1ac4..5b5565a 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -308,9 +308,9 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,    return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;  } -bool AArch64TTIImpl::areTypesABICompatible( -    const Function *Caller, const Function *Callee, -    const ArrayRef<Type *> &Types) const { +bool AArch64TTIImpl::areTypesABICompatible(const Function *Caller, +                                           const Function *Callee, +                                           ArrayRef<Type *> Types) const {    if (!BaseT::areTypesABICompatible(Caller, Callee, Types))      return false; @@ -2227,7 +2227,7 @@ static std::optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,    return std::nullopt;  } -template <Intrinsic::ID MulOpc, typename Intrinsic::ID FuseOpc> +template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>  static std::optional<Instruction *>  instCombineSVEVectorFuseMulAddSub(InstCombiner &IC, IntrinsicInst &II,                                    bool MergeIntoAddendOp) { @@ -6657,10 +6657,15 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(            Ops.push_back(&Ext->getOperandUse(0));          Ops.push_back(&Op); -        if (isa<SExtInst>(Ext)) +        if (isa<SExtInst>(Ext)) {            NumSExts++; -        else +        } else {            NumZExts++; +          // A zext(a) is also a sext(zext(a)), if we take more than 2 steps. +          if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 < +              I->getType()->getScalarSizeInBits()) +            NumSExts++; +        }          continue;        } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index fe2e849..b39546a 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -84,7 +84,7 @@ public:                             const Function *Callee) const override;    bool areTypesABICompatible(const Function *Caller, const Function *Callee, -                             const ArrayRef<Type *> &Types) const override; +                             ArrayRef<Type *> Types) const override;    unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,                                  unsigned DefaultCallPenalty) const override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index cd8b249..67042b7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -69,7 +69,7 @@ FunctionPass *createAMDGPUPreloadKernArgPrologLegacyPass();  ModulePass *createAMDGPUPreloadKernelArgumentsLegacyPass(const TargetMachine *);  struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { -  AMDGPUSimplifyLibCallsPass() {} +  AMDGPUSimplifyLibCallsPass() = default;    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);  }; @@ -371,13 +371,13 @@ public:  class AMDGPUAnnotateUniformValuesPass      : public PassInfoMixin<AMDGPUAnnotateUniformValuesPass> {  public: -  AMDGPUAnnotateUniformValuesPass() {} +  AMDGPUAnnotateUniformValuesPass() = default;    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);  };  class SIModeRegisterPass : public PassInfoMixin<SIModeRegisterPass> {  public: -  SIModeRegisterPass() {} +  SIModeRegisterPass() = default;    PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM);  }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h index 1064e57..dad94b8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -96,7 +96,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ArgDescriptor &Arg) {  }  struct KernArgPreloadDescriptor : public ArgDescriptor { -  KernArgPreloadDescriptor() {} +  KernArgPreloadDescriptor() = default;    SmallVector<MCRegister> Regs;  }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 9907c88f..8669978 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1555,7 +1555,7 @@ private:    AMDGPU::ClusterDimsAttr Attr; -  static constexpr const char AttrName[] = "amdgpu-cluster-dims"; +  static constexpr char AttrName[] = "amdgpu-cluster-dims";  };  AAAMDGPUClusterDims & diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h index cf2ab825..a3be0f5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h @@ -48,7 +48,7 @@ private:    FuncInfoMap FIM;  public: -  AMDGPUPerfHintAnalysis() {} +  AMDGPUPerfHintAnalysis() = default;    // OldPM    bool runOnSCC(const GCNTargetMachine &TM, CallGraphSCC &SCC); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 103cdec..1e5885a2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -202,7 +202,7 @@ bool PredicateMapping::match(const MachineInstr &MI,    return true;  } -SetOfRulesForOpcode::SetOfRulesForOpcode() {} +SetOfRulesForOpcode::SetOfRulesForOpcode() = default;  SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes)      : FastTypes(FastTypes) {} @@ -913,6 +913,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,    addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}}); +  addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}}); +    addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)        .Uni(S64, {{Sgpr64}, {}}); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index b28c50e..b87b54f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -816,7 +816,7 @@ parseAMDGPUAtomicOptimizerStrategy(StringRef Params) {    Params.consume_front("strategy=");    auto Result = StringSwitch<std::optional<ScanOptions>>(Params)                      .Case("dpp", ScanOptions::DPP) -                    .Cases("iterative", "", ScanOptions::Iterative) +                    .Cases({"iterative", ""}, ScanOptions::Iterative)                      .Case("none", ScanOptions::None)                      .Default(std::nullopt);    if (Result) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp index 733c5d5..fe81a5e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -181,14 +181,52 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(    return NewRetBlock;  } +static BasicBlock * +createDummyReturnBlock(Function &F, +                       SmallVector<BasicBlock *, 4> &ReturningBlocks) { +  BasicBlock *DummyReturnBB = +      BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F); +  Type *RetTy = F.getReturnType(); +  Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy); +  ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB); +  ReturningBlocks.push_back(DummyReturnBB); +  return DummyReturnBB; +} + +/// Handle conditional branch instructions (-> 2 targets) and callbr +/// instructions with N targets. +static void handleNBranch(Function &F, BasicBlock *BB, Instruction *BI, +                          BasicBlock *DummyReturnBB, +                          std::vector<DominatorTree::UpdateType> &Updates) { +  SmallVector<BasicBlock *, 2> Successors(successors(BB)); + +  // Create a new transition block to hold the conditional branch. +  BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock"); + +  Updates.reserve(Updates.size() + 2 * Successors.size() + 2); + +  // 'Successors' become successors of TransitionBB instead of BB, +  // and TransitionBB becomes a single successor of BB. +  Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB); +  for (BasicBlock *Successor : Successors) { +    Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor); +    Updates.emplace_back(DominatorTree::Delete, BB, Successor); +  } + +  // Create a branch that will always branch to the transition block and +  // references DummyReturnBB. +  BB->getTerminator()->eraseFromParent(); +  BranchInst::Create(TransitionBB, DummyReturnBB, +                     ConstantInt::getTrue(F.getContext()), BB); +  Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB); +} +  bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,                                              const PostDominatorTree &PDT,                                              const UniformityInfo &UA) { -  assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator."); -    if (PDT.root_size() == 0 ||        (PDT.root_size() == 1 && -       !isa<BranchInst>(PDT.getRoot()->getTerminator()))) +       !isa<BranchInst, CallBrInst>(PDT.getRoot()->getTerminator())))      return false;    // Loop over all of the blocks in a function, tracking all of the blocks that @@ -222,46 +260,28 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,        if (HasDivergentExitBlock)          UnreachableBlocks.push_back(BB);      } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { - -      ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext()); -      if (DummyReturnBB == nullptr) { -        DummyReturnBB = BasicBlock::Create(F.getContext(), -                                           "DummyReturnBlock", &F); -        Type *RetTy = F.getReturnType(); -        Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy); -        ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB); -        ReturningBlocks.push_back(DummyReturnBB); -      } +      if (!DummyReturnBB) +        DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);        if (BI->isUnconditional()) {          BasicBlock *LoopHeaderBB = BI->getSuccessor(0);          BI->eraseFromParent(); // Delete the unconditional branch.          // Add a new conditional branch with a dummy edge to the return block. -        BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB); -        Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB); -      } else { // Conditional branch. -        SmallVector<BasicBlock *, 2> Successors(successors(BB)); - -        // Create a new transition block to hold the conditional branch. -        BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock"); - -        Updates.reserve(Updates.size() + 2 * Successors.size() + 2); - -        // 'Successors' become successors of TransitionBB instead of BB, -        // and TransitionBB becomes a single successor of BB. -        Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB); -        for (BasicBlock *Successor : Successors) { -          Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor); -          Updates.emplace_back(DominatorTree::Delete, BB, Successor); -        } - -        // Create a branch that will always branch to the transition block and -        // references DummyReturnBB. -        BB->getTerminator()->eraseFromParent(); -        BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB); +        BranchInst::Create(LoopHeaderBB, DummyReturnBB, +                           ConstantInt::getTrue(F.getContext()), BB);          Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB); +      } else { +        handleNBranch(F, BB, BI, DummyReturnBB, Updates);        }        Changed = true; +    } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(BB->getTerminator())) { +      if (!DummyReturnBB) +        DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks); + +      handleNBranch(F, BB, CBI, DummyReturnBB, Updates); +      Changed = true; +    } else { +      llvm_unreachable("unsupported block terminator");      }    } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp index 61c5dcd..ded2f5a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp @@ -54,7 +54,7 @@ public:    bool CullSGPRHazardsAtMemWait;    unsigned CullSGPRHazardsMemWaitThreshold; -  AMDGPUWaitSGPRHazards() {} +  AMDGPUWaitSGPRHazards() = default;    // Return the numeric ID 0-127 for a given SGPR.    static std::optional<unsigned> sgprNumber(Register Reg, diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 975781f..f357981 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -183,7 +183,7 @@ class ScheduleMetrics {    unsigned BubbleCycles;  public: -  ScheduleMetrics() {} +  ScheduleMetrics() = default;    ScheduleMetrics(unsigned L, unsigned BC)        : ScheduleLength(L), BubbleCycles(BC) {}    unsigned getLength() const { return ScheduleLength; } @@ -217,7 +217,7 @@ class RegionPressureMap {    bool IsLiveOut;  public: -  RegionPressureMap() {} +  RegionPressureMap() = default;    RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)        : DAG(GCNDAG), IsLiveOut(LiveOut) {}    // Build the Instr->LiveReg and RegionIdx->Instr maps diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d9f76c9..45f5919 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6153,7 +6153,7 @@ bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,    // information.    if (AMDGPU::isPackedFP32Inst(MI.getOpcode()) && AMDGPU::isGFX12Plus(ST) &&        MO.isReg() && RI.isSGPRReg(MRI, MO.getReg())) { -    constexpr const AMDGPU::OpName OpNames[] = { +    constexpr AMDGPU::OpName OpNames[] = {          AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};      for (auto [I, OpName] : enumerate(OpNames)) { @@ -6215,8 +6215,8 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,  bool SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand(      const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN,      const MachineOperand *MO) const { -  constexpr const unsigned NumOps = 3; -  constexpr const AMDGPU::OpName OpNames[NumOps * 2] = { +  constexpr unsigned NumOps = 3; +  constexpr AMDGPU::OpName OpNames[NumOps * 2] = {        AMDGPU::OpName::src0,           AMDGPU::OpName::src1,        AMDGPU::OpName::src2,           AMDGPU::OpName::src0_modifiers,        AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers}; diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp index 77dc4a7..b2a8204 100644 --- a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp +++ b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp @@ -88,6 +88,16 @@ bool BPFAsmPrinter::doFinalization(Module &M) {      }    } +  for (GlobalObject &GO : M.global_objects()) { +    if (!GO.hasExternalWeakLinkage()) +      continue; + +    if (!SawTrapCall && GO.getName() == BPF_TRAP) { +      GO.eraseFromParent(); +      break; +    } +  } +    return AsmPrinter::doFinalization(M);  } @@ -160,6 +170,20 @@ bool BPFAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,  }  void BPFAsmPrinter::emitInstruction(const MachineInstr *MI) { +  if (MI->isCall()) { +    for (const MachineOperand &Op : MI->operands()) { +      if (Op.isGlobal()) { +        if (const GlobalValue *GV = Op.getGlobal()) +          if (GV->getName() == BPF_TRAP) +            SawTrapCall = true; +      } else if (Op.isSymbol()) { +        if (const MCSymbol *Sym = Op.getMCSymbol()) +          if (Sym->getName() == BPF_TRAP) +            SawTrapCall = true; +      } +    } +  } +    BPF_MC::verifyInstructionPredicates(MI->getOpcode(),                                        getSubtargetInfo().getFeatureBits()); diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.h b/llvm/lib/Target/BPF/BPFAsmPrinter.h index 90ef207..75a1d7e 100644 --- a/llvm/lib/Target/BPF/BPFAsmPrinter.h +++ b/llvm/lib/Target/BPF/BPFAsmPrinter.h @@ -39,6 +39,7 @@ public:  private:    BTFDebug *BTF;    TargetMachine &TM; +  bool SawTrapCall = false;    const BPFTargetMachine &getBTM() const;  }; diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp index ab4ee55..08f196b 100644 --- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp +++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp @@ -884,13 +884,13 @@ CSKYTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,                                 .Case("{t4}", CSKY::R20)                                 .Case("{t5}", CSKY::R21)                                 .Case("{t6}", CSKY::R22) -                               .Cases("{t7}", "{fp}", CSKY::R23) -                               .Cases("{t8}", "{top}", CSKY::R24) -                               .Cases("{t9}", "{bsp}", CSKY::R25) +                               .Cases({"{t7}", "{fp}"}, CSKY::R23) +                               .Cases({"{t8}", "{top}"}, CSKY::R24) +                               .Cases({"{t9}", "{bsp}"}, CSKY::R25)                                 .Case("{r26}", CSKY::R26)                                 .Case("{r27}", CSKY::R27)                                 .Cases({"{gb}", "{rgb}", "{rdb}"}, CSKY::R28) -                               .Cases("{tb}", "{rtb}", CSKY::R29) +                               .Cases({"{tb}", "{rtb}"}, CSKY::R29)                                 .Case("{svbr}", CSKY::R30)                                 .Case("{tls}", CSKY::R31)                                 .Default(CSKY::NoRegister); @@ -907,38 +907,38 @@ CSKYTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,    // use the ABI names in register constraint lists.    if (Subtarget.useHardFloat()) {      unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) -                        .Cases("{fr0}", "{vr0}", CSKY::F0_32) -                        .Cases("{fr1}", "{vr1}", CSKY::F1_32) -                        .Cases("{fr2}", "{vr2}", CSKY::F2_32) -                        .Cases("{fr3}", "{vr3}", CSKY::F3_32) -                        .Cases("{fr4}", "{vr4}", CSKY::F4_32) -                        .Cases("{fr5}", "{vr5}", CSKY::F5_32) -                        .Cases("{fr6}", "{vr6}", CSKY::F6_32) -                        .Cases("{fr7}", "{vr7}", CSKY::F7_32) -                        .Cases("{fr8}", "{vr8}", CSKY::F8_32) -                        .Cases("{fr9}", "{vr9}", CSKY::F9_32) -                        .Cases("{fr10}", "{vr10}", CSKY::F10_32) -                        .Cases("{fr11}", "{vr11}", CSKY::F11_32) -                        .Cases("{fr12}", "{vr12}", CSKY::F12_32) -                        .Cases("{fr13}", "{vr13}", CSKY::F13_32) -                        .Cases("{fr14}", "{vr14}", CSKY::F14_32) -                        .Cases("{fr15}", "{vr15}", CSKY::F15_32) -                        .Cases("{fr16}", "{vr16}", CSKY::F16_32) -                        .Cases("{fr17}", "{vr17}", CSKY::F17_32) -                        .Cases("{fr18}", "{vr18}", CSKY::F18_32) -                        .Cases("{fr19}", "{vr19}", CSKY::F19_32) -                        .Cases("{fr20}", "{vr20}", CSKY::F20_32) -                        .Cases("{fr21}", "{vr21}", CSKY::F21_32) -                        .Cases("{fr22}", "{vr22}", CSKY::F22_32) -                        .Cases("{fr23}", "{vr23}", CSKY::F23_32) -                        .Cases("{fr24}", "{vr24}", CSKY::F24_32) -                        .Cases("{fr25}", "{vr25}", CSKY::F25_32) -                        .Cases("{fr26}", "{vr26}", CSKY::F26_32) -                        .Cases("{fr27}", "{vr27}", CSKY::F27_32) -                        .Cases("{fr28}", "{vr28}", CSKY::F28_32) -                        .Cases("{fr29}", "{vr29}", CSKY::F29_32) -                        .Cases("{fr30}", "{vr30}", CSKY::F30_32) -                        .Cases("{fr31}", "{vr31}", CSKY::F31_32) +                        .Cases({"{fr0}", "{vr0}"}, CSKY::F0_32) +                        .Cases({"{fr1}", "{vr1}"}, CSKY::F1_32) +                        .Cases({"{fr2}", "{vr2}"}, CSKY::F2_32) +                        .Cases({"{fr3}", "{vr3}"}, CSKY::F3_32) +                        .Cases({"{fr4}", "{vr4}"}, CSKY::F4_32) +                        .Cases({"{fr5}", "{vr5}"}, CSKY::F5_32) +                        .Cases({"{fr6}", "{vr6}"}, CSKY::F6_32) +                        .Cases({"{fr7}", "{vr7}"}, CSKY::F7_32) +                        .Cases({"{fr8}", "{vr8}"}, CSKY::F8_32) +                        .Cases({"{fr9}", "{vr9}"}, CSKY::F9_32) +                        .Cases({"{fr10}", "{vr10}"}, CSKY::F10_32) +                        .Cases({"{fr11}", "{vr11}"}, CSKY::F11_32) +                        .Cases({"{fr12}", "{vr12}"}, CSKY::F12_32) +                        .Cases({"{fr13}", "{vr13}"}, CSKY::F13_32) +                        .Cases({"{fr14}", "{vr14}"}, CSKY::F14_32) +                        .Cases({"{fr15}", "{vr15}"}, CSKY::F15_32) +                        .Cases({"{fr16}", "{vr16}"}, CSKY::F16_32) +                        .Cases({"{fr17}", "{vr17}"}, CSKY::F17_32) +                        .Cases({"{fr18}", "{vr18}"}, CSKY::F18_32) +                        .Cases({"{fr19}", "{vr19}"}, CSKY::F19_32) +                        .Cases({"{fr20}", "{vr20}"}, CSKY::F20_32) +                        .Cases({"{fr21}", "{vr21}"}, CSKY::F21_32) +                        .Cases({"{fr22}", "{vr22}"}, CSKY::F22_32) +                        .Cases({"{fr23}", "{vr23}"}, CSKY::F23_32) +                        .Cases({"{fr24}", "{vr24}"}, CSKY::F24_32) +                        .Cases({"{fr25}", "{vr25}"}, CSKY::F25_32) +                        .Cases({"{fr26}", "{vr26}"}, CSKY::F26_32) +                        .Cases({"{fr27}", "{vr27}"}, CSKY::F27_32) +                        .Cases({"{fr28}", "{vr28}"}, CSKY::F28_32) +                        .Cases({"{fr29}", "{vr29}"}, CSKY::F29_32) +                        .Cases({"{fr30}", "{vr30}"}, CSKY::F30_32) +                        .Cases({"{fr31}", "{vr31}"}, CSKY::F31_32)                          .Default(CSKY::NoRegister);      if (FReg != CSKY::NoRegister) {        assert(CSKY::F0_32 <= FReg && FReg <= CSKY::F31_32 && "Unknown fp-reg"); diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp index 8ace2d2..eb4c884 100644 --- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp +++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp @@ -194,9 +194,10 @@ void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) {          dxbc::PSV::v2::ResourceBindInfo BindInfo;          BindInfo.Type = Type;          BindInfo.LowerBound = Binding.LowerBound; -        assert(Binding.Size == UINT32_MAX || -               (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX && -                   "Resource range is too large"); +        assert( +            (Binding.Size == UINT32_MAX || +             (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX) && +            "Resource range is too large");          BindInfo.UpperBound = (Binding.Size == UINT32_MAX)                                    ? UINT32_MAX                                    : Binding.LowerBound + Binding.Size - 1; diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td index f4e36fa7..e661c94 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td @@ -26,6 +26,7 @@ def tc_20a4bbec : InstrItinClass;  def tc_227864f7 : InstrItinClass;  def tc_257f6f7c : InstrItinClass;  def tc_26a377fe : InstrItinClass; +def tc_2a698a03 : InstrItinClass;  def tc_2b4c548e : InstrItinClass;  def tc_2c745bb8 : InstrItinClass;  def tc_2d4051cd : InstrItinClass; @@ -52,6 +53,7 @@ def tc_561aaa58 : InstrItinClass;  def tc_56c4f9fe : InstrItinClass;  def tc_56e64202 : InstrItinClass;  def tc_58d21193 : InstrItinClass; +def tc_57a4709c : InstrItinClass;  def tc_5bf8afbb : InstrItinClass;  def tc_5cdf8c84 : InstrItinClass;  def tc_61bf7c03 : InstrItinClass; @@ -220,6 +222,11 @@ class DepHVXItinV55 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -356,6 +363,11 @@ class DepHVXItinV55 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -812,6 +824,11 @@ class DepHVXItinV60 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -948,6 +965,11 @@ class DepHVXItinV60 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -1404,6 +1426,11 @@ class DepHVXItinV62 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -1540,6 +1567,11 @@ class DepHVXItinV62 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -1996,6 +2028,11 @@ class DepHVXItinV65 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -2132,6 +2169,11 @@ class DepHVXItinV65 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -2588,6 +2630,11 @@ class DepHVXItinV66 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -2724,6 +2771,11 @@ class DepHVXItinV66 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -3180,6 +3232,11 @@ class DepHVXItinV67 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -3316,6 +3373,11 @@ class DepHVXItinV67 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -3772,6 +3834,11 @@ class DepHVXItinV68 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -3908,6 +3975,11 @@ class DepHVXItinV68 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -4364,6 +4436,11 @@ class DepHVXItinV69 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -4500,6 +4577,11 @@ class DepHVXItinV69 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -4956,6 +5038,11 @@ class DepHVXItinV71 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -5092,6 +5179,11 @@ class DepHVXItinV71 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -5548,6 +5640,11 @@ class DepHVXItinV73 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -5684,6 +5781,11 @@ class DepHVXItinV73 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -6140,6 +6242,11 @@ class DepHVXItinV75 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -6276,6 +6383,11 @@ class DepHVXItinV75 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -6732,6 +6844,11 @@ class DepHVXItinV79 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -6868,6 +6985,11 @@ class DepHVXItinV79 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], @@ -7324,6 +7446,11 @@ class DepHVXItinV81 {         InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],        [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, +    InstrItinData <tc_2a698a03, /*SLOT0123,VSorVP*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5], +      [HVX_FWD, HVX_FWD]>, +      InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/        [InstrStage<1, [SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], @@ -7460,6 +7587,11 @@ class DepHVXItinV81 {         InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],        [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, +    InstrItinData <tc_57a4709c, /*SLOT0123,VA*/ +      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, +       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 2], +      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, +      InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/        [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,         InstrStage<1, [CVI_XLANE]>], [9, 2], diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td index f8f1c2a..b188134 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -29939,6 +29939,58 @@ let opNewValue = 0;  let isCVI = 1;  let DecoderNamespace = "EXT_mmvec";  } +def V6_vabs_qf16_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = vabs($Vu32.hf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabs_qf16_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = vabs($Vu32.qf16)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabs_qf32_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = vabs($Vu32.qf32)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabs_qf32_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = vabs($Vu32.sf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +}  def V6_vabs_sf : HInst<  (outs HvxVR:$Vd32),  (ins HvxVR:$Vu32), @@ -31302,6 +31354,21 @@ let isPseudo = 1;  let isCodeGenOnly = 1;  let DecoderNamespace = "EXT_mmvec";  } +def V6_valign4 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), +"$Vd32 = valign4($Vu32,$Vv32,$Rt8)", +tc_57a4709c, TypeCVI_VA>, Enc_a30110, Requires<[UseHVXV81]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b0; +let Inst{31-24} = 0b00011000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +}  def V6_valignb : HInst<  (outs HvxVR:$Vd32),  (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), @@ -32583,6 +32650,32 @@ let isCVI = 1;  let hasHvxTmp = 1;  let DecoderNamespace = "EXT_mmvec";  } +def V6_vconv_bf_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxWR:$Vuu32), +"$Vd32.bf = $Vuu32.qf32", +tc_2a698a03, TypeCVI_VS>, Enc_a33d04, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_f8_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.f8 = $Vu32.qf16", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +}  def V6_vconv_h_hf : HInst<  (outs HvxVR:$Vd32),  (ins HvxVR:$Vu32), @@ -32596,6 +32689,19 @@ let opNewValue = 0;  let isCVI = 1;  let DecoderNamespace = "EXT_mmvec";  } +def V6_vconv_h_hf_rnd : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.h = $Vu32.hf:rnd", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +}  def V6_vconv_hf_h : HInst<  (outs HvxVR:$Vd32),  (ins HvxVR:$Vu32), @@ -32635,6 +32741,71 @@ let opNewValue = 0;  let isCVI = 1;  let DecoderNamespace = "EXT_mmvec";  } +def V6_vconv_qf16_f8 : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32), +"$Vdd32.qf16 = $Vu32.f8", +tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_qf16_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = $Vu32.hf", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_qf16_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = $Vu32.qf16", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_qf32_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = $Vu32.qf32", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_qf32_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = $Vu32.sf", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +}  def V6_vconv_sf_qf32 : HInst<  (outs HvxVR:$Vd32),  (ins HvxVR:$Vu32), @@ -33720,6 +33891,122 @@ let isHVXALU2SRC = 1;  let DecoderNamespace = "EXT_mmvec";  let Constraints = "$Qx4 = $Qx4in";  } +def V6_veqhf : HInst< +(outs HvxQR:$Qd4), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Qd4 = vcmp.eq($Vu32.hf,$Vv32.hf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b000111; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_veqhf_and : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 &= vcmp.eq($Vu32.hf,$Vv32.hf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b000111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_veqhf_or : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 |= vcmp.eq($Vu32.hf,$Vv32.hf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b010111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isAccumulator = 1; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_veqhf_xor : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 ^= vcmp.eq($Vu32.hf,$Vv32.hf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b100111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_veqsf : HInst< +(outs HvxQR:$Qd4), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Qd4 = vcmp.eq($Vu32.sf,$Vv32.sf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b000011; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_veqsf_and : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 &= vcmp.eq($Vu32.sf,$Vv32.sf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b000011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_veqsf_or : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 |= vcmp.eq($Vu32.sf,$Vv32.sf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b010011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isAccumulator = 1; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_veqsf_xor : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 ^= vcmp.eq($Vu32.sf,$Vv32.sf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-2} = 0b100011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +}  def V6_veqw : HInst<  (outs HvxQR:$Qd4),  (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -34538,6 +34825,58 @@ let Inst{31-24} = 0b00011110;  let isCVI = 1;  let DecoderNamespace = "EXT_mmvec";  } +def V6_vilog2_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.w = vilog2($Vu32.hf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vilog2_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.w = vilog2($Vu32.qf16)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vilog2_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.w = vilog2($Vu32.qf32)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vilog2_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.w = vilog2($Vu32.sf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +}  def V6_vinsertwr : HInst<  (outs HvxVR:$Vx32),  (ins HvxVR:$Vx32in, IntRegs:$Rt32), @@ -37170,6 +37509,58 @@ let isCVI = 1;  let isHVXALU = 1;  let DecoderNamespace = "EXT_mmvec";  } +def V6_vneg_qf16_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = vneg($Vu32.hf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vneg_qf16_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf16 = vneg($Vu32.qf16)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vneg_qf32_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = vneg($Vu32.qf32)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vneg_qf32_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.qf32 = vneg($Vu32.sf)", +tc_2a698a03, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000001110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +}  def V6_vnormamth : HInst<  (outs HvxVR:$Vd32),  (ins HvxVR:$Vu32), diff --git a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td index 23f4b3a..c11483b 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td +++ b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td @@ -3830,6 +3830,122 @@ def: Pat<(int_hexagon_V6_vsub_hf_f8_128B HvxVR:$src1, HvxVR:$src2),  // V81 HVX Instructions. +def: Pat<(int_hexagon_V6_vabs_qf16_hf HvxVR:$src1), +         (V6_vabs_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf16_hf_128B HvxVR:$src1), +         (V6_vabs_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf16_qf16 HvxVR:$src1), +         (V6_vabs_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf16_qf16_128B HvxVR:$src1), +         (V6_vabs_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf32_qf32 HvxVR:$src1), +         (V6_vabs_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf32_qf32_128B HvxVR:$src1), +         (V6_vabs_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf32_sf HvxVR:$src1), +         (V6_vabs_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vabs_qf32_sf_128B HvxVR:$src1), +         (V6_vabs_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_valign4 HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), +         (V6_valign4 HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[UseHVXV81, UseHVX64B]>; +def: Pat<(int_hexagon_V6_valign4_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), +         (V6_valign4 HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[UseHVXV81, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vconv_bf_qf32 HvxWR:$src1), +         (V6_vconv_bf_qf32 HvxWR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_bf_qf32_128B HvxWR:$src1), +         (V6_vconv_bf_qf32 HvxWR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_f8_qf16 HvxVR:$src1), +         (V6_vconv_f8_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_f8_qf16_128B HvxVR:$src1), +         (V6_vconv_f8_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_h_hf_rnd HvxVR:$src1), +         (V6_vconv_h_hf_rnd HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vconv_h_hf_rnd_128B HvxVR:$src1), +         (V6_vconv_h_hf_rnd HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vconv_qf16_f8 HvxVR:$src1), +         (V6_vconv_qf16_f8 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf16_f8_128B HvxVR:$src1), +         (V6_vconv_qf16_f8 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf16_hf HvxVR:$src1), +         (V6_vconv_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf16_hf_128B HvxVR:$src1), +         (V6_vconv_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf16_qf16 HvxVR:$src1), +         (V6_vconv_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf16_qf16_128B HvxVR:$src1), +         (V6_vconv_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf32_qf32 HvxVR:$src1), +         (V6_vconv_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf32_qf32_128B HvxVR:$src1), +         (V6_vconv_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf32_sf HvxVR:$src1), +         (V6_vconv_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_qf32_sf_128B HvxVR:$src1), +         (V6_vconv_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf HvxVR:$src1, HvxVR:$src2), +         (V6_veqhf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_128B HvxVR:$src1, HvxVR:$src2), +         (V6_veqhf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqhf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqhf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqhf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqhf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqhf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqhf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqhf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf HvxVR:$src1, HvxVR:$src2), +         (V6_veqsf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_128B HvxVR:$src1, HvxVR:$src2), +         (V6_veqsf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_veqsf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), +         (V6_veqsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_hf HvxVR:$src1), +         (V6_vilog2_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_hf_128B HvxVR:$src1), +         (V6_vilog2_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_qf16 HvxVR:$src1), +         (V6_vilog2_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_qf16_128B HvxVR:$src1), +         (V6_vilog2_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_qf32 HvxVR:$src1), +         (V6_vilog2_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_qf32_128B HvxVR:$src1), +         (V6_vilog2_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_sf HvxVR:$src1), +         (V6_vilog2_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vilog2_sf_128B HvxVR:$src1), +         (V6_vilog2_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf16_hf HvxVR:$src1), +         (V6_vneg_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf16_hf_128B HvxVR:$src1), +         (V6_vneg_qf16_hf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf16_qf16 HvxVR:$src1), +         (V6_vneg_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf16_qf16_128B HvxVR:$src1), +         (V6_vneg_qf16_qf16 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf32_qf32 HvxVR:$src1), +         (V6_vneg_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf32_qf32_128B HvxVR:$src1), +         (V6_vneg_qf32_qf32 HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf32_sf HvxVR:$src1), +         (V6_vneg_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vneg_qf32_sf_128B HvxVR:$src1), +         (V6_vneg_qf32_sf HvxVR:$src1)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;  def: Pat<(int_hexagon_V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2),           (V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;  def: Pat<(int_hexagon_V6_vsub_hf_mix_128B HvxVR:$src1, HvxVR:$src2), diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 7ee280d..eadf020 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -1815,7 +1815,7 @@ struct WeightedLeaf {    int Weight;    int InsertionOrder; -  WeightedLeaf() {} +  WeightedLeaf() = default;    WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :      Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 904aabed..fe700e1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -375,6 +375,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,        setOperationAction(ISD::FFLOOR, VT, Legal);        setOperationAction(ISD::FTRUNC, VT, Legal);        setOperationAction(ISD::FROUNDEVEN, VT, Legal); +      setOperationAction(ISD::FMINNUM, VT, Legal); +      setOperationAction(ISD::FMAXNUM, VT, Legal);      }      setOperationAction(ISD::CTPOP, GRLenVT, Legal);      setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal); @@ -461,6 +463,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,        setOperationAction(ISD::FFLOOR, VT, Legal);        setOperationAction(ISD::FTRUNC, VT, Legal);        setOperationAction(ISD::FROUNDEVEN, VT, Legal); +      setOperationAction(ISD::FMINNUM, VT, Legal); +      setOperationAction(ISD::FMAXNUM, VT, Legal);      }    } diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 610ba05..b502b056 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1558,6 +1558,10 @@ defm : PatXrXrF<fmul, "XVFMUL">;  // XVFDIV_{S/D}  defm : PatXrXrF<fdiv, "XVFDIV">; +// XVFMAX_{S/D}, XVFMIN_{S/D} +defm : PatXrXrF<fmaxnum, "XVFMAX">; +defm : PatXrXrF<fminnum, "XVFMIN">; +  // XVFMADD_{S/D}  def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa),            (XVFMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 6470842..6b74a4b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1760,6 +1760,10 @@ defm : PatVrVrF<fmul, "VFMUL">;  // VFDIV_{S/D}  defm : PatVrVrF<fdiv, "VFDIV">; +// VFMAX_{S/D}, VFMIN_{S/D} +defm : PatVrVrF<fmaxnum, "VFMAX">; +defm : PatVrVrF<fminnum, "VFMIN">; +  // VFMADD_{S/D}  def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va),            (VFMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp index 7d54565..6d69af5 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp @@ -39,7 +39,7 @@ LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit)      : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH,                                /*HasRelocationAddend=*/true) {} -LoongArchELFObjectWriter::~LoongArchELFObjectWriter() {} +LoongArchELFObjectWriter::~LoongArchELFObjectWriter() = default;  unsigned LoongArchELFObjectWriter::getRelocType(const MCFixup &Fixup,                                                  const MCValue &Target, diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp index f0e2bc4..08fa51d 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -38,7 +38,7 @@ public:    LoongArchMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII)        : Ctx(ctx), MCII(MCII) {} -  ~LoongArchMCCodeEmitter() override {} +  ~LoongArchMCCodeEmitter() override = default;    void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,                           SmallVectorImpl<MCFixup> &Fixups, diff --git a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp index e37f3a66..fb5cd5c2 100644 --- a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp +++ b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp @@ -690,9 +690,9 @@ bool M68kAsmParser::parseRegisterName(MCRegister &RegNo, SMLoc Loc,      } else {        // Floating point control register.        RegNo = StringSwitch<unsigned>(RegisterNameLower) -                  .Cases("fpc", "fpcr", M68k::FPC) -                  .Cases("fps", "fpsr", M68k::FPS) -                  .Cases("fpi", "fpiar", M68k::FPIAR) +                  .Cases({"fpc", "fpcr"}, M68k::FPC) +                  .Cases({"fps", "fpsr"}, M68k::FPS) +                  .Cases({"fpi", "fpiar"}, M68k::FPIAR)                    .Default(M68k::NoRegister);        assert(RegNo != M68k::NoRegister &&               "Unrecognized FP control register name"); diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 97379d7..f588e56 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -6176,7 +6176,7 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {    CC = StringSwitch<unsigned>(Name)             .Case("zero", 0) -           .Cases("at", "AT", 1) +           .Cases({"at", "AT"}, 1)             .Case("a0", 4)             .Case("a1", 5)             .Case("a2", 6) diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h index caef8fe7..b832b82 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h +++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h @@ -20,7 +20,7 @@ class MemoryLocation;  class NVPTXAAResult : public AAResultBase {  public: -  NVPTXAAResult() {} +  NVPTXAAResult() = default;    NVPTXAAResult(NVPTXAAResult &&Arg) : AAResultBase(std::move(Arg)) {}    /// Handle invalidation events from the new pass manager. diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index c667a09..996d653 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1836,7 +1836,7 @@ bool NVPTXDAGToDAGISel::tryFence(SDNode *N) {    return true;  } -NVPTXScopes::NVPTXScopes(LLVMContext &C) { +NVPTXScopes::NVPTXScopes(LLVMContext &C) : Context(&C) {    Scopes[C.getOrInsertSyncScopeID("singlethread")] = NVPTX::Scope::Thread;    Scopes[C.getOrInsertSyncScopeID("")] = NVPTX::Scope::System;    Scopes[C.getOrInsertSyncScopeID("block")] = NVPTX::Scope::Block; @@ -1851,11 +1851,21 @@ NVPTX::Scope NVPTXScopes::operator[](SyncScope::ID ID) const {    auto S = Scopes.find(ID);    if (S == Scopes.end()) { -    // TODO: -    // - Add API to LLVMContext to get the name of a single scope. -    // - Use that API here to print an error containing the name -    //   of this Unknown ID. -    report_fatal_error(formatv("Could not find scope ID={}.", int(ID))); +    auto scopeName = Context->getSyncScopeName(ID); +    assert(scopeName.has_value() && "Scope name must exist."); + +    // Build list of supported syncscopes programmatically +    SmallVector<StringRef> supportedScopes; +    for (const auto &Entry : Scopes) { +      if (auto name = Context->getSyncScopeName(Entry.first)) +        supportedScopes.push_back(name->empty() ? "<empty string>" : *name); +    } + +    reportFatalUsageError( +        formatv("NVPTX backend does not support syncscope \"{0}\" (ID={1}).\n" +                "Supported syncscopes are: {2}.", +                scopeName.value(), int(ID), +                make_range(supportedScopes.begin(), supportedScopes.end())));    }    return S->second;  } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 1cb579b..d525531 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -35,6 +35,7 @@ struct NVPTXScopes {  private:    SmallMapVector<SyncScope::ID, NVPTX::Scope, 8> Scopes{}; +  LLVMContext *Context = nullptr;  };  class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index e8758aa..50827bd 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1562,12 +1562,17 @@ def : Pat<(int_nvvm_saturate_d f64:$a),     (CVT_f64_f64 $a, CvtSAT)>;  // Exp2  Log2  // -def : Pat<(int_nvvm_ex2_approx_ftz_f f32:$a), (EX2_APPROX_f32 $a, FTZ)>; -def : Pat<(int_nvvm_ex2_approx_f f32:$a), (EX2_APPROX_f32 $a, NoFTZ)>; +def : Pat<(f32 (int_nvvm_ex2_approx_ftz f32:$a)), (EX2_APPROX_f32 $a, FTZ)>; +def : Pat<(f32 (int_nvvm_ex2_approx f32:$a)), (EX2_APPROX_f32 $a, NoFTZ)>;  let Predicates = [hasPTX<70>, hasSM<75>] in { -  def : Pat<(int_nvvm_ex2_approx_f16 f16:$a), (EX2_APPROX_f16 $a)>; -  def : Pat<(int_nvvm_ex2_approx_f16x2 v2f16:$a), (EX2_APPROX_f16x2 $a)>; +  def : Pat<(f16 (int_nvvm_ex2_approx f16:$a)), (EX2_APPROX_f16 $a)>; +  def : Pat<(v2f16 (int_nvvm_ex2_approx v2f16:$a)), (EX2_APPROX_f16x2 $a)>; +} + +let Predicates = [hasPTX<78>, hasSM<90>] in { +  def : Pat<(bf16 (int_nvvm_ex2_approx_ftz bf16:$a)), (EX2_APPROX_bf16 $a)>; +  def : Pat<(v2bf16 (int_nvvm_ex2_approx_ftz v2bf16:$a)), (EX2_APPROX_bf16x2 $a)>;  }  def LG2_APPROX_f32 : diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index 729c077..64593e6 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -318,7 +318,7 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC,        // answer. These include:        //        //   - nvvm_cos_approx_{f,ftz_f} -      //   - nvvm_ex2_approx_{d,f,ftz_f} +      //   - nvvm_ex2_approx(_ftz)        //   - nvvm_lg2_approx_{d,f,ftz_f}        //   - nvvm_sin_approx_{f,ftz_f}        //   - nvvm_sqrt_approx_{f,ftz_f} diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index bcb3f50..780e124 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -2702,7 +2702,7 @@ static bool isSpecialLLVMGlobalArrayToSkip(const GlobalVariable *GV) {  static bool isSpecialLLVMGlobalArrayForStaticInit(const GlobalVariable *GV) {    return StringSwitch<bool>(GV->getName()) -      .Cases("llvm.global_ctors", "llvm.global_dtors", true) +      .Cases({"llvm.global_ctors", "llvm.global_dtors"}, true)        .Default(false);  } diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td index da3efdc..0c2e44e 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td @@ -360,6 +360,10 @@ let Predicates = [HasVSX, IsISAFuture] in {      def LXVPRLL : XForm_XTp5_RAB5<31, 621, (outs vsrprc:$XTp),                                    (ins (memr $RA):$addr, g8rc:$RB),                                    "lxvprll $XTp, $addr, $RB", IIC_LdStLFD, []>; +    def LXVPB32X +        : XForm_XTp5_RAB5<31, 877, (outs vsrprc:$XTp), +                          (ins (memr $RA):$addr, g8rc:$RB), +                          "lxvpb32x $XTp, $addr, $RB", IIC_LdStLFD, []>;    }    let mayStore = 1 in { @@ -376,6 +380,10 @@ let Predicates = [HasVSX, IsISAFuture] in {          : XForm_XTp5_RAB5<31, 749, (outs),                            (ins vsrprc:$XTp, (memr $RA):$addr, g8rc:$RB),                            "stxvprll $XTp, $addr, $RB", IIC_LdStLFD, []>; +    def STXVPB32X +        : XForm_XTp5_RAB5<31, 1005, (outs), +                          (ins vsrprc:$XTp, (memr $RA):$addr, g8rc:$RB), +                          "stxvpb32x $XTp, $addr, $RB", IIC_LdStLFD, []>;    }    def VUPKHSNTOB : VXForm_VRTB5<387, 0, (outs vrrc:$VRT), (ins vrrc:$VRB), diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 2fba090..b04e887 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -912,7 +912,7 @@ bool PPCTTIImpl::areInlineCompatible(const Function *Caller,  bool PPCTTIImpl::areTypesABICompatible(const Function *Caller,                                         const Function *Callee, -                                       const ArrayRef<Type *> &Types) const { +                                       ArrayRef<Type *> Types) const {    // We need to ensure that argument promotion does not    // attempt to promote pointers to MMA types (__vector_pair diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 475472a..8d7f255 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -147,7 +147,7 @@ public:    bool areInlineCompatible(const Function *Caller,                             const Function *Callee) const override;    bool areTypesABICompatible(const Function *Caller, const Function *Callee, -                             const ArrayRef<Type *> &Types) const override; +                             ArrayRef<Type *> Types) const override;    bool supportsTailCallFor(const CallBase *CB) const override;  private: diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 282cf5d..3d5a55c 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -95,7 +95,8 @@ private:    void addVectorLoadStoreOperands(MachineInstr &I,                                    SmallVectorImpl<SrcOp> &SrcOps,                                    unsigned &CurOp, bool IsMasked, -                                  bool IsStrided) const; +                                  bool IsStridedOrIndexed, +                                  LLT *IndexVT = nullptr) const;    bool selectIntrinsicWithSideEffects(MachineInstr &I,                                        MachineIRBuilder &MIB) const; @@ -722,15 +723,17 @@ static unsigned selectRegImmLoadStoreOp(unsigned GenericOpc, unsigned OpSize) {  void RISCVInstructionSelector::addVectorLoadStoreOperands(      MachineInstr &I, SmallVectorImpl<SrcOp> &SrcOps, unsigned &CurOp, -    bool IsMasked, bool IsStrided) const { +    bool IsMasked, bool IsStridedOrIndexed, LLT *IndexVT) const {    // Base Pointer    auto PtrReg = I.getOperand(CurOp++).getReg();    SrcOps.push_back(PtrReg); -  // Stride -  if (IsStrided) { +  // Stride or Index +  if (IsStridedOrIndexed) {      auto StrideReg = I.getOperand(CurOp++).getReg();      SrcOps.push_back(StrideReg); +    if (IndexVT) +      *IndexVT = MRI->getType(StrideReg);    }    // Mask @@ -805,6 +808,70 @@ bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(      I.eraseFromParent();      return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);    } +  case Intrinsic::riscv_vloxei: +  case Intrinsic::riscv_vloxei_mask: +  case Intrinsic::riscv_vluxei: +  case Intrinsic::riscv_vluxei_mask: { +    bool IsMasked = IntrinID == Intrinsic::riscv_vloxei_mask || +                    IntrinID == Intrinsic::riscv_vluxei_mask; +    bool IsOrdered = IntrinID == Intrinsic::riscv_vloxei || +                     IntrinID == Intrinsic::riscv_vloxei_mask; +    LLT VT = MRI->getType(I.getOperand(0).getReg()); +    unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); + +    // Result vector +    const Register DstReg = I.getOperand(0).getReg(); + +    // Sources +    bool HasPassthruOperand = IntrinID != Intrinsic::riscv_vlm; +    unsigned CurOp = 2; +    SmallVector<SrcOp, 4> SrcOps; // Source registers. + +    // Passthru +    if (HasPassthruOperand) { +      auto PassthruReg = I.getOperand(CurOp++).getReg(); +      SrcOps.push_back(PassthruReg); +    } else { +      // Use NoRegister if there is no specified passthru. +      SrcOps.push_back(Register()); +    } +    LLT IndexVT; +    addVectorLoadStoreOperands(I, SrcOps, CurOp, IsMasked, true, &IndexVT); + +    RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(VT)); +    RISCVVType::VLMUL IndexLMUL = +        RISCVTargetLowering::getLMUL(getMVTForLLT(IndexVT)); +    unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); +    if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { +      reportFatalUsageError("The V extension does not support EEW=64 for index " +                            "values when XLEN=32"); +    } +    const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( +        IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), +        static_cast<unsigned>(IndexLMUL)); + +    auto PseudoMI = MIB.buildInstr(P->Pseudo, {DstReg}, SrcOps); + +    // Select VL +    auto VLOpFn = renderVLOp(I.getOperand(CurOp++)); +    for (auto &RenderFn : *VLOpFn) +      RenderFn(PseudoMI); + +    // SEW +    PseudoMI.addImm(Log2SEW); + +    // Policy +    uint64_t Policy = RISCVVType::MASK_AGNOSTIC; +    if (IsMasked) +      Policy = I.getOperand(CurOp++).getImm(); +    PseudoMI.addImm(Policy); + +    // Memref +    PseudoMI.cloneMemRefs(I); + +    I.eraseFromParent(); +    return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI); +  }    case Intrinsic::riscv_vsm:    case Intrinsic::riscv_vse:    case Intrinsic::riscv_vse_mask: @@ -847,6 +914,56 @@ bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(      I.eraseFromParent();      return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);    } +  case Intrinsic::riscv_vsoxei: +  case Intrinsic::riscv_vsoxei_mask: +  case Intrinsic::riscv_vsuxei: +  case Intrinsic::riscv_vsuxei_mask: { +    bool IsMasked = IntrinID == Intrinsic::riscv_vsoxei_mask || +                    IntrinID == Intrinsic::riscv_vsuxei_mask; +    bool IsOrdered = IntrinID == Intrinsic::riscv_vsoxei || +                     IntrinID == Intrinsic::riscv_vsoxei_mask; +    LLT VT = MRI->getType(I.getOperand(1).getReg()); +    unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); + +    // Sources +    unsigned CurOp = 1; +    SmallVector<SrcOp, 4> SrcOps; // Source registers. + +    // Store value +    auto PassthruReg = I.getOperand(CurOp++).getReg(); +    SrcOps.push_back(PassthruReg); + +    LLT IndexVT; +    addVectorLoadStoreOperands(I, SrcOps, CurOp, IsMasked, true, &IndexVT); + +    RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(VT)); +    RISCVVType::VLMUL IndexLMUL = +        RISCVTargetLowering::getLMUL(getMVTForLLT(IndexVT)); +    unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); +    if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { +      reportFatalUsageError("The V extension does not support EEW=64 for index " +                            "values when XLEN=32"); +    } +    const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( +        IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), +        static_cast<unsigned>(IndexLMUL)); + +    auto PseudoMI = MIB.buildInstr(P->Pseudo, {}, SrcOps); + +    // Select VL +    auto VLOpFn = renderVLOp(I.getOperand(CurOp++)); +    for (auto &RenderFn : *VLOpFn) +      RenderFn(PseudoMI); + +    // SEW +    PseudoMI.addImm(Log2SEW); + +    // Memref +    PseudoMI.cloneMemRefs(I); + +    I.eraseFromParent(); +    return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI); +  }    }  } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index e75dfe3..5b8cfb2 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -407,7 +407,6 @@ enum OperandType : unsigned {    OPERAND_SIMM5_PLUS1,    OPERAND_SIMM6,    OPERAND_SIMM6_NONZERO, -  OPERAND_SIMM8,    OPERAND_SIMM8_UNSIGNED,    OPERAND_SIMM10,    OPERAND_SIMM10_LSB0000_NONZERO, diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index b25a054..9078335 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -371,8 +371,8 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,    RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);    unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());    if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { -    report_fatal_error("The V extension does not support EEW=64 for index " -                       "values when XLEN=32"); +    reportFatalUsageError("The V extension does not support EEW=64 for index " +                          "values when XLEN=32");    }    const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(        NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), @@ -444,8 +444,8 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,    RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);    unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());    if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { -    report_fatal_error("The V extension does not support EEW=64 for index " -                       "values when XLEN=32"); +    reportFatalUsageError("The V extension does not support EEW=64 for index " +                          "values when XLEN=32");    }    const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(        NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), @@ -2223,8 +2223,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {        RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);        unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());        if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { -        report_fatal_error("The V extension does not support EEW=64 for index " -                           "values when XLEN=32"); +        reportFatalUsageError("The V extension does not support EEW=64 for " +                              "index values when XLEN=32");        }        const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(            IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), @@ -2457,8 +2457,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {        RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);        unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());        if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { -        report_fatal_error("The V extension does not support EEW=64 for index " -                           "values when XLEN=32"); +        reportFatalUsageError("The V extension does not support EEW=64 for " +                              "index values when XLEN=32");        }        const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(            IsMasked, IsOrdered, IndexLog2EEW, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c6a8b84..e0cf739 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -23946,7 +23946,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,                                 .Case("{t0}", RISCV::X5)                                 .Case("{t1}", RISCV::X6)                                 .Case("{t2}", RISCV::X7) -                               .Cases("{s0}", "{fp}", RISCV::X8) +                               .Cases({"{s0}", "{fp}"}, RISCV::X8)                                 .Case("{s1}", RISCV::X9)                                 .Case("{a0}", RISCV::X10)                                 .Case("{a1}", RISCV::X11) @@ -23983,38 +23983,38 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,    // use the ABI names in register constraint lists.    if (Subtarget.hasStdExtF()) {      unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) -                        .Cases("{f0}", "{ft0}", RISCV::F0_F) -                        .Cases("{f1}", "{ft1}", RISCV::F1_F) -                        .Cases("{f2}", "{ft2}", RISCV::F2_F) -                        .Cases("{f3}", "{ft3}", RISCV::F3_F) -                        .Cases("{f4}", "{ft4}", RISCV::F4_F) -                        .Cases("{f5}", "{ft5}", RISCV::F5_F) -                        .Cases("{f6}", "{ft6}", RISCV::F6_F) -                        .Cases("{f7}", "{ft7}", RISCV::F7_F) -                        .Cases("{f8}", "{fs0}", RISCV::F8_F) -                        .Cases("{f9}", "{fs1}", RISCV::F9_F) -                        .Cases("{f10}", "{fa0}", RISCV::F10_F) -                        .Cases("{f11}", "{fa1}", RISCV::F11_F) -                        .Cases("{f12}", "{fa2}", RISCV::F12_F) -                        .Cases("{f13}", "{fa3}", RISCV::F13_F) -                        .Cases("{f14}", "{fa4}", RISCV::F14_F) -                        .Cases("{f15}", "{fa5}", RISCV::F15_F) -                        .Cases("{f16}", "{fa6}", RISCV::F16_F) -                        .Cases("{f17}", "{fa7}", RISCV::F17_F) -                        .Cases("{f18}", "{fs2}", RISCV::F18_F) -                        .Cases("{f19}", "{fs3}", RISCV::F19_F) -                        .Cases("{f20}", "{fs4}", RISCV::F20_F) -                        .Cases("{f21}", "{fs5}", RISCV::F21_F) -                        .Cases("{f22}", "{fs6}", RISCV::F22_F) -                        .Cases("{f23}", "{fs7}", RISCV::F23_F) -                        .Cases("{f24}", "{fs8}", RISCV::F24_F) -                        .Cases("{f25}", "{fs9}", RISCV::F25_F) -                        .Cases("{f26}", "{fs10}", RISCV::F26_F) -                        .Cases("{f27}", "{fs11}", RISCV::F27_F) -                        .Cases("{f28}", "{ft8}", RISCV::F28_F) -                        .Cases("{f29}", "{ft9}", RISCV::F29_F) -                        .Cases("{f30}", "{ft10}", RISCV::F30_F) -                        .Cases("{f31}", "{ft11}", RISCV::F31_F) +                        .Cases({"{f0}", "{ft0}"}, RISCV::F0_F) +                        .Cases({"{f1}", "{ft1}"}, RISCV::F1_F) +                        .Cases({"{f2}", "{ft2}"}, RISCV::F2_F) +                        .Cases({"{f3}", "{ft3}"}, RISCV::F3_F) +                        .Cases({"{f4}", "{ft4}"}, RISCV::F4_F) +                        .Cases({"{f5}", "{ft5}"}, RISCV::F5_F) +                        .Cases({"{f6}", "{ft6}"}, RISCV::F6_F) +                        .Cases({"{f7}", "{ft7}"}, RISCV::F7_F) +                        .Cases({"{f8}", "{fs0}"}, RISCV::F8_F) +                        .Cases({"{f9}", "{fs1}"}, RISCV::F9_F) +                        .Cases({"{f10}", "{fa0}"}, RISCV::F10_F) +                        .Cases({"{f11}", "{fa1}"}, RISCV::F11_F) +                        .Cases({"{f12}", "{fa2}"}, RISCV::F12_F) +                        .Cases({"{f13}", "{fa3}"}, RISCV::F13_F) +                        .Cases({"{f14}", "{fa4}"}, RISCV::F14_F) +                        .Cases({"{f15}", "{fa5}"}, RISCV::F15_F) +                        .Cases({"{f16}", "{fa6}"}, RISCV::F16_F) +                        .Cases({"{f17}", "{fa7}"}, RISCV::F17_F) +                        .Cases({"{f18}", "{fs2}"}, RISCV::F18_F) +                        .Cases({"{f19}", "{fs3}"}, RISCV::F19_F) +                        .Cases({"{f20}", "{fs4}"}, RISCV::F20_F) +                        .Cases({"{f21}", "{fs5}"}, RISCV::F21_F) +                        .Cases({"{f22}", "{fs6}"}, RISCV::F22_F) +                        .Cases({"{f23}", "{fs7}"}, RISCV::F23_F) +                        .Cases({"{f24}", "{fs8}"}, RISCV::F24_F) +                        .Cases({"{f25}", "{fs9}"}, RISCV::F25_F) +                        .Cases({"{f26}", "{fs10}"}, RISCV::F26_F) +                        .Cases({"{f27}", "{fs11}"}, RISCV::F27_F) +                        .Cases({"{f28}", "{ft8}"}, RISCV::F28_F) +                        .Cases({"{f29}", "{ft9}"}, RISCV::F29_F) +                        .Cases({"{f30}", "{ft10}"}, RISCV::F30_F) +                        .Cases({"{f31}", "{ft11}"}, RISCV::F31_F)                          .Default(RISCV::NoRegister);      if (FReg != RISCV::NoRegister) {        assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); diff --git a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp index a1c8e23..c58a5c0 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp @@ -48,7 +48,7 @@ class VXRMInfo {    } State = Uninitialized;  public: -  VXRMInfo() {} +  VXRMInfo() = default;    static VXRMInfo getUnknown() {      VXRMInfo Info; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index c31713e..1c6a5af 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -90,6 +90,7 @@ defvar ZfhminDExts = [ZfhminDExt, ZhinxminZdinxExt, ZhinxminZdinx32Ext];  //===----------------------------------------------------------------------===//  let Predicates = [HasHalfFPLoadStoreMove] in { +let canFoldAsLoad = 1 in  def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>;  // Operands for stores are in the order srcreg, base, offset rather than diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp index 0a318e0..ed6d355 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp @@ -15,4 +15,4 @@  using namespace llvm;  SPIRVTargetStreamer::SPIRVTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} -SPIRVTargetStreamer::~SPIRVTargetStreamer() {} +SPIRVTargetStreamer::~SPIRVTargetStreamer() = default; diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index 9e11c3a..dd57b74 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -149,23 +149,23 @@ static FunctionType *getOriginalFunctionType(const Function &F) {          return isa<MDString>(N->getOperand(0)) &&                 cast<MDString>(N->getOperand(0))->getString() == F.getName();        }); -  // TODO: probably one function can have numerous type mutations, -  // so we should support this.    if (ThisFuncMDIt != NamedMD->op_end()) {      auto *ThisFuncMD = *ThisFuncMDIt; -    MDNode *MD = dyn_cast<MDNode>(ThisFuncMD->getOperand(1)); -    assert(MD && "MDNode operand is expected"); -    ConstantInt *Const = getConstInt(MD, 0); -    if (Const) { -      auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(1)); -      assert(CMeta && "ConstantAsMetadata operand is expected"); -      assert(Const->getSExtValue() >= -1); -      // Currently -1 indicates return value, greater values mean -      // argument numbers. -      if (Const->getSExtValue() == -1) -        RetTy = CMeta->getType(); -      else -        ArgTypes[Const->getSExtValue()] = CMeta->getType(); +    for (unsigned I = 1; I != ThisFuncMD->getNumOperands(); ++I) { +      MDNode *MD = dyn_cast<MDNode>(ThisFuncMD->getOperand(I)); +      assert(MD && "MDNode operand is expected"); +      ConstantInt *Const = getConstInt(MD, 0); +      if (Const) { +        auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(1)); +        assert(CMeta && "ConstantAsMetadata operand is expected"); +        assert(Const->getSExtValue() >= -1); +        // Currently -1 indicates return value, greater values mean +        // argument numbers. +        if (Const->getSExtValue() == -1) +          RetTy = CMeta->getType(); +        else +          ArgTypes[Const->getSExtValue()] = CMeta->getType(); +      }      }    } diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h index 2d19f6de..44b6c66 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h @@ -81,7 +81,7 @@ private:    void initAvailableCapabilitiesForVulkan(const SPIRVSubtarget &ST);  public: -  RequirementHandler() {} +  RequirementHandler() = default;    void clear() {      MinimalCaps.clear();      AllCaps.clear(); diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index 7dd0b95..5ba0356 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -69,7 +69,7 @@ static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {  }  // Pin SPIRVTargetObjectFile's vtables to this file. -SPIRVTargetObjectFile::~SPIRVTargetObjectFile() {} +SPIRVTargetObjectFile::~SPIRVTargetObjectFile() = default;  SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT,                                         StringRef CPU, StringRef FS, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h b/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h index 9d0adbb..87ec256 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h @@ -16,7 +16,7 @@ namespace llvm {  /// This implementation is used for SystemZ ELF targets.  class SystemZELFTargetObjectFile : public TargetLoweringObjectFileELF {  public: -  SystemZELFTargetObjectFile() {} +  SystemZELFTargetObjectFile() = default;    /// Describe a TLS variable address within debug info.    const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h index 7845cdf..1bfc61f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h @@ -76,7 +76,7 @@ public:      BlockSet.insert(MBB);    }    ArrayRef<MachineBasicBlock *> getBlocks() const { return Blocks; } -  using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator; +  using block_iterator = ArrayRef<MachineBasicBlock *>::const_iterator;    block_iterator block_begin() const { return getBlocks().begin(); }    block_iterator block_end() const { return getBlocks().end(); }    inline iterator_range<block_iterator> blocks() const { @@ -96,7 +96,7 @@ public:    void addSubException(std::unique_ptr<WebAssemblyException> E) {      SubExceptions.push_back(std::move(E));    } -  using iterator = typename decltype(SubExceptions)::const_iterator; +  using iterator = decltype(SubExceptions)::const_iterator;    iterator begin() const { return SubExceptions.begin(); }    iterator end() const { return SubExceptions.end(); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h index ff4d6469..ee575e3 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -207,8 +207,7 @@ template <> struct MappingTraits<WebAssemblyFunctionInfo> {  template <> struct CustomMappingTraits<BBNumberMap> {    static void inputOne(IO &YamlIO, StringRef Key,                         BBNumberMap &SrcToUnwindDest) { -    YamlIO.mapRequired(Key.str().c_str(), -                       SrcToUnwindDest[std::atoi(Key.str().c_str())]); +    YamlIO.mapRequired(Key, SrcToUnwindDest[std::atoi(Key.str().c_str())]);    }    static void output(IO &YamlIO, BBNumberMap &SrcToUnwindDest) { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h index e92bf17..96b8a4e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h @@ -35,7 +35,7 @@ public:    virtual MachineBasicBlock *getHeader() const = 0;    virtual bool contains(const MachineBasicBlock *MBB) const = 0;    virtual unsigned getNumBlocks() const = 0; -  using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator; +  using block_iterator = ArrayRef<MachineBasicBlock *>::const_iterator;    virtual iterator_range<block_iterator> blocks() const = 0;    virtual bool isLoop() const = 0;  }; diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index b7ea672..bac3692 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2470,10 +2470,10 @@ bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,  // Report back its kind, or IOK_INVALID if does not evaluated as a known one  unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {    return StringSwitch<unsigned>(Name) -    .Cases("TYPE","type",IOK_TYPE) -    .Cases("SIZE","size",IOK_SIZE) -    .Cases("LENGTH","length",IOK_LENGTH) -    .Default(IOK_INVALID); +      .Cases({"TYPE", "type"}, IOK_TYPE) +      .Cases({"SIZE", "size"}, IOK_SIZE) +      .Cases({"LENGTH", "length"}, IOK_LENGTH) +      .Default(IOK_INVALID);  }  /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators.  The LENGTH operator @@ -2516,8 +2516,8 @@ unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {  unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {    return StringSwitch<unsigned>(Name.lower())        .Case("type", MOK_TYPE) -      .Cases("size", "sizeof", MOK_SIZEOF) -      .Cases("length", "lengthof", MOK_LENGTHOF) +      .Cases({"size", "sizeof"}, MOK_SIZEOF) +      .Cases({"length", "lengthof"}, MOK_LENGTHOF)        .Default(MOK_INVALID);  } @@ -2581,21 +2581,21 @@ bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {  bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size,                                                 StringRef *SizeStr) {    Size = StringSwitch<unsigned>(getTok().getString()) -    .Cases("BYTE", "byte", 8) -    .Cases("WORD", "word", 16) -    .Cases("DWORD", "dword", 32) -    .Cases("FLOAT", "float", 32) -    .Cases("LONG", "long", 32) -    .Cases("FWORD", "fword", 48) -    .Cases("DOUBLE", "double", 64) -    .Cases("QWORD", "qword", 64) -    .Cases("MMWORD","mmword", 64) -    .Cases("XWORD", "xword", 80) -    .Cases("TBYTE", "tbyte", 80) -    .Cases("XMMWORD", "xmmword", 128) -    .Cases("YMMWORD", "ymmword", 256) -    .Cases("ZMMWORD", "zmmword", 512) -    .Default(0); +             .Cases({"BYTE", "byte"}, 8) +             .Cases({"WORD", "word"}, 16) +             .Cases({"DWORD", "dword"}, 32) +             .Cases({"FLOAT", "float"}, 32) +             .Cases({"LONG", "long"}, 32) +             .Cases({"FWORD", "fword"}, 48) +             .Cases({"DOUBLE", "double"}, 64) +             .Cases({"QWORD", "qword"}, 64) +             .Cases({"MMWORD", "mmword"}, 64) +             .Cases({"XWORD", "xword"}, 80) +             .Cases({"TBYTE", "tbyte"}, 80) +             .Cases({"XMMWORD", "xmmword"}, 128) +             .Cases({"YMMWORD", "ymmword"}, 256) +             .Cases({"ZMMWORD", "zmmword"}, 512) +             .Default(0);    if (Size) {      if (SizeStr)        *SizeStr = getTok().getString(); @@ -2886,22 +2886,22 @@ bool X86AsmParser::parseATTOperand(OperandVector &Operands) {  // otherwise the EFLAGS Condition Code enumerator.  X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {    return StringSwitch<X86::CondCode>(CC) -      .Case("o", X86::COND_O)          // Overflow -      .Case("no", X86::COND_NO)        // No Overflow -      .Cases("b", "nae", X86::COND_B)  // Below/Neither Above nor Equal -      .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below -      .Cases("e", "z", X86::COND_E)    // Equal/Zero -      .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero -      .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above -      .Cases("a", "nbe", X86::COND_A)  // Above/Neither Below nor Equal -      .Case("s", X86::COND_S)          // Sign -      .Case("ns", X86::COND_NS)        // No Sign -      .Cases("p", "pe", X86::COND_P)   // Parity/Parity Even -      .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd -      .Cases("l", "nge", X86::COND_L)  // Less/Neither Greater nor Equal -      .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less -      .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater -      .Cases("g", "nle", X86::COND_G)  // Greater/Neither Less nor Equal +      .Case("o", X86::COND_O)            // Overflow +      .Case("no", X86::COND_NO)          // No Overflow +      .Cases({"b", "nae"}, X86::COND_B)  // Below/Neither Above nor Equal +      .Cases({"ae", "nb"}, X86::COND_AE) // Above or Equal/Not Below +      .Cases({"e", "z"}, X86::COND_E)    // Equal/Zero +      .Cases({"ne", "nz"}, X86::COND_NE) // Not Equal/Not Zero +      .Cases({"be", "na"}, X86::COND_BE) // Below or Equal/Not Above +      .Cases({"a", "nbe"}, X86::COND_A)  // Above/Neither Below nor Equal +      .Case("s", X86::COND_S)            // Sign +      .Case("ns", X86::COND_NS)          // No Sign +      .Cases({"p", "pe"}, X86::COND_P)   // Parity/Parity Even +      .Cases({"np", "po"}, X86::COND_NP) // No Parity/Parity Odd +      .Cases({"l", "nge"}, X86::COND_L)  // Less/Neither Greater nor Equal +      .Cases({"ge", "nl"}, X86::COND_GE) // Greater or Equal/Not Less +      .Cases({"le", "ng"}, X86::COND_LE) // Less or Equal/Not Greater +      .Cases({"g", "nle"}, X86::COND_G)  // Greater/Neither Less nor Equal        .Default(X86::COND_INVALID);  } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 133406b..b97b508 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33034,12 +33034,13 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,        DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));    Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy) -                      : (Type *)FixedVectorType::get(ArgTy, 4); +                      : (Type *)FixedVectorType::get(ArgTy, 2);    TargetLowering::CallLoweringInfo CLI(DAG);    CLI.setDebugLoc(dl)        .setChain(DAG.getEntryNode()) -      .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args)); +      .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args)) +      .setIsPostTypeLegalization();    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); @@ -53347,6 +53348,80 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,    return SDValue();  } +// Look for a RMW operation that only touches one bit of a larger than legal +// type and fold it to a BTC/BTR/BTS pattern acting on a single i32 sub value. +static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, +                              SelectionDAG &DAG, +                              const X86Subtarget &Subtarget) { +  using namespace SDPatternMatch; + +  // Only handle normal stores and its chain was a matching normal load. +  auto *Ld = dyn_cast<LoadSDNode>(St->getChain()); +  if (!ISD::isNormalStore(St) || !St->isSimple() || !Ld || +      !ISD::isNormalLoad(Ld) || !Ld->isSimple() || +      Ld->getBasePtr() != St->getBasePtr() || +      Ld->getOffset() != St->getOffset()) +    return SDValue(); + +  SDValue LoadVal(Ld, 0); +  SDValue StoredVal = St->getValue(); +  EVT VT = StoredVal.getValueType(); + +  // Only narrow larger than legal scalar integers. +  if (!VT.isScalarInteger() || +      VT.getSizeInBits() <= (Subtarget.is64Bit() ? 64 : 32)) +    return SDValue(); + +  // BTR: X & ~(1 << ShAmt) +  // BTS: X | (1 << ShAmt) +  // BTC: X ^ (1 << ShAmt) +  SDValue ShAmt; +  if (!StoredVal.hasOneUse() || +      !(sd_match(StoredVal, m_And(m_Specific(LoadVal), +                                  m_Not(m_Shl(m_One(), m_Value(ShAmt))))) || +        sd_match(StoredVal, +                 m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) || +        sd_match(StoredVal, +                 m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))))) +    return SDValue(); + +  // Ensure the shift amount is in bounds. +  KnownBits KnownAmt = DAG.computeKnownBits(ShAmt); +  if (KnownAmt.getMaxValue().uge(VT.getSizeInBits())) +    return SDValue(); + +  // Split the shift into an alignment shift that moves the active i32 block to +  // the bottom bits for truncation and a modulo shift that can act on the i32. +  EVT AmtVT = ShAmt.getValueType(); +  SDValue AlignAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, +                                 DAG.getSignedConstant(-32LL, DL, AmtVT)); +  SDValue ModuloAmt = +      DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT)); + +  // Compute the byte offset for the i32 block that is changed by the RMW. +  // combineTruncate will adjust the load for us in a similar way. +  EVT PtrVT = St->getBasePtr().getValueType(); +  SDValue PtrBitOfs = DAG.getZExtOrTrunc(AlignAmt, DL, PtrVT); +  SDValue PtrByteOfs = DAG.getNode(ISD::SRL, DL, PtrVT, PtrBitOfs, +                                   DAG.getShiftAmountConstant(3, PtrVT, DL)); +  SDValue NewPtr = DAG.getMemBasePlusOffset(St->getBasePtr(), PtrByteOfs, DL, +                                            SDNodeFlags::NoUnsignedWrap); + +  // Reconstruct the BTC/BTR/BTS pattern for the i32 block and store. +  SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt); +  X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); + +  SDValue Mask = +      DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32), +                  DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8)); +  if (StoredVal.getOpcode() == ISD::AND) +    Mask = DAG.getNOT(DL, Mask, MVT::i32); + +  SDValue Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask); +  return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(), +                      Align(), St->getMemOperand()->getFlags()); +} +  static SDValue combineStore(SDNode *N, SelectionDAG &DAG,                              TargetLowering::DAGCombinerInfo &DCI,                              const X86Subtarget &Subtarget) { @@ -53573,6 +53648,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,      }    } +  if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget)) +    return R; +    // Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC)    //         store(cmov(x, load(p), CC), p) to cstore(x, p, InvertCC)    if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) && @@ -54505,8 +54583,9 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,    // truncation, see if we can convert the shift into a pointer offset instead.    // Limit this to normal (non-ext) scalar integer loads.    if (SrcVT.isScalarInteger() && Src.getOpcode() == ISD::SRL && -      Src.hasOneUse() && Src.getOperand(0).hasOneUse() && -      ISD::isNormalLoad(Src.getOperand(0).getNode())) { +      Src.hasOneUse() && ISD::isNormalLoad(Src.getOperand(0).getNode()) && +      (Src.getOperand(0).hasOneUse() || +       !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, SrcVT))) {      auto *Ld = cast<LoadSDNode>(Src.getOperand(0));      if (Ld->isSimple() && VT.isByteSized() &&          isPowerOf2_64(VT.getSizeInBits())) { @@ -54529,8 +54608,7 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,          SDValue NewLoad =              DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getPointerInfo(),                          Align(), Ld->getMemOperand()->getFlags()); -        DAG.ReplaceAllUsesOfValueWith(Src.getOperand(0).getValue(1), -                                      NewLoad.getValue(1)); +        DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);          return NewLoad;        }      } @@ -56306,6 +56384,7 @@ static SDValue combineAVX512SetCCToKMOV(EVT VT, SDValue Op0, ISD::CondCode CC,  static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,                              TargetLowering::DAGCombinerInfo &DCI,                              const X86Subtarget &Subtarget) { +  using namespace SDPatternMatch;    const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();    const SDValue LHS = N->getOperand(0);    const SDValue RHS = N->getOperand(1); @@ -56364,6 +56443,37 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,        if (SDValue AndN = MatchAndCmpEq(RHS, LHS))          return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); +      // If we're performing a bit test on a larger than legal type, attempt +      // to (aligned) shift down the value to the bottom 32-bits and then +      // perform the bittest on the i32 value. +      // ICMP_ZERO(AND(X,SHL(1,IDX))) +      // --> ICMP_ZERO(AND(TRUNC(SRL(X,AND(IDX,-32))),SHL(1,AND(IDX,31)))) +      if (isNullConstant(RHS) && +          OpVT.getScalarSizeInBits() > (Subtarget.is64Bit() ? 64 : 32)) { +        SDValue X, ShAmt; +        if (sd_match(LHS, m_OneUse(m_And(m_Value(X), +                                         m_Shl(m_One(), m_Value(ShAmt)))))) { +          // Only attempt this if the shift amount is known to be in bounds. +          KnownBits KnownAmt = DAG.computeKnownBits(ShAmt); +          if (KnownAmt.getMaxValue().ult(OpVT.getScalarSizeInBits())) { +            EVT AmtVT = ShAmt.getValueType(); +            SDValue AlignAmt = +                DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, +                            DAG.getSignedConstant(-32LL, DL, AmtVT)); +            SDValue ModuloAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, +                                            DAG.getConstant(31, DL, AmtVT)); +            SDValue Mask = DAG.getNode( +                ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32), +                DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8)); +            X = DAG.getNode(ISD::SRL, DL, OpVT, X, AlignAmt); +            X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); +            X = DAG.getNode(ISD::AND, DL, MVT::i32, X, Mask); +            return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, MVT::i32), +                                CC); +          } +        } +      } +        // cmpeq(trunc(x),C) --> cmpeq(x,C)        // cmpne(trunc(x),C) --> cmpne(x,C)        // iff x upper bits are zero. diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp index 090060e..3b96e70 100644 --- a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp @@ -115,9 +115,9 @@ struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> {    static constexpr MachineInstr *const ArgNodeSentinel = nullptr;    using GraphT = ImmutableGraph<MachineInstr *, int>; -  using Node = typename GraphT::Node; -  using Edge = typename GraphT::Edge; -  using size_type = typename GraphT::size_type; +  using Node = GraphT::Node; +  using Edge = GraphT::Edge; +  using size_type = GraphT::size_type;    MachineGadgetGraph(std::unique_ptr<Node[]> Nodes,                       std::unique_ptr<Edge[]> Edges, size_type NodesSize,                       size_type EdgesSize, int NumFences = 0, int NumGadgets = 0) @@ -191,10 +191,10 @@ template <>  struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {    using GraphType = MachineGadgetGraph;    using Traits = llvm::GraphTraits<GraphType *>; -  using NodeRef = typename Traits::NodeRef; -  using EdgeRef = typename Traits::EdgeRef; -  using ChildIteratorType = typename Traits::ChildIteratorType; -  using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType; +  using NodeRef = Traits::NodeRef; +  using EdgeRef = Traits::EdgeRef; +  using ChildIteratorType = Traits::ChildIteratorType; +  using ChildEdgeIteratorType = Traits::ChildEdgeIteratorType;    DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} @@ -227,9 +227,6 @@ struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {  } // end namespace llvm -constexpr MachineInstr *MachineGadgetGraph::ArgNodeSentinel; -constexpr int MachineGadgetGraph::GadgetEdgeSentinel; -  char X86LoadValueInjectionLoadHardeningPass::ID = 0;  void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage( @@ -335,7 +332,7 @@ X86LoadValueInjectionLoadHardeningPass::getGadgetGraph(    L.computePhiInfo();    GraphBuilder Builder; -  using GraphIter = typename GraphBuilder::BuilderNodeRef; +  using GraphIter = GraphBuilder::BuilderNodeRef;    DenseMap<MachineInstr *, GraphIter> NodeMap;    int FenceCount = 0, GadgetCount = 0;    auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) { diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 3d8d0a23..0b1430e 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -6562,7 +6562,7 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,  bool X86TTIImpl::areTypesABICompatible(const Function *Caller,                                         const Function *Callee, -                                       const ArrayRef<Type *> &Types) const { +                                       ArrayRef<Type *> Types) const {    if (!BaseT::areTypesABICompatible(Caller, Callee, Types))      return false; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 133b366..de5e1c2 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -296,7 +296,7 @@ public:    bool areInlineCompatible(const Function *Caller,                             const Function *Callee) const override;    bool areTypesABICompatible(const Function *Caller, const Function *Callee, -                             const ArrayRef<Type *> &Type) const override; +                             ArrayRef<Type *> Type) const override;    uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {      return ST->getMaxInlineSizeThreshold(); diff --git a/llvm/lib/TargetParser/PPCTargetParser.cpp b/llvm/lib/TargetParser/PPCTargetParser.cpp index d510445..f74d670 100644 --- a/llvm/lib/TargetParser/PPCTargetParser.cpp +++ b/llvm/lib/TargetParser/PPCTargetParser.cpp @@ -48,9 +48,9 @@ StringRef normalizeCPUName(StringRef CPUName) {    // accepting it. Clang has always ignored it and passed the    // generic CPU ID to the back end.    return StringSwitch<StringRef>(CPUName) -      .Cases("common", "405", "generic") -      .Cases("ppc440", "440fp", "440") -      .Cases("630", "power3", "pwr3") +      .Cases({"common", "405"}, "generic") +      .Cases({"ppc440", "440fp"}, "440") +      .Cases({"630", "power3"}, "pwr3")        .Case("G3", "g3")        .Case("G4", "g4")        .Case("G4+", "g4+") @@ -69,7 +69,7 @@ StringRef normalizeCPUName(StringRef CPUName) {        .Case("power9", "pwr9")        .Case("power10", "pwr10")        .Case("power11", "pwr11") -      .Cases("powerpc", "powerpc32", "ppc") +      .Cases({"powerpc", "powerpc32"}, "ppc")        .Case("powerpc64", "ppc64")        .Case("powerpc64le", "ppc64le")        .Default(CPUName); diff --git a/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp b/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp index cda07e8..f55bc9c 100644 --- a/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp +++ b/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp @@ -32,7 +32,7 @@ using namespace llvm::MachO;  using namespace llvm::MachO::DylibReader;  using TripleVec = std::vector<Triple>; -static typename TripleVec::iterator emplace(TripleVec &Container, Triple &&T) { +static TripleVec::iterator emplace(TripleVec &Container, Triple &&T) {    auto I = partition_point(Container, [=](const Triple &CT) {      return std::forward_as_tuple(CT.getArch(), CT.getOS(),                                   CT.getEnvironment()) < diff --git a/llvm/lib/TextAPI/RecordVisitor.cpp b/llvm/lib/TextAPI/RecordVisitor.cpp index d333b33..24971a7 100644 --- a/llvm/lib/TextAPI/RecordVisitor.cpp +++ b/llvm/lib/TextAPI/RecordVisitor.cpp @@ -15,7 +15,7 @@  using namespace llvm;  using namespace llvm::MachO; -RecordVisitor::~RecordVisitor() {} +RecordVisitor::~RecordVisitor() = default;  void RecordVisitor::visitObjCInterface(const ObjCInterfaceRecord &) {}  void RecordVisitor::visitObjCCategory(const ObjCCategoryRecord &) {} diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h b/llvm/lib/Transforms/Coroutines/CoroCloner.h index e05fe28..1e549f1 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCloner.h +++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h @@ -77,7 +77,7 @@ public:        : OrigF(OrigF), Suffix(Suffix), Shape(Shape), FKind(FKind),          Builder(OrigF.getContext()), TTI(TTI) {} -  virtual ~BaseCloner() {} +  virtual ~BaseCloner() = default;    /// Create a clone for a continuation lowering.    static Function *createClone(Function &OrigF, const Twine &Suffix, diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 5048561..a6ac761 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -3619,7 +3619,7 @@ struct AAIntraFnReachabilityFunction final        return true;      RQITy StackRQI(A, From, To, ExclusionSet, false); -    typename RQITy::Reachable Result; +    RQITy::Reachable Result;      if (!NonConstThis->checkQueryCache(A, StackRQI, Result))        return NonConstThis->isReachableImpl(A, StackRQI,                                             /*IsTemporaryRQI=*/true); @@ -5185,6 +5185,7 @@ struct AADereferenceableCallSiteReturned final  // ------------------------ Align Argument Attribute ------------------------  namespace { +  static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,                                      Value &AssociatedValue, const Use *U,                                      const Instruction *I, bool &TrackUse) { @@ -5200,6 +5201,28 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,        TrackUse = true;      return 0;    } +  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) +    switch (II->getIntrinsicID()) { +    case Intrinsic::ptrmask: { +      // Is it appropriate to pull attribute in initialization? +      const auto *ConstVals = A.getAAFor<AAPotentialConstantValues>( +          QueryingAA, IRPosition::value(*II->getOperand(1)), DepClassTy::NONE); +      const auto *AlignAA = A.getAAFor<AAAlign>( +          QueryingAA, IRPosition::value(*II), DepClassTy::NONE); +      if (ConstVals && ConstVals->isValidState() && ConstVals->isAtFixpoint()) { +        unsigned ShiftValue = std::min(ConstVals->getAssumedMinTrailingZeros(), +                                       Value::MaxAlignmentExponent); +        Align ConstAlign(UINT64_C(1) << ShiftValue); +        if (ConstAlign >= AlignAA->getKnownAlign()) +          return Align(1).value(); +      } +      if (AlignAA) +        return AlignAA->getKnownAlign().value(); +      break; +    } +    default: +      break; +    }    MaybeAlign MA;    if (const auto *CB = dyn_cast<CallBase>(I)) { @@ -5499,6 +5522,44 @@ struct AAAlignCallSiteReturned final    AAAlignCallSiteReturned(const IRPosition &IRP, Attributor &A)        : Base(IRP, A) {} +  ChangeStatus updateImpl(Attributor &A) override { +    Instruction *I = getIRPosition().getCtxI(); +    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { +      switch (II->getIntrinsicID()) { +      case Intrinsic::ptrmask: { +        Align Alignment; +        bool Valid = false; + +        const auto *ConstVals = A.getAAFor<AAPotentialConstantValues>( +            *this, IRPosition::value(*II->getOperand(1)), DepClassTy::REQUIRED); +        if (ConstVals && ConstVals->isValidState()) { +          unsigned ShiftValue = +              std::min(ConstVals->getAssumedMinTrailingZeros(), +                       Value::MaxAlignmentExponent); +          Alignment = Align(UINT64_C(1) << ShiftValue); +          Valid = true; +        } + +        const auto *AlignAA = +            A.getAAFor<AAAlign>(*this, IRPosition::value(*(II->getOperand(0))), +                                DepClassTy::REQUIRED); +        if (AlignAA && AlignAA->isValidState()) { +          Alignment = std::max(AlignAA->getAssumedAlign(), Alignment); +          Valid = true; +        } + +        if (Valid) +          return clampStateAndIndicateChange<StateType>( +              this->getState(), +              std::min(this->getAssumedAlign(), Alignment).value()); +        break; +      } +      default: +        break; +      } +    } +    return Base::updateImpl(A); +  };    /// See AbstractAttribute::trackStatistics()    void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); }  }; @@ -10701,7 +10762,7 @@ struct AAInterFnReachabilityFunction      auto *NonConstThis = const_cast<AAInterFnReachabilityFunction *>(this);      RQITy StackRQI(A, From, To, ExclusionSet, false); -    typename RQITy::Reachable Result; +    RQITy::Reachable Result;      if (!NonConstThis->checkQueryCache(A, StackRQI, Result))        return NonConstThis->isReachableImpl(A, StackRQI,                                             /*IsTemporaryRQI=*/true); diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 894d83f..d35ae47 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -1034,11 +1034,11 @@ private:  } // namespace  template <> -struct llvm::DenseMapInfo<typename CallsiteContextGraph< +struct llvm::DenseMapInfo<CallsiteContextGraph<      ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo>      : public DenseMapInfo<std::pair<Instruction *, unsigned>> {};  template <> -struct llvm::DenseMapInfo<typename CallsiteContextGraph< +struct llvm::DenseMapInfo<CallsiteContextGraph<      IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo>      : public DenseMapInfo<std::pair<IndexCall, unsigned>> {};  template <> diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index d7eb745..2a87a0f 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -208,7 +208,7 @@ namespace KernelInfo {  // };  #define KERNEL_ENVIRONMENT_IDX(MEMBER, IDX)                                    \ -  constexpr const unsigned MEMBER##Idx = IDX; +  constexpr unsigned MEMBER##Idx = IDX;  KERNEL_ENVIRONMENT_IDX(Configuration, 0)  KERNEL_ENVIRONMENT_IDX(Ident, 1) @@ -216,7 +216,7 @@ KERNEL_ENVIRONMENT_IDX(Ident, 1)  #undef KERNEL_ENVIRONMENT_IDX  #define KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MEMBER, IDX)                      \ -  constexpr const unsigned MEMBER##Idx = IDX; +  constexpr unsigned MEMBER##Idx = IDX;  KERNEL_ENVIRONMENT_CONFIGURATION_IDX(UseGenericStateMachine, 0)  KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MayUseNestedParallelism, 1) @@ -258,7 +258,7 @@ KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxTeams)  GlobalVariable *  getKernelEnvironementGVFromKernelInitCB(CallBase *KernelInitCB) { -  constexpr const int InitKernelEnvironmentArgNo = 0; +  constexpr int InitKernelEnvironmentArgNo = 0;    return cast<GlobalVariable>(        KernelInitCB->getArgOperand(InitKernelEnvironmentArgNo)            ->stripPointerCasts()); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 3ddf182..cbaff29 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3997,6 +3997,27 @@ static Value *foldOrUnsignedUMulOverflowICmp(BinaryOperator &I,    return nullptr;  } +/// Fold select(X >s 0, 0, -X) | smax(X, 0) --> abs(X) +///      select(X <s 0, -X, 0) | smax(X, 0) --> abs(X) +static Value *FoldOrOfSelectSmaxToAbs(BinaryOperator &I, +                                      InstCombiner::BuilderTy &Builder) { +  Value *X; +  Value *Sel; +  if (match(&I, +            m_c_Or(m_Value(Sel), m_OneUse(m_SMax(m_Value(X), m_ZeroInt()))))) { +    auto NegX = m_Neg(m_Specific(X)); +    if (match(Sel, m_Select(m_SpecificICmp(ICmpInst::ICMP_SGT, m_Specific(X), +                                           m_ZeroInt()), +                            m_ZeroInt(), NegX)) || +        match(Sel, m_Select(m_SpecificICmp(ICmpInst::ICMP_SLT, m_Specific(X), +                                           m_ZeroInt()), +                            NegX, m_ZeroInt()))) +      return Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, +                                           Builder.getFalse()); +  } +  return nullptr; +} +  // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches  // here. We should standardize that construct where it is needed or choose some  // other way to ensure that commutated variants of patterns are not missed. @@ -4545,6 +4566,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {      if (Value *V = SimplifyAddWithRemainder(I))        return replaceInstUsesWith(I, V); +  if (Value *Res = FoldOrOfSelectSmaxToAbs(I, Builder)) +    return replaceInstUsesWith(I, Res); +    return nullptr;  } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index f5130da..9572f9d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3599,6 +3599,21 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {                                   m_Not(m_Specific(SelCond->getTrueValue())));        if (MayNeedFreeze)          C = Builder.CreateFreeze(C); +      if (!ProfcheckDisableMetadataFixes) { +        Value *C2 = nullptr, *A2 = nullptr, *B2 = nullptr; +        if (match(CondVal, m_LogicalAnd(m_Specific(C), m_Value(A2))) && +            SelCond) { +          return SelectInst::Create(C, A, B, "", nullptr, SelCond); +        } else if (match(FalseVal, +                         m_LogicalAnd(m_Not(m_Value(C2)), m_Value(B2))) && +                   SelFVal) { +          SelectInst *NewSI = SelectInst::Create(C, A, B, "", nullptr, SelFVal); +          NewSI->swapProfMetadata(); +          return NewSI; +        } else { +          return createSelectInstWithUnknownProfile(C, A, B); +        } +      }        return SelectInst::Create(C, A, B);      } @@ -3615,6 +3630,20 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {                                   m_Not(m_Specific(SelFVal->getTrueValue())));        if (MayNeedFreeze)          C = Builder.CreateFreeze(C); +      if (!ProfcheckDisableMetadataFixes) { +        Value *C2 = nullptr, *A2 = nullptr, *B2 = nullptr; +        if (match(CondVal, m_LogicalAnd(m_Not(m_Value(C2)), m_Value(A2))) && +            SelCond) { +          SelectInst *NewSI = SelectInst::Create(C, B, A, "", nullptr, SelCond); +          NewSI->swapProfMetadata(); +          return NewSI; +        } else if (match(FalseVal, m_LogicalAnd(m_Specific(C), m_Value(B2))) && +                   SelFVal) { +          return SelectInst::Create(C, B, A, "", nullptr, SelFVal); +        } else { +          return createSelectInstWithUnknownProfile(C, B, A); +        } +      }        return SelectInst::Create(C, B, A);      }    } diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp index 80e77e09..a2fad02 100644 --- a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp @@ -161,7 +161,7 @@ template <char NsanTypeId>  class ShadowTypeConfigImpl : public ShadowTypeConfig {  public:    char getNsanTypeId() const override { return NsanTypeId; } -  static constexpr const char kNsanTypeId = NsanTypeId; +  static constexpr char kNsanTypeId = NsanTypeId;  };  // `double` (`d`) shadow type. diff --git a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp index 89980d5..a577f51 100644 --- a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp +++ b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp @@ -122,7 +122,8 @@ DropUnnecessaryAssumesPass::run(Function &F, FunctionAnalysisManager &FAM) {      Value *Cond = Assume->getArgOperand(0);      // Don't drop type tests, which have special semantics. -    if (match(Cond, m_Intrinsic<Intrinsic::type_test>())) +    if (match(Cond, m_Intrinsic<Intrinsic::type_test>()) || +        match(Cond, m_Intrinsic<Intrinsic::public_type_test>()))        continue;      SmallVector<Value *> Affected; diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp index a06f832..d564e32 100644 --- a/llvm/lib/Transforms/Scalar/GVNSink.cpp +++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp @@ -514,7 +514,7 @@ public:  class GVNSink {  public: -  GVNSink() {} +  GVNSink() = default;    bool run(Function &F) {      LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index 3487e81..7e70ba2 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -245,11 +245,14 @@ raw_ostream &operator<<(raw_ostream &OS, ShapeInfo SI) {  } // namespace -static bool isUniformShape(Value *V) { +static bool isShapePreserving(Value *V) {    Instruction *I = dyn_cast<Instruction>(V);    if (!I)      return true; +  if (isa<SelectInst>(I)) +    return true; +    if (I->isBinaryOp())      return true; @@ -300,6 +303,16 @@ static bool isUniformShape(Value *V) {    }  } +/// Return an iterator over the operands of \p I that should share shape +/// information with \p I. +static iterator_range<Use *> getShapedOperandsForInst(Instruction *I) { +  assert(isShapePreserving(I) && +         "Can't retrieve shaped operands for an instruction that does not " +         "preserve shape information"); +  auto Ops = I->operands(); +  return isa<SelectInst>(I) ? drop_begin(Ops) : Ops; +} +  /// Return the ShapeInfo for the result of \p I, it it can be determined.  static std::optional<ShapeInfo>  computeShapeInfoForInst(Instruction *I, @@ -329,9 +342,8 @@ computeShapeInfoForInst(Instruction *I,        return OpShape->second;    } -  if (isUniformShape(I) || isa<SelectInst>(I)) { -    auto Ops = I->operands(); -    auto ShapedOps = isa<SelectInst>(I) ? drop_begin(Ops) : Ops; +  if (isShapePreserving(I)) { +    auto ShapedOps = getShapedOperandsForInst(I);      // Find the first operand that has a known shape and use that.      for (auto &Op : ShapedOps) {        auto OpShape = ShapeMap.find(Op.get()); @@ -710,10 +722,9 @@ public:        case Intrinsic::matrix_column_major_store:          return true;        default: -        return isUniformShape(II); +        break;        } -    return isUniformShape(V) || isa<StoreInst>(V) || isa<LoadInst>(V) || -           isa<SelectInst>(V); +    return isShapePreserving(V) || isa<StoreInst>(V) || isa<LoadInst>(V);    }    /// Propagate the shape information of instructions to their users. @@ -800,9 +811,8 @@ public:        } else if (isa<StoreInst>(V)) {          // Nothing to do.  We forward-propagated to this so we would just          // backward propagate to an instruction with an already known shape. -      } else if (isUniformShape(V) || isa<SelectInst>(V)) { -        auto Ops = cast<Instruction>(V)->operands(); -        auto ShapedOps = isa<SelectInst>(V) ? drop_begin(Ops) : Ops; +      } else if (isShapePreserving(V)) { +        auto ShapedOps = getShapedOperandsForInst(cast<Instruction>(V));          // Propagate to all operands.          ShapeInfo Shape = ShapeMap[V];          for (Use &U : ShapedOps) { diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index bb6c879..239526e 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -337,7 +337,7 @@ static void buildPartialUnswitchConditionalBranch(  static void buildPartialInvariantUnswitchConditionalBranch(      BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,      BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, -    MemorySSAUpdater *MSSAU) { +    MemorySSAUpdater *MSSAU, const BranchInst &OriginalBranch) {    ValueToValueMapTy VMap;    for (auto *Val : reverse(ToDuplicate)) {      Instruction *Inst = cast<Instruction>(Val); @@ -377,8 +377,19 @@ static void buildPartialInvariantUnswitchConditionalBranch(    IRBuilder<> IRB(&BB);    IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());    Value *Cond = VMap[ToDuplicate[0]]; -  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, -                   Direction ? &NormalSucc : &UnswitchedSucc); +  // The expectation is that ToDuplicate[0] is the condition used by the +  // OriginalBranch, case in which we can clone the profile metadata from there. +  auto *ProfData = +      !ProfcheckDisableMetadataFixes && +              ToDuplicate[0] == skipTrivialSelect(OriginalBranch.getCondition()) +          ? OriginalBranch.getMetadata(LLVMContext::MD_prof) +          : nullptr; +  auto *BR = +      IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, +                       Direction ? &NormalSucc : &UnswitchedSucc, ProfData); +  if (!ProfData) +    setExplicitlyUnknownBranchWeightsIfProfiled(*BR, *BR->getFunction(), +                                                DEBUG_TYPE);  }  /// Rewrite the PHI nodes in an unswitched loop exit basic block. @@ -2515,7 +2526,7 @@ static void unswitchNontrivialInvariants(      // the branch in the split block.      if (PartiallyInvariant)        buildPartialInvariantUnswitchConditionalBranch( -          *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU); +          *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU, *BI);      else {        buildPartialUnswitchConditionalBranch(            *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 0f3978f..0a8f5ea 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -143,8 +143,8 @@ struct SubGraphTraits {    class WrappedSuccIterator        : public iterator_adaptor_base<              WrappedSuccIterator, BaseSuccIterator, -            typename std::iterator_traits<BaseSuccIterator>::iterator_category, -            NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> { +            std::iterator_traits<BaseSuccIterator>::iterator_category, NodeRef, +            std::ptrdiff_t, NodeRef *, NodeRef> {      SmallDenseSet<RegionNode *> *Nodes;    public: @@ -558,11 +558,10 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {    } else {      // Test for successors as back edge      BasicBlock *BB = N->getNodeAs<BasicBlock>(); -    BranchInst *Term = cast<BranchInst>(BB->getTerminator()); - -    for (BasicBlock *Succ : Term->successors()) -      if (Visited.count(Succ)) -        Loops[Succ] = BB; +    if (BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator())) +      for (BasicBlock *Succ : Term->successors()) +        if (Visited.count(Succ)) +          Loops[Succ] = BB;    }  } @@ -594,7 +593,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {    for (BasicBlock *P : predecessors(BB)) {      // Ignore it if it's a branch from outside into our region entry -    if (!ParentRegion->contains(P)) +    if (!ParentRegion->contains(P) || !dyn_cast<BranchInst>(P->getTerminator()))        continue;      Region *R = RI->getRegionFor(P); @@ -1402,13 +1401,17 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {  /// Run the transformation for each region found  bool StructurizeCFG::run(Region *R, DominatorTree *DT,                           const TargetTransformInfo *TTI) { -  if (R->isTopLevelRegion()) +  // CallBr and its corresponding direct target blocks are for now ignored by +  // this pass. This is not a limitation for the currently intended uses cases +  // of callbr in the AMDGPU backend. +  // Parent and child regions are not affected by this (current) restriction. +  // See `llvm/test/Transforms/StructurizeCFG/callbr.ll` for details. +  if (R->isTopLevelRegion() || isa<CallBrInst>(R->getEntry()->getTerminator()))      return false;    this->DT = DT;    this->TTI = TTI;    Func = R->getEntry()->getParent(); -  assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");    ParentRegion = R; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 46f2903..a03cf6e 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3416,7 +3416,11 @@ DIExpression *llvm::getExpressionForConstant(DIBuilder &DIB, const Constant &C,    // Create integer constant expression.    auto createIntegerExpression = [&DIB](const Constant &CV) -> DIExpression * {      const APInt &API = cast<ConstantInt>(&CV)->getValue(); -    std::optional<int64_t> InitIntOpt = API.trySExtValue(); +    std::optional<int64_t> InitIntOpt; +    if (API.getBitWidth() == 1) +      InitIntOpt = API.tryZExtValue(); +    else +      InitIntOpt = API.trySExtValue();      return InitIntOpt ? DIB.createConstantValueExpression(                              static_cast<uint64_t>(*InitIntOpt))                        : nullptr; diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 8be471b..6e60b94 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -992,9 +992,12 @@ BranchProbability llvm::getBranchProbability(BranchInst *B,    uint64_t Weight0, Weight1;    if (!extractBranchWeights(*B, Weight0, Weight1))      return BranchProbability::getUnknown(); +  uint64_t Denominator = Weight0 + Weight1; +  if (Denominator == 0) +    return BranchProbability::getUnknown();    if (!ForFirstTarget)      std::swap(Weight0, Weight1); -  return BranchProbability::getBranchProbability(Weight0, Weight0 + Weight1); +  return BranchProbability::getBranchProbability(Weight0, Denominator);  }  bool llvm::setBranchProbability(BranchInst *B, BranchProbability P, diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp index 94c5c170..e86ab13 100644 --- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp +++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -158,6 +158,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {    SmallVector<BasicBlock *, 8> CallBrTargetBlocksToFix;    // Redirect exiting edges through a control flow hub.    ControlFlowHub CHub; +  bool Changed = false;    for (unsigned I = 0; I < ExitingBlocks.size(); ++I) {      BasicBlock *BB = ExitingBlocks[I]; @@ -182,6 +183,10 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {          bool UpdatedLI = false;          BasicBlock *NewSucc =              SplitCallBrEdge(BB, Succ, J, &DTU, nullptr, &LI, &UpdatedLI); +        // SplitCallBrEdge modifies the CFG because it creates an intermediate +        // block. So we need to set the changed flag no matter what the +        // ControlFlowHub is going to do later. +        Changed = true;          // Even if CallBr and Succ do not have a common parent loop, we need to          // add the new target block to the parent loop of the current loop.          if (!UpdatedLI) @@ -207,6 +212,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {    bool ChangedCFG;    std::tie(LoopExitBlock, ChangedCFG) = CHub.finalize(        &DTU, GuardBlocks, "loop.exit", MaxBooleansInControlFlowHub.getValue()); +  ChangedCFG |= Changed;    if (!ChangedCFG)      return false; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 5298728..04b0562 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -167,7 +167,7 @@ public:                                DebugLoc DL = DebugLoc::getUnknown(),                                const Twine &Name = "") {      return tryInsertInstruction( -        new VPInstruction(Opcode, Operands, Flags, DL, Name)); +        new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));    }    VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands, @@ -184,7 +184,7 @@ public:                                       DebugLoc DL = DebugLoc::getUnknown(),                                       const Twine &Name = "") {      return tryInsertInstruction( -        new VPInstruction(Opcode, Operands, WrapFlags, DL, Name)); +        new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));    }    VPInstruction *createNot(VPValue *Operand, @@ -205,7 +205,7 @@ public:      return tryInsertInstruction(new VPInstruction(          Instruction::BinaryOps::Or, {LHS, RHS}, -        VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name)); +        VPRecipeWithIRFlags::DisjointFlagsTy(false), {}, DL, Name));    }    VPInstruction *createLogicalAnd(VPValue *LHS, VPValue *RHS, @@ -221,7 +221,7 @@ public:                 std::optional<FastMathFlags> FMFs = std::nullopt) {      auto *Select =          FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal}, -                                 *FMFs, DL, Name) +                                 *FMFs, {}, DL, Name)               : new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},                                   DL, Name);      return tryInsertInstruction(Select); @@ -235,7 +235,7 @@ public:      assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&             Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");      return tryInsertInstruction( -        new VPInstruction(Instruction::ICmp, {A, B}, Pred, DL, Name)); +        new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));    }    /// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A @@ -246,7 +246,7 @@ public:      assert(Pred >= CmpInst::FIRST_FCMP_PREDICATE &&             Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");      return tryInsertInstruction( -        new VPInstruction(Instruction::FCmp, {A, B}, Pred, DL, Name)); +        new VPInstruction(Instruction::FCmp, {A, B}, Pred, {}, DL, Name));    }    VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, @@ -254,7 +254,7 @@ public:                                const Twine &Name = "") {      return tryInsertInstruction(          new VPInstruction(VPInstruction::PtrAdd, {Ptr, Offset}, -                          GEPNoWrapFlags::none(), DL, Name)); +                          GEPNoWrapFlags::none(), {}, DL, Name));    }    VPInstruction *createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset, @@ -262,7 +262,7 @@ public:                                      DebugLoc DL = DebugLoc::getUnknown(),                                      const Twine &Name = "") {      return tryInsertInstruction(new VPInstruction( -        VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, DL, Name)); +        VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));    }    VPInstruction *createWidePtrAdd(VPValue *Ptr, VPValue *Offset, @@ -270,7 +270,7 @@ public:                                    const Twine &Name = "") {      return tryInsertInstruction(          new VPInstruction(VPInstruction::WidePtrAdd, {Ptr, Offset}, -                          GEPNoWrapFlags::none(), DL, Name)); +                          GEPNoWrapFlags::none(), {}, DL, Name));    }    VPPhi *createScalarPhi(ArrayRef<VPValue *> IncomingValues, DebugLoc DL, @@ -303,9 +303,11 @@ public:    }    VPInstruction *createScalarCast(Instruction::CastOps Opcode, VPValue *Op, -                                  Type *ResultTy, DebugLoc DL) { +                                  Type *ResultTy, DebugLoc DL, +                                  const VPIRFlags &Flags = {}, +                                  const VPIRMetadata &Metadata = {}) {      return tryInsertInstruction( -        new VPInstructionWithType(Opcode, Op, ResultTy, {}, DL)); +        new VPInstructionWithType(Opcode, Op, ResultTy, DL, Flags, Metadata));    }    VPValue *createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 34b405c..bf3f52c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -20975,6 +20975,27 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,    if (isa<PHINode>(S.getMainOp()) ||        isVectorLikeInstWithConstOps(S.getMainOp()))      return nullptr; +  // If the parent node is non-schedulable and the current node is copyable, and +  // any of parent instructions are used outside several basic blocks or in +  // bin-op node - cancel scheduling, it may cause wrong def-use deps in +  // analysis, leading to a crash. +  // Non-scheduled nodes may not have related ScheduleData model, which may lead +  // to a skipped dep analysis. +  if (S.areInstructionsWithCopyableElements() && EI && EI.UserTE->hasState() && +      EI.UserTE->doesNotNeedToSchedule() && +      EI.UserTE->getOpcode() != Instruction::PHI && +      any_of(EI.UserTE->Scalars, [](Value *V) { +        auto *I = dyn_cast<Instruction>(V); +        if (!I || I->hasOneUser()) +          return false; +        for (User *U : I->users()) { +          auto *UI = cast<Instruction>(U); +          if (isa<BinaryOperator>(UI)) +            return true; +        } +        return false; +      })) +    return std::nullopt;    bool HasCopyables = S.areInstructionsWithCopyableElements();    if (((!HasCopyables && doesNotNeedToSchedule(VL)) ||         all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) { diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp index 9c869dd..d354933 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp @@ -92,7 +92,7 @@ void MemDGNode::print(raw_ostream &OS, bool PrintDeps) const {    DGNode::print(OS, false);    if (PrintDeps) {      // Print memory preds. -    static constexpr const unsigned Indent = 4; +    static constexpr unsigned Indent = 4;      for (auto *Pred : MemPreds)        OS.indent(Indent) << "<-" << *Pred->getInstruction() << "\n";    } diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp index 86dbd21..5534da9 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp @@ -25,14 +25,14 @@ static cl::opt<bool>                            "emit new instructions (*very* expensive)."));  #endif // NDEBUG -static constexpr const unsigned long StopAtDisabled = +static constexpr unsigned long StopAtDisabled =      std::numeric_limits<unsigned long>::max();  static cl::opt<unsigned long>      StopAt("sbvec-stop-at", cl::init(StopAtDisabled), cl::Hidden,             cl::desc("Vectorize if the invocation count is < than this. 0 "                      "disables vectorization.")); -static constexpr const unsigned long StopBundleDisabled = +static constexpr unsigned long StopBundleDisabled =      std::numeric_limits<unsigned long>::max();  static cl::opt<unsigned long>      StopBundle("sbvec-stop-bndl", cl::init(StopBundleDisabled), cl::Hidden, diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp index ed2f80b..2de6921 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp @@ -43,7 +43,7 @@ cl::opt<std::string> AllowFiles(      "sbvec-allow-files", cl::init(".*"), cl::Hidden,      cl::desc("Run the vectorizer only on file paths that match any in the "               "list of comma-separated regex's.")); -static constexpr const char AllowFilesDelim = ','; +static constexpr char AllowFilesDelim = ',';  SandboxVectorizerPass::SandboxVectorizerPass() : FPM("fpm") {    if (UserDefinedPassPipeline == DefaultPipelineMagicStr) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 08c9c15..cfe1f1e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -939,7 +939,7 @@ class VPIRMetadata {    SmallVector<std::pair<unsigned, MDNode *>> Metadata;  public: -  VPIRMetadata() {} +  VPIRMetadata() = default;    /// Adds metatadata that can be preserved from the original instruction    /// \p I. @@ -950,12 +950,9 @@ public:    VPIRMetadata(Instruction &I, LoopVersioning *LVer);    /// Copy constructor for cloning. -  VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {} +  VPIRMetadata(const VPIRMetadata &Other) = default; -  VPIRMetadata &operator=(const VPIRMetadata &Other) { -    Metadata = Other.Metadata; -    return *this; -  } +  VPIRMetadata &operator=(const VPIRMetadata &Other) = default;    /// Add all metadata to \p I.    void applyMetadata(Instruction &I) const; @@ -1107,14 +1104,14 @@ public:          VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {}    VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, -                const VPIRFlags &Flags, DebugLoc DL = DebugLoc::getUnknown(), -                const Twine &Name = ""); +                const VPIRFlags &Flags, const VPIRMetadata &MD = {}, +                DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");    VP_CLASSOF_IMPL(VPDef::VPInstructionSC)    VPInstruction *clone() override { -    SmallVector<VPValue *, 2> Operands(operands()); -    auto *New = new VPInstruction(Opcode, Operands, *this, getDebugLoc(), Name); +    auto *New = new VPInstruction(Opcode, operands(), *this, *this, +                                  getDebugLoc(), Name);      if (getUnderlyingValue())        New->setUnderlyingValue(getUnderlyingInstr());      return New; @@ -1196,7 +1193,14 @@ public:    VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,                          Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL,                          const Twine &Name = "") -      : VPInstruction(Opcode, Operands, Flags, DL, Name), ResultTy(ResultTy) {} +      : VPInstruction(Opcode, Operands, Flags, {}, DL, Name), +        ResultTy(ResultTy) {} + +  VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands, +                        Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags, +                        const VPIRMetadata &Metadata, const Twine &Name = "") +      : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name), +        ResultTy(ResultTy) {}    static inline bool classof(const VPRecipeBase *R) {      // VPInstructionWithType are VPInstructions with specific opcodes requiring @@ -1221,10 +1225,9 @@ public:    }    VPInstruction *clone() override { -    SmallVector<VPValue *, 2> Operands(operands());      auto *New = -        new VPInstructionWithType(getOpcode(), Operands, getResultType(), *this, -                                  getDebugLoc(), getName()); +        new VPInstructionWithType(getOpcode(), operands(), getResultType(), +                                  *this, getDebugLoc(), getName());      New->setUnderlyingValue(getUnderlyingValue());      return New;    } @@ -3206,6 +3209,9 @@ protected:        : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),          Alignment(Alignment), Consecutive(Consecutive), Reverse(Reverse) {      assert((Consecutive || !Reverse) && "Reverse implies consecutive"); +    assert(isa<VPVectorEndPointerRecipe>(getAddr()) || +           !Reverse && +               "Reversed acccess without VPVectorEndPointerRecipe address?");    }  public: @@ -3977,7 +3983,7 @@ class VPIRBasicBlock : public VPBasicBlock {          IRBB(IRBB) {}  public: -  ~VPIRBasicBlock() override {} +  ~VPIRBasicBlock() override = default;    static inline bool classof(const VPBlockBase *V) {      return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC; @@ -4029,7 +4035,7 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {          IsReplicator(IsReplicator) {}  public: -  ~VPRegionBlock() override {} +  ~VPRegionBlock() override = default;    /// Method to support type inquiry through isa, cast, and dyn_cast.    static inline bool classof(const VPBlockBase *V) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index b5b98c6..b57c448 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -313,7 +313,8 @@ private:      // Check for recipes that do not have opcodes.      if constexpr (std::is_same_v<RecipeTy, VPScalarIVStepsRecipe> ||                    std::is_same_v<RecipeTy, VPCanonicalIVPHIRecipe> || -                  std::is_same_v<RecipeTy, VPDerivedIVRecipe>) +                  std::is_same_v<RecipeTy, VPDerivedIVRecipe> || +                  std::is_same_v<RecipeTy, VPVectorEndPointerRecipe>)        return DefR;      else        return DefR && DefR->getOpcode() == Opcode; @@ -686,6 +687,64 @@ m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {    return VPDerivedIV_match<Op0_t, Op1_t, Op2_t>({Op0, Op1, Op2});  } +template <typename Addr_t, typename Mask_t> struct Load_match { +  Addr_t Addr; +  Mask_t Mask; + +  Load_match(Addr_t Addr, Mask_t Mask) : Addr(Addr), Mask(Mask) {} + +  template <typename OpTy> bool match(const OpTy *V) const { +    auto *Load = dyn_cast<VPWidenLoadRecipe>(V); +    if (!Load || !Addr.match(Load->getAddr()) || !Load->isMasked() || +        !Mask.match(Load->getMask())) +      return false; +    return true; +  } +}; + +/// Match a (possibly reversed) masked load. +template <typename Addr_t, typename Mask_t> +inline Load_match<Addr_t, Mask_t> m_MaskedLoad(const Addr_t &Addr, +                                               const Mask_t &Mask) { +  return Load_match<Addr_t, Mask_t>(Addr, Mask); +} + +template <typename Addr_t, typename Val_t, typename Mask_t> struct Store_match { +  Addr_t Addr; +  Val_t Val; +  Mask_t Mask; + +  Store_match(Addr_t Addr, Val_t Val, Mask_t Mask) +      : Addr(Addr), Val(Val), Mask(Mask) {} + +  template <typename OpTy> bool match(const OpTy *V) const { +    auto *Store = dyn_cast<VPWidenStoreRecipe>(V); +    if (!Store || !Addr.match(Store->getAddr()) || +        !Val.match(Store->getStoredValue()) || !Store->isMasked() || +        !Mask.match(Store->getMask())) +      return false; +    return true; +  } +}; + +/// Match a (possibly reversed) masked store. +template <typename Addr_t, typename Val_t, typename Mask_t> +inline Store_match<Addr_t, Val_t, Mask_t> +m_MaskedStore(const Addr_t &Addr, const Val_t &Val, const Mask_t &Mask) { +  return Store_match<Addr_t, Val_t, Mask_t>(Addr, Val, Mask); +} + +template <typename Op0_t, typename Op1_t> +using VectorEndPointerRecipe_match = +    Recipe_match<std::tuple<Op0_t, Op1_t>, 0, +                 /*Commutative*/ false, VPVectorEndPointerRecipe>; + +template <typename Op0_t, typename Op1_t> +VectorEndPointerRecipe_match<Op0_t, Op1_t> m_VecEndPtr(const Op0_t &Op0, +                                                       const Op1_t &Op1) { +  return VectorEndPointerRecipe_match<Op0_t, Op1_t>(Op0, Op1); +} +  /// Match a call argument at a given argument index.  template <typename Opnd_t> struct Argument_match {    /// Call argument index to match. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f9c15a3..1ee405a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -162,8 +162,12 @@ bool VPRecipeBase::mayHaveSideEffects() const {    case VPPredInstPHISC:    case VPVectorEndPointerSC:      return false; -  case VPInstructionSC: -    return mayWriteToMemory(); +  case VPInstructionSC: { +    auto *VPI = cast<VPInstruction>(this); +    return mayWriteToMemory() || +           VPI->getOpcode() == VPInstruction::BranchOnCount || +           VPI->getOpcode() == VPInstruction::BranchOnCond; +  }    case VPWidenCallSC: {      Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();      return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn(); @@ -490,10 +494,10 @@ template class VPUnrollPartAccessor<3>;  }  VPInstruction::VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, -                             const VPIRFlags &Flags, DebugLoc DL, -                             const Twine &Name) +                             const VPIRFlags &Flags, const VPIRMetadata &MD, +                             DebugLoc DL, const Twine &Name)      : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, Flags, DL), -      VPIRMetadata(), Opcode(Opcode), Name(Name.str()) { +      VPIRMetadata(MD), Opcode(Opcode), Name(Name.str()) {    assert(flagsValidForOpcode(getOpcode()) &&           "Set flags not supported for the provided opcode");    assert((getNumOperandsForOpcode(Opcode) == -1u || @@ -1241,6 +1245,8 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {    case Instruction::Select:    case Instruction::PHI:    case VPInstruction::AnyOf: +  case VPInstruction::BranchOnCond: +  case VPInstruction::BranchOnCount:    case VPInstruction::Broadcast:    case VPInstruction::BuildStructVector:    case VPInstruction::BuildVector: diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.h b/llvm/lib/Transforms/Vectorize/VPlanSLP.h index 77ff36c..44972c68 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanSLP.h +++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.h @@ -89,8 +89,7 @@ class VPlanSlp {    /// Width of the widest combined bundle in bits.    unsigned WidestBundleBits = 0; -  using MultiNodeOpTy = -      typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>; +  using MultiNodeOpTy = std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;    // Input operand bundles for the current multi node. Each multi node operand    // bundle contains values not matching the multi node's opcode. They will diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 6a8231b..9d9bb14 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -151,7 +151,27 @@ static bool cannotHoistOrSinkRecipe(const VPRecipeBase &R) {  static bool sinkScalarOperands(VPlan &Plan) {    auto Iter = vp_depth_first_deep(Plan.getEntry()); +  bool ScalarVFOnly = Plan.hasScalarVFOnly();    bool Changed = false; + +  auto IsValidSinkCandidate = [ScalarVFOnly](VPBasicBlock *SinkTo, +                                             VPSingleDefRecipe *Candidate) { +    // We only know how to duplicate VPReplicateRecipes and +    // VPScalarIVStepsRecipes for now. +    if (!isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(Candidate)) +      return false; + +    if (Candidate->getParent() == SinkTo || Candidate->mayHaveSideEffects() || +        Candidate->mayReadOrWriteMemory()) +      return false; + +    if (auto *RepR = dyn_cast<VPReplicateRecipe>(Candidate)) +      if (!ScalarVFOnly && RepR->isSingleScalar()) +        return false; + +    return true; +  }; +    // First, collect the operands of all recipes in replicate blocks as seeds for    // sinking.    SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList; @@ -159,51 +179,37 @@ static bool sinkScalarOperands(VPlan &Plan) {      VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();      if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)        continue; -    VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]); -    if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock()) +    VPBasicBlock *VPBB = cast<VPBasicBlock>(EntryVPBB->getSuccessors().front()); +    if (VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())        continue;      for (auto &Recipe : *VPBB) { -      for (VPValue *Op : Recipe.operands()) +      for (VPValue *Op : Recipe.operands()) {          if (auto *Def =                  dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe())) -          WorkList.insert({VPBB, Def}); +          if (IsValidSinkCandidate(VPBB, Def)) +            WorkList.insert({VPBB, Def}); +      }      }    } -  bool ScalarVFOnly = Plan.hasScalarVFOnly();    // Try to sink each replicate or scalar IV steps recipe in the worklist.    for (unsigned I = 0; I != WorkList.size(); ++I) {      VPBasicBlock *SinkTo;      VPSingleDefRecipe *SinkCandidate;      std::tie(SinkTo, SinkCandidate) = WorkList[I]; -    if (SinkCandidate->getParent() == SinkTo || -        SinkCandidate->mayHaveSideEffects() || -        SinkCandidate->mayReadOrWriteMemory()) -      continue; -    if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) { -      if (!ScalarVFOnly && RepR->isSingleScalar()) -        continue; -    } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate)) -      continue; -    bool NeedsDuplicating = false;      // All recipe users of the sink candidate must be in the same block SinkTo -    // or all users outside of SinkTo must be uniform-after-vectorization ( -    // i.e., only first lane is used) . In the latter case, we need to duplicate -    // SinkCandidate. -    auto CanSinkWithUser = [SinkTo, &NeedsDuplicating, -                            SinkCandidate](VPUser *U) { -      auto *UI = cast<VPRecipeBase>(U); -      if (UI->getParent() == SinkTo) -        return true; -      NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate); -      // We only know how to duplicate VPReplicateRecipes and -      // VPScalarIVStepsRecipes for now. -      return NeedsDuplicating && -             isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(SinkCandidate); -    }; -    if (!all_of(SinkCandidate->users(), CanSinkWithUser)) +    // or all users outside of SinkTo must have only their first lane used. In +    // the latter case, we need to duplicate SinkCandidate. +    auto UsersOutsideSinkTo = +        make_filter_range(SinkCandidate->users(), [SinkTo](VPUser *U) { +          return cast<VPRecipeBase>(U)->getParent() != SinkTo; +        }); +    if (any_of(UsersOutsideSinkTo, [SinkCandidate](VPUser *U) { +          return !U->onlyFirstLaneUsed(SinkCandidate); +        }))        continue; +    bool NeedsDuplicating = !UsersOutsideSinkTo.empty();      if (NeedsDuplicating) {        if (ScalarVFOnly) @@ -230,7 +236,8 @@ static bool sinkScalarOperands(VPlan &Plan) {      for (VPValue *Op : SinkCandidate->operands())        if (auto *Def =                dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe())) -        WorkList.insert({SinkTo, Def}); +        if (IsValidSinkCandidate(SinkTo, Def)) +          WorkList.insert({SinkTo, Def});      Changed = true;    }    return Changed; @@ -1056,13 +1063,9 @@ static VPValue *tryToFoldLiveIns(VPSingleDefRecipe &R,    return nullptr;  } -/// Try to simplify recipe \p R. -static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { -  VPlan *Plan = R.getParent()->getPlan(); - -  auto *Def = dyn_cast<VPSingleDefRecipe>(&R); -  if (!Def) -    return; +/// Try to simplify VPSingleDefRecipe \p Def. +static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) { +  VPlan *Plan = Def->getParent()->getPlan();    // Simplification of live-in IR values for SingleDef recipes using    // InstSimplifyFolder. @@ -1072,7 +1075,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {      return Def->replaceAllUsesWith(V);    // Fold PredPHI LiveIn -> LiveIn. -  if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(&R)) { +  if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(Def)) {      VPValue *Op = PredPHI->getOperand(0);      if (Op->isLiveIn())        PredPHI->replaceAllUsesWith(Op); @@ -1091,12 +1094,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {          return;        if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) { -        unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue())) +        unsigned ExtOpcode = match(Def->getOperand(0), m_SExt(m_VPValue()))                                   ? Instruction::SExt                                   : Instruction::ZExt;          auto *Ext = Builder.createWidenCast(Instruction::CastOps(ExtOpcode), A,                                              TruncTy); -        if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) { +        if (auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {            // UnderlyingExt has distinct return type, used to retain legacy cost.            Ext->setUnderlyingValue(UnderlyingExt);          } @@ -1159,7 +1162,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {          Builder.createLogicalAnd(X, Builder.createOr(Y, Z)));    // x && !x -> 0 -  if (match(&R, m_LogicalAnd(m_VPValue(X), m_Not(m_Deferred(X))))) +  if (match(Def, m_LogicalAnd(m_VPValue(X), m_Not(m_Deferred(X)))))      return Def->replaceAllUsesWith(Plan->getFalse());    if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X)))) @@ -1187,8 +1190,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {      return Def->replaceAllUsesWith(A);    if (match(Def, m_c_Mul(m_VPValue(A), m_ZeroInt()))) -    return Def->replaceAllUsesWith(R.getOperand(0) == A ? R.getOperand(1) -                                                        : R.getOperand(0)); +    return Def->replaceAllUsesWith( +        Def->getOperand(0) == A ? Def->getOperand(1) : Def->getOperand(0));    if (match(Def, m_Not(m_VPValue(A)))) {      if (match(A, m_Not(m_VPValue(A)))) @@ -1217,8 +1220,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {          }          // If Cmp doesn't have a debug location, use the one from the negation,          // to preserve the location. -        if (!Cmp->getDebugLoc() && R.getDebugLoc()) -          Cmp->setDebugLoc(R.getDebugLoc()); +        if (!Cmp->getDebugLoc() && Def->getDebugLoc()) +          Cmp->setDebugLoc(Def->getDebugLoc());        }      }    } @@ -1244,7 +1247,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {    if (match(Def, m_Intrinsic<Intrinsic::vp_merge>(m_True(), m_VPValue(A),                                                    m_VPValue(X), m_VPValue())) &&        match(A, m_c_BinaryOr(m_Specific(X), m_VPValue(Y))) && -      TypeInfo.inferScalarType(R.getVPSingleValue())->isIntegerTy(1)) { +      TypeInfo.inferScalarType(Def)->isIntegerTy(1)) {      Def->setOperand(1, Def->getOperand(0));      Def->setOperand(0, Y);      return; @@ -1252,35 +1255,41 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {    if (auto *Phi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(Def)) {      if (Phi->getOperand(0) == Phi->getOperand(1)) -      Def->replaceAllUsesWith(Phi->getOperand(0)); +      Phi->replaceAllUsesWith(Phi->getOperand(0));      return;    }    // Look through ExtractLastElement (BuildVector ....). -  if (match(&R, m_CombineOr(m_ExtractLastElement(m_BuildVector()), -                            m_ExtractLastLanePerPart(m_BuildVector())))) { -    auto *BuildVector = cast<VPInstruction>(R.getOperand(0)); +  if (match(Def, m_CombineOr(m_ExtractLastElement(m_BuildVector()), +                             m_ExtractLastLanePerPart(m_BuildVector())))) { +    auto *BuildVector = cast<VPInstruction>(Def->getOperand(0));      Def->replaceAllUsesWith(          BuildVector->getOperand(BuildVector->getNumOperands() - 1));      return;    }    // Look through ExtractPenultimateElement (BuildVector ....). -  if (match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>( -                    m_BuildVector()))) { -    auto *BuildVector = cast<VPInstruction>(R.getOperand(0)); +  if (match(Def, m_VPInstruction<VPInstruction::ExtractPenultimateElement>( +                     m_BuildVector()))) { +    auto *BuildVector = cast<VPInstruction>(Def->getOperand(0));      Def->replaceAllUsesWith(          BuildVector->getOperand(BuildVector->getNumOperands() - 2));      return;    }    uint64_t Idx; -  if (match(&R, m_ExtractElement(m_BuildVector(), m_ConstantInt(Idx)))) { -    auto *BuildVector = cast<VPInstruction>(R.getOperand(0)); +  if (match(Def, m_ExtractElement(m_BuildVector(), m_ConstantInt(Idx)))) { +    auto *BuildVector = cast<VPInstruction>(Def->getOperand(0));      Def->replaceAllUsesWith(BuildVector->getOperand(Idx));      return;    } +  if (match(Def, m_BuildVector()) && all_equal(Def->operands())) { +    Def->replaceAllUsesWith( +        Builder.createNaryOp(VPInstruction::Broadcast, Def->getOperand(0))); +    return; +  } +    if (auto *Phi = dyn_cast<VPPhi>(Def)) {      if (Phi->getNumOperands() == 1)        Phi->replaceAllUsesWith(Phi->getOperand(0)); @@ -1297,7 +1306,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {        isa<VPPhi>(X)) {      auto *Phi = cast<VPPhi>(X);      if (Phi->getOperand(1) != Def && match(Phi->getOperand(0), m_ZeroInt()) && -        Phi->getNumUsers() == 1 && (*Phi->user_begin() == &R)) { +        Phi->getNumUsers() == 1 && (*Phi->user_begin() == Def)) {        Phi->setOperand(0, Y);        Def->replaceAllUsesWith(Phi);        return; @@ -1305,7 +1314,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {    }    // VPVectorPointer for part 0 can be replaced by their start pointer. -  if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) { +  if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(Def)) {      if (VecPtr->isFirstPart()) {        VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));        return; @@ -1360,9 +1369,9 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan) {        Plan.getEntry());    VPTypeAnalysis TypeInfo(Plan);    for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { -    for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { -      simplifyRecipe(R, TypeInfo); -    } +    for (VPRecipeBase &R : make_early_inc_range(*VPBB)) +      if (auto *Def = dyn_cast<VPSingleDefRecipe>(&R)) +        simplifyRecipe(Def, TypeInfo);    }  } @@ -2515,90 +2524,102 @@ void VPlanTransforms::addActiveLaneMask(    HeaderMask->eraseFromParent();  } +template <typename Op0_t, typename Op1_t> struct RemoveMask_match { +  Op0_t In; +  Op1_t &Out; + +  RemoveMask_match(const Op0_t &In, Op1_t &Out) : In(In), Out(Out) {} + +  template <typename OpTy> bool match(OpTy *V) const { +    if (m_Specific(In).match(V)) { +      Out = nullptr; +      return true; +    } +    if (m_LogicalAnd(m_Specific(In), m_VPValue(Out)).match(V)) +      return true; +    return false; +  } +}; + +/// Match a specific mask \p In, or a combination of it (logical-and In, Out). +/// Returns the remaining part \p Out if so, or nullptr otherwise. +template <typename Op0_t, typename Op1_t> +static inline RemoveMask_match<Op0_t, Op1_t> m_RemoveMask(const Op0_t &In, +                                                          Op1_t &Out) { +  return RemoveMask_match<Op0_t, Op1_t>(In, Out); +} +  /// Try to optimize a \p CurRecipe masked by \p HeaderMask to a corresponding  /// EVL-based recipe without the header mask. Returns nullptr if no EVL-based  /// recipe could be created.  /// \p HeaderMask  Header Mask.  /// \p CurRecipe   Recipe to be transform.  /// \p TypeInfo    VPlan-based type analysis. -/// \p AllOneMask  The vector mask parameter of vector-predication intrinsics.  /// \p EVL         The explicit vector length parameter of vector-predication  /// intrinsics.  static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,                                         VPRecipeBase &CurRecipe, -                                       VPTypeAnalysis &TypeInfo, -                                       VPValue &AllOneMask, VPValue &EVL) { -  // FIXME: Don't transform recipes to EVL recipes if they're not masked by the -  // header mask. -  auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * { -    assert(OrigMask && "Unmasked recipe when folding tail"); -    // HeaderMask will be handled using EVL. -    VPValue *Mask; -    if (match(OrigMask, m_LogicalAnd(m_Specific(HeaderMask), m_VPValue(Mask)))) -      return Mask; -    return HeaderMask == OrigMask ? nullptr : OrigMask; -  }; +                                       VPTypeAnalysis &TypeInfo, VPValue &EVL) { +  VPlan *Plan = CurRecipe.getParent()->getPlan(); +  VPValue *Addr, *Mask, *EndPtr;    /// Adjust any end pointers so that they point to the end of EVL lanes not VF. -  auto GetNewAddr = [&CurRecipe, &EVL](VPValue *Addr) -> VPValue * { -    auto *EndPtr = dyn_cast<VPVectorEndPointerRecipe>(Addr); -    if (!EndPtr) -      return Addr; -    assert(EndPtr->getOperand(1) == &EndPtr->getParent()->getPlan()->getVF() && -           "VPVectorEndPointerRecipe with non-VF VF operand?"); -    assert( -        all_of(EndPtr->users(), -               [](VPUser *U) { -                 return cast<VPWidenMemoryRecipe>(U)->isReverse(); -               }) && -        "VPVectorEndPointRecipe not used by reversed widened memory recipe?"); -    VPVectorEndPointerRecipe *EVLAddr = EndPtr->clone(); -    EVLAddr->insertBefore(&CurRecipe); -    EVLAddr->setOperand(1, &EVL); -    return EVLAddr; +  auto AdjustEndPtr = [&CurRecipe, &EVL](VPValue *EndPtr) { +    auto *EVLEndPtr = cast<VPVectorEndPointerRecipe>(EndPtr)->clone(); +    EVLEndPtr->insertBefore(&CurRecipe); +    EVLEndPtr->setOperand(1, &EVL); +    return EVLEndPtr;    }; -  return TypeSwitch<VPRecipeBase *, VPRecipeBase *>(&CurRecipe) -      .Case<VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) { -        VPValue *NewMask = GetNewMask(L->getMask()); -        VPValue *NewAddr = GetNewAddr(L->getAddr()); -        return new VPWidenLoadEVLRecipe(*L, NewAddr, EVL, NewMask); -      }) -      .Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) { -        VPValue *NewMask = GetNewMask(S->getMask()); -        VPValue *NewAddr = GetNewAddr(S->getAddr()); -        return new VPWidenStoreEVLRecipe(*S, NewAddr, EVL, NewMask); -      }) -      .Case<VPInterleaveRecipe>([&](VPInterleaveRecipe *IR) { -        VPValue *NewMask = GetNewMask(IR->getMask()); -        return new VPInterleaveEVLRecipe(*IR, EVL, NewMask); -      }) -      .Case<VPReductionRecipe>([&](VPReductionRecipe *Red) { -        VPValue *NewMask = GetNewMask(Red->getCondOp()); -        return new VPReductionEVLRecipe(*Red, EVL, NewMask); -      }) -      .Case<VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * { -        VPValue *LHS, *RHS; -        // Transform select with a header mask condition -        //   select(header_mask, LHS, RHS) -        // into vector predication merge. -        //   vp.merge(all-true, LHS, RHS, EVL) -        if (!match(VPI, m_Select(m_Specific(HeaderMask), m_VPValue(LHS), -                                 m_VPValue(RHS)))) -          return nullptr; -        // Use all true as the condition because this transformation is -        // limited to selects whose condition is a header mask. -        return new VPWidenIntrinsicRecipe( -            Intrinsic::vp_merge, {&AllOneMask, LHS, RHS, &EVL}, -            TypeInfo.inferScalarType(LHS), VPI->getDebugLoc()); -      }) -      .Default([&](VPRecipeBase *R) { return nullptr; }); +  if (match(&CurRecipe, +            m_MaskedLoad(m_VPValue(Addr), m_RemoveMask(HeaderMask, Mask))) && +      !cast<VPWidenLoadRecipe>(CurRecipe).isReverse()) +    return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe), Addr, +                                    EVL, Mask); + +  if (match(&CurRecipe, +            m_MaskedLoad(m_VPValue(EndPtr), m_RemoveMask(HeaderMask, Mask))) && +      match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) && +      cast<VPWidenLoadRecipe>(CurRecipe).isReverse()) +    return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe), +                                    AdjustEndPtr(EndPtr), EVL, Mask); + +  if (match(&CurRecipe, m_MaskedStore(m_VPValue(Addr), m_VPValue(), +                                      m_RemoveMask(HeaderMask, Mask))) && +      !cast<VPWidenStoreRecipe>(CurRecipe).isReverse()) +    return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe), Addr, +                                     EVL, Mask); + +  if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_VPValue(), +                                      m_RemoveMask(HeaderMask, Mask))) && +      match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) && +      cast<VPWidenStoreRecipe>(CurRecipe).isReverse()) +    return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe), +                                     AdjustEndPtr(EndPtr), EVL, Mask); + +  if (auto *Rdx = dyn_cast<VPReductionRecipe>(&CurRecipe)) +    if (Rdx->isConditional() && +        match(Rdx->getCondOp(), m_RemoveMask(HeaderMask, Mask))) +      return new VPReductionEVLRecipe(*Rdx, EVL, Mask); + +  if (auto *Interleave = dyn_cast<VPInterleaveRecipe>(&CurRecipe)) +    if (Interleave->getMask() && +        match(Interleave->getMask(), m_RemoveMask(HeaderMask, Mask))) +      return new VPInterleaveEVLRecipe(*Interleave, EVL, Mask); + +  VPValue *LHS, *RHS; +  if (match(&CurRecipe, +            m_Select(m_Specific(HeaderMask), m_VPValue(LHS), m_VPValue(RHS)))) +    return new VPWidenIntrinsicRecipe( +        Intrinsic::vp_merge, {Plan->getTrue(), LHS, RHS, &EVL}, +        TypeInfo.inferScalarType(LHS), CurRecipe.getDebugLoc()); + +  return nullptr;  }  /// Replace recipes with their EVL variants.  static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {    VPTypeAnalysis TypeInfo(Plan); -  VPValue *AllOneMask = Plan.getTrue();    VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();    VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); @@ -2658,7 +2679,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {              ConstantInt::getSigned(Type::getInt32Ty(Plan.getContext()), -1));          VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe(              Intrinsic::experimental_vp_splice, -            {V1, V2, Imm, AllOneMask, PrevEVL, &EVL}, +            {V1, V2, Imm, Plan.getTrue(), PrevEVL, &EVL},              TypeInfo.inferScalarType(R.getVPSingleValue()), R.getDebugLoc());          VPSplice->insertBefore(&R);          R.getVPSingleValue()->replaceAllUsesWith(VPSplice); @@ -2692,7 +2713,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {    for (VPUser *U : collectUsersRecursively(EVLMask)) {      auto *CurRecipe = cast<VPRecipeBase>(U);      VPRecipeBase *EVLRecipe = -        optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, *AllOneMask, EVL); +        optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, EVL);      if (!EVLRecipe)        continue; @@ -4168,7 +4189,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,    unsigned VFMinVal = VF.getKnownMinValue();    SmallVector<VPInterleaveRecipe *> StoreGroups;    for (auto &R : *VectorLoop->getEntryBasicBlock()) { -    if (isa<VPCanonicalIVPHIRecipe>(&R) || match(&R, m_BranchOnCount())) +    if (isa<VPCanonicalIVPHIRecipe>(&R))        continue;      if (isa<VPDerivedIVRecipe, VPScalarIVStepsRecipe>(&R) && diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 8c23e78..c6380d3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -32,22 +32,17 @@ bool vputils::onlyScalarValuesUsed(const VPValue *Def) {  }  VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { -  VPValue *Expanded = nullptr;    if (auto *E = dyn_cast<SCEVConstant>(Expr)) -    Expanded = Plan.getOrAddLiveIn(E->getValue()); -  else { -    auto *U = dyn_cast<SCEVUnknown>(Expr); -    // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction -    // value. Otherwise the value may be defined in a loop and using it directly -    // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA -    // form. -    if (U && !isa<Instruction>(U->getValue())) { -      Expanded = Plan.getOrAddLiveIn(U->getValue()); -    } else { -      Expanded = new VPExpandSCEVRecipe(Expr); -      Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe()); -    } -  } +    return Plan.getOrAddLiveIn(E->getValue()); +  // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction +  // value. Otherwise the value may be defined in a loop and using it directly +  // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA +  // form. +  auto *U = dyn_cast<SCEVUnknown>(Expr); +  if (U && !isa<Instruction>(U->getValue())) +    return Plan.getOrAddLiveIn(U->getValue()); +  auto *Expanded = new VPExpandSCEVRecipe(Expr); +  Plan.getEntry()->appendRecipe(Expanded);    return Expanded;  }  | 
