55 files changed, 803 insertions, 362 deletions
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index b8c540c..9f8ac6e 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -849,17 +849,12 @@ RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
 /// %sum.2 = select %cmp, %add, %sum.1
 RecurrenceDescriptor::InstDesc
 RecurrenceDescriptor::isConditionalRdxPattern(Instruction *I) {
-  SelectInst *SI = dyn_cast<SelectInst>(I);
-  if (!SI)
-    return InstDesc(false, I);
-
-  CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition());
+  Value *TrueVal, *FalseVal;
   // Only handle single use cases for now.
-  if (!CI || !CI->hasOneUse())
+  if (!match(I,
+             m_Select(m_OneUse(m_Cmp()), m_Value(TrueVal), m_Value(FalseVal))))
     return InstDesc(false, I);
 
-  Value *TrueVal = SI->getTrueValue();
-  Value *FalseVal = SI->getFalseValue();
   // Handle only when either of operands of select instruction is a PHI
   // node for now.
   if ((isa<PHINode>(TrueVal) && isa<PHINode>(FalseVal)) ||
@@ -886,7 +881,7 @@ RecurrenceDescriptor::isConditionalRdxPattern(Instruction *I) {
   if (!IPhi || IPhi != FalseVal)
     return InstDesc(false, I);
 
-  return InstDesc(true, SI);
+  return InstDesc(true, I);
 }
 
 RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 4e38626..e08ef60 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -6644,7 +6644,7 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
            "Invalid mask width");
     // If index-width (mask size) is less than pointer-size then mask is
     // 1-extended.
-    if (match(Op1, m_PtrToInt(m_Specific(Op0))))
+    if (match(Op1, m_PtrToIntOrAddr(m_Specific(Op0))))
       return Op0;
 
     // NOTE: We may have attributes associated with the return value of the
diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp
index ab37338..0b2e3fc 100644
--- a/llvm/lib/Analysis/MemorySSA.cpp
+++ b/llvm/lib/Analysis/MemorySSA.cpp
@@ -393,7 +393,7 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA,
 /// \param AA        The AliasAnalysis we used for our search.
 /// \param AllowImpreciseClobber Always false, unless we do relaxed verify.
 
-LLVM_ATTRIBUTE_UNUSED static void
+[[maybe_unused]] static void
 checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt,
                    const MemoryLocation &StartLoc, const MemorySSA &MSSA,
                    const UpwardsMemoryQuery &Query, BatchAAResults &AA,
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 3fab6b0..a64b93d 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6417,8 +6417,18 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S,
   case scSequentialUMinExpr:
     return GetGCDMultiple(cast<SCEVNAryExpr>(S));
   case scUnknown: {
-    // ask ValueTracking for known bits
+    // Ask ValueTracking for known bits. SCEVUnknown only become available at
+    // the point their underlying IR instruction has been defined. If CtxI was
+    // not provided, use:
+    // * the first instruction in the entry block if it is an argument
+    // * the instruction itself otherwise.
     const SCEVUnknown *U = cast<SCEVUnknown>(S);
+    if (!CtxI) {
+      if (isa<Argument>(U->getValue()))
+        CtxI = &*F.getEntryBlock().begin();
+      else if (auto *I = dyn_cast<Instruction>(U->getValue()))
+        CtxI = I;
+    }
     unsigned Known =
         computeKnownBits(U->getValue(), getDataLayout(), &AC, CtxI, &DT)
             .countMinTrailingZeros();
@@ -15761,6 +15771,21 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
           const SCEV *OneAlignedUp =
               GetNextSCEVDividesByDivisor(One, DividesBy);
           To = SE.getUMaxExpr(FromRewritten, OneAlignedUp);
+        } else {
+          if (LHS->getType()->isPointerTy()) {
+            LHS = SE.getLosslessPtrToIntExpr(LHS);
+            RHS = SE.getLosslessPtrToIntExpr(RHS);
+            if (isa<SCEVCouldNotCompute>(LHS) || isa<SCEVCouldNotCompute>(RHS))
+              break;
+          }
+          auto AddSubRewrite = [&](const SCEV *A, const SCEV *B) {
+            const SCEV *Sub = SE.getMinusSCEV(A, B);
+            AddRewrite(Sub, Sub,
+                       SE.getUMaxExpr(Sub, SE.getOne(From->getType())));
+          };
+          AddSubRewrite(LHS, RHS);
+          AddSubRewrite(RHS, LHS);
+          continue;
         }
         break;
       default:
diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
index 1f751ee..e7f0b2c 100644
--- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp
+++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
@@ -60,6 +60,36 @@ void StaticDataProfileInfo::addConstantProfileCount(
     OriginalCount = getInstrMaxCountValue();
 }
 
+StaticDataProfileInfo::StaticDataHotness
+StaticDataProfileInfo::getConstantHotnessUsingProfileCount(
+    const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const {
+  // The accummulated counter shows the constant is hot. Return enum 'hot'
+  // whether this variable is seen by unprofiled functions or not.
+  if (PSI->isHotCount(Count))
+    return StaticDataHotness::Hot;
+  // The constant is not hot, and seen by unprofiled functions. We don't want to
+  // assign it to unlikely sections, even if the counter says 'cold'. So return
+  // enum 'LukewarmOrUnknown'.
+  if (ConstantWithoutCounts.count(C))
+    return StaticDataHotness::LukewarmOrUnknown;
+  // The accummulated counter shows the constant is cold so return enum 'cold'.
+  if (PSI->isColdCount(Count))
+    return StaticDataHotness::Cold;
+
+  return StaticDataHotness::LukewarmOrUnknown;
+}
+
+StringRef StaticDataProfileInfo::hotnessToStr(StaticDataHotness Hotness) const {
+  switch (Hotness) {
+  case StaticDataHotness::Cold:
+    return "unlikely";
+  case StaticDataHotness::Hot:
+    return "hot";
+  default:
+    return "";
+  }
+}
+
 std::optional<uint64_t>
 StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const {
   auto I = ConstantProfileCounts.find(C);
@@ -70,23 +100,10 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const {
 
 StringRef StaticDataProfileInfo::getConstantSectionPrefix(
     const Constant *C, const ProfileSummaryInfo *PSI) const {
-  auto Count = getConstantProfileCount(C);
+  std::optional<uint64_t> Count = getConstantProfileCount(C);
   if (!Count)
     return "";
-  // The accummulated counter shows the constant is hot. Return 'hot' whether
-  // this variable is seen by unprofiled functions or not.
-  if (PSI->isHotCount(*Count))
-    return "hot";
-  // The constant is not hot, and seen by unprofiled functions. We don't want to
-  // assign it to unlikely sections, even if the counter says 'cold'. So return
-  // an empty prefix before checking whether the counter is cold.
-  if (ConstantWithoutCounts.count(C))
-    return "";
-  // The accummulated counter shows the constant is cold. Return 'unlikely'.
-  if (PSI->isColdCount(*Count))
-    return "unlikely";
-  // The counter says lukewarm. Return an empty prefix.
-  return "";
+  return hotnessToStr(getConstantHotnessUsingProfileCount(C, PSI, *Count));
 }
 
 bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 219bbc9..05fffe9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1437,7 +1437,8 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,
           BrProbEnabled,
           MF.hasBBSections() && NumMBBSectionRanges > 1,
           static_cast<bool>(BBAddrMapSkipEmitBBEntries),
-          HasCalls};
+          HasCalls,
+          false};
 }
 
 void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 4931403..53f1cfe2 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -770,7 +770,7 @@ struct PartwordMaskValues {
   Value *Inv_Mask = nullptr;
 };
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
   auto PrintObj = [&O](auto *V) {
     if (V)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 3812823..04d9309 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -112,7 +112,7 @@ APInt GISelValueTracking::getKnownOnes(Register R) {
   return getKnownBits(R).One;
 }
 
-LLVM_ATTRIBUTE_UNUSED static void
+[[maybe_unused]] static void
 dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
   dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
          << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
@@ -2013,6 +2013,43 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
     FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
     break;
   }
+  case TargetOpcode::G_ADD: {
+    Register Src2 = MI.getOperand(2).getReg();
+    unsigned Src2NumSignBits =
+        computeNumSignBits(Src2, DemandedElts, Depth + 1);
+    if (Src2NumSignBits <= 2)
+      return 1; // Early out.
+
+    Register Src1 = MI.getOperand(1).getReg();
+    unsigned Src1NumSignBits =
+        computeNumSignBits(Src1, DemandedElts, Depth + 1);
+    if (Src1NumSignBits == 1)
+      return 1; // Early Out.
+
+    // Special case decrementing a value (ADD X, -1):
+    KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth);
+    if (Known2.isAllOnes()) {
+      KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth);
+      // If the input is known to be 0 or 1, the output is 0/-1, which is all
+      // sign bits set.
+      if ((Known1.Zero | 1).isAllOnes())
+        return TyBits;
+
+      // If we are subtracting one from a positive number, there is no carry
+      // out of the result.
+      if (Known1.isNonNegative()) {
+        FirstAnswer = Src1NumSignBits;
+        break;
+      }
+
+      // Otherwise, we treat this like an ADD.
+    }
+
+    // Add can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
+    break;
+  }
   case TargetOpcode::G_FCMP:
   case TargetOpcode::G_ICMP: {
     bool IsFP = Opcode == TargetOpcode::G_FCMP;
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 0e38017..d2f2c3e 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -661,7 +661,10 @@ void LiveIntervals::extendToIndices(LiveRange &LR,
 void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
                                SmallVectorImpl<SlotIndex> *EndPoints) {
   LiveQueryResult LRQ = LR.Query(Kill);
-  VNInfo *VNI = LRQ.valueOutOrDead();
+  // LR may have liveness reachable from early clobber slot, which may be
+  // only live-in instead of live-out of the instruction.
+  // For example, LR =[1r, 3r), Kill = 3e, we have to prune [3e, 3r) of LR.
+  VNInfo *VNI = LRQ.valueOutOrDead() ? LRQ.valueOutOrDead() : LRQ.valueIn();
   if (!VNI)
     return;
 
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 3268c26..9662511 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1551,7 +1551,7 @@ LLVM_DUMP_METHOD void ILPValue::dump() const {
   dbgs() << *this << '\n';
 }
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 raw_ostream &llvm::operator<<(raw_ostream &OS, const ILPValue &Val) {
   Val.print(OS);
   return OS;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e153842..358e060 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -658,13 +658,13 @@ namespace {
                           bool InexpensiveOnly = false,
                           std::optional<EVT> OutVT = std::nullopt);
     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
-    SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
-    SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
-    SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
+    SDValue buildRsqrtEstimate(SDValue Op);
+    SDValue buildSqrtEstimate(SDValue Op);
+    SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip);
     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
-                                SDNodeFlags Flags, bool Reciprocal);
+                                bool Reciprocal);
     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
-                                SDNodeFlags Flags, bool Reciprocal);
+                                bool Reciprocal);
     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
                                bool DemandHighBits = true);
     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -5044,7 +5044,6 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
 
   unsigned Opc = N->getOpcode();
   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
-  ConstantSDNode *N1C = isConstOrConstSplat(N1);
 
   // X / undef -> undef
   // X % undef -> undef
@@ -5076,7 +5075,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
   // it's a 1.
-  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
+  if (isOneOrOneSplat(N1) || (VT.getScalarType() == MVT::i1))
     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
 
   return SDValue();
@@ -18591,20 +18590,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
     // If this FDIV is part of a reciprocal square root, it may be folded
     // into a target-specific square root estimate instruction.
     if (N1.getOpcode() == ISD::FSQRT) {
-      if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
+      if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0)))
         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
-      if (SDValue RV =
-              buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
+      if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
         AddToWorklist(RV.getNode());
         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
       }
     } else if (N1.getOpcode() == ISD::FP_ROUND &&
                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
-      if (SDValue RV =
-              buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
+      if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
         AddToWorklist(RV.getNode());
         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
@@ -18636,7 +18633,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
             SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
             SDValue AAZ =
                 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
-            if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
+            if (SDValue Rsqrt = buildRsqrtEstimate(AAZ))
               return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
 
             // Estimate creation failed. Clean up speculatively created nodes.
@@ -18646,7 +18643,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
 
         // We found a FSQRT, so try to make this fold:
         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
-        if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
+        if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) {
           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
           AddToWorklist(Div.getNode());
           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
@@ -18743,11 +18740,12 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
     return SDValue();
 
   // FSQRT nodes have flags that propagate to the created nodes.
+  SelectionDAG::FlagInserter FlagInserter(DAG, Flags);
   // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
   //       transform the fdiv, we may produce a sub-optimal estimate sequence
   //       because the reciprocal calculation may not have to filter out a
   //       0.0 input.
-  return buildSqrtEstimate(N0, Flags);
+  return buildSqrtEstimate(N0);
 }
 
 /// copysign(x, fp_extend(y)) -> copysign(x, y)
@@ -29744,28 +29742,27 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
 /// As a result, we precompute A/2 prior to the iteration loop.
 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
-                                         unsigned Iterations,
-                                         SDNodeFlags Flags, bool Reciprocal) {
+                                         unsigned Iterations, bool Reciprocal) {
   EVT VT = Arg.getValueType();
   SDLoc DL(Arg);
   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
 
   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
   // this entire sequence requires only one FP constant.
-  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
-  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
+  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
+  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
 
   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
   for (unsigned i = 0; i < Iterations; ++i) {
-    SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
-    NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
-    NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
-    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
+    SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+    NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+    NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
   }
 
   // If non-reciprocal square root is requested, multiply the result by Arg.
   if (!Reciprocal)
-    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
 
   return Est;
 }
@@ -29776,8 +29773,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
 ///     =>
 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
-                                         unsigned Iterations,
-                                         SDNodeFlags Flags, bool Reciprocal) {
+                                         unsigned Iterations, bool Reciprocal) {
   EVT VT = Arg.getValueType();
   SDLoc DL(Arg);
   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
@@ -29790,9 +29786,9 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
   // Newton iterations for reciprocal square root:
   // E = (E * -0.5) * ((A * E) * E + -3.0)
   for (unsigned i = 0; i < Iterations; ++i) {
-    SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
-    SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
-    SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
+    SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est);
+    SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est);
+    SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree);
 
     // When calculating a square root at the last iteration build:
     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
@@ -29800,13 +29796,13 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
     SDValue LHS;
     if (Reciprocal || (i + 1) < Iterations) {
       // RSQRT: LHS = (E * -0.5)
-      LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
+      LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
     } else {
       // SQRT: LHS = (A * E) * -0.5
-      LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
+      LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf);
     }
 
-    Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
+    Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS);
   }
 
   return Est;
@@ -29815,8 +29811,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
 /// Op can be zero.
-SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
-                                           bool Reciprocal) {
+SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal) {
   if (LegalDAG)
     return SDValue();
 
@@ -29844,8 +29839,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
 
     if (Iterations > 0)
       Est = UseOneConstNR
-            ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
-            : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+                ? buildSqrtNROneConst(Op, Est, Iterations, Reciprocal)
+                : buildSqrtNRTwoConst(Op, Est, Iterations, Reciprocal);
     if (!Reciprocal) {
       SDLoc DL(Op);
       // Try the target specific test first.
@@ -29863,12 +29858,12 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
   return SDValue();
 }
 
-SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
-  return buildSqrtEstimateImpl(Op, Flags, true);
+SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op) {
+  return buildSqrtEstimateImpl(Op, true);
 }
 
-SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
-  return buildSqrtEstimateImpl(Op, Flags, false);
+SDValue DAGCombiner::buildSqrtEstimate(SDValue Op) {
+  return buildSqrtEstimateImpl(Op, false);
 }
 
 /// Return true if there is any possibility that the two addresses overlap.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c9aeef7..90edaf3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5063,8 +5063,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     break;
   case ISD::ADD:
   case ISD::ADDC:
-    // Add can have at most one carry bit.  Thus we know that the output
-    // is, at worst, one more bit than the inputs.
+    // TODO: Move Operand 1 check before Operand 0 check
     Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
     if (Tmp == 1) return 1; // Early out.
 
@@ -5088,6 +5087,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
 
     Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
     if (Tmp2 == 1) return 1; // Early out.
+
+    // Add can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
     return std::min(Tmp, Tmp2) - 1;
   case ISD::SUB:
     Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -6403,8 +6405,9 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
   if (VT.isScalableVector())
     return SDValue();
 
-  // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be
-  // simplified to one big BUILD_VECTOR.
+  // A CONCAT_VECTOR of scalar sources, such as UNDEF, BUILD_VECTOR and
+  // single-element INSERT_VECTOR_ELT operands can be simplified to one big
+  // BUILD_VECTOR.
   // FIXME: Add support for SCALAR_TO_VECTOR as well.
   EVT SVT = VT.getScalarType();
   SmallVector<SDValue, 16> Elts;
@@ -6414,6 +6417,10 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
       Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT));
     else if (Op.getOpcode() == ISD::BUILD_VECTOR)
       Elts.append(Op->op_begin(), Op->op_end());
+    else if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
+             OpVT.getVectorNumElements() == 1 &&
+             isNullConstant(Op.getOperand(2)))
+      Elts.push_back(Op.getOperand(1));
     else
       return SDValue();
   }
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 7a0256f..fa39603 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -338,9 +338,13 @@ static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI,
     if (FilePath.empty()) {
       // If we had a DW_AT_decl_file, but got no file then we need to emit a
       // warning.
+      const uint64_t DwarfFileIdx = dwarf::toUnsigned(
+          Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
+      // Check if there is no DW_AT_decl_line attribute, and don't report an
+      // error if it isn't there.
+      if (DwarfFileIdx == UINT32_MAX)
+        return;
       Out.Report("Invalid file index in DW_AT_decl_file", [&](raw_ostream &OS) {
-        const uint64_t DwarfFileIdx = dwarf::toUnsigned(
-            Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
         OS << "error: function DIE at " << HEX32(Die.getOffset())
            << " has an invalid file index " << DwarfFileIdx
            << " in its DW_AT_decl_file attribute, unable to create a single "
@@ -629,6 +633,10 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
   size_t NumBefore = Gsym.getNumFunctionInfos();
   auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
     DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false);
+    // Apple uses DW_AT_GNU_dwo_id for things other than split DWARF.
+    if (IsMachO)
+      return ReturnDie;
+
     if (DwarfUnit.getDWOId()) {
       DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit();
       if (!DWOCU->isDWOUnit())
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
index 5b3c05e..6c7e27e 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
@@ -260,22 +260,17 @@ public:
     }
 
     // Run finalization actions.
-    using WrapperFunctionCall = orc::shared::WrapperFunctionCall;
-    runFinalizeActions(
-        G->allocActions(),
-        [this, OnFinalized = std::move(OnFinalized)](
-            Expected<std::vector<WrapperFunctionCall>> DeallocActions) mutable {
-          completeFinalization(std::move(OnFinalized),
-                               std::move(DeallocActions));
-        });
-  }
+    auto DeallocActions = runFinalizeActions(G->allocActions());
+    if (!DeallocActions) {
+      OnFinalized(DeallocActions.takeError());
+      return;
+    }
 
-  void abandon(OnAbandonedFunction OnAbandoned) override {
-    Error Err = Error::success();
-    if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments))
-      Err = joinErrors(std::move(Err), errorCodeToError(EC));
-    if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments))
-      Err = joinErrors(std::move(Err), errorCodeToError(EC));
+    // Release the finalize segments slab.
+    if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) {
+      OnFinalized(errorCodeToError(EC));
+      return;
+    }
 
 #ifndef NDEBUG
     // Set 'G' to null to flag that we've been successfully finalized.
@@ -284,22 +279,17 @@ public:
     G = nullptr;
 #endif
 
-    OnAbandoned(std::move(Err));
+    // Continue with finalized allocation.
+    OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments),
+                                            std::move(*DeallocActions)));
   }
 
-private:
-  void completeFinalization(
-      OnFinalizedFunction OnFinalized,
-      Expected<std::vector<orc::shared::WrapperFunctionCall>> DeallocActions) {
-
-    if (!DeallocActions)
-      return OnFinalized(DeallocActions.takeError());
-
-    // Release the finalize segments slab.
-    if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) {
-      OnFinalized(errorCodeToError(EC));
-      return;
-    }
+  void abandon(OnAbandonedFunction OnAbandoned) override {
+    Error Err = Error::success();
+    if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments))
+      Err = joinErrors(std::move(Err), errorCodeToError(EC));
+    if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments))
+      Err = joinErrors(std::move(Err), errorCodeToError(EC));
 
 #ifndef NDEBUG
     // Set 'G' to null to flag that we've been successfully finalized.
@@ -308,11 +298,10 @@ private:
     G = nullptr;
 #endif
 
-    // Continue with finalized allocation.
-    OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments),
-                                            std::move(*DeallocActions)));
+    OnAbandoned(std::move(Err));
   }
 
+private:
   Error applyProtections() {
     for (auto &KV : BL.segments()) {
       const auto &AG = KV.first;
diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
index 7b327af..7e606c6a 100644
--- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
@@ -91,19 +91,9 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI,
       sys::Memory::InvalidateInstructionCache(Base.toPtr<void *>(), Size);
   }
 
-  std::vector<shared::WrapperFunctionCall> DeinitializeActions;
-  {
-    std::promise<MSVCPExpected<std::vector<shared::WrapperFunctionCall>>> P;
-    auto F = P.get_future();
-    shared::runFinalizeActions(
-        AI.Actions, [&](Expected<std::vector<shared::WrapperFunctionCall>> R) {
-          P.set_value(std::move(R));
-        });
-    if (auto DeinitializeActionsOrErr = F.get())
-      DeinitializeActions = std::move(*DeinitializeActionsOrErr);
-    else
-      return OnInitialized(DeinitializeActionsOrErr.takeError());
-  }
+  auto DeinitializeActions = shared::runFinalizeActions(AI.Actions);
+  if (!DeinitializeActions)
+    return OnInitialized(DeinitializeActions.takeError());
 
   {
     std::lock_guard<std::mutex> Lock(Mutex);
@@ -111,7 +101,7 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI,
     // This is the maximum range whose permission have been possibly modified
     auto &Alloc = Allocations[MinAddr];
     Alloc.Size = MaxAddr - MinAddr;
-    Alloc.DeinitializationActions = std::move(DeinitializeActions);
+    Alloc.DeinitializationActions = std::move(*DeinitializeActions);
     Reservations[AI.MappingBase.toPtr<void *>()].Allocations.push_back(MinAddr);
   }
 
@@ -128,10 +118,10 @@ void InProcessMemoryMapper::deinitialize(
 
     for (auto Base : llvm::reverse(Bases)) {
 
-      shared::runDeallocActions(
-          Allocations[Base].DeinitializationActions, [&](Error Err) {
-            AllErr = joinErrors(std::move(AllErr), std::move(Err));
-          });
+      if (Error Err = shared::runDeallocActions(
+              Allocations[Base].DeinitializationActions)) {
+        AllErr = joinErrors(std::move(AllErr), std::move(Err));
+      }
 
       // Reset protections to read/write so the area can be reused
       if (auto EC = sys::Memory::protectMappedMemory(
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp
index 08ab0c6..91f2899 100644
--- a/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp
@@ -12,39 +12,31 @@ namespace llvm {
 namespace orc {
 namespace shared {
 
-void runFinalizeActions(AllocActions &AAs,
-                        OnRunFinalizeActionsCompleteFn OnComplete) {
+Expected<std::vector<WrapperFunctionCall>>
+runFinalizeActions(AllocActions &AAs) {
   std::vector<WrapperFunctionCall> DeallocActions;
   DeallocActions.reserve(numDeallocActions(AAs));
 
   for (auto &AA : AAs) {
     if (AA.Finalize)
-
-      if (auto Err = AA.Finalize.runWithSPSRetErrorMerged()) {
-        while (!DeallocActions.empty()) {
-          Err = joinErrors(std::move(Err),
-                           DeallocActions.back().runWithSPSRetErrorMerged());
-          DeallocActions.pop_back();
-        }
-        return OnComplete(std::move(Err));
-      }
+      if (auto Err = AA.Finalize.runWithSPSRetErrorMerged())
+        return joinErrors(std::move(Err), runDeallocActions(DeallocActions));
 
     if (AA.Dealloc)
       DeallocActions.push_back(std::move(AA.Dealloc));
   }
 
   AAs.clear();
-  OnComplete(std::move(DeallocActions));
+  return DeallocActions;
 }
 
-void runDeallocActions(ArrayRef<WrapperFunctionCall> DAs,
-                       OnRunDeallocActionsComeleteFn OnComplete) {
+Error runDeallocActions(ArrayRef<WrapperFunctionCall> DAs) {
   Error Err = Error::success();
   while (!DAs.empty()) {
     Err = joinErrors(std::move(Err), DAs.back().runWithSPSRetErrorMerged());
     DAs = DAs.drop_back();
   }
-  OnComplete(std::move(Err));
+  return Err;
 }
 
 } // namespace shared
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
index 8c24b1f..4fbf232 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
@@ -9,10 +9,8 @@
 #include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h"
 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
 #include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
-#include "llvm/Support/MSVCErrorWorkarounds.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/WindowsError.h"
-#include <future>
 #include <sstream>
 
 #if defined(LLVM_ON_UNIX)
@@ -183,24 +181,15 @@ Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize(
   }
 
   // Run finalization actions and get deinitlization action list.
-  std::vector<shared::WrapperFunctionCall> DeinitializeActions;
-  {
-    std::promise<MSVCPExpected<std::vector<shared::WrapperFunctionCall>>> P;
-    auto F = P.get_future();
-    shared::runFinalizeActions(
-        FR.Actions, [&](Expected<std::vector<shared::WrapperFunctionCall>> R) {
-          P.set_value(std::move(R));
-        });
-    if (auto DeinitializeActionsOrErr = F.get())
-      DeinitializeActions = std::move(*DeinitializeActionsOrErr);
-    else
-      return DeinitializeActionsOrErr.takeError();
+  auto DeinitializeActions = shared::runFinalizeActions(FR.Actions);
+  if (!DeinitializeActions) {
+    return DeinitializeActions.takeError();
   }
 
   {
     std::lock_guard<std::mutex> Lock(Mutex);
     Allocations[MinAddr].DeinitializationActions =
-        std::move(DeinitializeActions);
+        std::move(*DeinitializeActions);
     Reservations[Reservation.toPtr<void *>()].Allocations.push_back(MinAddr);
   }
 
@@ -221,11 +210,10 @@ Error ExecutorSharedMemoryMapperService::deinitialize(
     std::lock_guard<std::mutex> Lock(Mutex);
 
     for (auto Base : llvm::reverse(Bases)) {
-      shared::runDeallocActions(
-          Allocations[Base].DeinitializationActions, [&](Error Err) {
-            if (Err)
-              AllErr = joinErrors(std::move(AllErr), std::move(Err));
-          });
+      if (Error Err = shared::runDeallocActions(
+              Allocations[Base].DeinitializationActions)) {
+        AllErr = joinErrors(std::move(AllErr), std::move(Err));
+      }
 
       // Remove the allocation from the allocation list of its reservation
       for (auto &Reservation : Reservations) {
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 3842b1a..6a9ef2e 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -741,7 +741,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
       assert(!CI2->isZero() && "And zero handled above");
       if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
         // If and'ing the address of a global with a constant, fold it.
-        if (CE1->getOpcode() == Instruction::PtrToInt &&
+        if ((CE1->getOpcode() == Instruction::PtrToInt ||
+             CE1->getOpcode() == Instruction::PtrToAddr) &&
             isa<GlobalValue>(CE1->getOperand(0))) {
           GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0));
 
diff --git a/llvm/lib/ObjCopy/ConfigManager.cpp b/llvm/lib/ObjCopy/ConfigManager.cpp
index eef8a21..6b7b4f1 100644
--- a/llvm/lib/ObjCopy/ConfigManager.cpp
+++ b/llvm/lib/ObjCopy/ConfigManager.cpp
@@ -122,14 +122,14 @@ ConfigManager::getDXContainerConfig() const {
   if (!Common.AddGnuDebugLink.empty() || !Common.SplitDWO.empty() ||
       !Common.AllocSectionsPrefix.empty() ||
       Common.DiscardMode != DiscardType::None || !Common.AddSection.empty() ||
-      !Common.DumpSection.empty() || !Common.KeepSection.empty() ||
-      !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() ||
-      !Common.SetSectionFlags.empty() || !Common.SetSectionType.empty() ||
-      Common.ExtractDWO || Common.OnlyKeepDebug || Common.StripAllGNU ||
-      Common.StripDWO || Common.StripDebug || Common.StripNonAlloc ||
-      Common.StripSections || Common.StripUnneeded ||
-      Common.DecompressDebugSections || Common.GapFill != 0 ||
-      Common.PadTo != 0 || Common.ChangeSectionLMAValAll != 0 ||
+      !Common.KeepSection.empty() || !Common.SectionsToRename.empty() ||
+      !Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() ||
+      !Common.SetSectionType.empty() || Common.ExtractDWO ||
+      Common.OnlyKeepDebug || Common.StripAllGNU || Common.StripDWO ||
+      Common.StripDebug || Common.StripNonAlloc || Common.StripSections ||
+      Common.StripUnneeded || Common.DecompressDebugSections ||
+      Common.GapFill != 0 || Common.PadTo != 0 ||
+      Common.ChangeSectionLMAValAll != 0 ||
       !Common.ChangeSectionAddress.empty()) {
     return createStringError(llvm::errc::invalid_argument,
                              "option is not supported for DXContainer");
diff --git a/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp b/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp
index d7f3c0d..95ab3d9 100644
--- a/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp
+++ b/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp
@@ -9,8 +9,10 @@
 #include "llvm/ObjCopy/DXContainer/DXContainerObjcopy.h"
 #include "DXContainerReader.h"
 #include "DXContainerWriter.h"
+#include "llvm/BinaryFormat/DXContainer.h"
 #include "llvm/ObjCopy/CommonConfig.h"
 #include "llvm/ObjCopy/DXContainer/DXContainerConfig.h"
+#include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
@@ -42,7 +44,47 @@ static Error extractPartAsObject(StringRef PartName, StringRef OutFilename,
                          "part '%s' not found", PartName.str().c_str());
 }
 
+static Error dumpPartToFile(StringRef PartName, StringRef Filename,
+                            StringRef InputFilename, Object &Obj) {
+  auto PartIter = llvm::find_if(
+      Obj.Parts, [&PartName](const Part &P) { return P.Name == PartName; });
+  if (PartIter == Obj.Parts.end())
+    return createFileError(Filename,
+                           std::make_error_code(std::errc::invalid_argument),
+                           "part '%s' not found", PartName.str().c_str());
+  ArrayRef<uint8_t> Contents = PartIter->Data;
+  // The DXContainer format is a bit odd because the part-specific headers are
+  // contained inside the part data itself. For parts that contain LLVM bitcode
+  // when we dump the part we want to skip the part-specific header so that we
+  // get a valid .bc file that we can inspect. All the data contained inside the
+  // program header is pulled out of the bitcode, so the header can be
+  // reconstructed if needed from the bitcode itself. More comprehensive
+  // documentation on the DXContainer format can be found at
+  // https://llvm.org/docs/DirectX/DXContainer.html.
+
+  if (PartName == "DXIL" || PartName == "STAT")
+    Contents = Contents.drop_front(sizeof(llvm::dxbc::ProgramHeader));
+  if (Contents.empty())
+    return createFileError(Filename, object_error::parse_failed,
+                           "part '%s' is empty", PartName.str().c_str());
+  Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+      FileOutputBuffer::create(Filename, Contents.size());
+  if (!BufferOrErr)
+    return createFileError(Filename, BufferOrErr.takeError());
+  std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
+  llvm::copy(Contents, Buf->getBufferStart());
+  if (Error E = Buf->commit())
+    return createFileError(Filename, std::move(E));
+  return Error::success();
+}
+
 static Error handleArgs(const CommonConfig &Config, Object &Obj) {
+  for (StringRef Flag : Config.DumpSection) {
+    auto [SecName, FileName] = Flag.split("=");
+    if (Error E = dumpPartToFile(SecName, FileName, Config.InputFilename, Obj))
+      return E;
+  }
+
   // Extract all sections before any modifications.
   for (StringRef Flag : Config.ExtractSection) {
     StringRef SectionName;
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index 53699ce0..f256e7b 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -837,7 +837,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
     Version = Data.getU8(Cur);
     if (!Cur)
       break;
-    if (Version < 2 || Version > 3)
+    if (Version < 2 || Version > 4)
       return createError("unsupported SHT_LLVM_BB_ADDR_MAP version: " +
                          Twine(static_cast<int>(Version)));
     Feature = Data.getU8(Cur); // Feature byte
@@ -852,6 +852,11 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
                          "callsite offsets feature is enabled: version = " +
                          Twine(static_cast<int>(Version)) +
                          " feature = " + Twine(static_cast<int>(Feature)));
+    if (FeatEnable.BBHash && Version < 4)
+      return createError("version should be >= 4 for SHT_LLVM_BB_ADDR_MAP when "
+                         "basic block hash feature is enabled: version = " +
+                         Twine(static_cast<int>(Version)) +
+                         " feature = " + Twine(static_cast<int>(Feature)));
     uint32_t NumBlocksInBBRange = 0;
     uint32_t NumBBRanges = 1;
     typename ELFFile<ELFT>::uintX_t RangeBaseAddress = 0;
@@ -907,6 +912,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
           uint32_t Size = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr) +
                           LastCallsiteEndOffset;
           uint32_t MD = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
+          uint64_t Hash = FeatEnable.BBHash ? Data.getU64(Cur) : 0;
           Expected<BBAddrMap::BBEntry::Metadata> MetadataOrErr =
               BBAddrMap::BBEntry::Metadata::decode(MD);
           if (!MetadataOrErr) {
@@ -914,7 +920,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
             break;
           }
           BBEntries.push_back({ID, Offset + PrevBBEndOffset, Size,
-                               *MetadataOrErr, CallsiteEndOffsets});
+                               *MetadataOrErr, CallsiteEndOffsets, Hash});
           PrevBBEndOffset += Offset + Size;
         }
         TotalNumBlocks += BBEntries.size();
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index faeeab3..8b75fbe 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -1465,7 +1465,7 @@ void ELFState<ELFT>::writeSectionContent(
   for (const auto &[Idx, E] : llvm::enumerate(*Section.Entries)) {
     // Write version and feature values.
     if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP) {
-      if (E.Version > 3)
+      if (E.Version > 4)
         WithColor::warning() << "unsupported SHT_LLVM_BB_ADDR_MAP version: "
                              << static_cast<int>(E.Version)
                              << "; encoding using the most recent version";
@@ -1526,6 +1526,12 @@ void ELFState<ELFT>::writeSectionContent(
         }
         SHeader.sh_size += CBA.writeULEB128(BBE.Size);
         SHeader.sh_size += CBA.writeULEB128(BBE.Metadata);
+        if (FeatureOrErr->BBHash || BBE.Hash.has_value()) {
+          uint64_t Hash =
+              BBE.Hash.has_value() ? BBE.Hash.value() : llvm::yaml::Hex64(0);
+          CBA.write<uint64_t>(Hash, ELFT::Endianness);
+          SHeader.sh_size += 8;
+        }
       }
     }
     if (!PGOAnalyses)
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index d9cce1e..421d6603 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -1887,6 +1887,7 @@ void MappingTraits<ELFYAML::BBAddrMapEntry::BBEntry>::mapping(
   IO.mapRequired("Size", E.Size);
   IO.mapRequired("Metadata", E.Metadata);
   IO.mapOptional("CallsiteEndOffsets", E.CallsiteEndOffsets);
+  IO.mapOptional("Hash", E.Hash);
 }
 
 void MappingTraits<ELFYAML::PGOAnalysisMapEntry>::mapping(
diff --git a/llvm/lib/Support/TextEncoding.cpp b/llvm/lib/Support/TextEncoding.cpp
index 804ff07..41f5187 100644
--- a/llvm/lib/Support/TextEncoding.cpp
+++ b/llvm/lib/Support/TextEncoding.cpp
@@ -54,9 +54,9 @@ static std::optional<TextEncoding> getKnownEncoding(StringRef Name) {
   return std::nullopt;
 }
 
-LLVM_ATTRIBUTE_UNUSED static void
-HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength,
-               SmallVectorImpl<char> &Result) {
+[[maybe_unused]] static void HandleOverflow(size_t &Capacity, char *&Output,
+                                            size_t &OutputLength,
+                                            SmallVectorImpl<char> &Result) {
   // No space left in output buffer. Double the size of the underlying
   // memory in the SmallVectorImpl, adjust pointer and length and continue
   // the conversion.
diff --git a/llvm/lib/Support/UnicodeNameToCodepoint.cpp b/llvm/lib/Support/UnicodeNameToCodepoint.cpp
index 8d66348..6f8e091 100644
--- a/llvm/lib/Support/UnicodeNameToCodepoint.cpp
+++ b/llvm/lib/Support/UnicodeNameToCodepoint.cpp
@@ -476,7 +476,7 @@ nearestMatchesForCodepointName(StringRef Pattern, std::size_t MaxMatchesCount) {
       std::min(NormalizedName.size(), UnicodeNameToCodepointLargestNameSize) +
       1;
 
-  LLVM_ATTRIBUTE_UNUSED static std::size_t Rows =
+  [[maybe_unused]] static std::size_t Rows =
       UnicodeNameToCodepointLargestNameSize + 1;
 
   std::vector<char> Distances(
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 2c3870c..636d4f8a 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -8217,6 +8217,8 @@ bool AArch64AsmParser::parseDataExpr(const MCExpr *&Res) {
       Spec = AArch64::S_GOTPCREL;
     else if (Identifier == "plt")
       Spec = AArch64::S_PLT;
+    else if (Identifier == "funcinit")
+      Spec = AArch64::S_FUNCINIT;
   }
   if (Spec == AArch64::S_None)
     return Error(Loc, "invalid relocation specifier");
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index a388216..892b8da 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -232,6 +232,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(const MCFixup &Fixup,
       }
       if (RefKind == AArch64::S_AUTH || RefKind == AArch64::S_AUTHADDR)
         return ELF::R_AARCH64_AUTH_ABS64;
+      if (RefKind == AArch64::S_FUNCINIT)
+        return ELF::R_AARCH64_FUNCINIT64;
       return ELF::R_AARCH64_ABS64;
     }
     case AArch64::fixup_aarch64_add_imm12:
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 2b5cf34..bc090c6 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -40,6 +40,7 @@ const MCAsmInfo::AtSpecifier ELFAtSpecifiers[] = {
     {AArch64::S_GOT, "GOT"},
     {AArch64::S_GOTPCREL, "GOTPCREL"},
     {AArch64::S_PLT, "PLT"},
+    {AArch64::S_FUNCINIT, "FUNCINIT"},
 };
 
 const MCAsmInfo::AtSpecifier MachOAtSpecifiers[] = {
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 0dfa61b..f2acff5 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -164,6 +164,7 @@ enum {
   // ELF relocation specifiers in data directives:
   S_PLT          = 0x400,
   S_GOTPCREL,
+  S_FUNCINIT,
 
   // Mach-O @ relocation specifiers:
   S_MACHO_GOT,
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index dd6fa16..d71f728 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -130,6 +130,12 @@ SMECallAttrs::SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI)
   if (auto *CalledFunction = CB.getCalledFunction())
     CalledFn = SMEAttrs(*CalledFunction, TLI);
 
+  // An `invoke` of an agnostic ZA function may not return normally (it may
+  // resume in an exception block). In this case, it acts like a private ZA
+  // callee and may require a ZA save to be set up before it is called.
+  if (isa<InvokeInst>(CB))
+    CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false);
+
   // FIXME: We probably should not allow SME attributes on direct calls but
   // clang duplicates streaming mode attributes at each callsite.
   assert((IsIndirect ||
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 80e985d..a2841c11 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -18168,7 +18168,7 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
   return CacheLineAlign;
 }
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 static bool isCopyFromRegOfInlineAsm(const SDNode *N) {
   assert(N->getOpcode() == ISD::CopyFromReg);
   do {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a44a247..d516330 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -865,22 +865,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       return;
     }
 
-    if (DestReg == AMDGPU::VCC_LO) {
-      if (AMDGPU::SReg_32RegClass.contains(SrcReg)) {
-        BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::VCC_LO)
-          .addReg(SrcReg, getKillRegState(KillSrc));
-      } else {
+    if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
+      if (DestReg == AMDGPU::VCC_LO) {
         // FIXME: Hack until VReg_1 removed.
         assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
         BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
-          .addImm(0)
-          .addReg(SrcReg, getKillRegState(KillSrc));
+            .addImm(0)
+            .addReg(SrcReg, getKillRegState(KillSrc));
+        return;
       }
 
-      return;
-    }
-
-    if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
       reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
       return;
     }
@@ -898,22 +892,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       return;
     }
 
-    if (DestReg == AMDGPU::VCC) {
-      if (AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) {
-        BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
-          .addReg(SrcReg, getKillRegState(KillSrc));
-      } else {
+    if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) {
+      if (DestReg == AMDGPU::VCC) {
         // FIXME: Hack until VReg_1 removed.
         assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
         BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
-          .addImm(0)
-          .addReg(SrcReg, getKillRegState(KillSrc));
+            .addImm(0)
+            .addReg(SrcReg, getKillRegState(KillSrc));
+        return;
       }
 
-      return;
-    }
-
-    if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) {
       reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
       return;
     }
diff --git a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp
index 39e651d..8945ec3 100644
--- a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp
+++ b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp
@@ -166,7 +166,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo,
 }
 
 // TODO
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 static DecodeStatus DecodesFPR128RegisterClass(MCInst &Inst, uint64_t RegNo,
                                                uint64_t Address,
                                                const MCDisassembler *Decoder) {
diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
index ca81d30..8ace2d2 100644
--- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
+++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Support/MD5.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <cstdint>
 #include <optional>
 
 using namespace llvm;
@@ -193,7 +194,12 @@ void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) {
         dxbc::PSV::v2::ResourceBindInfo BindInfo;
         BindInfo.Type = Type;
         BindInfo.LowerBound = Binding.LowerBound;
-        BindInfo.UpperBound = Binding.LowerBound + Binding.Size - 1;
+        assert(Binding.Size == UINT32_MAX ||
+               (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX &&
+                   "Resource range is too large");
+        BindInfo.UpperBound = (Binding.Size == UINT32_MAX)
+                                  ? UINT32_MAX
+                                  : Binding.LowerBound + Binding.Size - 1;
         BindInfo.Space = Binding.Space;
         BindInfo.Kind = static_cast<dxbc::PSV::ResourceKind>(Kind);
         BindInfo.Flags = Flags;
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 974f653..5f180d6 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -667,11 +667,10 @@ static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo,
   return DecodeRegisterClass(Inst, RegNo, HvxWRDecoderTable);
 }
 
-LLVM_ATTRIBUTE_UNUSED // Suppress warning temporarily.
-    static DecodeStatus
-    DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo,
-                              uint64_t /*Address*/,
-                              const MCDisassembler *Decoder) {
+[[maybe_unused]] // Suppress warning temporarily.
+static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                              uint64_t /*Address*/,
+                                              const MCDisassembler *Decoder) {
   static const MCPhysReg HvxVQRDecoderTable[] = {
       Hexagon::VQ0,  Hexagon::VQ1,  Hexagon::VQ2,  Hexagon::VQ3,
       Hexagon::VQ4,  Hexagon::VQ5,  Hexagon::VQ6,  Hexagon::VQ7};
diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index 5dde47a..a3296e0 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -419,8 +419,8 @@ namespace {
 
   using HCE = HexagonConstExtenders;
 
-  LLVM_ATTRIBUTE_UNUSED
-  raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR) {
+  [[maybe_unused]]
+  raw_ostream &operator<<(raw_ostream &OS, const OffsetRange &OR) {
     if (OR.Min > OR.Max)
       OS << '!';
     OS << '[' << OR.Min << ',' << OR.Max << "]a" << unsigned(OR.Align)
@@ -435,8 +435,8 @@ namespace {
     const HexagonRegisterInfo &HRI;
   };
 
-  LLVM_ATTRIBUTE_UNUSED
-  raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &P) {
+  [[maybe_unused]]
+  raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &P) {
     if (P.Rs.Reg != 0)
       OS << printReg(P.Rs.Reg, &P.HRI, P.Rs.Sub);
     else
@@ -451,8 +451,8 @@ namespace {
     const HexagonRegisterInfo &HRI;
   };
 
-  LLVM_ATTRIBUTE_UNUSED
-  raw_ostream &operator<< (raw_ostream &OS, const PrintExpr &P) {
+  [[maybe_unused]]
+  raw_ostream &operator<<(raw_ostream &OS, const PrintExpr &P) {
     OS << "## " << (P.Ex.Neg ? "- " : "+ ");
     if (P.Ex.Rs.Reg != 0)
       OS << printReg(P.Ex.Rs.Reg, &P.HRI, P.Ex.Rs.Sub);
@@ -469,15 +469,15 @@ namespace {
     const HexagonRegisterInfo &HRI;
   };
 
-  LLVM_ATTRIBUTE_UNUSED
-  raw_ostream &operator<< (raw_ostream &OS, const PrintInit &P) {
+  [[maybe_unused]]
+  raw_ostream &operator<<(raw_ostream &OS, const PrintInit &P) {
     OS << '[' << P.ExtI.first << ", "
        << PrintExpr(P.ExtI.second, P.HRI) << ']';
     return OS;
   }
 
-  LLVM_ATTRIBUTE_UNUSED
-  raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtDesc &ED) {
+  [[maybe_unused]]
+  raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtDesc &ED) {
     assert(ED.OpNum != -1u);
     const MachineBasicBlock &MBB = *ED.getOp().getParent()->getParent();
     const MachineFunction &MF = *MBB.getParent();
@@ -493,8 +493,8 @@ namespace {
     return OS;
   }
 
-  LLVM_ATTRIBUTE_UNUSED
-  raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtRoot &ER) {
+  [[maybe_unused]]
+  raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtRoot &ER) {
     switch (ER.Kind) {
       case MachineOperand::MO_Immediate:
         OS << "imm:" << ER.V.ImmVal;
@@ -527,8 +527,8 @@ namespace {
     return OS;
   }
 
-  LLVM_ATTRIBUTE_UNUSED
-  raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtValue &EV) {
+  [[maybe_unused]]
+  raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtValue &EV) {
     OS << HCE::ExtRoot(EV) << "  off:" << EV.Offset;
     return OS;
   }
@@ -540,8 +540,8 @@ namespace {
     const HexagonRegisterInfo &HRI;
   };
 
-  LLVM_ATTRIBUTE_UNUSED
-  raw_ostream &operator<< (raw_ostream &OS, const PrintIMap &P) {
+  [[maybe_unused]]
+  raw_ostream &operator<<(raw_ostream &OS, const PrintIMap &P) {
     OS << "{\n";
     for (const std::pair<const HCE::ExtenderInit, HCE::IndexList> &Q : P.IMap) {
       OS << "  " << PrintInit(Q.first, P.HRI) << " -> {";
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 4d96cfa..c7a4f68 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -789,7 +789,7 @@ struct ShuffleMask {
   }
 };
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) {
   SM.print(OS);
   return OS;
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 87d052b..e4c0a16 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -364,7 +364,7 @@ private:
   const HexagonVectorCombine &HVC;
 };
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
   OS << "Inst: " << AI.Inst << "  " << *AI.Inst << '\n';
   OS << "Addr: " << *AI.Addr << '\n';
@@ -375,7 +375,7 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
   return OS;
 }
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
   OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no");
   OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n';
@@ -394,7 +394,7 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
   return OS;
 }
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 raw_ostream &operator<<(raw_ostream &OS,
                         const AlignVectors::ByteSpan::Block &B) {
   OS << "  @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] ";
@@ -408,7 +408,7 @@ raw_ostream &operator<<(raw_ostream &OS,
   return OS;
 }
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) {
   OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';
   for (const AlignVectors::ByteSpan::Block &B : BS)
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
index fa8ae60..2ff5843 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
@@ -111,7 +111,7 @@ namespace {
    friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
   };
 
-  LLVM_ATTRIBUTE_UNUSED
+  [[maybe_unused]]
   raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
     const ChainOfDependences &CD = D.Chain;
     int ChainSize = CD.size();
@@ -144,7 +144,7 @@ namespace {
     bool isDefined() { return Inst2Replace != nullptr; }
   };
 
-  LLVM_ATTRIBUTE_UNUSED
+  [[maybe_unused]]
   raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
     OS << "** ReuseValue ***\n";
     OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index ca98269..e3094b4 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -275,7 +275,7 @@ namespace HexagonII {
     INST_ICLASS_ALU32_3   = 0xf0000000
   };
 
-  LLVM_ATTRIBUTE_UNUSED
+  [[maybe_unused]]
   static unsigned getMemAccessSizeInBytes(MemAccessSize S) {
     switch (S) {
       case ByteAccess:        return 1;
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 96ad5c6..0a8838c 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -156,13 +156,13 @@ FunctionPass *llvm::createRISCVVLOptimizerPass() {
   return new RISCVVLOptimizer();
 }
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) {
   OI.print(OS);
   return OS;
 }
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 static raw_ostream &operator<<(raw_ostream &OS,
                                const std::optional<OperandInfo> &OI) {
   if (OI)
diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h
index a0cf881..5a06ea3 100644
--- a/llvm/lib/Target/SystemZ/SystemZ.h
+++ b/llvm/lib/Target/SystemZ/SystemZ.h
@@ -24,6 +24,7 @@ class SystemZTargetMachine;
 
 namespace SystemZ {
 // Condition-code mask values.
+const unsigned CCMASK_NONE = 0;
 const unsigned CCMASK_0 = 1 << 3;
 const unsigned CCMASK_1 = 1 << 2;
 const unsigned CCMASK_2 = 1 << 1;
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 3b7d11a..de28faf 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -15,6 +15,7 @@
 #include "SystemZConstantPoolValue.h"
 #include "SystemZMachineFunctionInfo.h"
 #include "SystemZTargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -24,6 +25,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsS390.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/KnownBits.h"
@@ -1514,6 +1516,9 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
     default:
       break;
     }
+  } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
+    if (StringRef("{@cc}").compare(Constraint) == 0)
+      return C_Other;
   }
   return TargetLowering::getConstraintType(Constraint);
 }
@@ -1707,6 +1712,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
       return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
                                  SystemZMC::VR128Regs, 32);
     }
+    if (Constraint[1] == '@') {
+      if (StringRef("{@cc}").compare(Constraint) == 0)
+        return std::make_pair(0u, &SystemZ::GR32BitRegClass);
+    }
   }
   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
@@ -1737,6 +1746,38 @@ Register SystemZTargetLowering::getExceptionSelectorRegister(
   return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
 }
 
+// Convert condition code in CCReg to an i32 value.
+static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
+  SDLoc DL(CCReg);
+  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
+  return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
+                     DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
+}
+
+// Lower @cc targets via setcc.
+SDValue SystemZTargetLowering::LowerAsmOutputForConstraint(
+    SDValue &Chain, SDValue &Glue, const SDLoc &DL,
+    const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
+  if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
+    return SDValue();
+
+  // Check that return type is valid.
+  if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
+      OpInfo.ConstraintVT.getSizeInBits() < 8)
+    report_fatal_error("Glue output operand is of invalid type");
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  MRI.addLiveIn(SystemZ::CC);
+
+  if (Glue.getNode()) {
+    Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
+    Chain = Glue.getValue(1);
+  } else
+    Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
+  return getCCResult(DAG, Glue);
+}
+
 void SystemZTargetLowering::LowerAsmOperandForConstraint(
     SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
     SelectionDAG &DAG) const {
@@ -5300,14 +5341,6 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
                                  Node->getMemoryVT(), Node->getMemOperand());
 }
 
-// Convert condition code in CCReg to an i32 value.
-static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
-  SDLoc DL(CCReg);
-  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
-  return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
-                     DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
-}
-
 SDValue
 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
                                               SelectionDAG &DAG) const {
@@ -8723,95 +8756,247 @@ SDValue SystemZTargetLowering::combineSETCC(
   return SDValue();
 }
 
-static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
+static std::pair<SDValue, int> findCCUse(const SDValue &Val) {
+  switch (Val.getOpcode()) {
+  default:
+    return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
+  case SystemZISD::IPM:
+    if (Val.getOperand(0).getOpcode() == SystemZISD::CLC ||
+        Val.getOperand(0).getOpcode() == SystemZISD::STRCMP)
+      return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP);
+    return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY);
+  case SystemZISD::SELECT_CCMASK: {
+    SDValue Op4CCReg = Val.getOperand(4);
+    if (Op4CCReg.getOpcode() == SystemZISD::ICMP ||
+        Op4CCReg.getOpcode() == SystemZISD::TM) {
+      auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0));
+      if (OpCC != SDValue())
+        return std::make_pair(OpCC, OpCCValid);
+    }
+    auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
+    if (!CCValid)
+      return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
+    int CCValidVal = CCValid->getZExtValue();
+    return std::make_pair(Op4CCReg, CCValidVal);
+  }
+  case ISD::ADD:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+    auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0));
+    if (Op0CC != SDValue())
+      return std::make_pair(Op0CC, Op0CCValid);
+    return findCCUse(Val.getOperand(1));
+  }
+}
+
+static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
+                          SelectionDAG &DAG);
+
+SmallVector<SDValue, 4> static simplifyAssumingCCVal(SDValue &Val, SDValue &CC,
+                                                     SelectionDAG &DAG) {
+  SDLoc DL(Val);
+  auto Opcode = Val.getOpcode();
+  switch (Opcode) {
+  default:
+    return {};
+  case ISD::Constant:
+    return {Val, Val, Val, Val};
+  case SystemZISD::IPM: {
+    SDValue IPMOp0 = Val.getOperand(0);
+    if (IPMOp0 != CC)
+      return {};
+    SmallVector<SDValue, 4> ShiftedCCVals;
+    for (auto CC : {0, 1, 2, 3})
+      ShiftedCCVals.emplace_back(
+          DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32));
+    return ShiftedCCVals;
+  }
+  case SystemZISD::SELECT_CCMASK: {
+    SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
+    auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
+    auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
+    if (!CCValid || !CCMask)
+      return {};
+
+    int CCValidVal = CCValid->getZExtValue();
+    int CCMaskVal = CCMask->getZExtValue();
+    const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG);
+    const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG);
+    if (TrueSDVals.empty() || FalseSDVals.empty())
+      return {};
+    SDValue Op4CCReg = Val.getOperand(4);
+    if (Op4CCReg != CC)
+      combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG);
+    if (Op4CCReg != CC)
+      return {};
+    SmallVector<SDValue, 4> MergedSDVals;
+    for (auto &CCVal : {0, 1, 2, 3})
+      MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0)
+                                    ? TrueSDVals[CCVal]
+                                    : FalseSDVals[CCVal]);
+    return MergedSDVals;
+  }
+  case ISD::ADD:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::SRA:
+    // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA
+    // would clobber CC).
+    if (!Val.hasOneUse())
+      return {};
+    [[fallthrough]];
+  case ISD::SHL:
+  case ISD::SRL:
+    SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1);
+    const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG);
+    const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG);
+    if (Op0SDVals.empty() || Op1SDVals.empty())
+      return {};
+    SmallVector<SDValue, 4> BinaryOpSDVals;
+    for (auto CCVal : {0, 1, 2, 3})
+      BinaryOpSDVals.emplace_back(DAG.getNode(
+          Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal]));
+    return BinaryOpSDVals;
+  }
+}
+
+static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
+                          SelectionDAG &DAG) {
   // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
   // set by the CCReg instruction using the CCValid / CCMask masks,
-  // If the CCReg instruction is itself a ICMP testing the condition
+  // If the CCReg instruction is itself a ICMP / TM  testing the condition
   // code set by some other instruction, see whether we can directly
   // use that condition code.
-
-  // Verify that we have an ICMP against some constant.
-  if (CCValid != SystemZ::CCMASK_ICMP)
-    return false;
-  auto *ICmp = CCReg.getNode();
-  if (ICmp->getOpcode() != SystemZISD::ICMP)
-    return false;
-  auto *CompareLHS = ICmp->getOperand(0).getNode();
-  auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
-  if (!CompareRHS)
+  auto *CCNode = CCReg.getNode();
+  if (!CCNode)
     return false;
 
-  // Optimize the case where CompareLHS is a SELECT_CCMASK.
-  if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
-    // Verify that we have an appropriate mask for a EQ or NE comparison.
-    bool Invert = false;
-    if (CCMask == SystemZ::CCMASK_CMP_NE)
-      Invert = !Invert;
-    else if (CCMask != SystemZ::CCMASK_CMP_EQ)
+  if (CCNode->getOpcode() == SystemZISD::TM) {
+    if (CCValid != SystemZ::CCMASK_TM)
       return false;
-
-    // Verify that the ICMP compares against one of select values.
-    auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
-    if (!TrueVal)
-      return false;
-    auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
-    if (!FalseVal)
+    auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) {
+      auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
+      auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
+      if (!Op0Node || !Op1Node)
+        return -1;
+      auto Op0APVal = Op0Node->getAPIntValue();
+      auto Op1APVal = Op1Node->getAPIntValue();
+      auto Result = Op0APVal & Op1APVal;
+      bool AllOnes = Result == Op1APVal;
+      bool AllZeros = Result == 0;
+      bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0;
+      return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1;
+    };
+    SDValue Op0 = CCNode->getOperand(0);
+    SDValue Op1 = CCNode->getOperand(1);
+    auto [Op0CC, Op0CCValid] = findCCUse(Op0);
+    if (Op0CC == SDValue())
       return false;
-    if (CompareRHS->getAPIntValue() == FalseVal->getAPIntValue())
-      Invert = !Invert;
-    else if (CompareRHS->getAPIntValue() != TrueVal->getAPIntValue())
+    const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG);
+    const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG);
+    if (Op0SDVals.empty() || Op1SDVals.empty())
       return false;
-
-    // Compute the effective CC mask for the new branch or select.
-    auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
-    auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
-    if (!NewCCValid || !NewCCMask)
-      return false;
-    CCValid = NewCCValid->getZExtValue();
-    CCMask = NewCCMask->getZExtValue();
-    if (Invert)
-      CCMask ^= CCValid;
-
-    // Return the updated CCReg link.
-    CCReg = CompareLHS->getOperand(4);
+    int NewCCMask = 0;
+    for (auto CC : {0, 1, 2, 3}) {
+      auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]);
+      if (CCVal < 0)
+        return false;
+      NewCCMask <<= 1;
+      NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
+    }
+    NewCCMask &= Op0CCValid;
+    CCReg = Op0CC;
+    CCMask = NewCCMask;
+    CCValid = Op0CCValid;
     return true;
   }
+  if (CCNode->getOpcode() != SystemZISD::ICMP ||
+      CCValid != SystemZ::CCMASK_ICMP)
+    return false;
 
-  // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
-  if (CompareLHS->getOpcode() == ISD::SRA) {
-    auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
-    if (!SRACount || SRACount->getZExtValue() != 30)
-      return false;
-    auto *SHL = CompareLHS->getOperand(0).getNode();
-    if (SHL->getOpcode() != ISD::SHL)
-      return false;
-    auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
-    if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
-      return false;
-    auto *IPM = SHL->getOperand(0).getNode();
-    if (IPM->getOpcode() != SystemZISD::IPM)
-      return false;
-
-    // Avoid introducing CC spills (because SRA would clobber CC).
-    if (!CompareLHS->hasOneUse())
-      return false;
-    // Verify that the ICMP compares against zero.
-    if (CompareRHS->getZExtValue() != 0)
+  SDValue CmpOp0 = CCNode->getOperand(0);
+  SDValue CmpOp1 = CCNode->getOperand(1);
+  SDValue CmpOp2 = CCNode->getOperand(2);
+  auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0);
+  if (Op0CC != SDValue()) {
+    const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG);
+    const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG);
+    if (Op0SDVals.empty() || Op1SDVals.empty())
       return false;
 
-    // Compute the effective CC mask for the new branch or select.
-    CCMask = SystemZ::reverseCCMask(CCMask);
-
-    // Return the updated CCReg link.
-    CCReg = IPM->getOperand(0);
+    auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2);
+    auto CmpTypeVal = CmpType->getZExtValue();
+    const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val,
+                                               const SDValue &Op1Val) {
+      auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
+      auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
+      if (!Op0Node || !Op1Node)
+        return -1;
+      auto Op0APVal = Op0Node->getAPIntValue();
+      auto Op1APVal = Op1Node->getAPIntValue();
+      if (CmpTypeVal == SystemZICMP::SignedOnly)
+        return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2;
+      return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2;
+    };
+    int NewCCMask = 0;
+    for (auto CC : {0, 1, 2, 3}) {
+      auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]);
+      if (CCVal < 0)
+        return false;
+      NewCCMask <<= 1;
+      NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
+    }
+    NewCCMask &= Op0CCValid;
+    CCMask = NewCCMask;
+    CCReg = Op0CC;
+    CCValid = Op0CCValid;
     return true;
   }
 
   return false;
 }
 
-SDValue SystemZTargetLowering::combineBR_CCMASK(
-    SDNode *N, DAGCombinerInfo &DCI) const {
+// Merging versus split in multiple branches cost.
+TargetLoweringBase::CondMergingParams
+SystemZTargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
+                                                     const Value *Lhs,
+                                                     const Value *Rhs) const {
+  const auto isFlagOutOpCC = [](const Value *V) {
+    using namespace llvm::PatternMatch;
+    const Value *RHSVal;
+    const APInt *RHSC;
+    if (const auto *I = dyn_cast<Instruction>(V)) {
+      // PatternMatch.h provides concise tree-based pattern match of llvm IR.
+      if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) ||
+          match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) {
+        if (const auto *CB = dyn_cast<CallBase>(RHSVal)) {
+          if (CB->isInlineAsm()) {
+            const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
+            return IA &&
+                   IA->getConstraintString().find("{@cc}") != std::string::npos;
+          }
+        }
+      }
+    }
+    return false;
+  };
+  // Pattern (ICmp %asm) or (ICmp (And %asm)).
+  // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or
+  // BaseCost can be set >=2. If cost of instruction <= CostThreshold
+  // conditionals will be merged or else conditionals will be split.
+  if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs))
+    return {3, 0, -1};
+  // Default.
+  return {-1, -1, -1};
+}
+
+SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N,
+                                                DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
 
   // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
@@ -8824,8 +9009,7 @@ SDValue SystemZTargetLowering::combineBR_CCMASK(
   int CCMaskVal = CCMask->getZExtValue();
   SDValue Chain = N->getOperand(0);
   SDValue CCReg = N->getOperand(4);
-
-  if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
+  if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG))
     return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
                        Chain,
                        DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
@@ -8848,16 +9032,80 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK(
   int CCMaskVal = CCMask->getZExtValue();
   SDValue CCReg = N->getOperand(4);
 
-  if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
-    return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
-                       N->getOperand(0), N->getOperand(1),
-                       DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
-                       DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
-                       CCReg);
+  bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG);
+
+  // Populate SDVals vector for each condition code ccval for given Val, which
+  // can again be another nested select_ccmask with the same CC.
+  const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) {
+    if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) {
+      SmallVector<SDValue, 4> Res;
+      if (Val.getOperand(4) != CCReg)
+        return SmallVector<SDValue, 4>{};
+      SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
+      auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
+      if (!CCMask)
+        return SmallVector<SDValue, 4>{};
+
+      int CCMaskVal = CCMask->getZExtValue();
+      for (auto &CC : {0, 1, 2, 3})
+        Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal
+                                                              : FalseVal);
+      return Res;
+    }
+    return SmallVector<SDValue, 4>{Val, Val, Val, Val};
+  };
+  // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either
+  // with CCReg found by combineCCMask or original CCReg.
+  SDValue TrueVal = N->getOperand(0);
+  SDValue FalseVal = N->getOperand(1);
+  auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG);
+  auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG);
+  // TrueSDVals/FalseSDVals might be empty in case of non-constant
+  // TrueVal/FalseVal for select_ccmask, which can not be optimized further.
+  if (TrueSDVals.empty())
+    TrueSDVals = constructCCSDValsFromSELECT(TrueVal);
+  if (FalseSDVals.empty())
+    FalseSDVals = constructCCSDValsFromSELECT(FalseVal);
+  if (!TrueSDVals.empty() && !FalseSDVals.empty()) {
+    SmallSet<SDValue, 4> MergedSDValsSet;
+    // Ignoring CC values outside CCValiid.
+    for (auto CC : {0, 1, 2, 3}) {
+      if ((CCValidVal & ((1 << (3 - CC)))) != 0)
+        MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0)
+                                   ? TrueSDVals[CC]
+                                   : FalseSDVals[CC]);
+    }
+    if (MergedSDValsSet.size() == 1)
+      return *MergedSDValsSet.begin();
+    if (MergedSDValsSet.size() == 2) {
+      auto BeginIt = MergedSDValsSet.begin();
+      SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt);
+      if (NewTrueVal == FalseVal || NewFalseVal == TrueVal)
+        std::swap(NewTrueVal, NewFalseVal);
+      int NewCCMask = 0;
+      for (auto CC : {0, 1, 2, 3}) {
+        NewCCMask <<= 1;
+        NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0)
+                         ? (TrueSDVals[CC] == NewTrueVal)
+                         : (FalseSDVals[CC] == NewTrueVal);
+      }
+      CCMaskVal = NewCCMask;
+      CCMaskVal &= CCValidVal;
+      TrueVal = NewTrueVal;
+      FalseVal = NewFalseVal;
+      IsCombinedCCReg = true;
+    }
+  }
+
+  if (IsCombinedCCReg)
+    return DAG.getNode(
+        SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal,
+        FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
+        DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg);
+
   return SDValue();
 }
 
-
 SDValue SystemZTargetLowering::combineGET_CCMASK(
     SDNode *N, DAGCombinerInfo &DCI) const {
 
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index f8706b7..d5b7603 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -533,6 +533,18 @@ public:
   }
 
   const char *getTargetNodeName(unsigned Opcode) const override;
+
+  // This function currently returns cost for srl/ipm/cc sequence for merging.
+  CondMergingParams
+  getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
+                                const Value *Rhs) const override;
+
+  // Handle Lowering flag assembly outputs.
+  SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
+                                      const SDLoc &DL,
+                                      const AsmOperandInfo &Constraint,
+                                      SelectionDAG &DAG) const override;
+
   std::pair<unsigned, const TargetRegisterClass *>
   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                StringRef Constraint, MVT VT) const override;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 47c24fc..f973949 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -601,6 +601,29 @@ static MachineBasicBlock *LowerMemcpy(MachineInstr &MI, DebugLoc DL,
   MachineOperand Src = MI.getOperand(3);
   MachineOperand Len = MI.getOperand(4);
 
+  // If the length is a constant, we don't actually need the check.
+  if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
+    if (Def->getOpcode() == WebAssembly::CONST_I32 ||
+        Def->getOpcode() == WebAssembly::CONST_I64) {
+      if (Def->getOperand(1).getImm() == 0) {
+        // A zero-length memcpy is a no-op.
+        MI.eraseFromParent();
+        return BB;
+      }
+      // A non-zero-length memcpy doesn't need a zero check.
+      unsigned MemoryCopy =
+          Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
+      BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
+          .add(DstMem)
+          .add(SrcMem)
+          .add(Dst)
+          .add(Src)
+          .add(Len);
+      MI.eraseFromParent();
+      return BB;
+    }
+  }
+
   // We're going to add an extra use to `Len` to test if it's zero; that
   // use shouldn't be a kill, even if the original use is.
   MachineOperand NoKillLen = Len;
@@ -669,6 +692,28 @@ static MachineBasicBlock *LowerMemset(MachineInstr &MI, DebugLoc DL,
   MachineOperand Val = MI.getOperand(2);
   MachineOperand Len = MI.getOperand(3);
 
+  // If the length is a constant, we don't actually need the check.
+  if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
+    if (Def->getOpcode() == WebAssembly::CONST_I32 ||
+        Def->getOpcode() == WebAssembly::CONST_I64) {
+      if (Def->getOperand(1).getImm() == 0) {
+        // A zero-length memset is a no-op.
+        MI.eraseFromParent();
+        return BB;
+      }
+      // A non-zero-length memset doesn't need a zero check.
+      unsigned MemoryFill =
+          Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
+      BuildMI(*BB, MI, DL, TII.get(MemoryFill))
+          .add(Mem)
+          .add(Dst)
+          .add(Val)
+          .add(Len);
+      MI.eraseFromParent();
+      return BB;
+    }
+  }
+
   // We're going to add an extra use to `Len` to test if it's zero; that
   // use shouldn't be a kill, even if the original use is.
   MachineOperand NoKillLen = Len;
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 28fa2cd..b81641f 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -414,6 +414,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
 
   getActionDefinitionsBuilder(G_SEXT_INREG).lower();
 
+  getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
+
   // fp constants
   getActionDefinitionsBuilder(G_FCONSTANT)
       .legalFor({s32, s64})
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 3af8b3e..2bf016a 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1335,9 +1335,8 @@ def ProcessorFeatures {
     !listconcat(ARLFeatures, ARLSAdditionalFeatures);
 
   // Pantherlake
-  list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI];
   list<SubtargetFeature> PTLFeatures =
-    !listremove(!listconcat(ARLSFeatures, PTLAdditionalFeatures), [FeatureWIDEKL]);
+    !listremove(ARLSFeatures, [FeatureWIDEKL]);
 
 
   // Clearwaterforest
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index eea84a2..c32b1a6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3624,6 +3624,16 @@ X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
       match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) &&
       match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())))
     BaseCost += 1;
+
+  // For OR conditions with EQ comparisons, prefer splitting into branches
+  // (unless CCMP is available). OR+EQ cannot be optimized via bitwise ops,
+  // unlike OR+NE which becomes (P|Q)!=0. Similarly, don't split signed
+  // comparisons (SLT, SGT) that can be optimized.
+  if (BaseCost >= 0 && !Subtarget.hasCCMP() && Opc == Instruction::Or &&
+      match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) &&
+      match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())))
+    return {-1, -1, -1};
+
   return {BaseCost, BrMergingLikelyBias.getValue(),
           BrMergingUnlikelyBias.getValue()};
 }
@@ -3787,7 +3797,7 @@ static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
 
 /// Return true if every element in Mask, is an in-place blend/select mask or is
 /// undef.
-LLVM_ATTRIBUTE_UNUSED static bool isBlendOrUndef(ArrayRef<int> Mask) {
+[[maybe_unused]] static bool isBlendOrUndef(ArrayRef<int> Mask) {
   unsigned NumElts = Mask.size();
   for (auto [I, M] : enumerate(Mask))
     if (!isUndefOrEqual(M, I) && !isUndefOrEqual(M, I + NumElts))
@@ -8096,7 +8106,7 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl,
   return DstVec;
 }
 
-LLVM_ATTRIBUTE_UNUSED static bool isHorizOp(unsigned Opcode) {
+[[maybe_unused]] static bool isHorizOp(unsigned Opcode) {
   switch (Opcode) {
   case X86ISD::PACKSS:
   case X86ISD::PACKUS:
@@ -20813,7 +20823,7 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
     // for DAG type consistency we have to match the FP operand type.
 
     APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000));
-    LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK;
+    [[maybe_unused]] APFloat::opStatus Status = APFloat::opOK;
     bool LosesInfo = false;
     if (TheVT == MVT::f64)
       // The rounding mode is irrelevant as the conversion should be exact.
@@ -22856,7 +22866,7 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y,
   // be generated by the memcmp expansion pass with oversized integer compares
   // (see PR33325).
   bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X);
-  if (isNullConstant(Y) && !IsOrXorXorTreeCCZero)
+  if (isNullConstant(Y) && OpSize == 128 && !IsOrXorXorTreeCCZero)
     return SDValue();
 
   // Don't perform this combine if constructing the vector will be expensive.
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index edca7c1..1932a3a 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -175,7 +175,7 @@ constexpr FeatureBitset FeaturesArrowlakeS =
     FeaturesArrowlake | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 |
     FeatureSM4;
 constexpr FeatureBitset FeaturesPantherlake =
-    (FeaturesArrowlakeS ^ FeatureWIDEKL) | FeaturePREFETCHI;
+    (FeaturesArrowlakeS ^ FeatureWIDEKL);
 constexpr FeatureBitset FeaturesClearwaterforest =
     (FeaturesSierraforest ^ FeatureWIDEKL) | FeatureAVXVNNIINT16 |
     FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeaturePREFETCHI | FeatureUSERMSR;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 6b67b48..09cb225 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2979,10 +2979,14 @@ Instruction *InstCombinerImpl::foldAndOrOfSelectUsingImpliedCond(Value *Op,
          "Op must be either i1 or vector of i1.");
   if (SI.getCondition()->getType() != Op->getType())
     return nullptr;
-  if (Value *V = simplifyNestedSelectsUsingImpliedCond(SI, Op, IsAnd, DL))
-    return SelectInst::Create(Op,
-                              IsAnd ? V : ConstantInt::getTrue(Op->getType()),
-                              IsAnd ? ConstantInt::getFalse(Op->getType()) : V);
+  if (Value *V = simplifyNestedSelectsUsingImpliedCond(SI, Op, IsAnd, DL)) {
+    Instruction *MDFrom = nullptr;
+    if (!ProfcheckDisableMetadataFixes)
+      MDFrom = &SI;
+    return SelectInst::Create(
+        Op, IsAnd ? V : ConstantInt::getTrue(Op->getType()),
+        IsAnd ? ConstantInt::getFalse(Op->getType()) : V, "", nullptr, MDFrom);
+  }
   return nullptr;
 }
 
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index ff5f390..66e45ec 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -266,8 +266,7 @@ void DFAJumpThreading::unfold(DomTreeUpdater *DTU, LoopInfo *LI,
     if (!ProfcheckDisableMetadataFixes)
       BI->setMetadata(LLVMContext::MD_prof,
                       SI->getMetadata(LLVMContext::MD_prof));
-    DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock},
-                       {DominatorTree::Insert, StartBlock, NewBlock}});
+    DTU->applyUpdates({{DominatorTree::Insert, StartBlock, NewBlock}});
   } else {
     BasicBlock *EndBlock = SIUse->getParent();
     BasicBlock *NewBlockT = BasicBlock::Create(
@@ -1479,10 +1478,13 @@ bool DFAJumpThreading::run(Function &F) {
   DTU->flush();
 
 #ifdef EXPENSIVE_CHECKS
-  assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full));
   verifyFunction(F, &dbgs());
 #endif
 
+  if (MadeChanges && VerifyDomInfo)
+    assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full) &&
+           "Failed to maintain validity of domtree!");
+
   return MadeChanges;
 }
 
diff --git a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 213d0f3..1335665 100644
--- a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -39,10 +39,11 @@ public:
 private:
   AliasAnalysis *AA;
 };
+} // namespace
 
 /// iterativelyFlattenCFG - Call FlattenCFG on all the blocks in the function,
 /// iterating until no more changes are made.
-bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
+static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
   bool Changed = false;
   bool LocalChange = true;
 
@@ -67,7 +68,6 @@ bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
   }
   return Changed;
 }
-} // namespace
 
 char FlattenCFGLegacyPass::ID = 0;
 
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index b9d332b..578fec7 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -118,9 +118,13 @@ STATISTIC(
 STATISTIC(NumDeleted, "Number of instructions deleted");
 STATISTIC(NumVectorized, "Number of vectorized aggregates");
 
+namespace llvm {
 /// Disable running mem2reg during SROA in order to test or debug SROA.
 static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false),
                                      cl::Hidden);
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
+
 namespace {
 
 class AllocaSliceRewriter;
@@ -1777,7 +1781,8 @@ static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI,
   }
 
   Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
-                              LI.getName() + ".sroa.speculated");
+                              LI.getName() + ".sroa.speculated",
+                              ProfcheckDisableMetadataFixes ? nullptr : &SI);
 
   LLVM_DEBUG(dbgs() << "          speculated to: " << *V << "\n");
   LI.replaceAllUsesWith(V);
@@ -2662,7 +2667,9 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
   for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
     Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
 
-  V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend");
+  // No profiling support for vector selects.
+  V = IRB.CreateSelectWithUnknownProfile(ConstantVector::get(Mask2), V, Old,
+                                         DEBUG_TYPE, Name + "blend");
 
   LLVM_DEBUG(dbgs() << "    blend: " << *V << "\n");
   return V;
@@ -4360,10 +4367,13 @@ private:
     };
 
     Value *Cond, *True, *False;
+    Instruction *MDFrom = nullptr;
     if (auto *SI = dyn_cast<SelectInst>(Sel)) {
       Cond = SI->getCondition();
       True = SI->getTrueValue();
       False = SI->getFalseValue();
+      if (!ProfcheckDisableMetadataFixes)
+        MDFrom = SI;
     } else {
       Cond = Sel->getOperand(0);
       True = ConstantInt::get(Sel->getType(), 1);
@@ -4383,8 +4393,12 @@ private:
         IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(),
                       False->getName() + ".sroa.gep", NW);
 
-    Value *NSel =
-        IRB.CreateSelect(Cond, NTrue, NFalse, Sel->getName() + ".sroa.sel");
+    Value *NSel = MDFrom
+                      ? IRB.CreateSelect(Cond, NTrue, NFalse,
+                                         Sel->getName() + ".sroa.sel", MDFrom)
+                      : IRB.CreateSelectWithUnknownProfile(
+                            Cond, NTrue, NFalse, DEBUG_TYPE,
+                            Sel->getName() + ".sroa.sel");
     Visited.erase(&GEPI);
     GEPI.replaceAllUsesWith(NSel);
     GEPI.eraseFromParent();
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 2ee91a9..0f3978f 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -47,6 +47,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
 #include <cassert>
 #include <utility>
 
@@ -321,7 +322,7 @@ class StructurizeCFG {
 
   void collectInfos();
 
-  void insertConditions(bool Loops);
+  void insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter);
 
   void simplifyConditions();
 
@@ -671,10 +672,9 @@ void StructurizeCFG::collectInfos() {
 }
 
 /// Insert the missing branch conditions
-void StructurizeCFG::insertConditions(bool Loops) {
+void StructurizeCFG::insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter) {
   BranchVector &Conds = Loops ? LoopConds : Conditions;
   Value *Default = Loops ? BoolTrue : BoolFalse;
-  SSAUpdater PhiInserter;
 
   for (BranchInst *Term : Conds) {
     assert(Term->isConditional());
@@ -683,8 +683,9 @@ void StructurizeCFG::insertConditions(bool Loops) {
     BasicBlock *SuccTrue = Term->getSuccessor(0);
     BasicBlock *SuccFalse = Term->getSuccessor(1);
 
-    PhiInserter.Initialize(Boolean, "");
-    PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
+    unsigned Variable = PhiInserter.AddVariable("", Boolean);
+    PhiInserter.AddAvailableValue(Variable, Loops ? SuccFalse : Parent,
+                                  Default);
 
     BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
 
@@ -697,7 +698,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
         ParentInfo = PI;
         break;
       }
-      PhiInserter.AddAvailableValue(BB, PI.Pred);
+      PhiInserter.AddAvailableValue(Variable, BB, PI.Pred);
       Dominator.addAndRememberBlock(BB);
     }
 
@@ -706,9 +707,9 @@ void StructurizeCFG::insertConditions(bool Loops) {
       CondBranchWeights::setMetadata(*Term, ParentInfo.Weights);
     } else {
       if (!Dominator.resultIsRememberedBlock())
-        PhiInserter.AddAvailableValue(Dominator.result(), Default);
+        PhiInserter.AddAvailableValue(Variable, Dominator.result(), Default);
 
-      Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
+      PhiInserter.AddUse(Variable, &Term->getOperandUse(0));
     }
   }
 }
@@ -1414,8 +1415,12 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT,
   orderNodes();
   collectInfos();
   createFlow();
-  insertConditions(false);
-  insertConditions(true);
+
+  SSAUpdaterBulk PhiInserter;
+  insertConditions(false, PhiInserter);
+  insertConditions(true, PhiInserter);
+  PhiInserter.RewriteAndOptimizeAllUses(*DT);
+
   setPhiValues();
   simplifyHoistedPhis();
   simplifyConditions();
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8ca3bed..23f5623 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -24,12 +24,9 @@
 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
 
-#include "VPlanAnalysis.h"
 #include "VPlanValue.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/ilist.h"
@@ -41,10 +38,11 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/InstructionCost.h"
-#include <algorithm>
 #include <cassert>
 #include <cstddef>
+#include <functional>
 #include <string>
+#include <utility>
 
 namespace llvm {
 
@@ -346,13 +344,6 @@ public:
   /// Return the cost of the block.
   virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0;
 
-  /// Return true if it is legal to hoist instructions into this block.
-  bool isLegalToHoistInto() {
-    // There are currently no constraints that prevent an instruction to be
-    // hoisted into a VPBlockBase.
-    return true;
-  }
-
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
     OS << getName();