69 files changed, 965 insertions, 604 deletions
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 371ad41..edbeede 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1023,11 +1023,44 @@ static void computeKnownBitsFromOperator(const Operator *I,
     break;
   }
   case Instruction::Select: {
-    computeKnownBits(I->getOperand(2), Known, Depth + 1, Q);
-    computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
-
+    auto ComputeForArm = [&](Value *Arm, bool Invert) {
+      KnownBits Res(Known.getBitWidth());
+      computeKnownBits(Arm, Res, Depth + 1, Q);
+      // If we have a constant arm, we are done.
+      if (Res.isConstant())
+        return Res;
+
+      // See what condition implies about the bits of the two select arms.
+      KnownBits CondRes(Res.getBitWidth());
+      computeKnownBitsFromCond(Arm, I->getOperand(0), CondRes, Depth + 1, Q,
+                               Invert);
+      // If we don't get any information from the condition, no reason to
+      // proceed.
+      if (CondRes.isUnknown())
+        return Res;
+
+      // We can have conflict if the condition is dead. I.e if we have
+      // (x | 64) < 32 ? (x | 64) : y
+      // we will have conflict at bit 6 from the condition/the `or`.
+      // In that case just return. Its not particularly important
+      // what we do, as this select is going to be simplified soon.
+      CondRes = CondRes.unionWith(Res);
+      if (CondRes.hasConflict())
+        return Res;
+
+      // Finally make sure the information we found is valid. This is relatively
+      // expensive so it's left for the very end.
+      if (!isGuaranteedNotToBeUndef(Arm, Q.AC, Q.CxtI, Q.DT, Depth + 1))
+        return Res;
+
+      // Finally, we know we get information from the condition and its valid,
+      // so return it.
+      return CondRes;
+    };
     // Only known if known in both the LHS and RHS.
-    Known = Known.intersectWith(Known2);
+    Known =
+        ComputeForArm(I->getOperand(1), /*Invert=*/false)
+            .intersectWith(ComputeForArm(I->getOperand(2), /*Invert=*/true));
     break;
   }
   case Instruction::FPTrunc:
@@ -5709,7 +5742,7 @@ llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
       // looking for, then.
       if (*req_idx != *i)
         return FindInsertedValue(I->getAggregateOperand(), idx_range,
-                                 *InsertBefore);
+                                 InsertBefore);
     }
     // If we end up here, the indices of the insertvalue match with those
     // requested (though possibly only partially). Now we recursively look at
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 93fb2a8..0eb9c24 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -19,7 +19,7 @@
 using namespace llvm;
 
 PreservedAnalyses BitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
-  // RemoveDIs: there's no bitcode representation of the DPValue debug-info,
+  // RemoveDIs: there's no bitcode representation of the DbgRecord debug-info,
   // convert to dbg.values before writing out.
   bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat;
   if (IsNewDbgInfoFormat)
@@ -56,8 +56,8 @@ namespace {
     StringRef getPassName() const override { return "Bitcode Writer"; }
 
     bool runOnModule(Module &M) override {
-      // RemoveDIs: there's no bitcode representation of the DPValue debug-info,
-      // convert to dbg.values before writing out.
+      // RemoveDIs: there's no bitcode representation of the DbgRecord
+      // debug-info, convert to dbg.values before writing out.
       bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat;
       if (IsNewDbgInfoFormat)
         M.convertFromNewDbgValues();
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 55cdc3c..2e8e7d0 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -369,7 +369,8 @@ void AppleAccelTableWriter::emit() const {
 DWARF5AccelTableData::DWARF5AccelTableData(const DIE &Die,
                                            const uint32_t UnitID,
                                            const bool IsTU)
-    : OffsetVal(&Die), DieTag(Die.getTag()), IsTU(IsTU), UnitID(UnitID) {}
+    : OffsetVal(&Die), DieTag(Die.getTag()), AbbrevNumber(0), IsTU(IsTU),
+      UnitID(UnitID) {}
 
 void Dwarf5AccelTableWriter::Header::emit(Dwarf5AccelTableWriter &Ctx) {
   assert(CompUnitCount > 0 && "Index must have at least one CU.");
diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
index a4b819a..746926e56 100644
--- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
+++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
@@ -217,13 +217,14 @@ void FunctionVarLocs::init(FunctionVarLocsBuilder &Builder) {
   // to the start and end position in the vector with VarLocsBeforeInst. This
   // block includes VarLocs for any DPValues attached to that instruction.
   for (auto &P : Builder.VarLocsBeforeInst) {
-    // Process VarLocs attached to a DPValue alongside their marker Instruction.
+    // Process VarLocs attached to a DbgRecord alongside their marker
+    // Instruction.
     if (isa<const DbgRecord *>(P.first))
       continue;
     const Instruction *I = cast<const Instruction *>(P.first);
     unsigned BlockStart = VarLocRecords.size();
-    // Any VarLocInfos attached to a DPValue should now be remapped to their
-    // marker Instruction, in order of DPValue appearance and prior to any
+    // Any VarLocInfos attached to a DbgRecord should now be remapped to their
+    // marker Instruction, in order of DbgRecord appearance and prior to any
     // VarLocInfos attached directly to that instruction.
     for (const DPValue &DPV : DPValue::filter(I->getDbgRecordRange())) {
       // Even though DPV defines a variable location, VarLocsBeforeInst can
@@ -1649,7 +1650,7 @@ void AssignmentTrackingLowering::processUntaggedInstruction(
     Ops.push_back(dwarf::DW_OP_deref);
     DIE = DIExpression::prependOpcodes(DIE, Ops, /*StackValue=*/false,
                                        /*EntryValue=*/false);
-    // Find a suitable insert point, before the next instruction or DPValue
+    // Find a suitable insert point, before the next instruction or DbgRecord
     // after I.
     auto InsertBefore = getNextNode(&I);
     assert(InsertBefore && "Shouldn't be inserting after a terminator");
@@ -1886,21 +1887,21 @@ void AssignmentTrackingLowering::resetInsertionPoint(DPValue &After) {
 }
 
 void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) {
-  // If the block starts with DPValues, we need to process those DPValues as
+  // If the block starts with DbgRecords, we need to process those DbgRecords as
   // their own frame without processing any instructions first.
-  bool ProcessedLeadingDPValues = !BB.begin()->hasDbgRecords();
+  bool ProcessedLeadingDbgRecords = !BB.begin()->hasDbgRecords();
   for (auto II = BB.begin(), EI = BB.end(); II != EI;) {
     assert(VarsTouchedThisFrame.empty());
     // Process the instructions in "frames". A "frame" includes a single
     // non-debug instruction followed any debug instructions before the
     // next non-debug instruction.
 
-    // Skip the current instruction if it has unprocessed DPValues attached (see
-    // comment above `ProcessedLeadingDPValues`).
-    if (ProcessedLeadingDPValues) {
+    // Skip the current instruction if it has unprocessed DbgRecords attached
+    // (see comment above `ProcessedLeadingDbgRecords`).
+    if (ProcessedLeadingDbgRecords) {
       // II is now either a debug intrinsic, a non-debug instruction with no
-      // attached DPValues, or a non-debug instruction with attached processed
-      // DPValues.
+      // attached DbgRecords, or a non-debug instruction with attached processed
+      // DbgRecords.
       // II has not been processed.
       if (!isa<DbgInfoIntrinsic>(&*II)) {
         if (II->isTerminator())
@@ -1912,8 +1913,8 @@ void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) {
       }
     }
     // II is now either a debug intrinsic, a non-debug instruction with no
-    // attached DPValues, or a non-debug instruction with attached unprocessed
-    // DPValues.
+    // attached DbgRecords, or a non-debug instruction with attached unprocessed
+    // DbgRecords.
     if (II != EI && II->hasDbgRecords()) {
       // Skip over non-variable debug records (i.e., labels). They're going to
       // be read from IR (possibly re-ordering them within the debug record
@@ -1924,7 +1925,7 @@ void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) {
         assert(LiveSet->isValid());
       }
     }
-    ProcessedLeadingDPValues = true;
+    ProcessedLeadingDbgRecords = true;
     while (II != EI) {
       auto *Dbg = dyn_cast<DbgInfoIntrinsic>(&*II);
       if (!Dbg)
@@ -1934,9 +1935,9 @@ void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) {
       assert(LiveSet->isValid());
       ++II;
     }
-    // II is now a non-debug instruction either with no attached DPValues, or
-    // with attached processed DPValues. II has not been processed, and all
-    // debug instructions or DPValues in the frame preceding II have been
+    // II is now a non-debug instruction either with no attached DbgRecords, or
+    // with attached processed DbgRecords. II has not been processed, and all
+    // debug instructions or DbgRecords in the frame preceding II have been
     // processed.
 
     // We've processed everything in the "frame". Now determine which variables
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 59a0c64..055e275 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2946,7 +2946,7 @@ class TypePromotionTransaction {
       Instruction *PrevInst;
       BasicBlock *BB;
     } Point;
-    std::optional<DPValue::self_iterator> BeforeDPValue = std::nullopt;
+    std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
 
     /// Remember whether or not the instruction had a previous instruction.
     bool HasPrevInstruction;
@@ -2958,9 +2958,9 @@ class TypePromotionTransaction {
       BasicBlock *BB = Inst->getParent();
 
       // Record where we would have to re-insert the instruction in the sequence
-      // of DPValues, if we ended up reinserting.
+      // of DbgRecords, if we ended up reinserting.
       if (BB->IsNewDbgInfoFormat)
-        BeforeDPValue = Inst->getDbgReinsertionPosition();
+        BeforeDbgRecord = Inst->getDbgReinsertionPosition();
 
       if (HasPrevInstruction) {
         Point.PrevInst = &*std::prev(Inst->getIterator());
@@ -2983,7 +2983,7 @@ class TypePromotionTransaction {
           Inst->insertBefore(*Point.BB, Position);
       }
 
-      Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDPValue);
+      Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
     }
   };
 
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index 4ed44d1..8efe67a 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -982,7 +982,7 @@ void MIRFormatter::printIRValue(raw_ostream &OS, const Value &V,
 }
 
 void llvm::printMIR(raw_ostream &OS, const Module &M) {
-  // RemoveDIs: as there's no textual form for DPValues yet, print debug-info
+  // RemoveDIs: as there's no textual form for DbgRecords yet, print debug-info
   // in dbg.value format.
   bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat;
   if (IsNewDbgInfoFormat)
@@ -996,7 +996,7 @@ void llvm::printMIR(raw_ostream &OS, const Module &M) {
 }
 
 void llvm::printMIR(raw_ostream &OS, const MachineFunction &MF) {
-  // RemoveDIs: as there's no textual form for DPValues yet, print debug-info
+  // RemoveDIs: as there's no textual form for DbgRecords yet, print debug-info
   // in dbg.value format.
   bool IsNewDbgInfoFormat = MF.getFunction().IsNewDbgInfoFormat;
   if (IsNewDbgInfoFormat)
diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp
index 40898d2..f65d532 100644
--- a/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -645,12 +645,13 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
       DI->moveBeforePreserving(&*EndBlock->getFirstInsertionPt());
     }
 
-    // Duplicate implementation for DPValues, the non-instruction debug-info
-    // record. Helper lambda for moving DPValues to the end block.
-    auto TransferDPValues = [&](Instruction &I) {
-      for (auto &DPValue : llvm::make_early_inc_range(I.getDbgRecordRange())) {
-        DPValue.removeFromParent();
-        EndBlock->insertDbgRecordBefore(&DPValue,
+    // Duplicate implementation for DbgRecords, the non-instruction debug-info
+    // format. Helper lambda for moving DbgRecords to the end block.
+    auto TransferDbgRecords = [&](Instruction &I) {
+      for (auto &DbgRecord :
+           llvm::make_early_inc_range(I.getDbgRecordRange())) {
+        DbgRecord.removeFromParent();
+        EndBlock->insertDbgRecordBefore(&DbgRecord,
                                         EndBlock->getFirstInsertionPt());
       }
     };
@@ -660,7 +661,7 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
     // middle" of the select group.
     auto R = make_range(std::next(SI.getI()->getIterator()),
                         std::next(LastSI.getI()->getIterator()));
-    llvm::for_each(R, TransferDPValues);
+    llvm::for_each(R, TransferDbgRecords);
 
     // These are the new basic blocks for the conditional branch.
     // At least one will become an actual new basic block.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 735cec8..40b078a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2799,8 +2799,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
         // Limit this to after legalization if the add has wrap flags
         (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
                                        !N->getFlags().hasNoSignedWrap()))) {
-      SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
-                                DAG.getAllOnesConstant(DL, VT));
+      SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
     }
   }
@@ -3025,8 +3024,7 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
       // Limit this to after legalization if the add has wrap flags
       (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
                                      !N0->getFlags().hasNoSignedWrap()))) {
-    SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
-                              DAG.getAllOnesConstant(DL, VT));
+    SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
   }
 
@@ -3789,63 +3787,34 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
       return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
   }
 
-  // fold ((A+(B+or-C))-B) -> A+or-C
-  if (N0.getOpcode() == ISD::ADD &&
-      (N0.getOperand(1).getOpcode() == ISD::SUB ||
-       N0.getOperand(1).getOpcode() == ISD::ADD) &&
-      N0.getOperand(1).getOperand(0) == N1)
-    return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
-                       N0.getOperand(1).getOperand(1));
-
-  // fold ((A+(C+B))-B) -> A+C
-  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
-      N0.getOperand(1).getOperand(1) == N1)
-    return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
-                       N0.getOperand(1).getOperand(0));
+  SDValue A, B, C;
+
+  // fold ((A+(B+C))-B) -> A+C
+  if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
+    return DAG.getNode(ISD::ADD, DL, VT, A, C);
+
+  // fold ((A+(B-C))-B) -> A-C
+  if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
+    return DAG.getNode(ISD::SUB, DL, VT, A, C);
 
   // fold ((A-(B-C))-C) -> A-B
-  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
-      N0.getOperand(1).getOperand(1) == N1)
-    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
-                       N0.getOperand(1).getOperand(0));
+  if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
+    return DAG.getNode(ISD::SUB, DL, VT, A, B);
 
   // fold (A-(B-C)) -> A+(C-B)
-  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
+  if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
     return DAG.getNode(ISD::ADD, DL, VT, N0,
-                       DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
-                                   N1.getOperand(0)));
+                       DAG.getNode(ISD::SUB, DL, VT, C, B));
 
   // A - (A & B)  ->  A & (~B)
-  if (N1.getOpcode() == ISD::AND) {
-    SDValue A = N1.getOperand(0);
-    SDValue B = N1.getOperand(1);
-    if (A != N0)
-      std::swap(A, B);
-    if (A == N0 &&
-        (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
-      SDValue InvB =
-          DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
-      return DAG.getNode(ISD::AND, DL, VT, A, InvB);
-    }
-  }
+  if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
+      (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
+    return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
 
-  // fold (X - (-Y * Z)) -> (X + (Y * Z))
-  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
-    if (N1.getOperand(0).getOpcode() == ISD::SUB &&
-        isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
-      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
-                                N1.getOperand(0).getOperand(1),
-                                N1.getOperand(1));
-      return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
-    }
-    if (N1.getOperand(1).getOpcode() == ISD::SUB &&
-        isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
-      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
-                                N1.getOperand(0),
-                                N1.getOperand(1).getOperand(1));
-      return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
-    }
-  }
+  // fold (A - (-B * C)) -> (A + (B * C))
+  if (sd_match(N1, m_OneUse(m_Mul(m_Sub(m_Zero(), m_Value(B)), m_Value(C)))))
+    return DAG.getNode(ISD::ADD, DL, VT, N0,
+                       DAG.getNode(ISD::MUL, DL, VT, B, C));
 
   // If either operand of a sub is undef, the result is undef
   if (N0.isUndef())
@@ -3865,12 +3834,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (SDValue V = foldSubToUSubSat(VT, N))
     return V;
 
-  // (x - y) - 1  ->  add (xor y, -1), x
-  if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) {
-    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
-                              DAG.getAllOnesConstant(DL, VT));
-    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
-  }
+  // (A - B) - 1  ->  add (xor B, -1), A
+  if (sd_match(N, m_Sub(m_OneUse(m_Sub(m_Value(A), m_Value(B))), m_One())))
+    return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
 
   // Look for:
   //   sub y, (xor x, -1)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c2430359..b8c7d08 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -37,6 +37,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
@@ -81,6 +82,7 @@
 #include <vector>
 
 using namespace llvm;
+using namespace llvm::SDPatternMatch;
 
 /// makeVTList - Return an instance of the SDVTList struct initialized with the
 /// specified members.
@@ -4290,21 +4292,15 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
     return isKnownToBeAPowerOfTwo(Val.getOperand(2), Depth + 1) &&
            isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1);
 
-  if (Val.getOpcode() == ISD::AND) {
-    // Looking for `x & -x` pattern:
-    // If x == 0:
-    //    x & -x -> 0
-    // If x != 0:
-    //    x & -x -> non-zero pow2
-    // so if we find the pattern return whether we know `x` is non-zero.
-    for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) {
-      SDValue NegOp = Val.getOperand(OpIdx);
-      if (NegOp.getOpcode() == ISD::SUB &&
-          NegOp.getOperand(1) == Val.getOperand(1 - OpIdx) &&
-          isNullOrNullSplat(NegOp.getOperand(0)))
-        return isKnownNeverZero(Val.getOperand(1 - OpIdx), Depth);
-    }
-  }
+  // Looking for `x & -x` pattern:
+  // If x == 0:
+  //    x & -x -> 0
+  // If x != 0:
+  //    x & -x -> non-zero pow2
+  // so if we find the pattern return whether we know `x` is non-zero.
+  SDValue X;
+  if (sd_match(Val, m_And(m_Value(X), m_Sub(m_Zero(), m_Deferred(X)))))
+    return isKnownNeverZero(X, Depth);
 
   if (Val.getOpcode() == ISD::ZERO_EXTEND)
     return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1);
diff --git a/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 515b576..4bad57d 100644
--- a/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -150,9 +150,7 @@ static void MarkBlocksLiveIn(BasicBlock *BB,
   if (!LiveBBs.insert(BB).second)
     return; // already been here.
 
-  df_iterator_default_set<BasicBlock*> Visited;
-
-  for (BasicBlock *B : inverse_depth_first_ext(BB, Visited))
+  for (BasicBlock *B : inverse_depth_first(BB))
     LiveBBs.insert(B);
 }
 
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.cpp
index d346214..57ac991e 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.cpp
@@ -87,7 +87,7 @@ static void registerJITLoaderVTuneUnregisterImpl(
   for (auto &Method : UM) {
     JITEventWrapper::Wrapper->iJIT_NotifyEvent(
         iJVM_EVENT_TYPE_METHOD_UNLOAD_START,
-        const_cast<unsigned long *>(&Method.first));
+        const_cast<uint64_t *>(&Method.first));
   }
 }
 
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index d65ed8c1..c74c898 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2110,7 +2110,7 @@ Function *getFreshReductionFunc(Module &M) {
 
 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
     const LocationDescription &Loc, InsertPointTy AllocaIP,
-    ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait) {
+    ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait, bool IsByRef) {
   for (const ReductionInfo &RI : ReductionInfos) {
     (void)RI;
     assert(RI.Variable && "expected non-null variable");
@@ -2197,17 +2197,29 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
   for (auto En : enumerate(ReductionInfos)) {
     const ReductionInfo &RI = En.value();
     Type *ValueType = RI.ElementType;
-    Value *RedValue = Builder.CreateLoad(ValueType, RI.Variable,
-                                         "red.value." + Twine(En.index()));
+    // We have one less load for by-ref case because that load is now inside of
+    // the reduction region
+    Value *RedValue = nullptr;
+    if (!IsByRef) {
+      RedValue = Builder.CreateLoad(ValueType, RI.Variable,
+                                    "red.value." + Twine(En.index()));
+    }
     Value *PrivateRedValue =
         Builder.CreateLoad(ValueType, RI.PrivateVariable,
                            "red.private.value." + Twine(En.index()));
     Value *Reduced;
-    Builder.restoreIP(
-        RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced));
+    if (IsByRef) {
+      Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), RI.Variable,
+                                        PrivateRedValue, Reduced));
+    } else {
+      Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), RedValue,
+                                        PrivateRedValue, Reduced));
+    }
     if (!Builder.GetInsertBlock())
       return InsertPointTy();
-    Builder.CreateStore(Reduced, RI.Variable);
+    // for by-ref case, the load is inside of the reduction region
+    if (!IsByRef)
+      Builder.CreateStore(Reduced, RI.Variable);
   }
   Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
       IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
@@ -2219,7 +2231,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
   // function. There are no loads/stores here because they will be happening
   // inside the atomic elementwise reduction.
   Builder.SetInsertPoint(AtomicRedBlock);
-  if (CanGenerateAtomic) {
+  if (CanGenerateAtomic && !IsByRef) {
     for (const ReductionInfo &RI : ReductionInfos) {
       Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.ElementType,
                                               RI.Variable, RI.PrivateVariable));
@@ -2257,7 +2269,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
     Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced));
     if (!Builder.GetInsertBlock())
       return InsertPointTy();
-    Builder.CreateStore(Reduced, LHSPtr);
+    // store is inside of the reduction region when using by-ref
+    if (!IsByRef)
+      Builder.CreateStore(Reduced, LHSPtr);
   }
   Builder.CreateRetVoid();
 
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 1beb4c0..11383ea 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -4592,7 +4592,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
 void AssemblyWriter::printDPMarker(const DPMarker &Marker) {
   // There's no formal representation of a DPMarker -- print purely as a
   // debugging aid.
-  for (const DbgRecord &DPR : Marker.StoredDPValues) {
+  for (const DbgRecord &DPR : Marker.StoredDbgRecords) {
     printDbgRecord(DPR);
     Out << "\n";
   }
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 7ead7ce..4dd1bdd 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -63,9 +63,9 @@ DPMarker *BasicBlock::createMarker(InstListType::iterator It) {
 void BasicBlock::convertToNewDbgValues() {
   IsNewDbgInfoFormat = true;
 
-  // Iterate over all instructions in the instruction list, collecting dbg.value
-  // instructions and converting them to DPValues. Once we find a "real"
-  // instruction, attach all those DPValues to a DPMarker in that instruction.
+  // Iterate over all instructions in the instruction list, collecting debug
+  // info intrinsics and converting them to DbgRecords. Once we find a "real"
+  // instruction, attach all those DbgRecords to a DPMarker in that instruction.
   SmallVector<DbgRecord *, 4> DPVals;
   for (Instruction &I : make_early_inc_range(InstList)) {
     assert(!I.DbgMarker && "DbgMarker already set on old-format instrs?");
@@ -86,7 +86,7 @@ void BasicBlock::convertToNewDbgValues() {
     if (DPVals.empty())
       continue;
 
-    // Create a marker to store DPValues in.
+    // Create a marker to store DbgRecords in.
     createMarker(&I);
     DPMarker *Marker = I.DbgMarker;
 
@@ -102,7 +102,7 @@ void BasicBlock::convertFromNewDbgValues() {
   IsNewDbgInfoFormat = false;
 
   // Iterate over the block, finding instructions annotated with DPMarkers.
-  // Convert any attached DPValues to dbg.values and insert ahead of the
+  // Convert any attached DbgRecords to debug intrinsics and insert ahead of the
   // instruction.
   for (auto &Inst : *this) {
     if (!Inst.DbgMarker)
@@ -116,7 +116,7 @@ void BasicBlock::convertFromNewDbgValues() {
     Marker.eraseFromParent();
   }
 
-  // Assume no trailing DPValues: we could technically create them at the end
+  // Assume no trailing DbgRecords: we could technically create them at the end
   // of the block, after a terminator, but this would be non-cannonical and
   // indicates that something else is broken somewhere.
   assert(!getTrailingDbgRecords());
@@ -691,15 +691,15 @@ void BasicBlock::renumberInstructions() {
   NumInstrRenumberings++;
 }
 
-void BasicBlock::flushTerminatorDbgValues() {
-  // If we erase the terminator in a block, any DPValues will sink and "fall
+void BasicBlock::flushTerminatorDbgRecords() {
+  // If we erase the terminator in a block, any DbgRecords will sink and "fall
   // off the end", existing after any terminator that gets inserted. With
   // dbg.value intrinsics we would just insert the terminator at end() and
-  // the dbg.values would come before the terminator. With DPValues, we must
+  // the dbg.values would come before the terminator. With DbgRecords, we must
   // do this manually.
   // To get out of this unfortunate form, whenever we insert a terminator,
-  // check whether there's anything trailing at the end and move those DPValues
-  // in front of the terminator.
+  // check whether there's anything trailing at the end and move those
+  // DbgRecords in front of the terminator.
 
   // Do nothing if we're not in new debug-info format.
   if (!IsNewDbgInfoFormat)
@@ -710,15 +710,15 @@ void BasicBlock::flushTerminatorDbgValues() {
   if (!Term)
     return;
 
-  // Are there any dangling DPValues?
-  DPMarker *TrailingDPValues = getTrailingDbgRecords();
-  if (!TrailingDPValues)
+  // Are there any dangling DbgRecords?
+  DPMarker *TrailingDbgRecords = getTrailingDbgRecords();
+  if (!TrailingDbgRecords)
     return;
 
-  // Transfer DPValues from the trailing position onto the terminator.
+  // Transfer DbgRecords from the trailing position onto the terminator.
   createMarker(Term);
-  Term->DbgMarker->absorbDebugValues(*TrailingDPValues, false);
-  TrailingDPValues->eraseFromParent();
+  Term->DbgMarker->absorbDebugValues(*TrailingDbgRecords, false);
+  TrailingDbgRecords->eraseFromParent();
   deleteTrailingDbgRecords();
 }
 
@@ -735,7 +735,7 @@ void BasicBlock::spliceDebugInfoEmptyBlock(BasicBlock::iterator Dest,
   // If an optimisation pass attempts to splice the contents of the block from
   // BB1->begin() to BB1->getTerminator(), then the dbg.value will be
   // transferred to the destination.
-  // However, in the "new" DPValue format for debug-info, that range is empty:
+  // However, in the "new" DbgRecord format for debug-info, that range is empty:
   // begin() returns an iterator to the terminator, as there will only be a
   // single instruction in the block. We must piece together from the bits set
   // in the iterators whether there was the intention to transfer any debug
@@ -750,16 +750,16 @@ void BasicBlock::spliceDebugInfoEmptyBlock(BasicBlock::iterator Dest,
   bool ReadFromHead = First.getHeadBit();
 
   // If the source block is completely empty, including no terminator, then
-  // transfer any trailing DPValues that are still hanging around. This can
+  // transfer any trailing DbgRecords that are still hanging around. This can
   // occur when a block is optimised away and the terminator has been moved
   // somewhere else.
   if (Src->empty()) {
-    DPMarker *SrcTrailingDPValues = Src->getTrailingDbgRecords();
-    if (!SrcTrailingDPValues)
+    DPMarker *SrcTrailingDbgRecords = Src->getTrailingDbgRecords();
+    if (!SrcTrailingDbgRecords)
       return;
 
     Dest->adoptDbgRecords(Src, Src->end(), InsertAtHead);
-    // adoptDbgRecords should have released the trailing DPValues.
+    // adoptDbgRecords should have released the trailing DbgRecords.
     assert(!Src->getTrailingDbgRecords());
     return;
   }
@@ -785,8 +785,8 @@ void BasicBlock::spliceDebugInfo(BasicBlock::iterator Dest, BasicBlock *Src,
   /* Do a quick normalisation before calling the real splice implementation. We
      might be operating on a degenerate basic block that has no instructions
      in it, a legitimate transient state. In that case, Dest will be end() and
-     any DPValues temporarily stored in the TrailingDPValues map in LLVMContext.
-     We might illustrate it thus:
+     any DbgRecords temporarily stored in the TrailingDbgRecords map in
+     LLVMContext. We might illustrate it thus:
 
                          Dest
                            |
@@ -795,35 +795,35 @@ void BasicBlock::spliceDebugInfo(BasicBlock::iterator Dest, BasicBlock *Src,
                                 |               |
                                First           Last
 
-     However: does the caller expect the "~" DPValues to end up before or after
-     the spliced segment? This is communciated in the "Head" bit of Dest, which
-     signals whether the caller called begin() or end() on this block.
+     However: does the caller expect the "~" DbgRecords to end up before or
+     after the spliced segment? This is communciated in the "Head" bit of Dest,
+     which signals whether the caller called begin() or end() on this block.
 
-     If the head bit is set, then all is well, we leave DPValues trailing just
+     If the head bit is set, then all is well, we leave DbgRecords trailing just
      like how dbg.value instructions would trail after instructions spliced to
      the beginning of this block.
 
-     If the head bit isn't set, then try to jam the "~" DPValues onto the front
-     of the First instruction, then splice like normal, which joins the "~"
-     DPValues with the "+" DPValues. However if the "+" DPValues are supposed to
-     be left behind in Src, then:
-      * detach the "+" DPValues,
-      * move the "~" DPValues onto First,
+     If the head bit isn't set, then try to jam the "~" DbgRecords onto the
+     front of the First instruction, then splice like normal, which joins the
+     "~" DbgRecords with the "+" DbgRecords. However if the "+" DbgRecords are
+     supposed to be left behind in Src, then:
+      * detach the "+" DbgRecords,
+      * move the "~" DbgRecords onto First,
       * splice like normal,
-      * replace the "+" DPValues onto the Last position.
+      * replace the "+" DbgRecords onto the Last position.
      Complicated, but gets the job done. */
 
-  // If we're inserting at end(), and not in front of dangling DPValues, then
-  // move the DPValues onto "First". They'll then be moved naturally in the
+  // If we're inserting at end(), and not in front of dangling DbgRecords, then
+  // move the DbgRecords onto "First". They'll then be moved naturally in the
   // splice process.
-  DPMarker *MoreDanglingDPValues = nullptr;
-  DPMarker *OurTrailingDPValues = getTrailingDbgRecords();
-  if (Dest == end() && !Dest.getHeadBit() && OurTrailingDPValues) {
-    // Are the "+" DPValues not supposed to move? If so, detach them
+  DPMarker *MoreDanglingDbgRecords = nullptr;
+  DPMarker *OurTrailingDbgRecords = getTrailingDbgRecords();
+  if (Dest == end() && !Dest.getHeadBit() && OurTrailingDbgRecords) {
+    // Are the "+" DbgRecords not supposed to move? If so, detach them
     // temporarily.
     if (!First.getHeadBit() && First->hasDbgRecords()) {
-      MoreDanglingDPValues = Src->getMarker(First);
-      MoreDanglingDPValues->removeFromParent();
+      MoreDanglingDbgRecords = Src->getMarker(First);
+      MoreDanglingDbgRecords->removeFromParent();
     }
 
     if (First->hasDbgRecords()) {
@@ -839,8 +839,8 @@ void BasicBlock::spliceDebugInfo(BasicBlock::iterator Dest, BasicBlock *Src,
       // No current marker, create one and absorb in. (FIXME: we can avoid an
       // allocation in the future).
       DPMarker *CurMarker = Src->createMarker(&*First);
-      CurMarker->absorbDebugValues(*OurTrailingDPValues, false);
-      OurTrailingDPValues->eraseFromParent();
+      CurMarker->absorbDebugValues(*OurTrailingDbgRecords, false);
+      OurTrailingDbgRecords->eraseFromParent();
     }
     deleteTrailingDbgRecords();
     First.setHeadBit(true);
@@ -849,16 +849,16 @@ void BasicBlock::spliceDebugInfo(BasicBlock::iterator Dest, BasicBlock *Src,
   // Call the main debug-info-splicing implementation.
   spliceDebugInfoImpl(Dest, Src, First, Last);
 
-  // Do we have some "+" DPValues hanging around that weren't supposed to move,
-  // and we detached to make things easier?
-  if (!MoreDanglingDPValues)
+  // Do we have some "+" DbgRecords hanging around that weren't supposed to
+  // move, and we detached to make things easier?
+  if (!MoreDanglingDbgRecords)
     return;
 
   // FIXME: we could avoid an allocation here sometimes. (adoptDbgRecords
   // requires an iterator).
   DPMarker *LastMarker = Src->createMarker(Last);
-  LastMarker->absorbDebugValues(*MoreDanglingDPValues, true);
-  MoreDanglingDPValues->eraseFromParent();
+  LastMarker->absorbDebugValues(*MoreDanglingDbgRecords, true);
+  MoreDanglingDbgRecords->eraseFromParent();
 }
 
 void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
@@ -870,15 +870,16 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
   bool InsertAtHead = Dest.getHeadBit();
   bool ReadFromHead = First.getHeadBit();
   // Use this flag to signal the abnormal case, where we don't want to copy the
-  // DPValues ahead of the "Last" position.
+  // DbgRecords ahead of the "Last" position.
   bool ReadFromTail = !Last.getTailBit();
   bool LastIsEnd = (Last == Src->end());
 
   /*
     Here's an illustration of what we're about to do. We have two blocks, this
     and Src, and two segments of list. Each instruction is marked by a capital
-    while potential DPValue debug-info is marked out by "-" characters and a few
-    other special characters (+:=) where I want to highlight what's going on.
+    while potential DbgRecord debug-info is marked out by "-" characters and a
+    few other special characters (+:=) where I want to highlight what's going
+    on.
 
                                                  Dest
                                                    |
@@ -889,18 +890,18 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
 
     The splice method is going to take all the instructions from First up to
     (but not including) Last and insert them in _front_ of Dest, forming one
-    long list. All the DPValues attached to instructions _between_ First and
+    long list. All the DbgRecords attached to instructions _between_ First and
     Last need no maintenence. However, we have to do special things with the
-    DPValues marked with the +:= characters. We only have three positions:
-    should the "+" DPValues be transferred, and if so to where? Do we move the
-    ":" DPValues? Would they go in front of the "=" DPValues, or should the "="
-    DPValues go before "+" DPValues?
+    DbgRecords marked with the +:= characters. We only have three positions:
+    should the "+" DbgRecords be transferred, and if so to where? Do we move the
+    ":" DbgRecords? Would they go in front of the "=" DbgRecords, or should the
+    "=" DbgRecords go before "+" DbgRecords?
 
     We're told which way it should be by the bits carried in the iterators. The
     "Head" bit indicates whether the specified position is supposed to be at the
-    front of the attached DPValues (true) or not (false). The Tail bit is true
-    on the other end of a range: is the range intended to include DPValues up to
-    the end (false) or not (true).
+    front of the attached DbgRecords (true) or not (false). The Tail bit is true
+    on the other end of a range: is the range intended to include DbgRecords up
+    to the end (false) or not (true).
 
     FIXME: the tail bit doesn't need to be distinct from the head bit, we could
     combine them.
@@ -934,15 +935,16 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
 
    */
 
-  // Detach the marker at Dest -- this lets us move the "====" DPValues around.
+  // Detach the marker at Dest -- this lets us move the "====" DbgRecords
+  // around.
   DPMarker *DestMarker = nullptr;
   if (Dest != end()) {
     if ((DestMarker = getMarker(Dest)))
       DestMarker->removeFromParent();
   }
 
-  // If we're moving the tail range of DPValues (":::"), absorb them into the
-  // front of the DPValues at Dest.
+  // If we're moving the tail range of DbgRecords (":::"), absorb them into the
+  // front of the DbgRecords at Dest.
   if (ReadFromTail && Src->getMarker(Last)) {
     DPMarker *FromLast = Src->getMarker(Last);
     if (LastIsEnd) {
@@ -956,7 +958,7 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
     }
   }
 
-  // If we're _not_ reading from the head of First, i.e. the "++++" DPValues,
+  // If we're _not_ reading from the head of First, i.e. the "++++" DbgRecords,
   // move their markers onto Last. They remain in the Src block. No action
   // needed.
   if (!ReadFromHead && First->hasDbgRecords()) {
@@ -970,16 +972,16 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
     }
   }
 
-  // Finally, do something with the "====" DPValues we detached.
+  // Finally, do something with the "====" DbgRecords we detached.
   if (DestMarker) {
     if (InsertAtHead) {
-      // Insert them at the end of the DPValues at Dest. The "::::" DPValues
+      // Insert them at the end of the DbgRecords at Dest. The "::::" DbgRecords
       // might be in front of them.
       DPMarker *NewDestMarker = createMarker(Dest);
       NewDestMarker->absorbDebugValues(*DestMarker, false);
     } else {
       // Insert them right at the start of the range we moved, ahead of First
-      // and the "++++" DPValues.
+      // and the "++++" DbgRecords.
       DPMarker *FirstMarker = createMarker(First);
       FirstMarker->absorbDebugValues(*DestMarker, true);
     }
@@ -990,10 +992,10 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
     // any trailing debug-info at the end of the block would "normally" have
     // been pushed in front of "First". Move it there now.
     DPMarker *FirstMarker = getMarker(First);
-    DPMarker *TrailingDPValues = getTrailingDbgRecords();
-    if (TrailingDPValues) {
-      FirstMarker->absorbDebugValues(*TrailingDPValues, true);
-      TrailingDPValues->eraseFromParent();
+    DPMarker *TrailingDbgRecords = getTrailingDbgRecords();
+    if (TrailingDbgRecords) {
+      FirstMarker->absorbDebugValues(*TrailingDbgRecords, true);
+      TrailingDbgRecords->eraseFromParent();
       deleteTrailingDbgRecords();
     }
   }
@@ -1024,7 +1026,7 @@ void BasicBlock::splice(iterator Dest, BasicBlock *Src, iterator First,
   // And move the instructions.
   getInstList().splice(Dest, Src->getInstList(), First, Last);
 
-  flushTerminatorDbgValues();
+  flushTerminatorDbgRecords();
 }
 
 void BasicBlock::insertDbgRecordAfter(DbgRecord *DPV, Instruction *I) {
@@ -1057,38 +1059,40 @@ DPMarker *BasicBlock::getMarker(InstListType::iterator It) {
 }
 
 void BasicBlock::reinsertInstInDbgRecords(
-    Instruction *I, std::optional<DPValue::self_iterator> Pos) {
+    Instruction *I, std::optional<DbgRecord::self_iterator> Pos) {
   // "I" was originally removed from a position where it was
-  // immediately in front of Pos. Any DPValues on that position then "fell down"
-  // onto Pos. "I" has been re-inserted at the front of that wedge of DPValues,
-  // shuffle them around to represent the original positioning. To illustrate:
+  // immediately in front of Pos. Any DbgRecords on that position then "fell
+  // down" onto Pos. "I" has been re-inserted at the front of that wedge of
+  // DbgRecords, shuffle them around to represent the original positioning. To
+  // illustrate:
   //
   //   Instructions:  I1---I---I0
-  //       DPValues:    DDD DDD
+  //       DbgRecords:    DDD DDD
   //
   // Instruction "I" removed,
   //
   //   Instructions:  I1------I0
-  //       DPValues:    DDDDDD
+  //       DbgRecords:    DDDDDD
   //                       ^Pos
   //
   // Instruction "I" re-inserted (now):
   //
   //   Instructions:  I1---I------I0
-  //       DPValues:        DDDDDD
+  //       DbgRecords:        DDDDDD
   //                           ^Pos
   //
   // After this method completes:
   //
   //   Instructions:  I1---I---I0
-  //       DPValues:    DDD DDD
+  //       DbgRecords:    DDD DDD
 
-  // This happens if there were no DPValues on I0. Are there now DPValues there?
+  // This happens if there were no DbgRecords on I0. Are there now DbgRecords
+  // there?
   if (!Pos) {
     DPMarker *NextMarker = getNextMarker(I);
     if (!NextMarker)
       return;
-    if (NextMarker->StoredDPValues.empty())
+    if (NextMarker->StoredDbgRecords.empty())
       return;
     // There are DPMarkers there now -- they fell down from "I".
     DPMarker *ThisMarker = createMarker(I);
@@ -1096,15 +1100,15 @@ void BasicBlock::reinsertInstInDbgRecords(
     return;
   }
 
-  // Is there even a range of DPValues to move?
+  // Is there even a range of DbgRecords to move?
   DPMarker *DPM = (*Pos)->getMarker();
-  auto Range = make_range(DPM->StoredDPValues.begin(), (*Pos));
+  auto Range = make_range(DPM->StoredDbgRecords.begin(), (*Pos));
   if (Range.begin() == Range.end())
     return;
 
   // Otherwise: splice.
   DPMarker *ThisMarker = createMarker(I);
-  assert(ThisMarker->StoredDPValues.empty());
+  assert(ThisMarker->StoredDbgRecords.empty());
   ThisMarker->absorbDebugValues(Range, *DPM, true);
 }
 
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index e63b1e6..d168950 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -895,7 +895,7 @@ bool llvm::stripNonLineTableDebugInfo(Module &M) {
         if (I.hasMetadataOtherThanDebugLoc())
           I.setMetadata("heapallocsite", nullptr);
 
-        // Strip any DPValues attached.
+        // Strip any DbgRecords attached.
         I.dropDbgRecords();
       }
     }
diff --git a/llvm/lib/IR/DebugProgramInstruction.cpp b/llvm/lib/IR/DebugProgramInstruction.cpp
index 019b00c..f34d3ae 100644
--- a/llvm/lib/IR/DebugProgramInstruction.cpp
+++ b/llvm/lib/IR/DebugProgramInstruction.cpp
@@ -1,4 +1,4 @@
-//======-- DebugProgramInstruction.cpp - Implement DPValues/DPMarkers --======//
+//=====-- DebugProgramInstruction.cpp - Implement DbgRecords/DPMarkers --=====//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -541,21 +541,21 @@ void DbgRecord::moveAfter(DbgRecord *MoveAfter) {
 ///////////////////////////////////////////////////////////////////////////////
 
 // An empty, global, DPMarker for the purpose of describing empty ranges of
-// DPValues.
+// DbgRecords.
 DPMarker DPMarker::EmptyDPMarker;
 
 void DPMarker::dropDbgRecords() {
-  while (!StoredDPValues.empty()) {
-    auto It = StoredDPValues.begin();
+  while (!StoredDbgRecords.empty()) {
+    auto It = StoredDbgRecords.begin();
     DbgRecord *DR = &*It;
-    StoredDPValues.erase(It);
+    StoredDbgRecords.erase(It);
     DR->deleteRecord();
   }
 }
 
 void DPMarker::dropOneDbgRecord(DbgRecord *DR) {
   assert(DR->getMarker() == this);
-  StoredDPValues.erase(DR->getIterator());
+  StoredDbgRecords.erase(DR->getIterator());
   DR->deleteRecord();
 }
 
@@ -566,15 +566,15 @@ const BasicBlock *DPMarker::getParent() const {
 BasicBlock *DPMarker::getParent() { return MarkedInstr->getParent(); }
 
 void DPMarker::removeMarker() {
-  // Are there any DPValues in this DPMarker? If not, nothing to preserve.
+  // Are there any DbgRecords in this DPMarker? If not, nothing to preserve.
   Instruction *Owner = MarkedInstr;
-  if (StoredDPValues.empty()) {
+  if (StoredDbgRecords.empty()) {
     eraseFromParent();
     Owner->DbgMarker = nullptr;
     return;
   }
 
-  // The attached DPValues need to be preserved; attach them to the next
+  // The attached DbgRecords need to be preserved; attach them to the next
   // instruction. If there isn't a next instruction, put them on the
   // "trailing" list.
   DPMarker *NextMarker = Owner->getParent()->getNextMarker(Owner);
@@ -610,15 +610,15 @@ void DPMarker::eraseFromParent() {
 }
 
 iterator_range<DbgRecord::self_iterator> DPMarker::getDbgRecordRange() {
-  return make_range(StoredDPValues.begin(), StoredDPValues.end());
+  return make_range(StoredDbgRecords.begin(), StoredDbgRecords.end());
 }
 iterator_range<DbgRecord::const_self_iterator>
 DPMarker::getDbgRecordRange() const {
-  return make_range(StoredDPValues.begin(), StoredDPValues.end());
+  return make_range(StoredDbgRecords.begin(), StoredDbgRecords.end());
 }
 
 void DbgRecord::removeFromParent() {
-  getMarker()->StoredDPValues.erase(getIterator());
+  getMarker()->StoredDbgRecords.erase(getIterator());
   Marker = nullptr;
 }
 
@@ -628,29 +628,29 @@ void DbgRecord::eraseFromParent() {
 }
 
 void DPMarker::insertDbgRecord(DbgRecord *New, bool InsertAtHead) {
-  auto It = InsertAtHead ? StoredDPValues.begin() : StoredDPValues.end();
-  StoredDPValues.insert(It, *New);
+  auto It = InsertAtHead ? StoredDbgRecords.begin() : StoredDbgRecords.end();
+  StoredDbgRecords.insert(It, *New);
   New->setMarker(this);
 }
 void DPMarker::insertDbgRecord(DbgRecord *New, DbgRecord *InsertBefore) {
   assert(InsertBefore->getMarker() == this &&
-         "DPValue 'InsertBefore' must be contained in this DPMarker!");
-  StoredDPValues.insert(InsertBefore->getIterator(), *New);
+         "DbgRecord 'InsertBefore' must be contained in this DPMarker!");
+  StoredDbgRecords.insert(InsertBefore->getIterator(), *New);
   New->setMarker(this);
 }
 void DPMarker::insertDbgRecordAfter(DbgRecord *New, DbgRecord *InsertAfter) {
   assert(InsertAfter->getMarker() == this &&
-         "DPValue 'InsertAfter' must be contained in this DPMarker!");
-  StoredDPValues.insert(++(InsertAfter->getIterator()), *New);
+         "DbgRecord 'InsertAfter' must be contained in this DPMarker!");
+  StoredDbgRecords.insert(++(InsertAfter->getIterator()), *New);
   New->setMarker(this);
 }
 
 void DPMarker::absorbDebugValues(DPMarker &Src, bool InsertAtHead) {
-  auto It = InsertAtHead ? StoredDPValues.begin() : StoredDPValues.end();
-  for (DbgRecord &DPV : Src.StoredDPValues)
+  auto It = InsertAtHead ? StoredDbgRecords.begin() : StoredDbgRecords.end();
+  for (DbgRecord &DPV : Src.StoredDbgRecords)
     DPV.setMarker(this);
 
-  StoredDPValues.splice(It, Src.StoredDPValues);
+  StoredDbgRecords.splice(It, Src.StoredDbgRecords);
 }
 
 void DPMarker::absorbDebugValues(iterator_range<DbgRecord::self_iterator> Range,
@@ -659,45 +659,45 @@ void DPMarker::absorbDebugValues(iterator_range<DbgRecord::self_iterator> Range,
     DR.setMarker(this);
 
   auto InsertPos =
-      (InsertAtHead) ? StoredDPValues.begin() : StoredDPValues.end();
+      (InsertAtHead) ? StoredDbgRecords.begin() : StoredDbgRecords.end();
 
-  StoredDPValues.splice(InsertPos, Src.StoredDPValues, Range.begin(),
-                        Range.end());
+  StoredDbgRecords.splice(InsertPos, Src.StoredDbgRecords, Range.begin(),
+                          Range.end());
 }
 
 iterator_range<simple_ilist<DbgRecord>::iterator> DPMarker::cloneDebugInfoFrom(
     DPMarker *From, std::optional<simple_ilist<DbgRecord>::iterator> from_here,
     bool InsertAtHead) {
   DbgRecord *First = nullptr;
-  // Work out what range of DPValues to clone: normally all the contents of the
-  // "From" marker, optionally we can start from the from_here position down to
-  // end().
+  // Work out what range of DbgRecords to clone: normally all the contents of
+  // the "From" marker, optionally we can start from the from_here position down
+  // to end().
   auto Range =
-      make_range(From->StoredDPValues.begin(), From->StoredDPValues.end());
+      make_range(From->StoredDbgRecords.begin(), From->StoredDbgRecords.end());
   if (from_here.has_value())
-    Range = make_range(*from_here, From->StoredDPValues.end());
+    Range = make_range(*from_here, From->StoredDbgRecords.end());
 
   // Clone each DPValue and insert into StoreDPValues; optionally place them at
   // the start or the end of the list.
-  auto Pos = (InsertAtHead) ? StoredDPValues.begin() : StoredDPValues.end();
+  auto Pos = (InsertAtHead) ? StoredDbgRecords.begin() : StoredDbgRecords.end();
   for (DbgRecord &DR : Range) {
     DbgRecord *New = DR.clone();
     New->setMarker(this);
-    StoredDPValues.insert(Pos, *New);
+    StoredDbgRecords.insert(Pos, *New);
     if (!First)
       First = New;
   }
 
   if (!First)
-    return {StoredDPValues.end(), StoredDPValues.end()};
+    return {StoredDbgRecords.end(), StoredDbgRecords.end()};
 
   if (InsertAtHead)
     // If InsertAtHead is set, we cloned a range onto the front of of the
-    // StoredDPValues collection, return that range.
-    return {StoredDPValues.begin(), Pos};
+    // StoredDbgRecords collection, return that range.
+    return {StoredDbgRecords.begin(), Pos};
   else
     // We inserted a block at the end, return that range.
-    return {First->getIterator(), StoredDPValues.end()};
+    return {First->getIterator(), StoredDbgRecords.end()};
 }
 
 } // end namespace llvm
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index e089239..7a677d7 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -143,7 +143,7 @@ void Instruction::insertBefore(BasicBlock &BB,
     return;
 
   // We've inserted "this": if InsertAtHead is set then it comes before any
-  // DPValues attached to InsertPos. But if it's not set, then any DPValues
+  // DPValues attached to InsertPos. But if it's not set, then any DbgRecords
   // should now come before "this".
   bool InsertAtHead = InsertPos.getHeadBit();
   if (!InsertAtHead) {
@@ -166,10 +166,10 @@ void Instruction::insertBefore(BasicBlock &BB,
   }
 
   // If we're inserting a terminator, check if we need to flush out
-  // TrailingDPValues. Inserting instructions at the end of an incomplete
+  // TrailingDbgRecords. Inserting instructions at the end of an incomplete
   // block is handled by the code block above.
   if (isTerminator())
-    getParent()->flushTerminatorDbgValues();
+    getParent()->flushTerminatorDbgRecords();
 }
 
 /// Unlink this instruction from its current basic block and insert it into the
@@ -212,12 +212,12 @@ void Instruction::moveBeforeImpl(BasicBlock &BB, InstListType::iterator I,
   assert(I == BB.end() || I->getParent() == &BB);
   bool InsertAtHead = I.getHeadBit();
 
-  // If we've been given the "Preserve" flag, then just move the DPValues with
+  // If we've been given the "Preserve" flag, then just move the DbgRecords with
   // the instruction, no more special handling needed.
   if (BB.IsNewDbgInfoFormat && DbgMarker && !Preserve) {
     if (I != this->getIterator() || InsertAtHead) {
       // "this" is definitely moving in the list, or it's moving ahead of its
-      // attached DPValues. Detach any existing DPValues.
+      // attached DPValues. Detach any existing DbgRecords.
       handleMarkerRemoval();
     }
   }
@@ -229,15 +229,15 @@ void Instruction::moveBeforeImpl(BasicBlock &BB, InstListType::iterator I,
   if (BB.IsNewDbgInfoFormat && !Preserve) {
     DPMarker *NextMarker = getParent()->getNextMarker(this);
 
-    // If we're inserting at point I, and not in front of the DPValues attached
-    // there, then we should absorb the DPValues attached to I.
+    // If we're inserting at point I, and not in front of the DbgRecords
+    // attached there, then we should absorb the DbgRecords attached to I.
     if (!InsertAtHead && NextMarker && !NextMarker->empty()) {
       adoptDbgRecords(&BB, I, false);
     }
   }
 
   if (isTerminator())
-    getParent()->flushTerminatorDbgValues();
+    getParent()->flushTerminatorDbgRecords();
 }
 
 iterator_range<DbgRecord::self_iterator> Instruction::cloneDebugInfoFrom(
@@ -263,11 +263,11 @@ Instruction::getDbgReinsertionPosition() {
   if (!NextMarker)
     return std::nullopt;
 
-  // Are there any DPValues in the next marker?
-  if (NextMarker->StoredDPValues.empty())
+  // Are there any DbgRecords in the next marker?
+  if (NextMarker->StoredDbgRecords.empty())
     return std::nullopt;
 
-  return NextMarker->StoredDPValues.begin();
+  return NextMarker->StoredDbgRecords.begin();
 }
 
 bool Instruction::hasDbgRecords() const { return !getDbgRecordRange().empty(); }
@@ -275,20 +275,20 @@ bool Instruction::hasDbgRecords() const { return !getDbgRecordRange().empty(); }
 void Instruction::adoptDbgRecords(BasicBlock *BB, BasicBlock::iterator It,
                                   bool InsertAtHead) {
   DPMarker *SrcMarker = BB->getMarker(It);
-  auto ReleaseTrailingDPValues = [BB, It, SrcMarker]() {
+  auto ReleaseTrailingDbgRecords = [BB, It, SrcMarker]() {
     if (BB->end() == It) {
       SrcMarker->eraseFromParent();
       BB->deleteTrailingDbgRecords();
     }
   };
 
-  if (!SrcMarker || SrcMarker->StoredDPValues.empty()) {
-    ReleaseTrailingDPValues();
+  if (!SrcMarker || SrcMarker->StoredDbgRecords.empty()) {
+    ReleaseTrailingDbgRecords();
     return;
   }
 
   // If we have DPMarkers attached to this instruction, we have to honour the
-  // ordering of DPValues between this and the other marker. Fall back to just
+  // ordering of DbgRecords between this and the other marker. Fall back to just
   // absorbing from the source.
   if (DbgMarker || It == BB->end()) {
     // Ensure we _do_ have a marker.
@@ -304,10 +304,11 @@ void Instruction::adoptDbgRecords(BasicBlock *BB, BasicBlock::iterator It,
     // block, it's important to not leave the empty marker trailing. It will
     // give a misleading impression that some debug records have been left
     // trailing.
-    ReleaseTrailingDPValues();
+    ReleaseTrailingDbgRecords();
   } else {
-    // Optimisation: we're transferring all the DPValues from the source marker
-    // onto this empty location: just adopt the other instructions marker.
+    // Optimisation: we're transferring all the DbgRecords from the source
+    // marker onto this empty location: just adopt the other instructions
+    // marker.
     DbgMarker = SrcMarker;
     DbgMarker->MarkedInstr = this;
     It->DbgMarker = nullptr;
diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp
index a0bf9ca..a471314 100644
--- a/llvm/lib/IR/LLVMContextImpl.cpp
+++ b/llvm/lib/IR/LLVMContextImpl.cpp
@@ -50,7 +50,7 @@ LLVMContextImpl::~LLVMContextImpl() {
   // when it's terminator was removed were eventually replaced. This assertion
   // firing indicates that DPValues went missing during the lifetime of the
   // LLVMContext.
-  assert(TrailingDPValues.empty() && "DPValue records in blocks not cleaned");
+  assert(TrailingDbgRecords.empty() && "DbgRecords in blocks not cleaned");
 #endif
 
   // NOTE: We need to delete the contents of OwnedModules, but Module's dtor
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index c841b28..b1dcb26 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -1684,19 +1684,19 @@ public:
   /// such a way. These are stored in LLVMContext because typically LLVM only
   /// edits a small number of blocks at a time, so there's no need to bloat
   /// BasicBlock with such a data structure.
-  SmallDenseMap<BasicBlock *, DPMarker *> TrailingDPValues;
+  SmallDenseMap<BasicBlock *, DPMarker *> TrailingDbgRecords;
 
-  // Set, get and delete operations for TrailingDPValues.
+  // Set, get and delete operations for TrailingDbgRecords.
   void setTrailingDbgRecords(BasicBlock *B, DPMarker *M) {
-    assert(!TrailingDPValues.count(B));
-    TrailingDPValues[B] = M;
+    assert(!TrailingDbgRecords.count(B));
+    TrailingDbgRecords[B] = M;
   }
 
   DPMarker *getTrailingDbgRecords(BasicBlock *B) {
-    return TrailingDPValues.lookup(B);
+    return TrailingDbgRecords.lookup(B);
   }
 
-  void deleteTrailingDbgRecords(BasicBlock *B) { TrailingDPValues.erase(B); }
+  void deleteTrailingDbgRecords(BasicBlock *B) { TrailingDbgRecords.erase(B); }
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/MC/MCSectionXCOFF.cpp b/llvm/lib/MC/MCSectionXCOFF.cpp
index 95d32e3..609ef09 100644
--- a/llvm/lib/MC/MCSectionXCOFF.cpp
+++ b/llvm/lib/MC/MCSectionXCOFF.cpp
@@ -87,8 +87,7 @@ void MCSectionXCOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
     if (getKind().isCommon() && !getKind().isBSSLocal())
       return;
 
-    assert((getKind().isBSSExtern() || getKind().isBSSLocal()) &&
-           "Unexepected section kind for toc-data");
+    assert(getKind().isBSS() && "Unexpected section kind for toc-data");
     printCsectDirective(OS);
     return;
   }
diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp
index 9000e9a..6139d99 100644
--- a/llvm/lib/Object/Archive.cpp
+++ b/llvm/lib/Object/Archive.cpp
@@ -969,12 +969,19 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
   Err = Error::success();
 }
 
+object::Archive::Kind Archive::getDefaultKindForTriple(Triple &T) {
+  if (T.isOSDarwin())
+    return object::Archive::K_DARWIN;
+  if (T.isOSAIX())
+    return object::Archive::K_AIXBIG;
+  if (T.isOSWindows())
+    return object::Archive::K_COFF;
+  return object::Archive::K_GNU;
+}
+
 object::Archive::Kind Archive::getDefaultKind() {
   Triple HostTriple(sys::getDefaultTargetTriple());
-  return HostTriple.isOSDarwin()
-             ? object::Archive::K_DARWIN
-             : (HostTriple.isOSAIX() ? object::Archive::K_AIXBIG
-                                     : object::Archive::K_GNU);
+  return getDefaultKindForTriple(HostTriple);
 }
 
 Archive::child_iterator Archive::child_begin(Error &Err,
diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index e062974..aa57e55 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -62,12 +62,16 @@ object::Archive::Kind NewArchiveMember::detectKindFromObject() const {
   Expected<std::unique_ptr<object::ObjectFile>> OptionalObject =
       object::ObjectFile::createObjectFile(MemBufferRef);
 
-  if (OptionalObject)
-    return isa<object::MachOObjectFile>(**OptionalObject)
-               ? object::Archive::K_DARWIN
-               : (isa<object::XCOFFObjectFile>(**OptionalObject)
-                      ? object::Archive::K_AIXBIG
-                      : object::Archive::K_GNU);
+  if (OptionalObject) {
+    if (isa<object::MachOObjectFile>(**OptionalObject))
+      return object::Archive::K_DARWIN;
+    if (isa<object::XCOFFObjectFile>(**OptionalObject))
+      return object::Archive::K_AIXBIG;
+    if (isa<object::COFFObjectFile>(**OptionalObject) ||
+        isa<object::COFFImportFile>(**OptionalObject))
+      return object::Archive::K_COFF;
+    return object::Archive::K_GNU;
+  }
 
   // Squelch the error in case we had a non-object file.
   consumeError(OptionalObject.takeError());
@@ -80,10 +84,7 @@ object::Archive::Kind NewArchiveMember::detectKindFromObject() const {
             MemBufferRef, file_magic::bitcode, &Context)) {
       auto &IRObject = cast<object::IRObjectFile>(**ObjOrErr);
       auto TargetTriple = Triple(IRObject.getTargetTriple());
-      return TargetTriple.isOSDarwin()
-                 ? object::Archive::K_DARWIN
-                 : (TargetTriple.isOSAIX() ? object::Archive::K_AIXBIG
-                                           : object::Archive::K_GNU);
+      return object::Archive::getDefaultKindForTriple(TargetTriple);
     } else {
       // Squelch the error in case this was not a SymbolicFile.
       consumeError(ObjOrErr.takeError());
@@ -976,10 +977,12 @@ static Error writeArchiveToStream(raw_ostream &Out,
   SmallString<0> StringTableBuf;
   raw_svector_ostream StringTable(StringTableBuf);
   SymMap SymMap;
+  bool ShouldWriteSymtab = WriteSymtab != SymtabWritingMode::NoSymtab;
 
   // COFF symbol map uses 16-bit indexes, so we can't use it if there are too
-  // many members.
-  if (isCOFFArchive(Kind) && NewMembers.size() > 0xfffe)
+  // many members. COFF format also requires symbol table presence, so use
+  // GNU format when NoSymtab is requested.
+  if (isCOFFArchive(Kind) && (NewMembers.size() > 0xfffe || !ShouldWriteSymtab))
     Kind = object::Archive::K_GNU;
 
   // In the scenario when LLVMContext is populated SymbolicFile will contain a
@@ -1008,7 +1011,6 @@ static Error writeArchiveToStream(raw_ostream &Out,
   uint64_t LastMemberHeaderOffset = 0;
   uint64_t NumSyms = 0;
   uint64_t NumSyms32 = 0; // Store symbol number of 32-bit member files.
-  bool ShouldWriteSymtab = WriteSymtab != SymtabWritingMode::NoSymtab;
 
   for (const auto &M : Data) {
     // Record the start of the member's offset
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 054311d..9665ae5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7751,7 +7751,7 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
   // register allocator to pass call args in callee saved regs, without extra
   // copies to avoid these fake clobbers of actually-preserved GPRs.
   if (MI.getOpcode() == AArch64::MSRpstatesvcrImm1 ||
-      MI.getOpcode() == AArch64::MSRpstatePseudo)
+      MI.getOpcode() == AArch64::MSRpstatePseudo) {
     for (unsigned I = MI.getNumOperands() - 1; I > 0; --I)
       if (MachineOperand &MO = MI.getOperand(I);
           MO.isReg() && MO.isImplicit() && MO.isDef() &&
@@ -7759,6 +7759,16 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
            AArch64::GPR64RegClass.contains(MO.getReg())))
         MI.removeOperand(I);
 
+    // The SVE vector length can change when entering/leaving streaming mode.
+    if (MI.getOperand(0).getImm() == AArch64SVCR::SVCRSM ||
+        MI.getOperand(0).getImm() == AArch64SVCR::SVCRSMZA) {
+      MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/false,
+                                              /*IsImplicit=*/true));
+      MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/true,
+                                              /*IsImplicit=*/true));
+    }
+  }
+
   // Add an implicit use of 'VG' for ADDXri/SUBXri, which are instructions that
   // have nothing to do with VG, were it not that they are used to materialise a
   // frame-address. If they contain a frame-index to a scalable vector, this
@@ -21413,12 +21423,8 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // uzp1(xtn x, xtn y) -> xtn(uzp1 (x, y))
-  // Only implemented on little-endian subtargets.
-  bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
-
-  // This optimization only works on little endian.
-  if (!IsLittleEndian)
+  // These optimizations only work on little endian.
+  if (!DAG.getDataLayout().isLittleEndian())
     return SDValue();
 
   // uzp1(bitcast(x), bitcast(y)) -> uzp1(x, y)
@@ -21437,21 +21443,28 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG,
   if (ResVT != MVT::v2i32 && ResVT != MVT::v4i16 && ResVT != MVT::v8i8)
     return SDValue();
 
-  auto getSourceOp = [](SDValue Operand) -> SDValue {
-    const unsigned Opcode = Operand.getOpcode();
-    if (Opcode == ISD::TRUNCATE)
-      return Operand->getOperand(0);
-    if (Opcode == ISD::BITCAST &&
-        Operand->getOperand(0).getOpcode() == ISD::TRUNCATE)
-      return Operand->getOperand(0)->getOperand(0);
-    return SDValue();
-  };
+  SDValue SourceOp0 = peekThroughBitcasts(Op0);
+  SDValue SourceOp1 = peekThroughBitcasts(Op1);
 
-  SDValue SourceOp0 = getSourceOp(Op0);
-  SDValue SourceOp1 = getSourceOp(Op1);
+  // truncating uzp1(x, y) -> xtn(concat (x, y))
+  if (SourceOp0.getValueType() == SourceOp1.getValueType()) {
+    EVT Op0Ty = SourceOp0.getValueType();
+    if ((ResVT == MVT::v4i16 && Op0Ty == MVT::v2i32) ||
+        (ResVT == MVT::v8i8 && Op0Ty == MVT::v4i16)) {
+      SDValue Concat =
+          DAG.getNode(ISD::CONCAT_VECTORS, DL,
+                      Op0Ty.getDoubleNumVectorElementsVT(*DAG.getContext()),
+                      SourceOp0, SourceOp1);
+      return DAG.getNode(ISD::TRUNCATE, DL, ResVT, Concat);
+    }
+  }
 
-  if (!SourceOp0 || !SourceOp1)
+  // uzp1(xtn x, xtn y) -> xtn(uzp1 (x, y))
+  if (SourceOp0.getOpcode() != ISD::TRUNCATE ||
+      SourceOp1.getOpcode() != ISD::TRUNCATE)
     return SDValue();
+  SourceOp0 = SourceOp0.getOperand(0);
+  SourceOp1 = SourceOp1.getOperand(0);
 
   if (SourceOp0.getValueType() != SourceOp1.getValueType() ||
       !SourceOp0.getValueType().isSimple())
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 6254e68..b4b975c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6153,26 +6153,39 @@ defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
 defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
 defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
 
-def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))),
-                                 (v8i8 (trunc (v8i16 V128:$Vm))))),
-          (UZP1v16i8 V128:$Vn, V128:$Vm)>;
-def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))),
-                                 (v4i16 (trunc (v4i32 V128:$Vm))))),
-          (UZP1v8i16 V128:$Vn, V128:$Vm)>;
-def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))),
-                                 (v2i32 (trunc (v2i64 V128:$Vm))))),
-          (UZP1v4i32 V128:$Vn, V128:$Vm)>;
-// These are the same as above, with an optional assertzext node that can be
-// generated from fptoi lowering.
-def : Pat<(v16i8 (concat_vectors (v8i8 (assertzext (trunc (v8i16 V128:$Vn)))),
-                                 (v8i8 (assertzext (trunc (v8i16 V128:$Vm)))))),
-          (UZP1v16i8 V128:$Vn, V128:$Vm)>;
-def : Pat<(v8i16 (concat_vectors (v4i16 (assertzext (trunc (v4i32 V128:$Vn)))),
-                                 (v4i16 (assertzext (trunc (v4i32 V128:$Vm)))))),
-          (UZP1v8i16 V128:$Vn, V128:$Vm)>;
-def : Pat<(v4i32 (concat_vectors (v2i32 (assertzext (trunc (v2i64 V128:$Vn)))),
-                                 (v2i32 (assertzext (trunc (v2i64 V128:$Vm)))))),
-          (UZP1v4i32 V128:$Vn, V128:$Vm)>;
+def trunc_optional_assert_ext : PatFrags<(ops node:$op0),
+                                         [(trunc node:$op0),
+                                          (assertzext (trunc node:$op0)),
+                                          (assertsext (trunc node:$op0))]>;
+
+// concat_vectors(trunc(x), trunc(y)) -> uzp1(x, y)
+// concat_vectors(assertzext(trunc(x)), assertzext(trunc(y))) -> uzp1(x, y)
+// concat_vectors(assertsext(trunc(x)), assertsext(trunc(y))) -> uzp1(x, y)
+class concat_trunc_to_uzp1_pat<ValueType SrcTy, ValueType TruncTy, ValueType ConcatTy>
+  : Pat<(ConcatTy (concat_vectors (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))),
+                                  (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))),
+        (!cast<Instruction>("UZP1"#ConcatTy) V128:$Vn, V128:$Vm)>;
+def : concat_trunc_to_uzp1_pat<v8i16, v8i8, v16i8>;
+def : concat_trunc_to_uzp1_pat<v4i32, v4i16, v8i16>;
+def : concat_trunc_to_uzp1_pat<v2i64, v2i32, v4i32>;
+
+// trunc(concat_vectors(trunc(x), trunc(y))) -> xtn(uzp1(x, y))
+// trunc(concat_vectors(assertzext(trunc(x)), assertzext(trunc(y)))) -> xtn(uzp1(x, y))
+// trunc(concat_vectors(assertsext(trunc(x)), assertsext(trunc(y)))) -> xtn(uzp1(x, y))
+class trunc_concat_trunc_to_xtn_uzp1_pat<ValueType SrcTy, ValueType TruncTy, ValueType ConcatTy,
+                                         ValueType Ty>
+  : Pat<(Ty (trunc_optional_assert_ext
+                    (ConcatTy (concat_vectors
+                                  (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))),
+                                  (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))))),
+        (!cast<Instruction>("XTN"#Ty) (!cast<Instruction>("UZP1"#ConcatTy) V128:$Vn, V128:$Vm))>;
+def : trunc_concat_trunc_to_xtn_uzp1_pat<v4i32, v4i16, v8i16, v8i8>;
+def : trunc_concat_trunc_to_xtn_uzp1_pat<v2i64, v2i32, v4i32, v4i16>;
+
+def : Pat<(v8i8 (trunc (concat_vectors (v4i16 V64:$Vn), (v4i16 V64:$Vm)))),
+          (UZP1v8i8 V64:$Vn, V64:$Vm)>;
+def : Pat<(v4i16 (trunc (concat_vectors (v2i32 V64:$Vn), (v2i32 V64:$Vm)))),
+          (UZP1v4i16 V64:$Vn, V64:$Vm)>;
 
 def : Pat<(v16i8 (concat_vectors
                  (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))),
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 33cb5f9..44d9a8a 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -223,8 +223,6 @@ def MSRpstatesvcrImm1
   let Inst{8} = imm;
   let Inst{7-5} = 0b011; // op2
   let hasPostISelHook = 1;
-  let Uses = [VG];
-  let Defs = [VG];
 }
 
 def : InstAlias<"smstart",    (MSRpstatesvcrImm1 0b011, 0b1)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 5ed82c0..86f77f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -194,7 +194,25 @@ class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
   }];
 }
 
-class is_canonicalized<SDPatternOperator op> : PatFrag<
+class is_canonicalized_1<SDPatternOperator op> : PatFrag<
+  (ops node:$src0),
+  (op $src0),
+  [{
+    const SITargetLowering &Lowering =
+              *static_cast<const SITargetLowering *>(getTargetLowering());
+
+    return Lowering.isCanonicalized(*CurDAG, N->getOperand(0));
+   }]> {
+
+  let GISelPredicateCode = [{
+    const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
+      MF.getSubtarget().getTargetLowering());
+
+    return TLI->isCanonicalized(MI.getOperand(1).getReg(), MF);
+  }];
+}
+
+class is_canonicalized_2<SDPatternOperator op> : PatFrag<
   (ops node:$src0, node:$src1),
   (op $src0, $src1),
   [{
@@ -210,8 +228,8 @@ class is_canonicalized<SDPatternOperator op> : PatFrag<
     const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
       MF.getSubtarget().getTargetLowering());
 
-    return TLI->isCanonicalized(MI.getOperand(1).getReg(), const_cast<MachineFunction&>(MF)) &&
-      TLI->isCanonicalized(MI.getOperand(2).getReg(), const_cast<MachineFunction&>(MF));
+    return TLI->isCanonicalized(MI.getOperand(1).getReg(), MF) &&
+      TLI->isCanonicalized(MI.getOperand(2).getReg(), MF);
   }];
 }
 
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index c709102..4ae514f 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -2616,7 +2616,6 @@ defm BUFFER_ATOMIC_CMPSWAP_X2     : MUBUF_Real_Atomic_gfx11_gfx12<0x042, "buffer
 defm BUFFER_ATOMIC_FCMPSWAP       : MUBUF_Real_Atomic_gfx11<0x050, "buffer_atomic_cmpswap_f32">;
 defm BUFFER_ATOMIC_COND_SUB_U32   : MUBUF_Real_Atomic_gfx12<0x050>;
 defm BUFFER_ATOMIC_CSUB           : MUBUF_Real_Atomic_gfx11_gfx12<0x037, "buffer_atomic_sub_clamp_u32", "buffer_atomic_csub_u32">;
-def : Mnem_gfx11_gfx12<"buffer_atomic_csub", "buffer_atomic_csub_u32">;
 defm BUFFER_ATOMIC_DEC            : MUBUF_Real_Atomic_gfx11_gfx12<0x040, "buffer_atomic_dec_u32">;
 defm BUFFER_ATOMIC_DEC_X2         : MUBUF_Real_Atomic_gfx11_gfx12<0x04D, "buffer_atomic_dec_u64">;
 defm BUFFER_ATOMIC_INC            : MUBUF_Real_Atomic_gfx11_gfx12<0x03F, "buffer_atomic_inc_u32">;
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 87ace01..e944dde 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -1735,14 +1735,12 @@ def DS_WRITE_B128_vi      : DS_Real_vi<0xdf, DS_WRITE_B128>;
 def DS_READ_B96_vi        : DS_Real_vi<0xfe, DS_READ_B96>;
 def DS_READ_B128_vi       : DS_Real_vi<0xff, DS_READ_B128>;
 
-let SubtargetPredicate = isGFX90APlus in {
-  def DS_ADD_F64_vi     : DS_Real_vi<0x5c, DS_ADD_F64>;
-  def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>;
-} // End SubtargetPredicate = isGFX90APlus
-
-let SubtargetPredicate = isGFX940Plus in {
-  def DS_PK_ADD_F16_vi     : DS_Real_vi<0x17, DS_PK_ADD_F16>;
-  def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>;
-  def DS_PK_ADD_BF16_vi     : DS_Real_vi<0x18, DS_PK_ADD_BF16>;
-  def DS_PK_ADD_RTN_BF16_vi : DS_Real_vi<0xb8, DS_PK_ADD_RTN_BF16>;
-} // End SubtargetPredicate = isGFX940Plus
+// GFX90A+.
+def DS_ADD_F64_vi     : DS_Real_vi<0x5c, DS_ADD_F64>;
+def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>;
+
+// GFX940+.
+def DS_PK_ADD_F16_vi     : DS_Real_vi<0x17, DS_PK_ADD_F16>;
+def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>;
+def DS_PK_ADD_BF16_vi     : DS_Real_vi<0x18, DS_PK_ADD_BF16>;
+def DS_PK_ADD_RTN_BF16_vi : DS_Real_vi<0xb8, DS_PK_ADD_RTN_BF16>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9bc1b8e..5ccf21f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -12572,6 +12572,10 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
   case ISD::FREM:
   case ISD::FP_ROUND:
   case ISD::FP_EXTEND:
+  case ISD::FP16_TO_FP:
+  case ISD::FP_TO_FP16:
+  case ISD::BF16_TO_FP:
+  case ISD::FP_TO_BF16:
   case ISD::FLDEXP:
   case AMDGPUISD::FMUL_LEGACY:
   case AMDGPUISD::FMAD_FTZ:
@@ -12591,6 +12595,9 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
   case AMDGPUISD::CVT_F32_UBYTE1:
   case AMDGPUISD::CVT_F32_UBYTE2:
   case AMDGPUISD::CVT_F32_UBYTE3:
+  case AMDGPUISD::FP_TO_FP16:
+  case AMDGPUISD::SIN_HW:
+  case AMDGPUISD::COS_HW:
     return true;
 
   // It can/will be lowered or combined as a bit operation.
@@ -12600,6 +12607,20 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
   case ISD::FCOPYSIGN:
     return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
 
+  case ISD::AND:
+    if (Op.getValueType() == MVT::i32) {
+      // Be careful as we only know it is a bitcast floating point type. It
+      // could be f32, v2f16, we have no way of knowing. Luckily the constant
+      // value that we optimize for, which comes up in fp32 to bf16 conversions,
+      // is valid to optimize for all types.
+      if (auto *RHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+        if (RHS->getZExtValue() == 0xffff0000) {
+          return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
+        }
+      }
+    }
+    break;
+
   case ISD::FSIN:
   case ISD::FCOS:
   case ISD::FSINCOS:
@@ -12665,6 +12686,9 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
     return false;
 
   case ISD::BITCAST:
+    // TODO: This is incorrect as it loses track of the operand's type. We may
+    // end up effectively bitcasting from f32 to v2f16 or vice versa, and the
+    // same bits that are canonicalized in one type need not be in the other.
     return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
   case ISD::TRUNCATE: {
     // Hack round the mess we make when legalizing extract_vector_elt
@@ -12694,25 +12718,26 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
     case Intrinsic::amdgcn_trig_preop:
     case Intrinsic::amdgcn_log:
     case Intrinsic::amdgcn_exp2:
+    case Intrinsic::amdgcn_sqrt:
       return true;
     default:
       break;
     }
 
-    [[fallthrough]];
+    break;
   }
   default:
-    // FIXME: denormalsEnabledForType is broken for dynamic
-    return denormalsEnabledForType(DAG, Op.getValueType()) &&
-           DAG.isKnownNeverSNaN(Op);
+    break;
   }
 
-  llvm_unreachable("invalid operation");
+  // FIXME: denormalsEnabledForType is broken for dynamic
+  return denormalsEnabledForType(DAG, Op.getValueType()) &&
+         DAG.isKnownNeverSNaN(Op);
 }
 
-bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
+bool SITargetLowering::isCanonicalized(Register Reg, const MachineFunction &MF,
                                        unsigned MaxDepth) const {
-  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
   MachineInstr *MI = MRI.getVRegDef(Reg);
   unsigned Opcode = MI->getOpcode();
 
@@ -12931,27 +12956,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
     }
   }
 
-  unsigned SrcOpc = N0.getOpcode();
-
-  // If it's free to do so, push canonicalizes further up the source, which may
-  // find a canonical source.
-  //
-  // TODO: More opcodes. Note this is unsafe for the _ieee minnum/maxnum for
-  // sNaNs.
-  if (SrcOpc == ISD::FMINNUM || SrcOpc == ISD::FMAXNUM) {
-    auto *CRHS = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
-    if (CRHS && N0.hasOneUse()) {
-      SDLoc SL(N);
-      SDValue Canon0 = DAG.getNode(ISD::FCANONICALIZE, SL, VT,
-                                   N0.getOperand(0));
-      SDValue Canon1 = getCanonicalConstantFP(DAG, SL, VT, CRHS->getValueAPF());
-      DCI.AddToWorklist(Canon0.getNode());
-
-      return DAG.getNode(N0.getOpcode(), SL, VT, Canon0, Canon1);
-    }
-  }
-
-  return isCanonicalized(DAG, N0) ? N0 : SDValue();
+  return SDValue();
 }
 
 static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
@@ -15939,8 +15944,8 @@ bool SITargetLowering::denormalsEnabledForType(const SelectionDAG &DAG,
   }
 }
 
-bool SITargetLowering::denormalsEnabledForType(LLT Ty,
-                                               MachineFunction &MF) const {
+bool SITargetLowering::denormalsEnabledForType(
+    LLT Ty, const MachineFunction &MF) const {
   switch (Ty.getScalarSizeInBits()) {
   case 32:
     return !denormalModeIsFlushAllF32(MF);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index a20442e..89da442 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -523,10 +523,10 @@ public:
 
   bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
                        unsigned MaxDepth = 5) const;
-  bool isCanonicalized(Register Reg, MachineFunction &MF,
+  bool isCanonicalized(Register Reg, const MachineFunction &MF,
                        unsigned MaxDepth = 5) const;
   bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const;
-  bool denormalsEnabledForType(LLT Ty, MachineFunction &MF) const;
+  bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const;
 
   bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
                                  const TargetRegisterInfo *TRI,
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 33c93cd..3ab7884 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2944,6 +2944,34 @@ def : GCNPat<
    (V_BFREV_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1))), sub0,
    (V_BFREV_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0))), sub1)>;
 
+// If fcanonicalize's operand is implicitly canonicalized, we only need a copy.
+let AddedComplexity = 1000 in {
+def : GCNPat<
+  (is_canonicalized_1<fcanonicalize> f16:$src),
+  (COPY f16:$src)
+>;
+
+def : GCNPat<
+  (is_canonicalized_1<fcanonicalize> v2f16:$src),
+  (COPY v2f16:$src)
+>;
+
+def : GCNPat<
+  (is_canonicalized_1<fcanonicalize> f32:$src),
+  (COPY f32:$src)
+>;
+
+def : GCNPat<
+  (is_canonicalized_1<fcanonicalize> v2f32:$src),
+  (COPY v2f32:$src)
+>;
+
+def : GCNPat<
+  (is_canonicalized_1<fcanonicalize> f64:$src),
+  (COPY f64:$src)
+>;
+}
+
 // Prefer selecting to max when legal, but using mul is always valid.
 let AddedComplexity = -5 in {
 
@@ -3277,8 +3305,8 @@ def : GCNPat <
 
 let AddedComplexity = 5 in {
 def : GCNPat <
-  (v2f16 (is_canonicalized<build_vector> (f16 (VOP3Mods (f16 VGPR_32:$src0), i32:$src0_mods)),
-                                         (f16 (VOP3Mods (f16 VGPR_32:$src1), i32:$src1_mods)))),
+  (v2f16 (is_canonicalized_2<build_vector> (f16 (VOP3Mods (f16 VGPR_32:$src0), i32:$src0_mods)),
+                                           (f16 (VOP3Mods (f16 VGPR_32:$src1), i32:$src1_mods)))),
   (V_PACK_B32_F16_e64 $src0_mods, VGPR_32:$src0, $src1_mods, VGPR_32:$src1)
 >;
 }
@@ -3590,6 +3618,17 @@ FPMinMaxPat<Instruction minmaxInst, ValueType vt, SDPatternOperator min_or_max,
               DSTCLAMP.NONE, DSTOMOD.NONE)
 >;
 
+class
+FPMinCanonMaxPat<Instruction minmaxInst, ValueType vt, SDPatternOperator min_or_max,
+                 SDPatternOperator max_or_min_oneuse> : GCNPat <
+  (min_or_max (is_canonicalized_1<fcanonicalize>
+                  (max_or_min_oneuse (VOP3Mods vt:$src0, i32:$src0_mods),
+                                     (VOP3Mods vt:$src1, i32:$src1_mods))),
+               (vt (VOP3Mods vt:$src2, i32:$src2_mods))),
+  (minmaxInst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2,
+              DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
 let OtherPredicates = [isGFX11Plus] in {
 def : IntMinMaxPat<V_MAXMIN_I32_e64, smin, smax_oneuse>;
 def : IntMinMaxPat<V_MINMAX_I32_e64, smax, smin_oneuse>;
@@ -3599,6 +3638,10 @@ def : FPMinMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>;
 def : FPMinMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>;
 def : FPMinMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
 def : FPMinMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
+def : FPMinCanonMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>;
+def : FPMinCanonMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>;
+def : FPMinCanonMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
+def : FPMinCanonMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
 }
 
 let OtherPredicates = [isGFX9Plus] in {
@@ -3612,6 +3655,10 @@ def : FPMinMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fmi
 def : FPMinMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
 def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
 def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
+def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
+def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
+def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
+def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
 }
 
 // Convert a floating-point power of 2 to the integer exponent.
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
index 3fd7a1a..98cd3a8 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
@@ -695,8 +695,8 @@ bool PPCInstructionSelector::selectConstantPool(
                .addReg(HaAddrReg)
                .addMemOperand(MMO);
     else
-      // For medium code model, generate ADDItocL(CPI, ADDIStocHA8(X2, CPI))
-      MI = BuildMI(MBB, I, DbgLoc, TII.get(PPC::ADDItocL), DstReg)
+      // For medium code model, generate ADDItocL8(CPI, ADDIStocHA8(X2, CPI))
+      MI = BuildMI(MBB, I, DbgLoc, TII.get(PPC::ADDItocL8), DstReg)
                .addReg(HaAddrReg)
                .addConstantPoolIndex(CPI);
   }
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index 3bbc5a6..5015ba8 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -881,7 +881,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY],
 // 3 Cycles ALU operations, 1 input operands
 def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
       (instrs
-    ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, LI, LI8,
+    ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL8, LI, LI8,
     ADDIC, ADDIC8,
     ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8,
     ADDME, ADDME8,
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 9396ca2..542854e 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1236,8 +1236,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
-  case PPC::ADDItocL: {
-    // Transform %xd = ADDItocL %xs, @sym
+  case PPC::ADDItocL8: {
+    // Transform %xd = ADDItocL8 %xs, @sym
     LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
 
     // Change the opcode to ADDI8. If the global address is external, then
@@ -1246,7 +1246,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::ADDI8);
 
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL.");
+    assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL8.");
 
     LLVM_DEBUG(assert(
         !(MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal())) &&
@@ -2659,6 +2659,8 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
   // If the Global Variable has the toc-data attribute, it needs to be emitted
   // when we emit the .toc section.
   if (GV->hasAttribute("toc-data")) {
+    unsigned PointerSize = GV->getParent()->getDataLayout().getPointerSize();
+    Subtarget->tocDataChecks(PointerSize, GV);
     TOCDataGlobalVars.push_back(GV);
     return;
   }
diff --git a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
index 8bbe315..6bb66bc 100644
--- a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
+++ b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
@@ -29,7 +29,7 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
     ADDIStocHA8,
     ADDIdtprelL32,
     ADDItlsldLADDR32,
-    ADDItocL,
+    ADDItocL8,
     ADDME,
     ADDME8,
     ADDME8O,
@@ -518,7 +518,7 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
     ADDIStocHA8,
     ADDIdtprelL32,
     ADDItlsldLADDR32,
-    ADDItocL,
+    ADDItocL8,
     ADDME,
     ADDME8,
     ADDME8O,
diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 56af80f..6e31cda 100644
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -2094,7 +2094,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
     // for large code model, we generate:
     //       LDtocL(GV, ADDIStocHA8(%x2, GV))
     // Otherwise we generate:
-    //       ADDItocL(ADDIStocHA8(%x2, GV), GV)
+    //       ADDItocL8(ADDIStocHA8(%x2, GV), GV)
     // Either way, start with the ADDIStocHA8:
     Register HighPartReg = createResultReg(RC);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
@@ -2104,9 +2104,11 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
               DestReg).addGlobalAddress(GV).addReg(HighPartReg);
     } else {
-      // Otherwise generate the ADDItocL.
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDItocL),
-              DestReg).addReg(HighPartReg).addGlobalAddress(GV);
+      // Otherwise generate the ADDItocL8.
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDItocL8),
+              DestReg)
+          .addReg(HighPartReg)
+          .addGlobalAddress(GV);
     }
   }
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 9e5f0b3..0c25accd 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -521,40 +521,6 @@ static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {
 
   if (!GV->hasAttribute("toc-data"))
     return false;
-
-  // TODO: These asserts should be updated as more support for the toc data
-  // transformation is added (struct support, etc.).
-
-  assert(
-      PointerSize >= GV->getAlign().valueOrOne().value() &&
-      "GlobalVariables with an alignment requirement stricter than TOC entry "
-      "size not supported by the toc data transformation.");
-
-  Type *GVType = GV->getValueType();
-
-  assert(GVType->isSized() && "A GlobalVariable's size must be known to be "
-                              "supported by the toc data transformation.");
-
-  if (GVType->isVectorTy())
-    report_fatal_error("A GlobalVariable of Vector type is not currently "
-                       "supported by the toc data transformation.");
-
-  if (GVType->isArrayTy())
-    report_fatal_error("A GlobalVariable of Array type is not currently "
-                       "supported by the toc data transformation.");
-
-  if (GVType->isStructTy())
-    report_fatal_error("A GlobalVariable of Struct type is not currently "
-                       "supported by the toc data transformation.");
-
-  assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 &&
-         "A GlobalVariable with size larger than a TOC entry is not currently "
-         "supported by the toc data transformation.");
-
-  if (GV->hasPrivateLinkage())
-    report_fatal_error("A GlobalVariable with private linkage is not "
-                       "currently supported by the toc data transformation.");
-
   return true;
 }
 
@@ -6168,7 +6134,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
     // [64-bit ELF/AIX]
     //   LDtocL(@sym, ADDIStocHA8(%x2, @sym))
     // Otherwise we generate:
-    //   ADDItocL(ADDIStocHA8(%x2, @sym), @sym)
+    //   ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
     SDValue GA = N->getOperand(0);
     SDValue TOCbase = N->getOperand(1);
 
@@ -6188,7 +6154,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
     }
 
     // Build the address relative to the TOC-pointer.
-    ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
+    ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL8, dl, MVT::i64,
                                           SDValue(Tmp, 0), GA));
     return;
   }
@@ -7741,7 +7707,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
     // target flags on the immediate operand when we fold it into the
     // load instruction.
     //
-    // For something like ADDItocL, the relocation information is
+    // For something like ADDItocL8, the relocation information is
     // inferred from the opcode; when we process it in the AsmPrinter,
     // we add the necessary relocation there.  A load, though, can receive
     // relocation from various flavors of ADDIxxx, so we need to carry
@@ -7762,7 +7728,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
     case PPC::ADDItlsldL:
       Flags = PPCII::MO_TLSLD_LO;
       break;
-    case PPC::ADDItocL:
+    case PPC::ADDItocL8:
       Flags = PPCII::MO_TOC_LO;
       break;
     }
@@ -7789,7 +7755,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
         // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
         // one use, then we can do this for any offset, we just need to also
         // update the offset (i.e. the symbol addend) on the addis also.
-        if (Base.getMachineOpcode() != PPC::ADDItocL)
+        if (Base.getMachineOpcode() != PPC::ADDItocL8)
           continue;
 
         if (!HBase.isMachineOpcode() ||
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 2949d58..a935979 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1480,8 +1480,8 @@ let hasSideEffects = 0 in {
 let isReMaterializable = 1 in {
 def ADDIStocHA8: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
                        "#ADDIStocHA8", []>, isPPC64;
-def ADDItocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
-                     "#ADDItocL", []>, isPPC64;
+def ADDItocL8: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+                     "#ADDItocL8", []>, isPPC64;
 }
 
 // Local Data Transform
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 5d37e92..5f5eb31 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1077,7 +1077,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(
   case PPC::LIS8:
   case PPC::ADDIStocHA:
   case PPC::ADDIStocHA8:
-  case PPC::ADDItocL:
+  case PPC::ADDItocL8:
   case PPC::LOAD_STACK_GUARD:
   case PPC::PPCLdFixedAddr:
   case PPC::XXLXORz:
@@ -3453,7 +3453,7 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
             break;
           case PPC::LI:
           case PPC::LI8:
-          case PPC::ADDItocL:
+          case PPC::ADDItocL8:
           case PPC::ADDI:
           case PPC::ADDI8:
             OpNoForForwarding = i;
@@ -4420,7 +4420,7 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
                                                MachineOperand *&ImmMO,
                                                MachineOperand *&RegMO) const {
   unsigned Opc = DefMI.getOpcode();
-  if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8)
+  if (Opc != PPC::ADDItocL8 && Opc != PPC::ADDI && Opc != PPC::ADDI8)
     return false;
 
   assert(DefMI.getNumOperands() >= 3 &&
@@ -4485,8 +4485,8 @@ bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
                                              int64_t &Imm,
                                              int64_t BaseImm) const {
   assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
-  if (DefMI.getOpcode() == PPC::ADDItocL) {
-    // The operand for ADDItocL is CPI, which isn't imm at compiling time,
+  if (DefMI.getOpcode() == PPC::ADDItocL8) {
+    // The operand for ADDItocL8 is CPI, which isn't imm at compiling time,
     // However, we know that, it is 16-bit width, and has the alignment of 4.
     // Check if the instruction met the requirement.
     if (III.ImmMustBeMultipleOf > 4 ||
@@ -4899,7 +4899,7 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(
     // register with ImmMO.
     // Before that, we need to fixup the target flags for imm.
     // For some reason, we miss to set the flag for the ImmMO if it is CPI.
-    if (DefMI.getOpcode() == PPC::ADDItocL)
+    if (DefMI.getOpcode() == PPC::ADDItocL8)
       ImmMO->setTargetFlags(PPCII::MO_TOC_LO);
 
     // MI didn't have the interface such as MI.setOperand(i) though
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
index 6b8ad22..fb6e656 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.def
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
@@ -32,7 +32,7 @@
 // {addi} followed by one of these {lxvd2x, lxvw4x, lxvdsx, lvebx, lvehx,
 // lvewx, lvx, lxsdx}
 FUSION_FEATURE(AddiLoad, hasAddiLoadFusion, 2, \
-               FUSION_OP_SET(ADDI, ADDI8, ADDItocL), \
+               FUSION_OP_SET(ADDI, ADDI8, ADDItocL8), \
                FUSION_OP_SET(LXVD2X, LXVW4X, LXVDSX, LVEBX, LVEHX, LVEWX, \
                              LVX, LXSDX))
 
@@ -135,11 +135,11 @@ FUSION_FEATURE(XorisXori, hasWideImmFusion, 1, FUSION_OP_SET(XORIS, XORIS8),
 // addis rx,ra,si - addi rt,rx,SI, SI >= 0
 FUSION_FEATURE(AddisAddi, hasWideImmFusion, 1,
                FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8),
-               FUSION_OP_SET(ADDI, ADDI8, ADDItocL))
+               FUSION_OP_SET(ADDI, ADDI8, ADDItocL8))
 
 // addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2
 FUSION_FEATURE(AddiAddis, hasWideImmFusion, 1,
-               FUSION_OP_SET(ADDI, ADDI8, ADDItocL),
+               FUSION_OP_SET(ADDI, ADDI8, ADDItocL8),
                FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8))
 
 // mtctr - { bcctr,bcctrl }
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 5380ec1..884f2f5 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -185,6 +185,28 @@ bool PPCSubtarget::enableSubRegLiveness() const {
   return UseSubRegLiveness;
 }
 
+void PPCSubtarget::tocDataChecks(unsigned PointerSize,
+                                 const GlobalVariable *GV) const {
+  // TODO: These asserts should be updated as more support for the toc data
+  // transformation is added (struct support, etc.).
+  assert(
+      PointerSize >= GV->getAlign().valueOrOne().value() &&
+      "GlobalVariables with an alignment requirement stricter than TOC entry "
+      "size not supported by the toc data transformation.");
+
+  Type *GVType = GV->getValueType();
+  assert(GVType->isSized() && "A GlobalVariable's size must be known to be "
+                              "supported by the toc data transformation.");
+  if (GV->getParent()->getDataLayout().getTypeSizeInBits(GVType) >
+      PointerSize * 8)
+    report_fatal_error(
+        "A GlobalVariable with size larger than a TOC entry is not currently "
+        "supported by the toc data transformation.");
+  if (GV->hasPrivateLinkage())
+    report_fatal_error("A GlobalVariable with private linkage is not "
+                       "currently supported by the toc data transformation.");
+}
+
 bool PPCSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
   // Large code model always uses the TOC even for local symbols.
   if (TM.getCodeModel() == CodeModel::Large)
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 306a52d..d913f22 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -245,6 +245,8 @@ public:
   /// True if the GV will be accessed via an indirect symbol.
   bool isGVIndirectSymbol(const GlobalValue *GV) const;
 
+  void tocDataChecks(unsigned PointerSize, const GlobalVariable *GV) const;
+
   /// True if the ABI is descriptor based.
   bool usesFunctionDescriptors() const {
     // Both 32-bit and 64-bit AIX are descriptor based. For ELF only the 64-bit
diff --git a/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
index 81f078a..0527991 100644
--- a/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -94,8 +94,7 @@ namespace {
 
 protected:
     bool hasTOCLoReloc(const MachineInstr &MI) {
-      if (MI.getOpcode() == PPC::LDtocL ||
-          MI.getOpcode() == PPC::ADDItocL ||
+      if (MI.getOpcode() == PPC::LDtocL || MI.getOpcode() == PPC::ADDItocL8 ||
           MI.getOpcode() == PPC::LWZtocL)
         return true;
 
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index d83979a..2da75bd 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -2716,7 +2716,7 @@ ParseStatus RISCVAsmParser::parseDirective(AsmToken DirectiveID) {
 
 bool RISCVAsmParser::resetToArch(StringRef Arch, SMLoc Loc, std::string &Result,
                                  bool FromOptionDirective) {
-  for (auto Feature : RISCVFeatureKV)
+  for (auto &Feature : RISCVFeatureKV)
     if (llvm::RISCVISAInfo::isSupportedExtensionFeature(Feature.Key))
       clearFeatureBits(Feature.Value, Feature.Key);
 
@@ -2735,7 +2735,7 @@ bool RISCVAsmParser::resetToArch(StringRef Arch, SMLoc Loc, std::string &Result,
   }
   auto &ISAInfo = *ParseResult;
 
-  for (auto Feature : RISCVFeatureKV)
+  for (auto &Feature : RISCVFeatureKV)
     if (ISAInfo->hasExtension(Feature.Key))
       setFeatureBits(Feature.Value, Feature.Key);
 
@@ -2823,9 +2823,8 @@ bool RISCVAsmParser::parseDirectiveOption() {
         break;
       }
 
-      ArrayRef<SubtargetFeatureKV> KVArray(RISCVFeatureKV);
-      auto Ext = llvm::lower_bound(KVArray, Arch);
-      if (Ext == KVArray.end() || StringRef(Ext->Key) != Arch ||
+      auto Ext = llvm::lower_bound(RISCVFeatureKV, Arch);
+      if (Ext == std::end(RISCVFeatureKV) || StringRef(Ext->Key) != Arch ||
           !RISCVISAInfo::isSupportedExtension(Arch)) {
         if (isDigit(Arch.back()))
           return Error(
@@ -2858,7 +2857,7 @@ bool RISCVAsmParser::parseDirectiveOption() {
         // It is invalid to disable an extension that there are other enabled
         // extensions depend on it.
         // TODO: Make use of RISCVISAInfo to handle this
-        for (auto Feature : KVArray) {
+        for (auto &Feature : RISCVFeatureKV) {
           if (getSTI().hasFeature(Feature.Value) &&
               Feature.Implies.test(Ext->Value))
             return Error(Loc,
@@ -3271,11 +3270,13 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
                             .addOperand(Inst.getOperand(0))
                             .addOperand(Inst.getOperand(1))
                             .addOperand(Inst.getOperand(2))
-                            .addReg(RISCV::NoRegister));
+                            .addReg(RISCV::NoRegister)
+                            .setLoc(IDLoc));
     emitToStreamer(Out, MCInstBuilder(RISCV::VMNAND_MM)
                             .addOperand(Inst.getOperand(0))
                             .addOperand(Inst.getOperand(0))
-                            .addOperand(Inst.getOperand(0)));
+                            .addOperand(Inst.getOperand(0))
+                            .setLoc(IDLoc));
   } else if (Inst.getNumOperands() == 4) {
     // masked va >= x, vd != v0
     //
@@ -3287,11 +3288,13 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
                             .addOperand(Inst.getOperand(0))
                             .addOperand(Inst.getOperand(1))
                             .addOperand(Inst.getOperand(2))
-                            .addOperand(Inst.getOperand(3)));
+                            .addOperand(Inst.getOperand(3))
+                            .setLoc(IDLoc));
     emitToStreamer(Out, MCInstBuilder(RISCV::VMXOR_MM)
                             .addOperand(Inst.getOperand(0))
                             .addOperand(Inst.getOperand(0))
-                            .addReg(RISCV::V0));
+                            .addReg(RISCV::V0)
+                            .setLoc(IDLoc));
   } else if (Inst.getNumOperands() == 5 &&
              Inst.getOperand(0).getReg() == RISCV::V0) {
     // masked va >= x, vd == v0
@@ -3306,11 +3309,13 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
                             .addOperand(Inst.getOperand(1))
                             .addOperand(Inst.getOperand(2))
                             .addOperand(Inst.getOperand(3))
-                            .addReg(RISCV::NoRegister));
+                            .addReg(RISCV::NoRegister)
+                            .setLoc(IDLoc));
     emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM)
                             .addOperand(Inst.getOperand(0))
                             .addOperand(Inst.getOperand(0))
-                            .addOperand(Inst.getOperand(1)));
+                            .addOperand(Inst.getOperand(1))
+                            .setLoc(IDLoc));
   } else if (Inst.getNumOperands() == 5) {
     // masked va >= x, any vd
     //
@@ -3323,19 +3328,23 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
                             .addOperand(Inst.getOperand(1))
                             .addOperand(Inst.getOperand(2))
                             .addOperand(Inst.getOperand(3))
-                            .addReg(RISCV::NoRegister));
+                            .addReg(RISCV::NoRegister)
+                            .setLoc(IDLoc));
     emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM)
                             .addOperand(Inst.getOperand(1))
                             .addReg(RISCV::V0)
-                            .addOperand(Inst.getOperand(1)));
+                            .addOperand(Inst.getOperand(1))
+                            .setLoc(IDLoc));
     emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM)
                             .addOperand(Inst.getOperand(0))
                             .addOperand(Inst.getOperand(0))
-                            .addReg(RISCV::V0));
+                            .addReg(RISCV::V0)
+                            .setLoc(IDLoc));
     emitToStreamer(Out, MCInstBuilder(RISCV::VMOR_MM)
                             .addOperand(Inst.getOperand(0))
                             .addOperand(Inst.getOperand(1))
-                            .addOperand(Inst.getOperand(0)));
+                            .addOperand(Inst.getOperand(0))
+                            .setLoc(IDLoc));
   }
 }
 
@@ -3637,7 +3646,8 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
                             .addOperand(Inst.getOperand(0))
                             .addOperand(Inst.getOperand(1))
                             .addImm(Imm - 1)
-                            .addOperand(Inst.getOperand(3)));
+                            .addOperand(Inst.getOperand(3))
+                            .setLoc(IDLoc));
     return false;
   }
   case RISCV::PseudoVMSGEU_VI:
@@ -3655,7 +3665,8 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
                               .addOperand(Inst.getOperand(0))
                               .addOperand(Inst.getOperand(1))
                               .addOperand(Inst.getOperand(1))
-                              .addOperand(Inst.getOperand(3)));
+                              .addOperand(Inst.getOperand(3))
+                              .setLoc(IDLoc));
     } else {
       // Other immediate values can subtract one like signed.
       unsigned Opc = Inst.getOpcode() == RISCV::PseudoVMSGEU_VI
@@ -3665,7 +3676,8 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
                               .addOperand(Inst.getOperand(0))
                               .addOperand(Inst.getOperand(1))
                               .addImm(Imm - 1)
-                              .addOperand(Inst.getOperand(3)));
+                              .addOperand(Inst.getOperand(3))
+                              .setLoc(IDLoc));
     }
 
     return false;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 08678a8..803774f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10466,6 +10466,7 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
   SDValue BasePtr = MemSD->getBasePtr();
   SDValue Val, Mask, VL;
 
+  bool IsCompressingStore = false;
   if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
     Val = VPStore->getValue();
     Mask = VPStore->getMask();
@@ -10474,9 +10475,11 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
     const auto *MStore = cast<MaskedStoreSDNode>(Op);
     Val = MStore->getValue();
     Mask = MStore->getMask();
+    IsCompressingStore = MStore->isCompressingStore();
   }
 
-  bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+  bool IsUnmasked =
+      ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
 
   MVT VT = Val.getSimpleValueType();
   MVT XLenVT = Subtarget.getXLenVT();
@@ -10486,7 +10489,7 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
     ContainerVT = getContainerForFixedLengthVector(VT);
 
     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
-    if (!IsUnmasked) {
+    if (!IsUnmasked || IsCompressingStore) {
       MVT MaskVT = getMaskTypeFor(ContainerVT);
       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
     }
@@ -10495,6 +10498,15 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
   if (!VL)
     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
 
+  if (IsCompressingStore) {
+    Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
+                      DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
+                      DAG.getUNDEF(ContainerVT), Val, Mask, VL);
+    VL =
+        DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
+                    getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
+  }
+
   unsigned IntID =
       IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index ecd3736..8f46fdc 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1620,3 +1620,13 @@ bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
                   C2.NumIVMuls, C2.NumBaseAdds,
                   C2.ScaleCost, C2.ImmCost, C2.SetupCost);
 }
+
+bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy, Align Alignment) {
+  auto *VTy = dyn_cast<VectorType>(DataTy);
+  if (!VTy || VTy->isScalableTy())
+    return false;
+
+  if (!isLegalMaskedLoadStore(DataTy, Alignment))
+    return false;
+  return true;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index af36e9d..8daf684 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -261,6 +261,8 @@ public:
     return TLI->isLegalStridedLoadStore(DataTypeVT, Alignment);
   }
 
+  bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment);
+
   bool isVScaleKnownToBeAPowerOfTwo() const {
     return TLI->isVScaleKnownToBeAPowerOfTwo();
   }
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index 2d7a00b..f1fbe2b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -85,6 +85,42 @@ static ConstantInt *getConstInt(MDNode *MD, unsigned NumOp) {
   return nullptr;
 }
 
+// If the function has pointer arguments, we are forced to re-create this
+// function type from the very beginning, changing PointerType by
+// TypedPointerType for each pointer argument. Otherwise, the same `Type*`
+// potentially corresponds to different SPIR-V function type, effectively
+// invalidating logic behind global registry and duplicates tracker.
+static FunctionType *
+fixFunctionTypeIfPtrArgs(SPIRVGlobalRegistry *GR, const Function &F,
+                         FunctionType *FTy, const SPIRVType *SRetTy,
+                         const SmallVector<SPIRVType *, 4> &SArgTys) {
+  if (F.getParent()->getNamedMetadata("spv.cloned_funcs"))
+    return FTy;
+
+  bool hasArgPtrs = false;
+  for (auto &Arg : F.args()) {
+    // check if it's an instance of a non-typed PointerType
+    if (Arg.getType()->isPointerTy()) {
+      hasArgPtrs = true;
+      break;
+    }
+  }
+  if (!hasArgPtrs) {
+    Type *RetTy = FTy->getReturnType();
+    // check if it's an instance of a non-typed PointerType
+    if (!RetTy->isPointerTy())
+      return FTy;
+  }
+
+  // re-create function type, using TypedPointerType instead of PointerType to
+  // properly trace argument types
+  const Type *RetTy = GR->getTypeForSPIRVType(SRetTy);
+  SmallVector<Type *, 4> ArgTys;
+  for (auto SArgTy : SArgTys)
+    ArgTys.push_back(const_cast<Type *>(GR->getTypeForSPIRVType(SArgTy)));
+  return FunctionType::get(const_cast<Type *>(RetTy), ArgTys, false);
+}
+
 // This code restores function args/retvalue types for composite cases
 // because the final types should still be aggregate whereas they're i32
 // during the translation to cope with aggregate flattening etc.
@@ -162,7 +198,7 @@ static SPIRVType *getArgSPIRVType(const Function &F, unsigned ArgIdx,
 
   // If OriginalArgType is non-pointer, use the OriginalArgType (the type cannot
   // be legally reassigned later).
-  if (!OriginalArgType->isPointerTy())
+  if (!isPointerTy(OriginalArgType))
     return GR->getOrCreateSPIRVType(OriginalArgType, MIRBuilder, ArgAccessQual);
 
   // In case OriginalArgType is of pointer type, there are three possibilities:
@@ -179,8 +215,7 @@ static SPIRVType *getArgSPIRVType(const Function &F, unsigned ArgIdx,
     SPIRVType *ElementType = GR->getOrCreateSPIRVType(ByValRefType, MIRBuilder);
     return GR->getOrCreateSPIRVPointerType(
         ElementType, MIRBuilder,
-        addressSpaceToStorageClass(Arg->getType()->getPointerAddressSpace(),
-                                   ST));
+        addressSpaceToStorageClass(getPointerAddressSpace(Arg->getType()), ST));
   }
 
   for (auto User : Arg->users()) {
@@ -240,7 +275,6 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
       static_cast<const SPIRVSubtarget *>(&MIRBuilder.getMF().getSubtarget());
 
   // Assign types and names to all args, and store their types for later.
-  FunctionType *FTy = getOriginalFunctionType(F);
   SmallVector<SPIRVType *, 4> ArgTypeVRegs;
   if (VRegs.size() > 0) {
     unsigned i = 0;
@@ -255,7 +289,7 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
 
       if (Arg.hasName())
         buildOpName(VRegs[i][0], Arg.getName(), MIRBuilder);
-      if (Arg.getType()->isPointerTy()) {
+      if (isPointerTy(Arg.getType())) {
         auto DerefBytes = static_cast<unsigned>(Arg.getDereferenceableBytes());
         if (DerefBytes != 0)
           buildOpDecorate(VRegs[i][0], MIRBuilder,
@@ -322,7 +356,9 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
   MRI->setRegClass(FuncVReg, &SPIRV::IDRegClass);
   if (F.isDeclaration())
     GR->add(&F, &MIRBuilder.getMF(), FuncVReg);
+  FunctionType *FTy = getOriginalFunctionType(F);
   SPIRVType *RetTy = GR->getOrCreateSPIRVType(FTy->getReturnType(), MIRBuilder);
+  FTy = fixFunctionTypeIfPtrArgs(GR, F, FTy, RetTy, ArgTypeVRegs);
   SPIRVType *FuncTy = GR->getOrCreateOpTypeFunctionWithArgs(
       FTy, RetTy, ArgTypeVRegs, MIRBuilder);
   uint32_t FuncControl = getFunctionControl(F);
@@ -429,7 +465,6 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
     return false;
   MachineFunction &MF = MIRBuilder.getMF();
   GR->setCurrentFunc(MF);
-  FunctionType *FTy = nullptr;
   const Function *CF = nullptr;
   std::string DemangledName;
   const Type *OrigRetTy = Info.OrigRet.Ty;
@@ -444,7 +479,7 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
     // TODO: support constexpr casts and indirect calls.
     if (CF == nullptr)
       return false;
-    if ((FTy = getOriginalFunctionType(*CF)) != nullptr)
+    if (FunctionType *FTy = getOriginalFunctionType(*CF))
       OrigRetTy = FTy->getReturnType();
   }
 
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 575e903..c5b9012 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -57,8 +57,14 @@ class SPIRVEmitIntrinsics
   bool TrackConstants = true;
   DenseMap<Instruction *, Constant *> AggrConsts;
   DenseSet<Instruction *> AggrStores;
+
+  // deduce values type
+  DenseMap<Value *, Type *> DeducedElTys;
+  Type *deduceElementType(Value *I);
+
   void preprocessCompositeConstants(IRBuilder<> &B);
   void preprocessUndefs(IRBuilder<> &B);
+
   CallInst *buildIntrWithMD(Intrinsic::ID IntrID, ArrayRef<Type *> Types,
                             Value *Arg, Value *Arg2, ArrayRef<Constant *> Imms,
                             IRBuilder<> &B) {
@@ -72,6 +78,7 @@ class SPIRVEmitIntrinsics
       Args.push_back(Imm);
     return B.CreateIntrinsic(IntrID, {Types}, Args);
   }
+
   void replaceMemInstrUses(Instruction *Old, Instruction *New, IRBuilder<> &B);
   void processInstrAfterVisit(Instruction *I, IRBuilder<> &B);
   void insertAssignPtrTypeIntrs(Instruction *I, IRBuilder<> &B);
@@ -156,6 +163,48 @@ static inline void reportFatalOnTokenType(const Instruction *I) {
                        false);
 }
 
+// Deduce and return a successfully deduced Type of the Instruction,
+// or nullptr otherwise.
+static Type *deduceElementTypeHelper(Value *I,
+                                     std::unordered_set<Value *> &Visited,
+                                     DenseMap<Value *, Type *> &DeducedElTys) {
+  // maybe already known
+  auto It = DeducedElTys.find(I);
+  if (It != DeducedElTys.end())
+    return It->second;
+
+  // maybe a cycle
+  if (Visited.find(I) != Visited.end())
+    return nullptr;
+  Visited.insert(I);
+
+  // fallback value in case when we fail to deduce a type
+  Type *Ty = nullptr;
+  // look for known basic patterns of type inference
+  if (auto *Ref = dyn_cast<AllocaInst>(I))
+    Ty = Ref->getAllocatedType();
+  else if (auto *Ref = dyn_cast<GetElementPtrInst>(I))
+    Ty = Ref->getResultElementType();
+  else if (auto *Ref = dyn_cast<GlobalValue>(I))
+    Ty = Ref->getValueType();
+  else if (auto *Ref = dyn_cast<AddrSpaceCastInst>(I))
+    Ty = deduceElementTypeHelper(Ref->getPointerOperand(), Visited,
+                                 DeducedElTys);
+
+  // remember the found relationship
+  if (Ty)
+    DeducedElTys[I] = Ty;
+
+  return Ty;
+}
+
+Type *SPIRVEmitIntrinsics::deduceElementType(Value *I) {
+  std::unordered_set<Value *> Visited;
+  if (Type *Ty = deduceElementTypeHelper(I, Visited, DeducedElTys))
+    return Ty;
+  return IntegerType::getInt8Ty(I->getContext());
+}
+
 void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old,
                                               Instruction *New,
                                               IRBuilder<> &B) {
@@ -280,7 +329,7 @@ Instruction *SPIRVEmitIntrinsics::visitBitCastInst(BitCastInst &I) {
   // varying element types. In case of IR coming from older versions of LLVM
   // such bitcasts do not provide sufficient information, should be just skipped
   // here, and handled in insertPtrCastOrAssignTypeInstr.
-  if (I.getType()->isPointerTy()) {
+  if (isPointerTy(I.getType())) {
     I.replaceAllUsesWith(Source);
     I.eraseFromParent();
     return nullptr;
@@ -333,20 +382,10 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
   while (BitCastInst *BC = dyn_cast<BitCastInst>(Pointer))
     Pointer = BC->getOperand(0);
 
-  // Do not emit spv_ptrcast if Pointer is a GlobalValue of expected type.
-  GlobalValue *GV = dyn_cast<GlobalValue>(Pointer);
-  if (GV && GV->getValueType() == ExpectedElementType)
-    return;
-
-  // Do not emit spv_ptrcast if Pointer is a result of alloca with expected
-  // type.
-  AllocaInst *A = dyn_cast<AllocaInst>(Pointer);
-  if (A && A->getAllocatedType() == ExpectedElementType)
-    return;
-
-  // Do not emit spv_ptrcast if Pointer is a result of GEP of expected type.
-  GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Pointer);
-  if (GEPI && GEPI->getResultElementType() == ExpectedElementType)
+  // Do not emit spv_ptrcast if Pointer's element type is ExpectedElementType
+  std::unordered_set<Value *> Visited;
+  Type *PointerElemTy = deduceElementTypeHelper(Pointer, Visited, DeducedElTys);
+  if (PointerElemTy == ExpectedElementType)
     return;
 
   setInsertPointSkippingPhis(B, I);
@@ -356,7 +395,7 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
       ValueAsMetadata::getConstant(ExpectedElementTypeConst);
   MDTuple *TyMD = MDNode::get(F->getContext(), CM);
   MetadataAsValue *VMD = MetadataAsValue::get(F->getContext(), TyMD);
-  unsigned AddressSpace = Pointer->getType()->getPointerAddressSpace();
+  unsigned AddressSpace = getPointerAddressSpace(Pointer->getType());
   bool FirstPtrCastOrAssignPtrType = true;
 
   // Do not emit new spv_ptrcast if equivalent one already exists or when
@@ -401,9 +440,11 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
   // spv_assign_ptr_type instead.
   if (FirstPtrCastOrAssignPtrType &&
       (isa<Instruction>(Pointer) || isa<Argument>(Pointer))) {
-    buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {Pointer->getType()},
-                    ExpectedElementTypeConst, Pointer,
-                    {B.getInt32(AddressSpace)}, B);
+    CallInst *CI = buildIntrWithMD(
+        Intrinsic::spv_assign_ptr_type, {Pointer->getType()},
+        ExpectedElementTypeConst, Pointer, {B.getInt32(AddressSpace)}, B);
+    DeducedElTys[CI] = ExpectedElementType;
+    DeducedElTys[Pointer] = ExpectedElementType;
     return;
   }
 
@@ -419,7 +460,7 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
   // Handle basic instructions:
   StoreInst *SI = dyn_cast<StoreInst>(I);
   if (SI && F->getCallingConv() == CallingConv::SPIR_KERNEL &&
-      SI->getValueOperand()->getType()->isPointerTy() &&
+      isPointerTy(SI->getValueOperand()->getType()) &&
       isa<Argument>(SI->getValueOperand())) {
     return replacePointerOperandWithPtrCast(
         I, SI->getValueOperand(), IntegerType::getInt8Ty(F->getContext()), 0,
@@ -440,9 +481,34 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
   if (!CI || CI->isIndirectCall() || CI->getCalledFunction()->isIntrinsic())
     return;
 
+  // collect information about formal parameter types
+  Function *CalledF = CI->getCalledFunction();
+  SmallVector<Type *, 4> CalledArgTys;
+  bool HaveTypes = false;
+  for (auto &CalledArg : CalledF->args()) {
+    if (!isPointerTy(CalledArg.getType())) {
+      CalledArgTys.push_back(nullptr);
+      continue;
+    }
+    auto It = DeducedElTys.find(&CalledArg);
+    Type *ParamTy = It != DeducedElTys.end() ? It->second : nullptr;
+    if (!ParamTy) {
+      for (User *U : CalledArg.users()) {
+        if (Instruction *Inst = dyn_cast<Instruction>(U)) {
+          std::unordered_set<Value *> Visited;
+          ParamTy = deduceElementTypeHelper(Inst, Visited, DeducedElTys);
+          if (ParamTy)
+            break;
+        }
+      }
+    }
+    HaveTypes |= ParamTy != nullptr;
+    CalledArgTys.push_back(ParamTy);
+  }
+
   std::string DemangledName =
       getOclOrSpirvBuiltinDemangledName(CI->getCalledFunction()->getName());
-  if (DemangledName.empty())
+  if (DemangledName.empty() && !HaveTypes)
     return;
 
   for (unsigned OpIdx = 0; OpIdx < CI->arg_size(); OpIdx++) {
@@ -455,8 +521,11 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
     if (!isa<Instruction>(ArgOperand) && !isa<Argument>(ArgOperand))
       continue;
 
-    Type *ExpectedType = SPIRV::parseBuiltinCallArgumentBaseType(
-        DemangledName, OpIdx, I->getContext());
+    Type *ExpectedType =
+        OpIdx < CalledArgTys.size() ? CalledArgTys[OpIdx] : nullptr;
+    if (!ExpectedType && !DemangledName.empty())
+      ExpectedType = SPIRV::parseBuiltinCallArgumentBaseType(
+          DemangledName, OpIdx, I->getContext());
     if (!ExpectedType)
       continue;
 
@@ -639,30 +708,25 @@ void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV,
 void SPIRVEmitIntrinsics::insertAssignPtrTypeIntrs(Instruction *I,
                                                    IRBuilder<> &B) {
   reportFatalOnTokenType(I);
-  if (!I->getType()->isPointerTy() || !requireAssignType(I) ||
+  if (!isPointerTy(I->getType()) || !requireAssignType(I) ||
       isa<BitCastInst>(I))
     return;
 
   setInsertPointSkippingPhis(B, I->getNextNode());
 
-  Constant *EltTyConst;
-  unsigned AddressSpace = I->getType()->getPointerAddressSpace();
-  if (auto *AI = dyn_cast<AllocaInst>(I))
-    EltTyConst = UndefValue::get(AI->getAllocatedType());
-  else if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
-    EltTyConst = UndefValue::get(GEP->getResultElementType());
-  else
-    EltTyConst = UndefValue::get(IntegerType::getInt8Ty(I->getContext()));
-
-  buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {I->getType()}, EltTyConst, I,
-                  {B.getInt32(AddressSpace)}, B);
+  Type *ElemTy = deduceElementType(I);
+  Constant *EltTyConst = UndefValue::get(ElemTy);
+  unsigned AddressSpace = getPointerAddressSpace(I->getType());
+  CallInst *CI = buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {I->getType()},
+                                 EltTyConst, I, {B.getInt32(AddressSpace)}, B);
+  DeducedElTys[CI] = ElemTy;
 }
 
 void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I,
                                                 IRBuilder<> &B) {
   reportFatalOnTokenType(I);
   Type *Ty = I->getType();
-  if (!Ty->isVoidTy() && !Ty->isPointerTy() && requireAssignType(I)) {
+  if (!Ty->isVoidTy() && !isPointerTy(Ty) && requireAssignType(I)) {
     setInsertPointSkippingPhis(B, I->getNextNode());
     Type *TypeToAssign = Ty;
     if (auto *II = dyn_cast<IntrinsicInst>(I)) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 8556581..bda9c57 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -750,7 +750,7 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType(
 SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(
     const Type *Ty, MachineIRBuilder &MIRBuilder,
     SPIRV::AccessQualifier::AccessQualifier AccessQual, bool EmitIR) {
-  if (TypesInProcessing.count(Ty) && !Ty->isPointerTy())
+  if (TypesInProcessing.count(Ty) && !isPointerTy(Ty))
     return nullptr;
   TypesInProcessing.insert(Ty);
   SPIRVType *SpirvType = createSPIRVType(Ty, MIRBuilder, AccessQual, EmitIR);
@@ -762,11 +762,15 @@ SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(
   // will be added later. For special types it is already added to DT.
   if (SpirvType->getOpcode() != SPIRV::OpTypeForwardPointer && !Reg.isValid() &&
       !isSpecialOpaqueType(Ty)) {
-    if (!Ty->isPointerTy())
+    if (!isPointerTy(Ty))
       DT.add(Ty, &MIRBuilder.getMF(), getSPIRVTypeID(SpirvType));
+    else if (isTypedPointerTy(Ty))
+      DT.add(cast<TypedPointerType>(Ty)->getElementType(),
+             getPointerAddressSpace(Ty), &MIRBuilder.getMF(),
+             getSPIRVTypeID(SpirvType));
     else
       DT.add(Type::getInt8Ty(MIRBuilder.getMF().getFunction().getContext()),
-             Ty->getPointerAddressSpace(), &MIRBuilder.getMF(),
+             getPointerAddressSpace(Ty), &MIRBuilder.getMF(),
              getSPIRVTypeID(SpirvType));
   }
 
@@ -787,12 +791,15 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(
     const Type *Ty, MachineIRBuilder &MIRBuilder,
     SPIRV::AccessQualifier::AccessQualifier AccessQual, bool EmitIR) {
   Register Reg;
-  if (!Ty->isPointerTy())
+  if (!isPointerTy(Ty))
     Reg = DT.find(Ty, &MIRBuilder.getMF());
+  else if (isTypedPointerTy(Ty))
+    Reg = DT.find(cast<TypedPointerType>(Ty)->getElementType(),
+                  getPointerAddressSpace(Ty), &MIRBuilder.getMF());
   else
     Reg =
         DT.find(Type::getInt8Ty(MIRBuilder.getMF().getFunction().getContext()),
-                Ty->getPointerAddressSpace(), &MIRBuilder.getMF());
+                getPointerAddressSpace(Ty), &MIRBuilder.getMF());
 
   if (Reg.isValid() && !isSpecialOpaqueType(Ty))
     return getSPIRVTypeForVReg(Reg);
@@ -836,11 +843,16 @@ bool SPIRVGlobalRegistry::isScalarOrVectorOfType(Register VReg,
 
 unsigned
 SPIRVGlobalRegistry::getScalarOrVectorComponentCount(Register VReg) const {
-  if (SPIRVType *Type = getSPIRVTypeForVReg(VReg))
-    return Type->getOpcode() == SPIRV::OpTypeVector
-               ? static_cast<unsigned>(Type->getOperand(2).getImm())
-               : 1;
-  return 0;
+  return getScalarOrVectorComponentCount(getSPIRVTypeForVReg(VReg));
+}
+
+unsigned
+SPIRVGlobalRegistry::getScalarOrVectorComponentCount(SPIRVType *Type) const {
+  if (!Type)
+    return 0;
+  return Type->getOpcode() == SPIRV::OpTypeVector
+             ? static_cast<unsigned>(Type->getOperand(2).getImm())
+             : 1;
 }
 
 unsigned
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index 9c0061d..25d82ebf 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -198,9 +198,10 @@ public:
   // opcode (e.g. OpTypeBool, or OpTypeVector %x 4, where %x is OpTypeBool).
   bool isScalarOrVectorOfType(Register VReg, unsigned TypeOpcode) const;
 
-  // Return number of elements in a vector if the given VReg is associated with
+  // Return number of elements in a vector if the argument is associated with
   // a vector type. Return 1 for a scalar type, and 0 for a missing type.
   unsigned getScalarOrVectorComponentCount(Register VReg) const;
+  unsigned getScalarOrVectorComponentCount(SPIRVType *Type) const;
 
   // For vectors or scalars of booleans, integers and floats, return the scalar
   // type's bitwidth. Otherwise calls llvm_unreachable().
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 74df8de..fd19b74 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -125,6 +125,8 @@ private:
 
   bool selectConstVector(Register ResVReg, const SPIRVType *ResType,
                          MachineInstr &I) const;
+  bool selectSplatVector(Register ResVReg, const SPIRVType *ResType,
+                         MachineInstr &I) const;
 
   bool selectCmp(Register ResVReg, const SPIRVType *ResType,
                  unsigned comparisonOpcode, MachineInstr &I) const;
@@ -313,6 +315,8 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg,
 
   case TargetOpcode::G_BUILD_VECTOR:
     return selectConstVector(ResVReg, ResType, I);
+  case TargetOpcode::G_SPLAT_VECTOR:
+    return selectSplatVector(ResVReg, ResType, I);
 
   case TargetOpcode::G_SHUFFLE_VECTOR: {
     MachineBasicBlock &BB = *I.getParent();
@@ -1185,6 +1189,43 @@ bool SPIRVInstructionSelector::selectConstVector(Register ResVReg,
   return MIB.constrainAllUses(TII, TRI, RBI);
 }
 
+bool SPIRVInstructionSelector::selectSplatVector(Register ResVReg,
+                                                 const SPIRVType *ResType,
+                                                 MachineInstr &I) const {
+  if (ResType->getOpcode() != SPIRV::OpTypeVector)
+    report_fatal_error("Cannot select G_SPLAT_VECTOR with a non-vector result");
+  unsigned N = GR.getScalarOrVectorComponentCount(ResType);
+  unsigned OpIdx = I.getNumExplicitDefs();
+  if (!I.getOperand(OpIdx).isReg())
+    report_fatal_error("Unexpected argument in G_SPLAT_VECTOR");
+
+  // check if we may construct a constant vector
+  Register OpReg = I.getOperand(OpIdx).getReg();
+  bool IsConst = false;
+  if (SPIRVType *OpDef = MRI->getVRegDef(OpReg)) {
+    if (OpDef->getOpcode() == SPIRV::ASSIGN_TYPE &&
+        OpDef->getOperand(1).isReg()) {
+      if (SPIRVType *RefDef = MRI->getVRegDef(OpDef->getOperand(1).getReg()))
+        OpDef = RefDef;
+    }
+    IsConst = OpDef->getOpcode() == TargetOpcode::G_CONSTANT ||
+              OpDef->getOpcode() == TargetOpcode::G_FCONSTANT;
+  }
+
+  if (!IsConst && N < 2)
+    report_fatal_error(
+        "There must be at least two constituent operands in a vector");
+
+  auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
+                     TII.get(IsConst ? SPIRV::OpConstantComposite
+                                     : SPIRV::OpCompositeConstruct))
+                 .addDef(ResVReg)
+                 .addUse(GR.getSPIRVTypeID(ResType));
+  for (unsigned i = 0; i < N; ++i)
+    MIB.addUse(OpReg);
+  return MIB.constrainAllUses(TII, TRI, RBI);
+}
+
 bool SPIRVInstructionSelector::selectCmp(Register ResVReg,
                                          const SPIRVType *ResType,
                                          unsigned CmpOpc,
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
index f815487..4b871bd 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -149,7 +149,9 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
   getActionDefinitionsBuilder(G_GLOBAL_VALUE).alwaysLegal();
 
   // TODO: add proper rules for vectors legalization.
-  getActionDefinitionsBuilder({G_BUILD_VECTOR, G_SHUFFLE_VECTOR}).alwaysLegal();
+  getActionDefinitionsBuilder(
+      {G_BUILD_VECTOR, G_SHUFFLE_VECTOR, G_SPLAT_VECTOR})
+      .alwaysLegal();
 
   // Vector Reduction Operations
   getActionDefinitionsBuilder(
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index e5f35aa..d5ed501 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -15,6 +15,7 @@
 
 #include "MCTargetDesc/SPIRVBaseInfo.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/TypedPointerType.h"
 #include <string>
 
 namespace llvm {
@@ -100,5 +101,30 @@ bool isEntryPoint(const Function &F);
 
 // Parse basic scalar type name, substring TypeName, and return LLVM type.
 Type *parseBasicTypeName(StringRef TypeName, LLVMContext &Ctx);
+
+// True if this is an instance of TypedPointerType.
+inline bool isTypedPointerTy(const Type *T) {
+  return T->getTypeID() == Type::TypedPointerTyID;
+}
+
+// True if this is an instance of PointerType.
+inline bool isUntypedPointerTy(const Type *T) {
+  return T->getTypeID() == Type::PointerTyID;
+}
+
+// True if this is an instance of PointerType or TypedPointerType.
+inline bool isPointerTy(const Type *T) {
+  return isUntypedPointerTy(T) || isTypedPointerTy(T);
+}
+
+// Get the address space of this pointer or pointer vector type for instances of
+// PointerType or TypedPointerType.
+inline unsigned getPointerAddressSpace(const Type *T) {
+  Type *SubT = T->getScalarType();
+  return SubT->getTypeID() == Type::PointerTyID
+             ? cast<PointerType>(SubT)->getAddressSpace()
+             : cast<TypedPointerType>(SubT)->getAddressSpace();
+}
+
 } // namespace llvm
 #endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index ee4fd04..f65ed25 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -196,14 +196,24 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
         .Case("0xb36", "arm1136j-s")
         .Case("0xb56", "arm1156t2-s")
         .Case("0xb76", "arm1176jz-s")
+        .Case("0xc05", "cortex-a5")
+        .Case("0xc07", "cortex-a7")
         .Case("0xc08", "cortex-a8")
         .Case("0xc09", "cortex-a9")
         .Case("0xc0f", "cortex-a15")
+        .Case("0xc0e", "cortex-a17")
         .Case("0xc20", "cortex-m0")
         .Case("0xc23", "cortex-m3")
         .Case("0xc24", "cortex-m4")
+        .Case("0xc27", "cortex-m7")
+        .Case("0xd20", "cortex-m23")
+        .Case("0xd21", "cortex-m33")
         .Case("0xd24", "cortex-m52")
         .Case("0xd22", "cortex-m55")
+        .Case("0xd23", "cortex-m85")
+        .Case("0xc18", "cortex-r8")
+        .Case("0xd13", "cortex-r52")
+        .Case("0xd15", "cortex-r82")
         .Case("0xd02", "cortex-a34")
         .Case("0xd04", "cortex-a35")
         .Case("0xd03", "cortex-a53")
@@ -211,13 +221,17 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
         .Case("0xd46", "cortex-a510")
         .Case("0xd80", "cortex-a520")
         .Case("0xd07", "cortex-a57")
+        .Case("0xd06", "cortex-a65")
+        .Case("0xd43", "cortex-a65ae")
         .Case("0xd08", "cortex-a72")
         .Case("0xd09", "cortex-a73")
         .Case("0xd0a", "cortex-a75")
         .Case("0xd0b", "cortex-a76")
+        .Case("0xd0e", "cortex-a76ae")
         .Case("0xd0d", "cortex-a77")
         .Case("0xd41", "cortex-a78")
         .Case("0xd42", "cortex-a78ae")
+        .Case("0xd4b", "cortex-a78c")
         .Case("0xd47", "cortex-a710")
         .Case("0xd4d", "cortex-a715")
         .Case("0xd81", "cortex-a720")
@@ -226,6 +240,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
         .Case("0xd48", "cortex-x2")
         .Case("0xd4e", "cortex-x3")
         .Case("0xd82", "cortex-x4")
+        .Case("0xd4a", "neoverse-e1")
         .Case("0xd0c", "neoverse-n1")
         .Case("0xd49", "neoverse-n2")
         .Case("0xd40", "neoverse-v1")
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 488a6f0..f98833b 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12371,7 +12371,7 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo {
           SplitBlockAndInsertIfThen(LastCmp, IP, /* Unreachable */ false);
       BasicBlock *CBBB = CB->getParent();
       A.registerManifestAddedBasicBlock(*ThenTI->getParent());
-      A.registerManifestAddedBasicBlock(*CBBB);
+      A.registerManifestAddedBasicBlock(*IP->getParent());
       auto *SplitTI = cast<BranchInst>(LastCmp->getNextNode());
       BasicBlock *ElseBB;
       if (&*IP == CB) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f5f3716..694b180 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -504,6 +504,11 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
     return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
   }
 
+  // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
+  if (II.hasOneUse() && match(Op1, m_Zero()) &&
+      match(II.user_back(), m_Shift(m_Value(), m_Specific(&II))))
+    return IC.replaceOperand(II, 1, IC.Builder.getTrue());
+
   Constant *C;
 
   if (IsTZ) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 45afa63..a9817f1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1121,6 +1121,10 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {
   Value *Src = Zext.getOperand(0);
   Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
 
+  // zext nneg bool x -> 0
+  if (SrcTy->isIntOrIntVectorTy(1) && Zext.hasNonNeg())
+    return replaceInstUsesWith(Zext, Constant::getNullValue(Zext.getType()));
+
   // Try to extend the entire expression tree to the wide destination type.
   unsigned BitsToClear;
   if (shouldChangeType(SrcTy, DestTy) &&
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 1688005..c9bbe43 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -5202,7 +5202,8 @@ static bool combineInstructionsOverFunction(
     if (Iteration > Opts.MaxIterations) {
       report_fatal_error(
           "Instruction Combining did not reach a fixpoint after " +
-          Twine(Opts.MaxIterations) + " iterations");
+              Twine(Opts.MaxIterations) + " iterations",
+          /*GenCrashDiag=*/false);
     }
   }
 
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 11a5c29c..87584da 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -260,6 +260,10 @@ static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases",
 
 namespace {
 
+template <typename T> T optOr(cl::opt<T> &Opt, T Other) {
+  return Opt.getNumOccurrences() ? Opt : Other;
+}
+
 bool shouldUsePageAliases(const Triple &TargetTriple) {
   return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64;
 }
@@ -269,14 +273,11 @@ bool shouldInstrumentStack(const Triple &TargetTriple) {
 }
 
 bool shouldInstrumentWithCalls(const Triple &TargetTriple) {
-  return ClInstrumentWithCalls.getNumOccurrences()
-             ? ClInstrumentWithCalls
-             : TargetTriple.getArch() == Triple::x86_64;
+  return optOr(ClInstrumentWithCalls, TargetTriple.getArch() == Triple::x86_64);
 }
 
 bool mightUseStackSafetyAnalysis(bool DisableOptimization) {
-  return ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety
-                                              : !DisableOptimization;
+  return optOr(ClUseStackSafety, !DisableOptimization);
 }
 
 bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
@@ -296,10 +297,8 @@ public:
   HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
                      const StackSafetyGlobalInfo *SSI)
       : M(M), SSI(SSI) {
-    this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
-    this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0
-                              ? ClEnableKhwasan
-                              : CompileKernel;
+    this->Recover = optOr(ClRecover, Recover);
+    this->CompileKernel = optOr(ClEnableKhwasan, CompileKernel);
     this->Rng =
         ClRandomSkipRate.getNumOccurrences() ? M.createRNG("hwasan") : nullptr;
 
@@ -625,19 +624,14 @@ void HWAddressSanitizer::initializeModule() {
   bool NewRuntime =
       !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
 
-  UseShortGranules =
-      ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime;
-  OutlinedChecks =
-      (TargetTriple.isAArch64() || TargetTriple.isRISCV64()) &&
-      TargetTriple.isOSBinFormatELF() &&
-      (ClInlineAllChecks.getNumOccurrences() ? !ClInlineAllChecks : !Recover);
+  UseShortGranules = optOr(ClUseShortGranules, NewRuntime);
+  OutlinedChecks = (TargetTriple.isAArch64() || TargetTriple.isRISCV64()) &&
+                   TargetTriple.isOSBinFormatELF() &&
+                   !optOr(ClInlineAllChecks, Recover);
 
-  InlineFastPath =
-      (ClInlineFastPathChecks.getNumOccurrences()
-           ? ClInlineFastPathChecks
-           : !(TargetTriple.isAndroid() ||
-               TargetTriple.isOSFuchsia())); // These platforms may prefer less
-                                             // inlining to reduce binary size.
+  // These platforms may prefer less inlining to reduce binary size.
+  InlineFastPath = optOr(ClInlineFastPathChecks, !(TargetTriple.isAndroid() ||
+                                                   TargetTriple.isOSFuchsia()));
 
   if (ClMatchAllTag.getNumOccurrences()) {
     if (ClMatchAllTag != -1) {
@@ -649,22 +643,17 @@ void HWAddressSanitizer::initializeModule() {
   UseMatchAllCallback = !CompileKernel && MatchAllTag.has_value();
 
   // If we don't have personality function support, fall back to landing pads.
-  InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences()
-                              ? ClInstrumentLandingPads
-                              : !NewRuntime;
+  InstrumentLandingPads = optOr(ClInstrumentLandingPads, !NewRuntime);
 
   if (!CompileKernel) {
     createHwasanCtorComdat();
-    bool InstrumentGlobals =
-        ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime;
+    bool InstrumentGlobals = optOr(ClGlobals, NewRuntime);
 
     if (InstrumentGlobals && !UsePageAliases)
       instrumentGlobals();
 
     bool InstrumentPersonalityFunctions =
-        ClInstrumentPersonalityFunctions.getNumOccurrences()
-            ? ClInstrumentPersonalityFunctions
-            : NewRuntime;
+        optOr(ClInstrumentPersonalityFunctions, NewRuntime);
     if (InstrumentPersonalityFunctions)
       instrumentPersonalityFunctions();
   }
diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp
index ccca8bc..6ad4be169 100644
--- a/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -311,7 +311,7 @@ void Float2IntPass::walkForwards() {
 }
 
 // If there is a valid transform to be done, do it.
-bool Float2IntPass::validateAndTransform() {
+bool Float2IntPass::validateAndTransform(const DataLayout &DL) {
   bool MadeChange = false;
 
   // Iterate over every disjoint partition of the def-use graph.
@@ -376,15 +376,23 @@ bool Float2IntPass::validateAndTransform() {
       LLVM_DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n");
       continue;
     }
-    if (MinBW > 64) {
-      LLVM_DEBUG(
-          dbgs() << "F2I: Value requires more than 64 bits to represent!\n");
-      continue;
-    }
 
-    // OK, R is known to be representable. Now pick a type for it.
-    // FIXME: Pick the smallest legal type that will fit.
-    Type *Ty = (MinBW > 32) ? Type::getInt64Ty(*Ctx) : Type::getInt32Ty(*Ctx);
+    // OK, R is known to be representable.
+    // Pick the smallest legal type that will fit.
+    Type *Ty = DL.getSmallestLegalIntType(*Ctx, MinBW);
+    if (!Ty) {
+      // Every supported target supports 64-bit and 32-bit integers,
+      // so fallback to a 32 or 64-bit integer if the value fits.
+      if (MinBW <= 32) {
+        Ty = Type::getInt32Ty(*Ctx);
+      } else if (MinBW <= 64) {
+        Ty = Type::getInt64Ty(*Ctx);
+      } else {
+        LLVM_DEBUG(dbgs() << "F2I: Value requires more than bits to represent "
+                             "than the target supports!\n");
+        continue;
+      }
+    }
 
     for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
          MI != ME; ++MI)
@@ -491,7 +499,8 @@ bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) {
   walkBackwards();
   walkForwards();
 
-  bool Modified = validateAndTransform();
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  bool Modified = validateAndTransform(DL);
   if (Modified)
     cleanup();
   return Modified;
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 7b74caa..a87e5a3 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -2848,7 +2848,7 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool PreserveLCSSA,
       Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor});
     DTU->applyUpdates(Updates);
   }
-  BB->flushTerminatorDbgValues();
+  BB->flushTerminatorDbgRecords();
   return NumInstrsRemoved;
 }
 
diff --git a/llvm/lib/Transforms/Utils/LoopConstrainer.cpp b/llvm/lib/Transforms/Utils/LoopConstrainer.cpp
index 81545ef..d9832ee 100644
--- a/llvm/lib/Transforms/Utils/LoopConstrainer.cpp
+++ b/llvm/lib/Transforms/Utils/LoopConstrainer.cpp
@@ -42,8 +42,11 @@ static bool isSafeDecreasingBound(const SCEV *Start, const SCEV *BoundSCEV,
   ICmpInst::Predicate BoundPred =
       IsSigned ? CmpInst::ICMP_SGT : CmpInst::ICMP_UGT;
 
+  auto StartLG = SE.applyLoopGuards(Start, L);
+  auto BoundLG = SE.applyLoopGuards(BoundSCEV, L);
+
   if (LatchBrExitIdx == 1)
-    return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV);
+    return SE.isLoopEntryGuardedByCond(L, BoundPred, StartLG, BoundLG);
 
   assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be either 0 or 1");
 
@@ -54,10 +57,10 @@ static bool isSafeDecreasingBound(const SCEV *Start, const SCEV *BoundSCEV,
   const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Min), StepPlusOne);
 
   const SCEV *MinusOne =
-      SE.getMinusSCEV(BoundSCEV, SE.getOne(BoundSCEV->getType()));
+      SE.getMinusSCEV(BoundLG, SE.getOne(BoundLG->getType()));
 
-  return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, MinusOne) &&
-         SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit);
+  return SE.isLoopEntryGuardedByCond(L, BoundPred, StartLG, MinusOne) &&
+         SE.isLoopEntryGuardedByCond(L, BoundPred, BoundLG, Limit);
 }
 
 /// Given a loop with an increasing induction variable, is it possible to
@@ -86,8 +89,11 @@ static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *BoundSCEV,
   ICmpInst::Predicate BoundPred =
       IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
 
+  auto StartLG = SE.applyLoopGuards(Start, L);
+  auto BoundLG = SE.applyLoopGuards(BoundSCEV, L);
+
   if (LatchBrExitIdx == 1)
-    return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV);
+    return SE.isLoopEntryGuardedByCond(L, BoundPred, StartLG, BoundLG);
 
   assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be 0 or 1");
 
@@ -97,9 +103,9 @@ static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *BoundSCEV,
                        : APInt::getMaxValue(BitWidth);
   const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne);
 
-  return (SE.isLoopEntryGuardedByCond(L, BoundPred, Start,
-                                      SE.getAddExpr(BoundSCEV, Step)) &&
-          SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit));
+  return (SE.isLoopEntryGuardedByCond(L, BoundPred, StartLG,
+                                      SE.getAddExpr(BoundLG, Step)) &&
+          SE.isLoopEntryGuardedByCond(L, BoundPred, BoundLG, Limit));
 }
 
 /// Returns estimate for max latch taken count of the loop of the narrowest
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 8c6af7a..acfd87c 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -577,28 +577,28 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
 
     Module *M = OrigHeader->getModule();
 
-    // Track the next DPValue to clone. If we have a sequence where an
+    // Track the next DbgRecord to clone. If we have a sequence where an
     // instruction is hoisted instead of being cloned:
-    //    DPValue blah
+    //    DbgRecord blah
     //    %foo = add i32 0, 0
-    //    DPValue xyzzy
+    //    DbgRecord xyzzy
     //    %bar = call i32 @foobar()
-    // where %foo is hoisted, then the DPValue "blah" will be seen twice, once
+    // where %foo is hoisted, then the DbgRecord "blah" will be seen twice, once
     // attached to %foo, then when %foo his hoisted it will "fall down" onto the
     // function call:
-    //    DPValue blah
-    //    DPValue xyzzy
+    //    DbgRecord blah
+    //    DbgRecord xyzzy
     //    %bar = call i32 @foobar()
     // causing it to appear attached to the call too.
     //
     // To avoid this, cloneDebugInfoFrom takes an optional "start cloning from
-    // here" position to account for this behaviour. We point it at any DPValues
-    // on the next instruction, here labelled xyzzy, before we hoist %foo.
-    // Later, we only only clone DPValues from that position (xyzzy) onwards,
-    // which avoids cloning DPValue "blah" multiple times.
-    // (Stored as a range because it gives us a natural way of testing whether
-    //  there were DPValues on the next instruction before we hoisted things).
-    iterator_range<DPValue::self_iterator> NextDbgInsts =
+    // here" position to account for this behaviour. We point it at any
+    // DbgRecords on the next instruction, here labelled xyzzy, before we hoist
+    // %foo. Later, we only only clone DbgRecords from that position (xyzzy)
+    // onwards, which avoids cloning DbgRecord "blah" multiple times. (Stored as
+    // a range because it gives us a natural way of testing whether
+    //  there were DbgRecords on the next instruction before we hoisted things).
+    iterator_range<DbgRecord::self_iterator> NextDbgInsts =
         (I != E) ? I->getDbgRecordRange() : DPMarker::getEmptyDbgRecordRange();
 
     while (I != E) {
@@ -777,7 +777,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
     // OrigPreHeader's old terminator (the original branch into the loop), and
     // remove the corresponding incoming values from the PHI nodes in OrigHeader.
     LoopEntryBranch->eraseFromParent();
-    OrigPreheader->flushTerminatorDbgValues();
+    OrigPreheader->flushTerminatorDbgRecords();
 
     // Update MemorySSA before the rewrite call below changes the 1:1
     // instruction:cloned_instruction_or_value mapping.
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 0f3d140..6d2a6a3 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1572,7 +1572,8 @@ hoistLockstepIdenticalDPValues(Instruction *TI, Instruction *I1,
   while (none_of(Itrs, atEnd)) {
     bool HoistDPVs = allIdentical(Itrs);
     for (CurrentAndEndIt &Pair : Itrs) {
-      // Increment Current iterator now as we may be about to move the DPValue.
+      // Increment Current iterator now as we may be about to move the
+      // DbgRecord.
       DbgRecord &DR = *Pair.first++;
       if (HoistDPVs) {
         DR.removeFromParent();
@@ -5304,7 +5305,7 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
   // Ensure that any debug-info records that used to occur after the Unreachable
   // are moved to in front of it -- otherwise they'll "dangle" at the end of
   // the block.
-  BB->flushTerminatorDbgValues();
+  BB->flushTerminatorDbgRecords();
 
   // Debug-info records on the unreachable inst itself should be deleted, as
   // below we delete everything past the final executable instruction.
@@ -5326,8 +5327,8 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
     // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
     // and we can therefore guarantee this block will be erased.
 
-    // If we're deleting this, we're deleting any subsequent dbg.values, so
-    // delete DPValue records of variable information.
+    // If we're deleting this, we're deleting any subsequent debug info, so
+    // delete DbgRecords.
     BBI->dropDbgRecords();
 
     // Delete this instruction (any uses are guaranteed to be dead)
diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 3da1610..abb7a44 100644
--- a/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -146,7 +146,7 @@ public:
   Value *mapValue(const Value *V);
   void remapInstruction(Instruction *I);
   void remapFunction(Function &F);
-  void remapDPValue(DbgRecord &DPV);
+  void remapDbgRecord(DbgRecord &DPV);
 
   Constant *mapConstant(const Constant *C) {
     return cast_or_null<Constant>(mapValue(C));
@@ -537,7 +537,7 @@ Value *Mapper::mapValue(const Value *V) {
   return getVM()[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
 }
 
-void Mapper::remapDPValue(DbgRecord &DR) {
+void Mapper::remapDbgRecord(DbgRecord &DR) {
   if (DPLabel *DPL = dyn_cast<DPLabel>(&DR)) {
     DPL->setLabel(cast<DILabel>(mapMetadata(DPL->getLabel())));
     return;
@@ -1067,7 +1067,7 @@ void Mapper::remapFunction(Function &F) {
     for (Instruction &I : BB) {
       remapInstruction(&I);
       for (DbgRecord &DR : I.getDbgRecordRange())
-        remapDPValue(DR);
+        remapDbgRecord(DR);
     }
   }
 }
@@ -1234,7 +1234,7 @@ void ValueMapper::remapInstruction(Instruction &I) {
 }
 
 void ValueMapper::remapDPValue(Module *M, DPValue &V) {
-  FlushingMapper(pImpl)->remapDPValue(V);
+  FlushingMapper(pImpl)->remapDbgRecord(V);
 }
 
 void ValueMapper::remapDPValueRange(
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index a7ebf78..e86705e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -79,6 +79,13 @@ public:
   VPBasicBlock *getInsertBlock() const { return BB; }
   VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
 
+  /// Create a VPBuilder to insert after \p R.
+  static VPBuilder getToInsertAfter(VPRecipeBase *R) {
+    VPBuilder B;
+    B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
+    return B;
+  }
+
   /// InsertPoint - A saved insertion point.
   class VPInsertPoint {
     VPBasicBlock *Block = nullptr;
@@ -131,8 +138,9 @@ public:
 
   /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
   /// its underlying Instruction.
-  VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
-                        Instruction *Inst = nullptr, const Twine &Name = "") {
+  VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
+                              Instruction *Inst = nullptr,
+                              const Twine &Name = "") {
     DebugLoc DL;
     if (Inst)
       DL = Inst->getDebugLoc();
@@ -140,8 +148,8 @@ public:
     NewVPInst->setUnderlyingValue(Inst);
     return NewVPInst;
   }
-  VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
-                        DebugLoc DL, const Twine &Name = "") {
+  VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
+                              DebugLoc DL, const Twine &Name = "") {
     return createInstruction(Opcode, Operands, DL, Name);
   }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 6474a96..877b5d4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -296,8 +296,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
       // recipes.
       if (Br->isConditional()) {
         VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
-        VPBB->appendRecipe(
-            new VPInstruction(VPInstruction::BranchOnCond, {Cond}));
+        VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst);
       }
 
       // Skip the rest of the Instruction processing for Branch instructions.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index f6b564a..3b19db9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1192,10 +1192,10 @@ void VPlanTransforms::addActiveLaneMask(
     LaneMask = addVPLaneMaskPhiAndUpdateExitBranch(
         Plan, DataAndControlFlowWithoutRuntimeCheck);
   } else {
-    LaneMask = new VPInstruction(VPInstruction::ActiveLaneMask,
-                                 {WideCanonicalIV, Plan.getTripCount()},
-                                 nullptr, "active.lane.mask");
-    LaneMask->insertAfter(WideCanonicalIV);
+    VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV);
+    LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask,
+                              {WideCanonicalIV, Plan.getTripCount()}, nullptr,
+                              "active.lane.mask");
   }
 
   // Walk users of WideCanonicalIV and replace all compares of the form